mirror of
https://github.com/MariaDB/server.git
synced 2025-12-01 17:39:21 +03:00
Merge branch 'merge-tokudb-5.6' into 10.0
This commit is contained in:
51
.gitignore
vendored
51
.gitignore
vendored
@@ -166,31 +166,32 @@ storage/myisam/myisamlog
|
|||||||
storage/myisam/myisampack
|
storage/myisam/myisampack
|
||||||
storage/myisam/rt_test
|
storage/myisam/rt_test
|
||||||
storage/myisam/sp_test
|
storage/myisam/sp_test
|
||||||
storage/tokudb/ft-index/buildheader/db.h
|
storage/tokudb/PerconaFT/buildheader/db.h
|
||||||
storage/tokudb/ft-index/buildheader/make_tdb
|
storage/tokudb/PerconaFT/buildheader/make_tdb
|
||||||
storage/tokudb/ft-index/buildheader/runcat.sh
|
storage/tokudb/PerconaFT/buildheader/runcat.sh
|
||||||
storage/tokudb/ft-index/ft/log_code.cc
|
storage/tokudb/PerconaFT/ft/log_code.cc
|
||||||
storage/tokudb/ft-index/ft/log_header.h
|
storage/tokudb/PerconaFT/ft/log_header.h
|
||||||
storage/tokudb/ft-index/ft/log_print.cc
|
storage/tokudb/PerconaFT/ft/log_print.cc
|
||||||
storage/tokudb/ft-index/ft/logformat
|
storage/tokudb/PerconaFT/ft/logformat
|
||||||
storage/tokudb/ft-index/ft/ftverify
|
storage/tokudb/PerconaFT/ft/ftverify
|
||||||
storage/tokudb/ft-index/ft/tdb-recover
|
storage/tokudb/PerconaFT/ft/tdb-recover
|
||||||
storage/tokudb/ft-index/ft/tokuftdump
|
storage/tokudb/PerconaFT/ft/tokuftdump
|
||||||
storage/tokudb/ft-index/portability/merge_archives_tokuportability_static.cmake
|
storage/tokudb/PerconaFT/portability/merge_archives_tokuportability_static.cmake
|
||||||
storage/tokudb/ft-index/portability/toku_config.h
|
storage/tokudb/PerconaFT/portability/toku_config.h
|
||||||
storage/tokudb/ft-index/portability/tokuportability_static_depends.cc
|
storage/tokudb/PerconaFT/portability/tokuportability_static_depends.cc
|
||||||
storage/tokudb/ft-index/src/merge_archives_tokufractaltree_static.cmake
|
storage/tokudb/PerconaFT/snappy/
|
||||||
storage/tokudb/ft-index/src/tokufractaltree_static_depends.cc
|
storage/tokudb/PerconaFT/src/merge_archives_tokufractaltree_static.cmake
|
||||||
storage/tokudb/ft-index/toku_include/toku_config.h
|
storage/tokudb/PerconaFT/src/tokufractaltree_static_depends.cc
|
||||||
storage/tokudb/ft-index/tools/ba_replay
|
storage/tokudb/PerconaFT/toku_include/toku_config.h
|
||||||
storage/tokudb/ft-index/tools/ftverify
|
storage/tokudb/PerconaFT/tools/ba_replay
|
||||||
storage/tokudb/ft-index/tools/tdb-recover
|
storage/tokudb/PerconaFT/tools/ftverify
|
||||||
storage/tokudb/ft-index/tools/tokudb_dump
|
storage/tokudb/PerconaFT/tools/tdb-recover
|
||||||
storage/tokudb/ft-index/tools/tokudb_gen
|
storage/tokudb/PerconaFT/tools/tokudb_dump
|
||||||
storage/tokudb/ft-index/tools/tokudb_load
|
storage/tokudb/PerconaFT/tools/tokudb_gen
|
||||||
storage/tokudb/ft-index/tools/tokuftdump
|
storage/tokudb/PerconaFT/tools/tokudb_load
|
||||||
storage/tokudb/ft-index/tools/tokuft_logprint
|
storage/tokudb/PerconaFT/tools/tokuftdump
|
||||||
storage/tokudb/ft-index/xz/
|
storage/tokudb/PerconaFT/tools/tokuft_logprint
|
||||||
|
storage/tokudb/PerconaFT/xz/
|
||||||
support-files/MySQL-shared-compat.spec
|
support-files/MySQL-shared-compat.spec
|
||||||
support-files/binary-configure
|
support-files/binary-configure
|
||||||
support-files/config.huge.ini
|
support-files/config.huge.ini
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
# ft-index only supports x86-64 and cmake-2.8.9+
|
SET(TOKUDB_VERSION 5.6.26-74.0)
|
||||||
|
# PerconaFT only supports x86-64 and cmake-2.8.9+
|
||||||
IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND
|
IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND
|
||||||
NOT CMAKE_VERSION VERSION_LESS "2.8.9")
|
NOT CMAKE_VERSION VERSION_LESS "2.8.9")
|
||||||
CHECK_CXX_SOURCE_COMPILES(
|
CHECK_CXX_SOURCE_COMPILES(
|
||||||
@@ -32,19 +33,20 @@ IF (HAVE_WVLA)
|
|||||||
ENDIF()
|
ENDIF()
|
||||||
|
|
||||||
############################################
|
############################################
|
||||||
SET(TOKUDB_VERSION "tokudb-7.5.7")
|
|
||||||
SET(TOKUDB_DEB_FILES "usr/lib/mysql/plugin/ha_tokudb.so\netc/mysql/conf.d/tokudb.cnf\nusr/bin/tokuftdump\nusr/share/doc/mariadb-server-10.0/README-TOKUDB\nusr/share/doc/mariadb-server-10.0/README.md" PARENT_SCOPE)
|
SET(TOKUDB_DEB_FILES "usr/lib/mysql/plugin/ha_tokudb.so\netc/mysql/conf.d/tokudb.cnf\nusr/bin/tokuftdump\nusr/share/doc/mariadb-server-10.0/README-TOKUDB\nusr/share/doc/mariadb-server-10.0/README.md" PARENT_SCOPE)
|
||||||
SET(USE_BDB OFF CACHE BOOL "")
|
|
||||||
MARK_AS_ADVANCED(BUILDNAME)
|
MARK_AS_ADVANCED(BUILDNAME)
|
||||||
MARK_AS_ADVANCED(BUILD_TESTING)
|
MARK_AS_ADVANCED(BUILD_TESTING)
|
||||||
MARK_AS_ADVANCED(CMAKE_TOKUDB_REVISION)
|
MARK_AS_ADVANCED(CMAKE_TOKUDB_REVISION)
|
||||||
MARK_AS_ADVANCED(LIBTOKUDB)
|
MARK_AS_ADVANCED(LIBTOKUDB)
|
||||||
MARK_AS_ADVANCED(LIBTOKUPORTABILITY)
|
MARK_AS_ADVANCED(LIBTOKUPORTABILITY)
|
||||||
|
MARK_AS_ADVANCED(PROFILING)
|
||||||
|
MARK_AS_ADVANCED(SNAPPY_SOURCE_DIR)
|
||||||
MARK_AS_ADVANCED(TOKUDB_DATA)
|
MARK_AS_ADVANCED(TOKUDB_DATA)
|
||||||
MARK_AS_ADVANCED(TOKU_DEBUG_PARANOID)
|
MARK_AS_ADVANCED(TOKU_DEBUG_PARANOID)
|
||||||
MARK_AS_ADVANCED(USE_BDB)
|
|
||||||
MARK_AS_ADVANCED(USE_VALGRIND)
|
MARK_AS_ADVANCED(USE_VALGRIND)
|
||||||
MARK_AS_ADVANCED(XZ_SOURCE_DIR)
|
MARK_AS_ADVANCED(XZ_SOURCE_DIR)
|
||||||
|
MARK_AS_ADVANCED(gcc_ar)
|
||||||
|
MARK_AS_ADVANCED(gcc_ranlib)
|
||||||
############################################
|
############################################
|
||||||
|
|
||||||
SET(BUILD_TESTING OFF CACHE BOOL "")
|
SET(BUILD_TESTING OFF CACHE BOOL "")
|
||||||
@@ -108,14 +110,26 @@ endmacro(append_cflags_if_supported)
|
|||||||
set_cflags_if_supported(-Wno-missing-field-initializers)
|
set_cflags_if_supported(-Wno-missing-field-initializers)
|
||||||
append_cflags_if_supported(-Wno-vla)
|
append_cflags_if_supported(-Wno-vla)
|
||||||
|
|
||||||
ADD_SUBDIRECTORY(ft-index)
|
IF (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/PerconaFT/")
|
||||||
|
IF (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ft-index/")
|
||||||
|
MESSAGE(FATAL_ERROR "Found both PerconaFT and ft-index sources. Don't know which to use.")
|
||||||
|
ENDIF ()
|
||||||
|
SET(TOKU_FT_DIR_NAME "PerconaFT")
|
||||||
|
|
||||||
INCLUDE_DIRECTORIES(ft-index)
|
ELSEIF (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ft-index/")
|
||||||
INCLUDE_DIRECTORIES(ft-index/portability)
|
MESSAGE(WARNING "Found ft-index sources, ft-index is deprecated and replaced with PerconaFT.")
|
||||||
INCLUDE_DIRECTORIES(ft-index/util)
|
SET(TOKU_FT_DIR_NAME "ft-index")
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/ft-index)
|
ELSE ()
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/ft-index/buildheader)
|
MESSAGE(FATAL_ERROR "Could not find PerconaFT sources.")
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/ft-index/portability)
|
ENDIF ()
|
||||||
|
|
||||||
|
ADD_SUBDIRECTORY(${TOKU_FT_DIR_NAME})
|
||||||
|
INCLUDE_DIRECTORIES(${TOKU_FT_DIR_NAME})
|
||||||
|
INCLUDE_DIRECTORIES(${TOKU_FT_DIR_NAME}/portability)
|
||||||
|
INCLUDE_DIRECTORIES(${TOKU_FT_DIR_NAME}/util)
|
||||||
|
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/${TOKU_FT_DIR_NAME})
|
||||||
|
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/${TOKU_FT_DIR_NAME}/buildheader)
|
||||||
|
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/${TOKU_FT_DIR_NAME}/portability)
|
||||||
|
|
||||||
SET(TOKUDB_PLUGIN_DYNAMIC "ha_tokudb")
|
SET(TOKUDB_PLUGIN_DYNAMIC "ha_tokudb")
|
||||||
SET(TOKUDB_SOURCES ha_tokudb.cc)
|
SET(TOKUDB_SOURCES ha_tokudb.cc)
|
||||||
|
|||||||
84
storage/tokudb/PerconaFT/CMakeLists.txt
Normal file
84
storage/tokudb/PerconaFT/CMakeLists.txt
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
|
||||||
|
|
||||||
|
project(TokuDB)
|
||||||
|
|
||||||
|
# suppress -rdynamic
|
||||||
|
set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
|
||||||
|
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
|
||||||
|
|
||||||
|
## Versions of gcc >= 4.9.0 require special version of 'ar' and 'ranlib' for
|
||||||
|
## link-time optimizations to work properly.
|
||||||
|
##
|
||||||
|
## From https://gcc.gnu.org/gcc-4.9/changes.html:
|
||||||
|
##
|
||||||
|
## When using a linker plugin, compiling with the -flto option now
|
||||||
|
## generates slim objects files (.o) which only contain intermediate
|
||||||
|
## language representation for LTO. Use -ffat-lto-objects to create
|
||||||
|
## files which contain additionally the object code. To generate
|
||||||
|
## static libraries suitable for LTO processing, use gcc-ar and
|
||||||
|
## gcc-ranlib; to list symbols from a slim object file use
|
||||||
|
## gcc-nm. (Requires that ar, ranlib and nm have been compiled with
|
||||||
|
## plugin support.)
|
||||||
|
if ((CMAKE_CXX_COMPILER_ID STREQUAL GNU) AND
|
||||||
|
NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9.0"))
|
||||||
|
find_program(gcc_ar "gcc-ar")
|
||||||
|
if (gcc_ar)
|
||||||
|
set(CMAKE_AR "${gcc_ar}")
|
||||||
|
endif ()
|
||||||
|
find_program(gcc_ranlib "gcc-ranlib")
|
||||||
|
if (gcc_ranlib)
|
||||||
|
set(CMAKE_RANLIB "${gcc_ranlib}")
|
||||||
|
endif ()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
include(TokuFeatureDetection)
|
||||||
|
include(TokuSetupCompiler)
|
||||||
|
include(TokuSetupCTest)
|
||||||
|
include(TokuThirdParty)
|
||||||
|
|
||||||
|
set(TOKU_CMAKE_SCRIPT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||||
|
include(TokuMergeLibs)
|
||||||
|
|
||||||
|
## need a way to change the name of libs we build
|
||||||
|
set(LIBTOKUPORTABILITY "tokuportability" CACHE STRING "Name of libtokuportability.so")
|
||||||
|
set(LIBTOKUDB "tokufractaltree" CACHE STRING "Name of libtokufractaltree.so")
|
||||||
|
|
||||||
|
set(INSTALL_LIBDIR "lib" CACHE STRING "where to install libs")
|
||||||
|
|
||||||
|
if (USE_VALGRIND AND NOT VALGRIND_INCLUDE_DIR MATCHES NOTFOUND)
|
||||||
|
include_directories(
|
||||||
|
${VALGRIND_INCLUDE_DIR}
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
include_directories(
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/portability
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR} ## so you can include <ft/ft-ops.h> from inside src/
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR} ## for logging code
|
||||||
|
)
|
||||||
|
## include where config.h will be generated
|
||||||
|
include_directories(${CMAKE_CURRENT_BINARY_DIR}/portability)
|
||||||
|
|
||||||
|
## build db.h and include where it will be generated
|
||||||
|
add_subdirectory(buildheader)
|
||||||
|
include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/buildheader)
|
||||||
|
|
||||||
|
## default includes and libraries
|
||||||
|
include_directories(SYSTEM
|
||||||
|
/usr/local/include
|
||||||
|
${ZLIB_INCLUDE_DIRS}
|
||||||
|
)
|
||||||
|
|
||||||
|
## add subdirectories
|
||||||
|
add_subdirectory(util)
|
||||||
|
add_subdirectory(portability)
|
||||||
|
add_subdirectory(ft)
|
||||||
|
add_subdirectory(locktree)
|
||||||
|
add_subdirectory(src)
|
||||||
|
add_subdirectory(ftcxx)
|
||||||
|
add_subdirectory(tools)
|
||||||
|
|
||||||
|
INSTALL_DOCUMENTATION(README.md COPYING.AGPLv3 COPYING.GPLv2 PATENTS
|
||||||
|
COMPONENT Server)
|
||||||
|
|
||||||
|
## build tags
|
||||||
|
#include(TokuBuildTagDatabases)
|
||||||
661
storage/tokudb/PerconaFT/COPYING.AGPLv3
Normal file
661
storage/tokudb/PerconaFT/COPYING.AGPLv3
Normal file
@@ -0,0 +1,661 @@
|
|||||||
|
GNU AFFERO GENERAL PUBLIC LICENSE
|
||||||
|
Version 3, 19 November 2007
|
||||||
|
|
||||||
|
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
Preamble
|
||||||
|
|
||||||
|
The GNU Affero General Public License is a free, copyleft license for
|
||||||
|
software and other kinds of works, specifically designed to ensure
|
||||||
|
cooperation with the community in the case of network server software.
|
||||||
|
|
||||||
|
The licenses for most software and other practical works are designed
|
||||||
|
to take away your freedom to share and change the works. By contrast,
|
||||||
|
our General Public Licenses are intended to guarantee your freedom to
|
||||||
|
share and change all versions of a program--to make sure it remains free
|
||||||
|
software for all its users.
|
||||||
|
|
||||||
|
When we speak of free software, we are referring to freedom, not
|
||||||
|
price. Our General Public Licenses are designed to make sure that you
|
||||||
|
have the freedom to distribute copies of free software (and charge for
|
||||||
|
them if you wish), that you receive source code or can get it if you
|
||||||
|
want it, that you can change the software or use pieces of it in new
|
||||||
|
free programs, and that you know you can do these things.
|
||||||
|
|
||||||
|
Developers that use our General Public Licenses protect your rights
|
||||||
|
with two steps: (1) assert copyright on the software, and (2) offer
|
||||||
|
you this License which gives you legal permission to copy, distribute
|
||||||
|
and/or modify the software.
|
||||||
|
|
||||||
|
A secondary benefit of defending all users' freedom is that
|
||||||
|
improvements made in alternate versions of the program, if they
|
||||||
|
receive widespread use, become available for other developers to
|
||||||
|
incorporate. Many developers of free software are heartened and
|
||||||
|
encouraged by the resulting cooperation. However, in the case of
|
||||||
|
software used on network servers, this result may fail to come about.
|
||||||
|
The GNU General Public License permits making a modified version and
|
||||||
|
letting the public access it on a server without ever releasing its
|
||||||
|
source code to the public.
|
||||||
|
|
||||||
|
The GNU Affero General Public License is designed specifically to
|
||||||
|
ensure that, in such cases, the modified source code becomes available
|
||||||
|
to the community. It requires the operator of a network server to
|
||||||
|
provide the source code of the modified version running there to the
|
||||||
|
users of that server. Therefore, public use of a modified version, on
|
||||||
|
a publicly accessible server, gives the public access to the source
|
||||||
|
code of the modified version.
|
||||||
|
|
||||||
|
An older license, called the Affero General Public License and
|
||||||
|
published by Affero, was designed to accomplish similar goals. This is
|
||||||
|
a different license, not a version of the Affero GPL, but Affero has
|
||||||
|
released a new version of the Affero GPL which permits relicensing under
|
||||||
|
this license.
|
||||||
|
|
||||||
|
The precise terms and conditions for copying, distribution and
|
||||||
|
modification follow.
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
0. Definitions.
|
||||||
|
|
||||||
|
"This License" refers to version 3 of the GNU Affero General Public License.
|
||||||
|
|
||||||
|
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||||
|
works, such as semiconductor masks.
|
||||||
|
|
||||||
|
"The Program" refers to any copyrightable work licensed under this
|
||||||
|
License. Each licensee is addressed as "you". "Licensees" and
|
||||||
|
"recipients" may be individuals or organizations.
|
||||||
|
|
||||||
|
To "modify" a work means to copy from or adapt all or part of the work
|
||||||
|
in a fashion requiring copyright permission, other than the making of an
|
||||||
|
exact copy. The resulting work is called a "modified version" of the
|
||||||
|
earlier work or a work "based on" the earlier work.
|
||||||
|
|
||||||
|
A "covered work" means either the unmodified Program or a work based
|
||||||
|
on the Program.
|
||||||
|
|
||||||
|
To "propagate" a work means to do anything with it that, without
|
||||||
|
permission, would make you directly or secondarily liable for
|
||||||
|
infringement under applicable copyright law, except executing it on a
|
||||||
|
computer or modifying a private copy. Propagation includes copying,
|
||||||
|
distribution (with or without modification), making available to the
|
||||||
|
public, and in some countries other activities as well.
|
||||||
|
|
||||||
|
To "convey" a work means any kind of propagation that enables other
|
||||||
|
parties to make or receive copies. Mere interaction with a user through
|
||||||
|
a computer network, with no transfer of a copy, is not conveying.
|
||||||
|
|
||||||
|
An interactive user interface displays "Appropriate Legal Notices"
|
||||||
|
to the extent that it includes a convenient and prominently visible
|
||||||
|
feature that (1) displays an appropriate copyright notice, and (2)
|
||||||
|
tells the user that there is no warranty for the work (except to the
|
||||||
|
extent that warranties are provided), that licensees may convey the
|
||||||
|
work under this License, and how to view a copy of this License. If
|
||||||
|
the interface presents a list of user commands or options, such as a
|
||||||
|
menu, a prominent item in the list meets this criterion.
|
||||||
|
|
||||||
|
1. Source Code.
|
||||||
|
|
||||||
|
The "source code" for a work means the preferred form of the work
|
||||||
|
for making modifications to it. "Object code" means any non-source
|
||||||
|
form of a work.
|
||||||
|
|
||||||
|
A "Standard Interface" means an interface that either is an official
|
||||||
|
standard defined by a recognized standards body, or, in the case of
|
||||||
|
interfaces specified for a particular programming language, one that
|
||||||
|
is widely used among developers working in that language.
|
||||||
|
|
||||||
|
The "System Libraries" of an executable work include anything, other
|
||||||
|
than the work as a whole, that (a) is included in the normal form of
|
||||||
|
packaging a Major Component, but which is not part of that Major
|
||||||
|
Component, and (b) serves only to enable use of the work with that
|
||||||
|
Major Component, or to implement a Standard Interface for which an
|
||||||
|
implementation is available to the public in source code form. A
|
||||||
|
"Major Component", in this context, means a major essential component
|
||||||
|
(kernel, window system, and so on) of the specific operating system
|
||||||
|
(if any) on which the executable work runs, or a compiler used to
|
||||||
|
produce the work, or an object code interpreter used to run it.
|
||||||
|
|
||||||
|
The "Corresponding Source" for a work in object code form means all
|
||||||
|
the source code needed to generate, install, and (for an executable
|
||||||
|
work) run the object code and to modify the work, including scripts to
|
||||||
|
control those activities. However, it does not include the work's
|
||||||
|
System Libraries, or general-purpose tools or generally available free
|
||||||
|
programs which are used unmodified in performing those activities but
|
||||||
|
which are not part of the work. For example, Corresponding Source
|
||||||
|
includes interface definition files associated with source files for
|
||||||
|
the work, and the source code for shared libraries and dynamically
|
||||||
|
linked subprograms that the work is specifically designed to require,
|
||||||
|
such as by intimate data communication or control flow between those
|
||||||
|
subprograms and other parts of the work.
|
||||||
|
|
||||||
|
The Corresponding Source need not include anything that users
|
||||||
|
can regenerate automatically from other parts of the Corresponding
|
||||||
|
Source.
|
||||||
|
|
||||||
|
The Corresponding Source for a work in source code form is that
|
||||||
|
same work.
|
||||||
|
|
||||||
|
2. Basic Permissions.
|
||||||
|
|
||||||
|
All rights granted under this License are granted for the term of
|
||||||
|
copyright on the Program, and are irrevocable provided the stated
|
||||||
|
conditions are met. This License explicitly affirms your unlimited
|
||||||
|
permission to run the unmodified Program. The output from running a
|
||||||
|
covered work is covered by this License only if the output, given its
|
||||||
|
content, constitutes a covered work. This License acknowledges your
|
||||||
|
rights of fair use or other equivalent, as provided by copyright law.
|
||||||
|
|
||||||
|
You may make, run and propagate covered works that you do not
|
||||||
|
convey, without conditions so long as your license otherwise remains
|
||||||
|
in force. You may convey covered works to others for the sole purpose
|
||||||
|
of having them make modifications exclusively for you, or provide you
|
||||||
|
with facilities for running those works, provided that you comply with
|
||||||
|
the terms of this License in conveying all material for which you do
|
||||||
|
not control copyright. Those thus making or running the covered works
|
||||||
|
for you must do so exclusively on your behalf, under your direction
|
||||||
|
and control, on terms that prohibit them from making any copies of
|
||||||
|
your copyrighted material outside their relationship with you.
|
||||||
|
|
||||||
|
Conveying under any other circumstances is permitted solely under
|
||||||
|
the conditions stated below. Sublicensing is not allowed; section 10
|
||||||
|
makes it unnecessary.
|
||||||
|
|
||||||
|
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||||
|
|
||||||
|
No covered work shall be deemed part of an effective technological
|
||||||
|
measure under any applicable law fulfilling obligations under article
|
||||||
|
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||||
|
similar laws prohibiting or restricting circumvention of such
|
||||||
|
measures.
|
||||||
|
|
||||||
|
When you convey a covered work, you waive any legal power to forbid
|
||||||
|
circumvention of technological measures to the extent such circumvention
|
||||||
|
is effected by exercising rights under this License with respect to
|
||||||
|
the covered work, and you disclaim any intention to limit operation or
|
||||||
|
modification of the work as a means of enforcing, against the work's
|
||||||
|
users, your or third parties' legal rights to forbid circumvention of
|
||||||
|
technological measures.
|
||||||
|
|
||||||
|
4. Conveying Verbatim Copies.
|
||||||
|
|
||||||
|
You may convey verbatim copies of the Program's source code as you
|
||||||
|
receive it, in any medium, provided that you conspicuously and
|
||||||
|
appropriately publish on each copy an appropriate copyright notice;
|
||||||
|
keep intact all notices stating that this License and any
|
||||||
|
non-permissive terms added in accord with section 7 apply to the code;
|
||||||
|
keep intact all notices of the absence of any warranty; and give all
|
||||||
|
recipients a copy of this License along with the Program.
|
||||||
|
|
||||||
|
You may charge any price or no price for each copy that you convey,
|
||||||
|
and you may offer support or warranty protection for a fee.
|
||||||
|
|
||||||
|
5. Conveying Modified Source Versions.
|
||||||
|
|
||||||
|
You may convey a work based on the Program, or the modifications to
|
||||||
|
produce it from the Program, in the form of source code under the
|
||||||
|
terms of section 4, provided that you also meet all of these conditions:
|
||||||
|
|
||||||
|
a) The work must carry prominent notices stating that you modified
|
||||||
|
it, and giving a relevant date.
|
||||||
|
|
||||||
|
b) The work must carry prominent notices stating that it is
|
||||||
|
released under this License and any conditions added under section
|
||||||
|
7. This requirement modifies the requirement in section 4 to
|
||||||
|
"keep intact all notices".
|
||||||
|
|
||||||
|
c) You must license the entire work, as a whole, under this
|
||||||
|
License to anyone who comes into possession of a copy. This
|
||||||
|
License will therefore apply, along with any applicable section 7
|
||||||
|
additional terms, to the whole of the work, and all its parts,
|
||||||
|
regardless of how they are packaged. This License gives no
|
||||||
|
permission to license the work in any other way, but it does not
|
||||||
|
invalidate such permission if you have separately received it.
|
||||||
|
|
||||||
|
d) If the work has interactive user interfaces, each must display
|
||||||
|
Appropriate Legal Notices; however, if the Program has interactive
|
||||||
|
interfaces that do not display Appropriate Legal Notices, your
|
||||||
|
work need not make them do so.
|
||||||
|
|
||||||
|
A compilation of a covered work with other separate and independent
|
||||||
|
works, which are not by their nature extensions of the covered work,
|
||||||
|
and which are not combined with it such as to form a larger program,
|
||||||
|
in or on a volume of a storage or distribution medium, is called an
|
||||||
|
"aggregate" if the compilation and its resulting copyright are not
|
||||||
|
used to limit the access or legal rights of the compilation's users
|
||||||
|
beyond what the individual works permit. Inclusion of a covered work
|
||||||
|
in an aggregate does not cause this License to apply to the other
|
||||||
|
parts of the aggregate.
|
||||||
|
|
||||||
|
6. Conveying Non-Source Forms.
|
||||||
|
|
||||||
|
You may convey a covered work in object code form under the terms
|
||||||
|
of sections 4 and 5, provided that you also convey the
|
||||||
|
machine-readable Corresponding Source under the terms of this License,
|
||||||
|
in one of these ways:
|
||||||
|
|
||||||
|
a) Convey the object code in, or embodied in, a physical product
|
||||||
|
(including a physical distribution medium), accompanied by the
|
||||||
|
Corresponding Source fixed on a durable physical medium
|
||||||
|
customarily used for software interchange.
|
||||||
|
|
||||||
|
b) Convey the object code in, or embodied in, a physical product
|
||||||
|
(including a physical distribution medium), accompanied by a
|
||||||
|
written offer, valid for at least three years and valid for as
|
||||||
|
long as you offer spare parts or customer support for that product
|
||||||
|
model, to give anyone who possesses the object code either (1) a
|
||||||
|
copy of the Corresponding Source for all the software in the
|
||||||
|
product that is covered by this License, on a durable physical
|
||||||
|
medium customarily used for software interchange, for a price no
|
||||||
|
more than your reasonable cost of physically performing this
|
||||||
|
conveying of source, or (2) access to copy the
|
||||||
|
Corresponding Source from a network server at no charge.
|
||||||
|
|
||||||
|
c) Convey individual copies of the object code with a copy of the
|
||||||
|
written offer to provide the Corresponding Source. This
|
||||||
|
alternative is allowed only occasionally and noncommercially, and
|
||||||
|
only if you received the object code with such an offer, in accord
|
||||||
|
with subsection 6b.
|
||||||
|
|
||||||
|
d) Convey the object code by offering access from a designated
|
||||||
|
place (gratis or for a charge), and offer equivalent access to the
|
||||||
|
Corresponding Source in the same way through the same place at no
|
||||||
|
further charge. You need not require recipients to copy the
|
||||||
|
Corresponding Source along with the object code. If the place to
|
||||||
|
copy the object code is a network server, the Corresponding Source
|
||||||
|
may be on a different server (operated by you or a third party)
|
||||||
|
that supports equivalent copying facilities, provided you maintain
|
||||||
|
clear directions next to the object code saying where to find the
|
||||||
|
Corresponding Source. Regardless of what server hosts the
|
||||||
|
Corresponding Source, you remain obligated to ensure that it is
|
||||||
|
available for as long as needed to satisfy these requirements.
|
||||||
|
|
||||||
|
e) Convey the object code using peer-to-peer transmission, provided
|
||||||
|
you inform other peers where the object code and Corresponding
|
||||||
|
Source of the work are being offered to the general public at no
|
||||||
|
charge under subsection 6d.
|
||||||
|
|
||||||
|
A separable portion of the object code, whose source code is excluded
|
||||||
|
from the Corresponding Source as a System Library, need not be
|
||||||
|
included in conveying the object code work.
|
||||||
|
|
||||||
|
A "User Product" is either (1) a "consumer product", which means any
|
||||||
|
tangible personal property which is normally used for personal, family,
|
||||||
|
or household purposes, or (2) anything designed or sold for incorporation
|
||||||
|
into a dwelling. In determining whether a product is a consumer product,
|
||||||
|
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||||
|
product received by a particular user, "normally used" refers to a
|
||||||
|
typical or common use of that class of product, regardless of the status
|
||||||
|
of the particular user or of the way in which the particular user
|
||||||
|
actually uses, or expects or is expected to use, the product. A product
|
||||||
|
is a consumer product regardless of whether the product has substantial
|
||||||
|
commercial, industrial or non-consumer uses, unless such uses represent
|
||||||
|
the only significant mode of use of the product.
|
||||||
|
|
||||||
|
"Installation Information" for a User Product means any methods,
|
||||||
|
procedures, authorization keys, or other information required to install
|
||||||
|
and execute modified versions of a covered work in that User Product from
|
||||||
|
a modified version of its Corresponding Source. The information must
|
||||||
|
suffice to ensure that the continued functioning of the modified object
|
||||||
|
code is in no case prevented or interfered with solely because
|
||||||
|
modification has been made.
|
||||||
|
|
||||||
|
If you convey an object code work under this section in, or with, or
|
||||||
|
specifically for use in, a User Product, and the conveying occurs as
|
||||||
|
part of a transaction in which the right of possession and use of the
|
||||||
|
User Product is transferred to the recipient in perpetuity or for a
|
||||||
|
fixed term (regardless of how the transaction is characterized), the
|
||||||
|
Corresponding Source conveyed under this section must be accompanied
|
||||||
|
by the Installation Information. But this requirement does not apply
|
||||||
|
if neither you nor any third party retains the ability to install
|
||||||
|
modified object code on the User Product (for example, the work has
|
||||||
|
been installed in ROM).
|
||||||
|
|
||||||
|
The requirement to provide Installation Information does not include a
|
||||||
|
requirement to continue to provide support service, warranty, or updates
|
||||||
|
for a work that has been modified or installed by the recipient, or for
|
||||||
|
the User Product in which it has been modified or installed. Access to a
|
||||||
|
network may be denied when the modification itself materially and
|
||||||
|
adversely affects the operation of the network or violates the rules and
|
||||||
|
protocols for communication across the network.
|
||||||
|
|
||||||
|
Corresponding Source conveyed, and Installation Information provided,
|
||||||
|
in accord with this section must be in a format that is publicly
|
||||||
|
documented (and with an implementation available to the public in
|
||||||
|
source code form), and must require no special password or key for
|
||||||
|
unpacking, reading or copying.
|
||||||
|
|
||||||
|
7. Additional Terms.
|
||||||
|
|
||||||
|
"Additional permissions" are terms that supplement the terms of this
|
||||||
|
License by making exceptions from one or more of its conditions.
|
||||||
|
Additional permissions that are applicable to the entire Program shall
|
||||||
|
be treated as though they were included in this License, to the extent
|
||||||
|
that they are valid under applicable law. If additional permissions
|
||||||
|
apply only to part of the Program, that part may be used separately
|
||||||
|
under those permissions, but the entire Program remains governed by
|
||||||
|
this License without regard to the additional permissions.
|
||||||
|
|
||||||
|
When you convey a copy of a covered work, you may at your option
|
||||||
|
remove any additional permissions from that copy, or from any part of
|
||||||
|
it. (Additional permissions may be written to require their own
|
||||||
|
removal in certain cases when you modify the work.) You may place
|
||||||
|
additional permissions on material, added by you to a covered work,
|
||||||
|
for which you have or can give appropriate copyright permission.
|
||||||
|
|
||||||
|
Notwithstanding any other provision of this License, for material you
|
||||||
|
add to a covered work, you may (if authorized by the copyright holders of
|
||||||
|
that material) supplement the terms of this License with terms:
|
||||||
|
|
||||||
|
a) Disclaiming warranty or limiting liability differently from the
|
||||||
|
terms of sections 15 and 16 of this License; or
|
||||||
|
|
||||||
|
b) Requiring preservation of specified reasonable legal notices or
|
||||||
|
author attributions in that material or in the Appropriate Legal
|
||||||
|
Notices displayed by works containing it; or
|
||||||
|
|
||||||
|
c) Prohibiting misrepresentation of the origin of that material, or
|
||||||
|
requiring that modified versions of such material be marked in
|
||||||
|
reasonable ways as different from the original version; or
|
||||||
|
|
||||||
|
d) Limiting the use for publicity purposes of names of licensors or
|
||||||
|
authors of the material; or
|
||||||
|
|
||||||
|
e) Declining to grant rights under trademark law for use of some
|
||||||
|
trade names, trademarks, or service marks; or
|
||||||
|
|
||||||
|
f) Requiring indemnification of licensors and authors of that
|
||||||
|
material by anyone who conveys the material (or modified versions of
|
||||||
|
it) with contractual assumptions of liability to the recipient, for
|
||||||
|
any liability that these contractual assumptions directly impose on
|
||||||
|
those licensors and authors.
|
||||||
|
|
||||||
|
All other non-permissive additional terms are considered "further
|
||||||
|
restrictions" within the meaning of section 10. If the Program as you
|
||||||
|
received it, or any part of it, contains a notice stating that it is
|
||||||
|
governed by this License along with a term that is a further
|
||||||
|
restriction, you may remove that term. If a license document contains
|
||||||
|
a further restriction but permits relicensing or conveying under this
|
||||||
|
License, you may add to a covered work material governed by the terms
|
||||||
|
of that license document, provided that the further restriction does
|
||||||
|
not survive such relicensing or conveying.
|
||||||
|
|
||||||
|
If you add terms to a covered work in accord with this section, you
|
||||||
|
must place, in the relevant source files, a statement of the
|
||||||
|
additional terms that apply to those files, or a notice indicating
|
||||||
|
where to find the applicable terms.
|
||||||
|
|
||||||
|
Additional terms, permissive or non-permissive, may be stated in the
|
||||||
|
form of a separately written license, or stated as exceptions;
|
||||||
|
the above requirements apply either way.
|
||||||
|
|
||||||
|
8. Termination.
|
||||||
|
|
||||||
|
You may not propagate or modify a covered work except as expressly
|
||||||
|
provided under this License. Any attempt otherwise to propagate or
|
||||||
|
modify it is void, and will automatically terminate your rights under
|
||||||
|
this License (including any patent licenses granted under the third
|
||||||
|
paragraph of section 11).
|
||||||
|
|
||||||
|
However, if you cease all violation of this License, then your
|
||||||
|
license from a particular copyright holder is reinstated (a)
|
||||||
|
provisionally, unless and until the copyright holder explicitly and
|
||||||
|
finally terminates your license, and (b) permanently, if the copyright
|
||||||
|
holder fails to notify you of the violation by some reasonable means
|
||||||
|
prior to 60 days after the cessation.
|
||||||
|
|
||||||
|
Moreover, your license from a particular copyright holder is
|
||||||
|
reinstated permanently if the copyright holder notifies you of the
|
||||||
|
violation by some reasonable means, this is the first time you have
|
||||||
|
received notice of violation of this License (for any work) from that
|
||||||
|
copyright holder, and you cure the violation prior to 30 days after
|
||||||
|
your receipt of the notice.
|
||||||
|
|
||||||
|
Termination of your rights under this section does not terminate the
|
||||||
|
licenses of parties who have received copies or rights from you under
|
||||||
|
this License. If your rights have been terminated and not permanently
|
||||||
|
reinstated, you do not qualify to receive new licenses for the same
|
||||||
|
material under section 10.
|
||||||
|
|
||||||
|
9. Acceptance Not Required for Having Copies.
|
||||||
|
|
||||||
|
You are not required to accept this License in order to receive or
|
||||||
|
run a copy of the Program. Ancillary propagation of a covered work
|
||||||
|
occurring solely as a consequence of using peer-to-peer transmission
|
||||||
|
to receive a copy likewise does not require acceptance. However,
|
||||||
|
nothing other than this License grants you permission to propagate or
|
||||||
|
modify any covered work. These actions infringe copyright if you do
|
||||||
|
not accept this License. Therefore, by modifying or propagating a
|
||||||
|
covered work, you indicate your acceptance of this License to do so.
|
||||||
|
|
||||||
|
10. Automatic Licensing of Downstream Recipients.
|
||||||
|
|
||||||
|
Each time you convey a covered work, the recipient automatically
|
||||||
|
receives a license from the original licensors, to run, modify and
|
||||||
|
propagate that work, subject to this License. You are not responsible
|
||||||
|
for enforcing compliance by third parties with this License.
|
||||||
|
|
||||||
|
An "entity transaction" is a transaction transferring control of an
|
||||||
|
organization, or substantially all assets of one, or subdividing an
|
||||||
|
organization, or merging organizations. If propagation of a covered
|
||||||
|
work results from an entity transaction, each party to that
|
||||||
|
transaction who receives a copy of the work also receives whatever
|
||||||
|
licenses to the work the party's predecessor in interest had or could
|
||||||
|
give under the previous paragraph, plus a right to possession of the
|
||||||
|
Corresponding Source of the work from the predecessor in interest, if
|
||||||
|
the predecessor has it or can get it with reasonable efforts.
|
||||||
|
|
||||||
|
You may not impose any further restrictions on the exercise of the
|
||||||
|
rights granted or affirmed under this License. For example, you may
|
||||||
|
not impose a license fee, royalty, or other charge for exercise of
|
||||||
|
rights granted under this License, and you may not initiate litigation
|
||||||
|
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||||
|
any patent claim is infringed by making, using, selling, offering for
|
||||||
|
sale, or importing the Program or any portion of it.
|
||||||
|
|
||||||
|
11. Patents.
|
||||||
|
|
||||||
|
A "contributor" is a copyright holder who authorizes use under this
|
||||||
|
License of the Program or a work on which the Program is based. The
|
||||||
|
work thus licensed is called the contributor's "contributor version".
|
||||||
|
|
||||||
|
A contributor's "essential patent claims" are all patent claims
|
||||||
|
owned or controlled by the contributor, whether already acquired or
|
||||||
|
hereafter acquired, that would be infringed by some manner, permitted
|
||||||
|
by this License, of making, using, or selling its contributor version,
|
||||||
|
but do not include claims that would be infringed only as a
|
||||||
|
consequence of further modification of the contributor version. For
|
||||||
|
purposes of this definition, "control" includes the right to grant
|
||||||
|
patent sublicenses in a manner consistent with the requirements of
|
||||||
|
this License.
|
||||||
|
|
||||||
|
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||||
|
patent license under the contributor's essential patent claims, to
|
||||||
|
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||||
|
propagate the contents of its contributor version.
|
||||||
|
|
||||||
|
In the following three paragraphs, a "patent license" is any express
|
||||||
|
agreement or commitment, however denominated, not to enforce a patent
|
||||||
|
(such as an express permission to practice a patent or covenant not to
|
||||||
|
sue for patent infringement). To "grant" such a patent license to a
|
||||||
|
party means to make such an agreement or commitment not to enforce a
|
||||||
|
patent against the party.
|
||||||
|
|
||||||
|
If you convey a covered work, knowingly relying on a patent license,
|
||||||
|
and the Corresponding Source of the work is not available for anyone
|
||||||
|
to copy, free of charge and under the terms of this License, through a
|
||||||
|
publicly available network server or other readily accessible means,
|
||||||
|
then you must either (1) cause the Corresponding Source to be so
|
||||||
|
available, or (2) arrange to deprive yourself of the benefit of the
|
||||||
|
patent license for this particular work, or (3) arrange, in a manner
|
||||||
|
consistent with the requirements of this License, to extend the patent
|
||||||
|
license to downstream recipients. "Knowingly relying" means you have
|
||||||
|
actual knowledge that, but for the patent license, your conveying the
|
||||||
|
covered work in a country, or your recipient's use of the covered work
|
||||||
|
in a country, would infringe one or more identifiable patents in that
|
||||||
|
country that you have reason to believe are valid.
|
||||||
|
|
||||||
|
If, pursuant to or in connection with a single transaction or
|
||||||
|
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||||
|
covered work, and grant a patent license to some of the parties
|
||||||
|
receiving the covered work authorizing them to use, propagate, modify
|
||||||
|
or convey a specific copy of the covered work, then the patent license
|
||||||
|
you grant is automatically extended to all recipients of the covered
|
||||||
|
work and works based on it.
|
||||||
|
|
||||||
|
A patent license is "discriminatory" if it does not include within
|
||||||
|
the scope of its coverage, prohibits the exercise of, or is
|
||||||
|
conditioned on the non-exercise of one or more of the rights that are
|
||||||
|
specifically granted under this License. You may not convey a covered
|
||||||
|
work if you are a party to an arrangement with a third party that is
|
||||||
|
in the business of distributing software, under which you make payment
|
||||||
|
to the third party based on the extent of your activity of conveying
|
||||||
|
the work, and under which the third party grants, to any of the
|
||||||
|
parties who would receive the covered work from you, a discriminatory
|
||||||
|
patent license (a) in connection with copies of the covered work
|
||||||
|
conveyed by you (or copies made from those copies), or (b) primarily
|
||||||
|
for and in connection with specific products or compilations that
|
||||||
|
contain the covered work, unless you entered into that arrangement,
|
||||||
|
or that patent license was granted, prior to 28 March 2007.
|
||||||
|
|
||||||
|
Nothing in this License shall be construed as excluding or limiting
|
||||||
|
any implied license or other defenses to infringement that may
|
||||||
|
otherwise be available to you under applicable patent law.
|
||||||
|
|
||||||
|
12. No Surrender of Others' Freedom.
|
||||||
|
|
||||||
|
If conditions are imposed on you (whether by court order, agreement or
|
||||||
|
otherwise) that contradict the conditions of this License, they do not
|
||||||
|
excuse you from the conditions of this License. If you cannot convey a
|
||||||
|
covered work so as to satisfy simultaneously your obligations under this
|
||||||
|
License and any other pertinent obligations, then as a consequence you may
|
||||||
|
not convey it at all. For example, if you agree to terms that obligate you
|
||||||
|
to collect a royalty for further conveying from those to whom you convey
|
||||||
|
the Program, the only way you could satisfy both those terms and this
|
||||||
|
License would be to refrain entirely from conveying the Program.
|
||||||
|
|
||||||
|
13. Remote Network Interaction; Use with the GNU General Public License.
|
||||||
|
|
||||||
|
Notwithstanding any other provision of this License, if you modify the
|
||||||
|
Program, your modified version must prominently offer all users
|
||||||
|
interacting with it remotely through a computer network (if your version
|
||||||
|
supports such interaction) an opportunity to receive the Corresponding
|
||||||
|
Source of your version by providing access to the Corresponding Source
|
||||||
|
from a network server at no charge, through some standard or customary
|
||||||
|
means of facilitating copying of software. This Corresponding Source
|
||||||
|
shall include the Corresponding Source for any work covered by version 3
|
||||||
|
of the GNU General Public License that is incorporated pursuant to the
|
||||||
|
following paragraph.
|
||||||
|
|
||||||
|
Notwithstanding any other provision of this License, you have
|
||||||
|
permission to link or combine any covered work with a work licensed
|
||||||
|
under version 3 of the GNU General Public License into a single
|
||||||
|
combined work, and to convey the resulting work. The terms of this
|
||||||
|
License will continue to apply to the part which is the covered work,
|
||||||
|
but the work with which it is combined will remain governed by version
|
||||||
|
3 of the GNU General Public License.
|
||||||
|
|
||||||
|
14. Revised Versions of this License.
|
||||||
|
|
||||||
|
The Free Software Foundation may publish revised and/or new versions of
|
||||||
|
the GNU Affero General Public License from time to time. Such new versions
|
||||||
|
will be similar in spirit to the present version, but may differ in detail to
|
||||||
|
address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the
|
||||||
|
Program specifies that a certain numbered version of the GNU Affero General
|
||||||
|
Public License "or any later version" applies to it, you have the
|
||||||
|
option of following the terms and conditions either of that numbered
|
||||||
|
version or of any later version published by the Free Software
|
||||||
|
Foundation. If the Program does not specify a version number of the
|
||||||
|
GNU Affero General Public License, you may choose any version ever published
|
||||||
|
by the Free Software Foundation.
|
||||||
|
|
||||||
|
If the Program specifies that a proxy can decide which future
|
||||||
|
versions of the GNU Affero General Public License can be used, that proxy's
|
||||||
|
public statement of acceptance of a version permanently authorizes you
|
||||||
|
to choose that version for the Program.
|
||||||
|
|
||||||
|
Later license versions may give you additional or different
|
||||||
|
permissions. However, no additional obligations are imposed on any
|
||||||
|
author or copyright holder as a result of your choosing to follow a
|
||||||
|
later version.
|
||||||
|
|
||||||
|
15. Disclaimer of Warranty.
|
||||||
|
|
||||||
|
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||||
|
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||||
|
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||||
|
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||||
|
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||||
|
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||||
|
|
||||||
|
16. Limitation of Liability.
|
||||||
|
|
||||||
|
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||||
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||||
|
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||||
|
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||||
|
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||||
|
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||||
|
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||||
|
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||||
|
SUCH DAMAGES.
|
||||||
|
|
||||||
|
17. Interpretation of Sections 15 and 16.
|
||||||
|
|
||||||
|
If the disclaimer of warranty and limitation of liability provided
|
||||||
|
above cannot be given local legal effect according to their terms,
|
||||||
|
reviewing courts shall apply local law that most closely approximates
|
||||||
|
an absolute waiver of all civil liability in connection with the
|
||||||
|
Program, unless a warranty or assumption of liability accompanies a
|
||||||
|
copy of the Program in return for a fee.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
How to Apply These Terms to Your New Programs
|
||||||
|
|
||||||
|
If you develop a new program, and you want it to be of the greatest
|
||||||
|
possible use to the public, the best way to achieve this is to make it
|
||||||
|
free software which everyone can redistribute and change under these terms.
|
||||||
|
|
||||||
|
To do so, attach the following notices to the program. It is safest
|
||||||
|
to attach them to the start of each source file to most effectively
|
||||||
|
state the exclusion of warranty; and each file should have at least
|
||||||
|
the "copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
|
<one line to give the program's name and a brief idea of what it does.>
|
||||||
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
If your software can interact with users remotely through a computer
|
||||||
|
network, you should also make sure that it provides a way for users to
|
||||||
|
get its source. For example, if your program is a web application, its
|
||||||
|
interface could display a "Source" link that leads users to an archive
|
||||||
|
of the code. There are many ways you could offer source, and different
|
||||||
|
solutions will be better for different programs; see section 13 for the
|
||||||
|
specific requirements.
|
||||||
|
|
||||||
|
You should also get your employer (if you work as a programmer) or school,
|
||||||
|
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||||
|
For more information on this, and how to apply and follow the GNU AGPL, see
|
||||||
|
<http://www.gnu.org/licenses/>.
|
||||||
@@ -11,7 +11,6 @@ list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE
|
|||||||
portability/try-leak-lost
|
portability/try-leak-lost
|
||||||
portability/try-leak-reachable
|
portability/try-leak-reachable
|
||||||
portability/try-leak-uninit
|
portability/try-leak-uninit
|
||||||
util/helgrind_test_circular_buffer
|
|
||||||
util/helgrind_test_partitioned_counter
|
util/helgrind_test_partitioned_counter
|
||||||
util/helgrind_test_partitioned_counter_5833
|
util/helgrind_test_partitioned_counter_5833
|
||||||
ydb/diskfull.tdb
|
ydb/diskfull.tdb
|
||||||
@@ -53,7 +52,6 @@ list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE
|
|||||||
|
|
||||||
if (NOT @RUN_HELGRIND_TESTS@)
|
if (NOT @RUN_HELGRIND_TESTS@)
|
||||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE
|
list(APPEND CTEST_CUSTOM_TESTS_IGNORE
|
||||||
util/helgrind_test_circular_buffer
|
|
||||||
util/helgrind_test_partitioned_counter
|
util/helgrind_test_partitioned_counter
|
||||||
util/helgrind_test_partitioned_counter_5833
|
util/helgrind_test_partitioned_counter_5833
|
||||||
ydb/helgrind_helgrind1.tdb
|
ydb/helgrind_helgrind1.tdb
|
||||||
37
storage/tokudb/PerconaFT/PATENTS
Normal file
37
storage/tokudb/PerconaFT/PATENTS
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
UNIVERSITY PATENT NOTICE:
|
||||||
|
The technology is licensed by the Massachusetts Institute of
|
||||||
|
Technology, Rutgers State University of New Jersey, and the Research
|
||||||
|
Foundation of State University of New York at Stony Brook under
|
||||||
|
United States of America Serial No. 11/760379 and to the patents
|
||||||
|
and/or patent applications resulting from it.
|
||||||
|
PATENT MARKING NOTICE:
|
||||||
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
PATENT RIGHTS GRANT:
|
||||||
|
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
||||||
|
Percona as part of the Fractal Tree project.
|
||||||
|
"PATENT CLAIMS" means the claims of patents that are owned or
|
||||||
|
licensable by Percona, both currently or in the future; and that in
|
||||||
|
the absence of this license would be infringed by THIS
|
||||||
|
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
||||||
|
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
||||||
|
patentability, enforceability and/or non-infringement of any of the
|
||||||
|
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
||||||
|
Percona hereby grants to you, for the term and geographical scope of
|
||||||
|
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
||||||
|
irrevocable (except as stated in this section) patent license to
|
||||||
|
make, have made, use, offer to sell, sell, import, transfer, and
|
||||||
|
otherwise run, modify, and propagate the contents of THIS
|
||||||
|
IMPLEMENTATION, where such license applies only to the PATENT
|
||||||
|
CLAIMS. This grant does not include claims that would be infringed
|
||||||
|
only as a consequence of further modifications of THIS
|
||||||
|
IMPLEMENTATION. If you or your agent or licensee institute or order
|
||||||
|
or agree to the institution of patent litigation against any entity
|
||||||
|
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||||
|
THIS IMPLEMENTATION constitutes direct or contributory patent
|
||||||
|
infringement, or inducement of patent infringement, then any rights
|
||||||
|
granted to you under this License shall terminate as of the date
|
||||||
|
such litigation is filed. If you or your agent or exclusive
|
||||||
|
licensee institute or order or agree to the institution of a PATENT
|
||||||
|
CHALLENGE, then Percona may terminate any rights granted to you
|
||||||
|
under this License.
|
||||||
123
storage/tokudb/PerconaFT/README.md
Normal file
123
storage/tokudb/PerconaFT/README.md
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
PerconaFT
|
||||||
|
======
|
||||||
|
|
||||||
|
PerconaFT is a high-performance, transactional key-value store, used in the
|
||||||
|
TokuDB storage engine for Percona Server and MySQL, and in TokuMX, the
|
||||||
|
high-performance MongoDB distribution.
|
||||||
|
|
||||||
|
PerconaFT is provided as a shared library with an interface similar to
|
||||||
|
Berkeley DB.
|
||||||
|
|
||||||
|
To build the full MySQL product, see the instructions for
|
||||||
|
[Percona/tokudb-engine][tokudb-engine]. To build TokuMX, see the instructions
|
||||||
|
for [Percona/percona-server-mongodb][mongo]. This document covers PerconaFT only.
|
||||||
|
|
||||||
|
[tokudb-engine]: https://github.com/Percona/tokudb-engine
|
||||||
|
[mongo]: https://github.com/Percona/percona-server-mongodb
|
||||||
|
|
||||||
|
|
||||||
|
Building
|
||||||
|
--------
|
||||||
|
|
||||||
|
PerconaFT is built using CMake >= 2.8.9. Out-of-source builds are
|
||||||
|
recommended. You need a C++11 compiler, though only GCC >= 4.7 and
|
||||||
|
Apple's Clang are tested. You also need zlib development packages
|
||||||
|
(`yum install zlib-devel` or `apt-get install zlib1g-dev`).
|
||||||
|
|
||||||
|
You will also need the source code for jemalloc, checked out in
|
||||||
|
`third_party/`.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
git clone git://github.com/Percona/PerconaFT.git percona-ft
|
||||||
|
cd percona-ft
|
||||||
|
git clone git://github.com/Percona/jemalloc.git third_party/jemalloc
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
CC=gcc47 CXX=g++47 cmake \
|
||||||
|
-D CMAKE_BUILD_TYPE=Debug \
|
||||||
|
-D BUILD_TESTING=OFF \
|
||||||
|
-D USE_VALGRIND=OFF \
|
||||||
|
-D CMAKE_INSTALL_PREFIX=../prefix/ \
|
||||||
|
..
|
||||||
|
cmake --build . --target install
|
||||||
|
```
|
||||||
|
|
||||||
|
This will build `libtokudb.so` and `libtokuportability.so` and install it,
|
||||||
|
some header files, and some examples to `percona-ft/prefix/`. It will also
|
||||||
|
build jemalloc and install it alongside these libraries, you should link
|
||||||
|
to that if you are planning to run benchmarks or in production.
|
||||||
|
|
||||||
|
### Platforms
|
||||||
|
|
||||||
|
PerconaFT is supported on 64-bit Centos, should work on other 64-bit linux
|
||||||
|
distributions, and may work on OSX 10.8 and FreeBSD. PerconaFT is not
|
||||||
|
supported on 32-bit systems.
|
||||||
|
|
||||||
|
[Transparent hugepages][transparent-hugepages] is a feature in newer linux
|
||||||
|
kernel versions that causes problems for the memory usage tracking
|
||||||
|
calculations in PerconaFT and can lead to memory overcommit. If you have
|
||||||
|
this feature enabled, PerconaFT will not start, and you should turn it off.
|
||||||
|
If you want to run with transparent hugepages on, you can set an
|
||||||
|
environment variable `TOKU_HUGE_PAGES_OK=1`, but only do this for testing,
|
||||||
|
and only with a small cache size.
|
||||||
|
|
||||||
|
[transparent-hugepages]: https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Performance_Tuning_Guide/s-memory-transhuge.html
|
||||||
|
|
||||||
|
|
||||||
|
Testing
|
||||||
|
-------
|
||||||
|
|
||||||
|
PerconaFT uses CTest for testing. The CDash testing dashboard is not
|
||||||
|
currently public, but you can run the tests without submitting them.
|
||||||
|
|
||||||
|
There are some large data files not stored in the git repository, that
|
||||||
|
will be made available soon. For now, the tests that use these files will
|
||||||
|
not run.
|
||||||
|
|
||||||
|
In the build directory from above:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
cmake -D BUILD_TESTING=ON ..
|
||||||
|
ctest -D ExperimentalStart \
|
||||||
|
-D ExperimentalConfigure \
|
||||||
|
-D ExperimentalBuild \
|
||||||
|
-D ExperimentalTest
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Contributing
|
||||||
|
------------
|
||||||
|
|
||||||
|
Please report bugs in PerconaFT to the [issue tracker][jira].
|
||||||
|
|
||||||
|
We have two publicly accessible mailing lists for TokuDB:
|
||||||
|
|
||||||
|
- tokudb-user@googlegroups.com is for general and support related
|
||||||
|
questions about the use of TokuDB.
|
||||||
|
- tokudb-dev@googlegroups.com is for discussion of the development of
|
||||||
|
TokuDB.
|
||||||
|
|
||||||
|
and two for TokuMX:
|
||||||
|
|
||||||
|
- tokumx-user@googlegroups.com is for general and support related
|
||||||
|
questions about the use of TokuMX.
|
||||||
|
- tokumx-dev@googlegroups.com is for discussion of the development of
|
||||||
|
TokuMX.
|
||||||
|
|
||||||
|
All source code and test contributions must be provided under a [BSD 2-Clause][bsd-2] license. For any small change set, the license text may be contained within the commit comment and the pull request. For larger contributions, the license must be presented in a COPYING.<feature_name> file in the root of the PerconaFT project. Please see the [BSD 2-Clause license template][bsd-2] for the content of the license text.
|
||||||
|
|
||||||
|
[jira]: https://tokutek.atlassian.net/browse/FT/
|
||||||
|
[bsd-2]: http://opensource.org/licenses/BSD-2-Clause/
|
||||||
|
|
||||||
|
|
||||||
|
License
|
||||||
|
-------
|
||||||
|
|
||||||
|
PerconaFT is available under the GPL version 2, and AGPL version 3, with slight modifications.
|
||||||
|
See [COPYING.AGPLv3][agpllicense],
|
||||||
|
[COPYING.GPLv2][gpllicense], and
|
||||||
|
[PATENTS][patents].
|
||||||
|
|
||||||
|
[agpllicense]: http://github.com/Perona/PerconaFT/blob/master/COPYING.AGPLv3
|
||||||
|
[gpllicense]: http://github.com/Perona/PerconaFT/blob/master/COPYING.GPLv2
|
||||||
|
[patents]: http://github.com/Perona/PerconaFT/blob/master/PATENTS
|
||||||
29
storage/tokudb/PerconaFT/buildheader/CMakeLists.txt
Normal file
29
storage/tokudb/PerconaFT/buildheader/CMakeLists.txt
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
set_directory_properties(PROPERTIES INCLUDE_DIRECTORIES "")
|
||||||
|
|
||||||
|
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/runcat.sh" "#!/bin/bash
|
||||||
|
out=$1; shift
|
||||||
|
exec \"$@\" >$out")
|
||||||
|
|
||||||
|
add_executable(make_tdb make_tdb.cc)
|
||||||
|
set_property(TARGET make_tdb APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/db.h"
|
||||||
|
COMMAND bash runcat.sh "${CMAKE_CURRENT_BINARY_DIR}/db.h" $<TARGET_FILE:make_tdb>
|
||||||
|
DEPENDS make_tdb)
|
||||||
|
add_custom_target(install_tdb_h DEPENDS
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/db.h")
|
||||||
|
|
||||||
|
# detect when we are being built as a subproject
|
||||||
|
if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
|
||||||
|
install(
|
||||||
|
FILES "${CMAKE_CURRENT_BINARY_DIR}/db.h"
|
||||||
|
DESTINATION include
|
||||||
|
RENAME tokudb.h
|
||||||
|
COMPONENT tokukv_headers
|
||||||
|
)
|
||||||
|
install(
|
||||||
|
FILES "${CMAKE_CURRENT_BINARY_DIR}/db.h"
|
||||||
|
DESTINATION include
|
||||||
|
COMPONENT tokukv_headers
|
||||||
|
)
|
||||||
|
endif ()
|
||||||
@@ -1,93 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
/*
|
#ident "$Id$"
|
||||||
COPYING CONDITIONS NOTICE:
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "$Id$"
|
|
||||||
/* LICENSE: This file is licensed under the GPL or from Tokutek. */
|
|
||||||
|
|
||||||
/* Make a db.h that will be link-time compatible with Sleepycat's Berkeley DB. */
|
/* Make a db.h that will be link-time compatible with Sleepycat's Berkeley DB. */
|
||||||
|
|
||||||
@@ -339,10 +286,11 @@ static void print_defines (void) {
|
|||||||
dodefine_from_track(txn_flags, DB_INHERIT_ISOLATION);
|
dodefine_from_track(txn_flags, DB_INHERIT_ISOLATION);
|
||||||
dodefine_from_track(txn_flags, DB_SERIALIZABLE);
|
dodefine_from_track(txn_flags, DB_SERIALIZABLE);
|
||||||
dodefine_from_track(txn_flags, DB_TXN_READ_ONLY);
|
dodefine_from_track(txn_flags, DB_TXN_READ_ONLY);
|
||||||
|
dodefine_from_track(txn_flags, DB_READ_COMMITTED_ALWAYS);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TokuFT specific error codes*/
|
/* PerconaFT specific error codes*/
|
||||||
printf("/* TokuFT specific error codes */\n");
|
printf("/* PerconaFT specific error codes */\n");
|
||||||
dodefine(TOKUDB_OUT_OF_LOCKS);
|
dodefine(TOKUDB_OUT_OF_LOCKS);
|
||||||
dodefine(TOKUDB_SUCCEEDED_EARLY);
|
dodefine(TOKUDB_SUCCEEDED_EARLY);
|
||||||
dodefine(TOKUDB_FOUND_BUT_REJECTED);
|
dodefine(TOKUDB_FOUND_BUT_REJECTED);
|
||||||
@@ -421,6 +369,8 @@ static void print_db_env_struct (void) {
|
|||||||
"int (*cleaner_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */",
|
"int (*cleaner_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */",
|
||||||
"int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */",
|
"int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */",
|
||||||
"int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */",
|
"int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */",
|
||||||
|
"int (*evictor_set_enable_partial_eviction) (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */",
|
||||||
|
"int (*evictor_get_enable_partial_eviction) (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */",
|
||||||
"int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
|
"int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
|
||||||
"int (*checkpointing_resume) (DB_ENV*) /* Alert tokuft that 'postpone' is no longer necessary */",
|
"int (*checkpointing_resume) (DB_ENV*) /* Alert tokuft that 'postpone' is no longer necessary */",
|
||||||
"int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */",
|
"int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */",
|
||||||
@@ -457,6 +407,7 @@ static void print_db_env_struct (void) {
|
|||||||
"int (*set_lock_timeout_callback) (DB_ENV *env, lock_timeout_callback callback)",
|
"int (*set_lock_timeout_callback) (DB_ENV *env, lock_timeout_callback callback)",
|
||||||
"int (*txn_xa_recover) (DB_ENV*, TOKU_XA_XID list[/*count*/], long count, /*out*/ long *retp, uint32_t flags)",
|
"int (*txn_xa_recover) (DB_ENV*, TOKU_XA_XID list[/*count*/], long count, /*out*/ long *retp, uint32_t flags)",
|
||||||
"int (*get_txn_from_xid) (DB_ENV*, /*in*/ TOKU_XA_XID *, /*out*/ DB_TXN **)",
|
"int (*get_txn_from_xid) (DB_ENV*, /*in*/ TOKU_XA_XID *, /*out*/ DB_TXN **)",
|
||||||
|
"DB* (*get_db_for_directory) (DB_ENV*)",
|
||||||
"int (*get_cursor_for_directory) (DB_ENV*, /*in*/ DB_TXN *, /*out*/ DBC **)",
|
"int (*get_cursor_for_directory) (DB_ENV*, /*in*/ DB_TXN *, /*out*/ DBC **)",
|
||||||
"int (*get_cursor_for_persistent_environment)(DB_ENV*, /*in*/ DB_TXN *, /*out*/ DBC **)",
|
"int (*get_cursor_for_persistent_environment)(DB_ENV*, /*in*/ DB_TXN *, /*out*/ DBC **)",
|
||||||
"void (*change_fsync_log_period) (DB_ENV*, uint32_t)",
|
"void (*change_fsync_log_period) (DB_ENV*, uint32_t)",
|
||||||
@@ -466,6 +417,9 @@ static void print_db_env_struct (void) {
|
|||||||
"uint64_t (*get_loader_memory_size)(DB_ENV *env)",
|
"uint64_t (*get_loader_memory_size)(DB_ENV *env)",
|
||||||
"void (*set_killed_callback)(DB_ENV *env, uint64_t default_killed_time_msec, uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec), int (*killed_callback)(void))",
|
"void (*set_killed_callback)(DB_ENV *env, uint64_t default_killed_time_msec, uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec), int (*killed_callback)(void))",
|
||||||
"void (*do_backtrace) (DB_ENV *env)",
|
"void (*do_backtrace) (DB_ENV *env)",
|
||||||
|
"int (*set_client_pool_threads)(DB_ENV *, uint32_t)",
|
||||||
|
"int (*set_cachetable_pool_threads)(DB_ENV *, uint32_t)",
|
||||||
|
"int (*set_checkpoint_pool_threads)(DB_ENV *, uint32_t)",
|
||||||
NULL};
|
NULL};
|
||||||
|
|
||||||
sort_and_dump_fields("db_env", true, extra);
|
sort_and_dump_fields("db_env", true, extra);
|
||||||
@@ -481,7 +435,9 @@ static void print_db_key_range_struct (void) {
|
|||||||
|
|
||||||
static void print_db_lsn_struct (void) {
|
static void print_db_lsn_struct (void) {
|
||||||
field_counter=0;
|
field_counter=0;
|
||||||
sort_and_dump_fields("db_lsn", false, NULL);
|
/* A dummy field to make sizeof(DB_LSN) equal in C and C++ */
|
||||||
|
const char *extra[] = { "char dummy", NULL };
|
||||||
|
sort_and_dump_fields("db_lsn", false, extra);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_dbt_struct (void) {
|
static void print_dbt_struct (void) {
|
||||||
@@ -575,7 +531,7 @@ static void print_db_txn_struct (void) {
|
|||||||
STRUCT_SETUP(DB_TXN, prepare, "int (*%s) (DB_TXN*, uint8_t gid[DB_GID_SIZE], uint32_t flags)");
|
STRUCT_SETUP(DB_TXN, prepare, "int (*%s) (DB_TXN*, uint8_t gid[DB_GID_SIZE], uint32_t flags)");
|
||||||
STRUCT_SETUP(DB_TXN, discard, "int (*%s) (DB_TXN*, uint32_t)");
|
STRUCT_SETUP(DB_TXN, discard, "int (*%s) (DB_TXN*, uint32_t)");
|
||||||
STRUCT_SETUP(DB_TXN, id, "uint32_t (*%s) (DB_TXN *)");
|
STRUCT_SETUP(DB_TXN, id, "uint32_t (*%s) (DB_TXN *)");
|
||||||
STRUCT_SETUP(DB_TXN, mgrp, "DB_ENV *%s /* In TokuFT, mgrp is a DB_ENV, not a DB_TXNMGR */");
|
STRUCT_SETUP(DB_TXN, mgrp, "DB_ENV *%s /* In PerconaFT, mgrp is a DB_ENV, not a DB_TXNMGR */");
|
||||||
STRUCT_SETUP(DB_TXN, parent, "DB_TXN *%s");
|
STRUCT_SETUP(DB_TXN, parent, "DB_TXN *%s");
|
||||||
const char *extra[] = {
|
const char *extra[] = {
|
||||||
"int (*txn_stat)(DB_TXN *, struct txn_stat **)",
|
"int (*txn_stat)(DB_TXN *, struct txn_stat **)",
|
||||||
@@ -618,6 +574,7 @@ static void print_dbc_struct (void) {
|
|||||||
"int (*c_set_bounds)(DBC*, const DBT*, const DBT*, bool pre_acquire, int out_of_range_error)",
|
"int (*c_set_bounds)(DBC*, const DBT*, const DBT*, bool pre_acquire, int out_of_range_error)",
|
||||||
"void (*c_set_check_interrupt_callback)(DBC*, bool (*)(void*, uint64_t deleted_rows), void *)",
|
"void (*c_set_check_interrupt_callback)(DBC*, bool (*)(void*, uint64_t deleted_rows), void *)",
|
||||||
"void (*c_remove_restriction)(DBC*)",
|
"void (*c_remove_restriction)(DBC*)",
|
||||||
|
"void (*c_set_txn)(DBC*, DB_TXN*)",
|
||||||
"char _internal[512]",
|
"char _internal[512]",
|
||||||
NULL};
|
NULL};
|
||||||
sort_and_dump_fields("dbc", false, extra);
|
sort_and_dump_fields("dbc", false, extra);
|
||||||
@@ -629,8 +586,8 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
|
|||||||
|
|
||||||
printf("#ifndef _DB_H\n");
|
printf("#ifndef _DB_H\n");
|
||||||
printf("#define _DB_H\n");
|
printf("#define _DB_H\n");
|
||||||
printf("/* This code generated by make_db_h. Copyright (c) 2007-2013 Tokutek */\n");
|
printf("/* This code generated by make_db_h. Copyright (c) 2006, 2015, Percona and/or its affiliates. */\n");
|
||||||
printf("#ident \"Copyright (c) 2007-2013 Tokutek Inc. All rights reserved.\"\n");
|
printf("#ident \"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.\"\n");
|
||||||
printf("#include <sys/types.h>\n");
|
printf("#include <sys/types.h>\n");
|
||||||
printf("/*stdio is needed for the FILE* in db->verify*/\n");
|
printf("/*stdio is needed for the FILE* in db->verify*/\n");
|
||||||
printf("#include <stdio.h>\n");
|
printf("#include <stdio.h>\n");
|
||||||
@@ -642,9 +599,9 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
|
|||||||
|
|
||||||
printf("#define DB_VERSION_MAJOR %d\n", DB_VERSION_MAJOR);
|
printf("#define DB_VERSION_MAJOR %d\n", DB_VERSION_MAJOR);
|
||||||
printf("#define DB_VERSION_MINOR %d\n", DB_VERSION_MINOR);
|
printf("#define DB_VERSION_MINOR %d\n", DB_VERSION_MINOR);
|
||||||
printf("/* As of r40364 (post TokuFT 5.2.7), the patch version number is 100+ the BDB header patch version number.*/\n");
|
printf("/* As of r40364 (post PerconaFT 5.2.7), the patch version number is 100+ the BDB header patch version number.*/\n");
|
||||||
printf("#define DB_VERSION_PATCH %d\n", 100+DB_VERSION_PATCH);
|
printf("#define DB_VERSION_PATCH %d\n", 100+DB_VERSION_PATCH);
|
||||||
printf("#define DB_VERSION_STRING \"Tokutek: TokuFT %d.%d.%d\"\n", DB_VERSION_MAJOR, DB_VERSION_MINOR, 100+DB_VERSION_PATCH);
|
printf("#define DB_VERSION_STRING \"Percona: PerconaFT %d.%d.%d\"\n", DB_VERSION_MAJOR, DB_VERSION_MINOR, 100+DB_VERSION_PATCH);
|
||||||
|
|
||||||
#ifndef DB_GID_SIZE
|
#ifndef DB_GID_SIZE
|
||||||
#define DB_GID_SIZE DB_XIDDATASIZE
|
#define DB_GID_SIZE DB_XIDDATASIZE
|
||||||
@@ -696,6 +653,7 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
|
|||||||
// compression methods
|
// compression methods
|
||||||
printf("typedef enum toku_compression_method {\n");
|
printf("typedef enum toku_compression_method {\n");
|
||||||
printf(" TOKU_NO_COMPRESSION = 0,\n"); // "identity" compression
|
printf(" TOKU_NO_COMPRESSION = 0,\n"); // "identity" compression
|
||||||
|
printf(" TOKU_SNAPPY_METHOD = 7,\n"); // google snappy
|
||||||
printf(" TOKU_ZLIB_METHOD = 8,\n"); // RFC 1950 says use 8 for zlib. It reserves 15 to allow more bytes.
|
printf(" TOKU_ZLIB_METHOD = 8,\n"); // RFC 1950 says use 8 for zlib. It reserves 15 to allow more bytes.
|
||||||
printf(" TOKU_QUICKLZ_METHOD = 9,\n"); // We use 9 for QUICKLZ (the QLZ compression level is stored int he high-order nibble). I couldn't find any standard for any other numbers, so I just use 9. -Bradley
|
printf(" TOKU_QUICKLZ_METHOD = 9,\n"); // We use 9 for QUICKLZ (the QLZ compression level is stored int he high-order nibble). I couldn't find any standard for any other numbers, so I just use 9. -Bradley
|
||||||
printf(" TOKU_LZMA_METHOD = 10,\n"); // We use 10 for LZMA. (Note the compression level is stored in the high-order nibble).
|
printf(" TOKU_LZMA_METHOD = 10,\n"); // We use 10 for LZMA. (Note the compression level is stored in the high-order nibble).
|
||||||
@@ -0,0 +1,126 @@
|
|||||||
|
## set up lists of sources and headers for tags
|
||||||
|
file(GLOB_RECURSE all_srcs
|
||||||
|
buildheader/*.cc
|
||||||
|
db-benchmark-test/*.cc
|
||||||
|
ft/*.cc
|
||||||
|
include/*.cc
|
||||||
|
locktree/*.cc
|
||||||
|
portability/*.cc
|
||||||
|
src/*.cc
|
||||||
|
utils/*.cc
|
||||||
|
util/*.cc
|
||||||
|
db-benchmark-test/*.cc
|
||||||
|
)
|
||||||
|
list(APPEND all_srcs
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/ft/log_code.cc
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/ft/log_print.cc
|
||||||
|
)
|
||||||
|
file(GLOB_RECURSE all_hdrs
|
||||||
|
buildheader/*.h
|
||||||
|
db-benchmark-test/*.h
|
||||||
|
ft/*.h
|
||||||
|
include/*.h
|
||||||
|
locktree/*.h
|
||||||
|
portability/*.h
|
||||||
|
src/*.h
|
||||||
|
utils/*.h
|
||||||
|
util/*.h
|
||||||
|
db-benchmark-test/*.h
|
||||||
|
)
|
||||||
|
list(APPEND all_hdrs
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/portability/toku_config.h
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/buildheader/db.h
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/ft/log_header.h
|
||||||
|
)
|
||||||
|
|
||||||
|
option(USE_ETAGS "Build the etags database." ON)
|
||||||
|
if (USE_ETAGS)
|
||||||
|
find_program(ETAGS "etags")
|
||||||
|
if (NOT ETAGS MATCHES NOTFOUND)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/TAGS"
|
||||||
|
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/etags-stamp"
|
||||||
|
COMMAND ${ETAGS} -o TAGS ${all_srcs} ${all_hdrs}
|
||||||
|
COMMAND touch "${CMAKE_CURRENT_BINARY_DIR}/etags-stamp"
|
||||||
|
DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code
|
||||||
|
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||||
|
add_custom_target(build_etags ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/TAGS" etags-stamp)
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
option(USE_CTAGS "Build the ctags database." ON)
|
||||||
|
if (USE_CTAGS AND
|
||||||
|
# Macs by default are not case-sensitive, so tags and TAGS clobber each other. Do etags and not ctags in that case, because Emacs is superior. :P
|
||||||
|
(NOT APPLE OR NOT USE_ETAGS))
|
||||||
|
find_program(CTAGS "ctags")
|
||||||
|
if (NOT CTAGS MATCHES NOTFOUND)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/tags"
|
||||||
|
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/ctags-stamp"
|
||||||
|
COMMAND ${CTAGS} -o tags ${all_srcs} ${all_hdrs}
|
||||||
|
COMMAND touch "${CMAKE_CURRENT_BINARY_DIR}/ctags-stamp"
|
||||||
|
DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code
|
||||||
|
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||||
|
add_custom_target(build_ctags ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/tags" ctags-stamp)
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
option(USE_CSCOPE "Build the cscope database." ON)
|
||||||
|
if (USE_CSCOPE)
|
||||||
|
find_program(CSCOPE "cscope")
|
||||||
|
if (NOT CSCOPE MATCHES NOTFOUND)
|
||||||
|
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/cscope.files" "")
|
||||||
|
foreach(file ${all_srcs} ${all_hdrs})
|
||||||
|
file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cscope.files" "${file}\n")
|
||||||
|
endforeach(file)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.out"
|
||||||
|
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.in.out"
|
||||||
|
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.po.out"
|
||||||
|
COMMAND ${CSCOPE} -b -q -R -i"${CMAKE_CURRENT_BINARY_DIR}/cscope.files" -I"${CMAKE_CURRENT_SOURCE_DIR}" -I"${CMAKE_CURRENT_SOURCE_DIR}/include" -I"${CMAKE_CURRENT_SOURCE_DIR}/portability" -I"${CMAKE_CURRENT_SOURCE_DIR}/portability" -I"${CMAKE_CURRENT_SOURCE_DIR}/ft" -I"${CMAKE_CURRENT_SOURCE_DIR}/src" -I"${CMAKE_CURRENT_SOURCE_DIR}/locktree" -I"${CMAKE_CURRENT_SOURCE_DIR}/utils" -I"${CMAKE_CURRENT_SOURCE_DIR}/db-benchmark-test" -I"${CMAKE_CURRENT_BINARY_DIR}" -I"${CMAKE_CURRENT_BINARY_DIR}/portability" -I"${CMAKE_CURRENT_BINARY_DIR}/buildheader"
|
||||||
|
DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code
|
||||||
|
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||||
|
add_custom_target(build_cscope.out ALL DEPENDS
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/cscope.out"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/cscope.in.out"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/cscope.po.out")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
option(USE_GTAGS "Build the gtags database." ON)
|
||||||
|
if (USE_GTAGS)
|
||||||
|
find_program(GTAGS "gtags")
|
||||||
|
if (NOT GTAGS MATCHES NOTFOUND)
|
||||||
|
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/gtags.files" "")
|
||||||
|
foreach(file ${all_srcs} ${all_hdrs})
|
||||||
|
file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/gtags.files" "${file}\n")
|
||||||
|
endforeach(file)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GTAGS"
|
||||||
|
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GRTAGS"
|
||||||
|
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GPATH"
|
||||||
|
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GSYMS"
|
||||||
|
COMMAND ${GTAGS} -f "${CMAKE_CURRENT_BINARY_DIR}/gtags.files"
|
||||||
|
DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code
|
||||||
|
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||||
|
add_custom_target(build_GTAGS ALL DEPENDS
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/GTAGS"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/GRTAGS"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/GPATH"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/GSYMS")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
option(USE_MKID "Build the idutils database." ON)
|
||||||
|
if (USE_MKID)
|
||||||
|
find_program(MKID "mkid")
|
||||||
|
if (NOT MKID MATCHES NOTFOUND)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/ID"
|
||||||
|
COMMAND ${MKID} ${all_srcs} ${all_hdrs}
|
||||||
|
DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code
|
||||||
|
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||||
|
add_custom_target(build_MKID ALL DEPENDS
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/ID")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
@@ -14,6 +14,7 @@ include(CheckIncludeFiles)
|
|||||||
## check for some include files
|
## check for some include files
|
||||||
check_include_files(alloca.h HAVE_ALLOCA_H)
|
check_include_files(alloca.h HAVE_ALLOCA_H)
|
||||||
check_include_files(arpa/inet.h HAVE_ARPA_INET_H)
|
check_include_files(arpa/inet.h HAVE_ARPA_INET_H)
|
||||||
|
check_include_files(bits/functexcept.h HAVE_BITS_FUNCTEXCEPT_H)
|
||||||
check_include_files(byteswap.h HAVE_BYTESWAP_H)
|
check_include_files(byteswap.h HAVE_BYTESWAP_H)
|
||||||
check_include_files(endian.h HAVE_ENDIAN_H)
|
check_include_files(endian.h HAVE_ENDIAN_H)
|
||||||
check_include_files(fcntl.h HAVE_FCNTL_H)
|
check_include_files(fcntl.h HAVE_FCNTL_H)
|
||||||
@@ -110,13 +111,14 @@ check_function_exists(pthread_yield HAVE_PTHREAD_YIELD)
|
|||||||
check_function_exists(pthread_yield_np HAVE_PTHREAD_YIELD_NP)
|
check_function_exists(pthread_yield_np HAVE_PTHREAD_YIELD_NP)
|
||||||
## check if we have pthread_getthreadid_np() (i.e. freebsd)
|
## check if we have pthread_getthreadid_np() (i.e. freebsd)
|
||||||
check_function_exists(pthread_getthreadid_np HAVE_PTHREAD_GETTHREADID_NP)
|
check_function_exists(pthread_getthreadid_np HAVE_PTHREAD_GETTHREADID_NP)
|
||||||
|
check_function_exists(sched_getcpu HAVE_SCHED_GETCPU)
|
||||||
|
|
||||||
include(CheckCSourceCompiles)
|
include(CheckCSourceCompiles)
|
||||||
|
|
||||||
if (HAVE_PTHREAD_YIELD)
|
if (HAVE_PTHREAD_YIELD)
|
||||||
include(CheckPrototypeDefinition)
|
include(CheckPrototypeDefinition)
|
||||||
|
|
||||||
check_prototype_definition(pthread_yield "void pthread_yield(void)" 0 "pthread.h" PTHREAD_YIELD_RETURNS_VOID)
|
check_prototype_definition(pthread_yield "void pthread_yield(void)" "(void)0" "pthread.h" PTHREAD_YIELD_RETURNS_VOID)
|
||||||
check_c_source_compiles("#include <pthread.h>
|
check_c_source_compiles("#include <pthread.h>
|
||||||
int main(void) {
|
int main(void) {
|
||||||
int r = pthread_yield();
|
int r = pthread_yield();
|
||||||
@@ -96,6 +96,7 @@ set_cflags_if_supported(
|
|||||||
-Wno-error=address-of-array-temporary
|
-Wno-error=address-of-array-temporary
|
||||||
-Wno-error=tautological-constant-out-of-range-compare
|
-Wno-error=tautological-constant-out-of-range-compare
|
||||||
-Wno-ignored-attributes
|
-Wno-ignored-attributes
|
||||||
|
-Wno-error=extern-c-compat
|
||||||
-fno-rtti
|
-fno-rtti
|
||||||
-fno-exceptions
|
-fno-exceptions
|
||||||
)
|
)
|
||||||
@@ -116,6 +117,13 @@ if (NOT CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
|||||||
)
|
)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
option (PROFILING "Allow profiling and debug" ON)
|
||||||
|
if (PROFILING)
|
||||||
|
set_cflags_if_supported(
|
||||||
|
-fno-omit-frame-pointer
|
||||||
|
)
|
||||||
|
endif ()
|
||||||
|
|
||||||
## this hits with optimized builds somewhere in ftleaf_split, we don't
|
## this hits with optimized builds somewhere in ftleaf_split, we don't
|
||||||
## know why but we don't think it's a big deal
|
## know why but we don't think it's a big deal
|
||||||
set_cflags_if_supported(
|
set_cflags_if_supported(
|
||||||
@@ -105,3 +105,38 @@ add_library(lzma STATIC IMPORTED)
|
|||||||
set_target_properties(lzma PROPERTIES IMPORTED_LOCATION
|
set_target_properties(lzma PROPERTIES IMPORTED_LOCATION
|
||||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/lib/liblzma.a")
|
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/lib/liblzma.a")
|
||||||
add_dependencies(lzma build_lzma)
|
add_dependencies(lzma build_lzma)
|
||||||
|
|
||||||
|
|
||||||
|
## add snappy with an external project
|
||||||
|
set(SNAPPY_SOURCE_DIR "${TokuDB_SOURCE_DIR}/third_party/snappy-1.1.2" CACHE FILEPATH "Where to find sources for snappy.")
|
||||||
|
if (NOT EXISTS "${SNAPPY_SOURCE_DIR}/CMakeLists.txt")
|
||||||
|
message(FATAL_ERROR "Can't find the snappy sources. Please check them out to ${SNAPPY_SOURCE_DIR} or modify SNAPPY_SOURCE_DIR.")
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
FILE(GLOB SNAPPY_ALL_FILES ${SNAPPY_SOURCE_DIR}/*)
|
||||||
|
ExternalProject_Add(build_snappy
|
||||||
|
PREFIX snappy
|
||||||
|
DOWNLOAD_COMMAND
|
||||||
|
cp -a "${SNAPPY_ALL_FILES}" "<SOURCE_DIR>/"
|
||||||
|
CMAKE_ARGS
|
||||||
|
-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
|
||||||
|
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
|
||||||
|
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||||
|
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
|
||||||
|
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
|
||||||
|
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
|
||||||
|
${USE_PROJECT_CMAKE_MODULE_PATH}
|
||||||
|
)
|
||||||
|
FILE(GLOB_RECURSE SNAPPY_ALL_FILES_RECURSIVE ${SNAPPY_SOURCE_DIR}/*)
|
||||||
|
ExternalProject_Add_Step(build_snappy reclone_src # Names of project and custom step
|
||||||
|
COMMENT "(re)cloning snappy source..." # Text printed when step executes
|
||||||
|
DEPENDERS download configure # Steps that depend on this step
|
||||||
|
DEPENDS ${SNAPPY_ALL_FILES_RECURSIVE} # Files on which this step depends
|
||||||
|
)
|
||||||
|
|
||||||
|
include_directories("${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/snappy/include")
|
||||||
|
|
||||||
|
add_library(snappy STATIC IMPORTED)
|
||||||
|
set_target_properties(snappy PROPERTIES IMPORTED_LOCATION
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/snappy/lib/libsnappy.a")
|
||||||
|
add_dependencies(snappy build_snappy)
|
||||||
95
storage/tokudb/PerconaFT/ft/CMakeLists.txt
Normal file
95
storage/tokudb/PerconaFT/ft/CMakeLists.txt
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
|
||||||
|
## generate log_code.cc, log_print.cc, log_header.cc
|
||||||
|
set_source_files_properties(
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/log_code"
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/log_print"
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/log_header.h"
|
||||||
|
PROPERTIES GENERATED TRUE)
|
||||||
|
|
||||||
|
add_executable(logformat logger/logformat.cc)
|
||||||
|
target_link_libraries(logformat ${LIBTOKUPORTABILITY}_static)
|
||||||
|
if (USE_GCOV)
|
||||||
|
add_space_separated_property(TARGET logformat LINK_FLAGS --coverage)
|
||||||
|
endif (USE_GCOV)
|
||||||
|
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_code.cc"
|
||||||
|
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_print.cc"
|
||||||
|
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_header.h"
|
||||||
|
COMMAND $<TARGET_FILE:logformat> .
|
||||||
|
DEPENDS logger/logformat
|
||||||
|
)
|
||||||
|
add_custom_target(
|
||||||
|
generate_log_code
|
||||||
|
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/log_code.cc" "${CMAKE_CURRENT_BINARY_DIR}/log_print.cc" "${CMAKE_CURRENT_BINARY_DIR}/log_header.h"
|
||||||
|
)
|
||||||
|
|
||||||
|
set(FT_SOURCES
|
||||||
|
bndata
|
||||||
|
cachetable/background_job_manager
|
||||||
|
cachetable/cachetable
|
||||||
|
cachetable/checkpoint
|
||||||
|
cursor
|
||||||
|
ft
|
||||||
|
ft-cachetable-wrappers
|
||||||
|
ft-flusher
|
||||||
|
ft-hot-flusher
|
||||||
|
ft-ops
|
||||||
|
ft-status
|
||||||
|
ft-test-helpers
|
||||||
|
ft-verify
|
||||||
|
loader/callbacks
|
||||||
|
loader/dbufio
|
||||||
|
loader/loader
|
||||||
|
loader/pqueue
|
||||||
|
leafentry
|
||||||
|
le-cursor
|
||||||
|
logger/logcursor
|
||||||
|
logger/logfilemgr
|
||||||
|
logger/logger
|
||||||
|
logger/log_upgrade
|
||||||
|
logger/recover
|
||||||
|
msg
|
||||||
|
msg_buffer
|
||||||
|
node
|
||||||
|
pivotkeys
|
||||||
|
serialize/block_allocator
|
||||||
|
serialize/block_allocator_strategy
|
||||||
|
serialize/block_table
|
||||||
|
serialize/compress
|
||||||
|
serialize/ft_node-serialize
|
||||||
|
serialize/ft-node-deserialize
|
||||||
|
serialize/ft-serialize
|
||||||
|
serialize/quicklz
|
||||||
|
serialize/sub_block
|
||||||
|
txn/rollback
|
||||||
|
txn/rollback-apply
|
||||||
|
txn/rollback-ct-callbacks
|
||||||
|
txn/rollback_log_node_cache
|
||||||
|
txn/roll
|
||||||
|
txn/txn
|
||||||
|
txn/txn_child_manager
|
||||||
|
txn/txn_manager
|
||||||
|
txn/xids
|
||||||
|
ule
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/log_code"
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/log_print"
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(ft SHARED ${FT_SOURCES})
|
||||||
|
add_library(ft_static STATIC ${FT_SOURCES})
|
||||||
|
## we're going to link this into libtokudb.so so it needs to have PIC
|
||||||
|
set_target_properties(ft_static PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
maybe_add_gcov_to_libraries(ft ft_static)
|
||||||
|
|
||||||
|
## depend on other generated targets
|
||||||
|
add_dependencies(ft install_tdb_h generate_log_code build_lzma build_snappy)
|
||||||
|
add_dependencies(ft_static install_tdb_h generate_log_code build_lzma build_snappy)
|
||||||
|
|
||||||
|
## link with lzma (which should be static) and link dependers with zlib
|
||||||
|
target_link_libraries(ft LINK_PRIVATE util_static lzma snappy ${LIBTOKUPORTABILITY})
|
||||||
|
target_link_libraries(ft LINK_PUBLIC z)
|
||||||
|
target_link_libraries(ft_static LINK_PRIVATE lzma snappy)
|
||||||
|
|
||||||
|
add_subdirectory(tests)
|
||||||
@@ -1,95 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include <ft/bndata.h>
|
#include <ft/bndata.h>
|
||||||
#include <ft/ft-internal.h>
|
#include <ft/ft-internal.h>
|
||||||
@@ -1,92 +1,39 @@
|
|||||||
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
@@ -182,7 +129,7 @@ public:
|
|||||||
void deserialize_from_rbuf(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version);
|
void deserialize_from_rbuf(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version);
|
||||||
|
|
||||||
// Retrieve the memory footprint of this basement node.
|
// Retrieve the memory footprint of this basement node.
|
||||||
// May over or under count: see Tokutek/ft-index#136
|
// May over or under count: see Percona/PerconaFT#136
|
||||||
// Also see dmt's implementation.
|
// Also see dmt's implementation.
|
||||||
uint64_t get_memory_size(void);
|
uint64_t get_memory_size(void);
|
||||||
|
|
||||||
106
storage/tokudb/PerconaFT/ft/cachetable/background_job_manager.cc
Normal file
106
storage/tokudb/PerconaFT/ft/cachetable/background_job_manager.cc
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include <portability/toku_config.h>
|
||||||
|
#include <memory.h>
|
||||||
|
#include <toku_pthread.h>
|
||||||
|
|
||||||
|
#include "cachetable/background_job_manager.h"
|
||||||
|
|
||||||
|
struct background_job_manager_struct {
|
||||||
|
bool accepting_jobs;
|
||||||
|
uint32_t num_jobs;
|
||||||
|
toku_cond_t jobs_wait;
|
||||||
|
toku_mutex_t jobs_lock;
|
||||||
|
};
|
||||||
|
|
||||||
|
void bjm_init(BACKGROUND_JOB_MANAGER* pbjm) {
|
||||||
|
BACKGROUND_JOB_MANAGER XCALLOC(bjm);
|
||||||
|
toku_mutex_init(&bjm->jobs_lock, 0);
|
||||||
|
toku_cond_init(&bjm->jobs_wait, NULL);
|
||||||
|
bjm->accepting_jobs = true;
|
||||||
|
bjm->num_jobs = 0;
|
||||||
|
*pbjm = bjm;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bjm_destroy(BACKGROUND_JOB_MANAGER bjm) {
|
||||||
|
assert(bjm->num_jobs == 0);
|
||||||
|
toku_cond_destroy(&bjm->jobs_wait);
|
||||||
|
toku_mutex_destroy(&bjm->jobs_lock);
|
||||||
|
toku_free(bjm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bjm_reset(BACKGROUND_JOB_MANAGER bjm) {
|
||||||
|
toku_mutex_lock(&bjm->jobs_lock);
|
||||||
|
assert(bjm->num_jobs == 0);
|
||||||
|
bjm->accepting_jobs = true;
|
||||||
|
toku_mutex_unlock(&bjm->jobs_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
int bjm_add_background_job(BACKGROUND_JOB_MANAGER bjm) {
|
||||||
|
int ret_val;
|
||||||
|
toku_mutex_lock(&bjm->jobs_lock);
|
||||||
|
if (bjm->accepting_jobs) {
|
||||||
|
bjm->num_jobs++;
|
||||||
|
ret_val = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ret_val = -1;
|
||||||
|
}
|
||||||
|
toku_mutex_unlock(&bjm->jobs_lock);
|
||||||
|
return ret_val;
|
||||||
|
}
|
||||||
|
void bjm_remove_background_job(BACKGROUND_JOB_MANAGER bjm){
|
||||||
|
toku_mutex_lock(&bjm->jobs_lock);
|
||||||
|
assert(bjm->num_jobs > 0);
|
||||||
|
bjm->num_jobs--;
|
||||||
|
if (bjm->num_jobs == 0 && !bjm->accepting_jobs) {
|
||||||
|
toku_cond_broadcast(&bjm->jobs_wait);
|
||||||
|
}
|
||||||
|
toku_mutex_unlock(&bjm->jobs_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bjm_wait_for_jobs_to_finish(BACKGROUND_JOB_MANAGER bjm) {
|
||||||
|
toku_mutex_lock(&bjm->jobs_lock);
|
||||||
|
bjm->accepting_jobs = false;
|
||||||
|
while (bjm->num_jobs > 0) {
|
||||||
|
toku_cond_wait(&bjm->jobs_wait, &bjm->jobs_lock);
|
||||||
|
}
|
||||||
|
toku_mutex_unlock(&bjm->jobs_lock);
|
||||||
|
}
|
||||||
|
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
//
|
||||||
|
// The background job manager keeps track of the existence of
|
||||||
|
// background jobs running. We use the background job manager
|
||||||
|
// to allow threads to perform background jobs on various pieces
|
||||||
|
// of the system (e.g. cachefiles and cloned pairs being written out
|
||||||
|
// for checkpoint)
|
||||||
|
//
|
||||||
|
|
||||||
|
typedef struct background_job_manager_struct *BACKGROUND_JOB_MANAGER;
|
||||||
|
|
||||||
|
|
||||||
|
void bjm_init(BACKGROUND_JOB_MANAGER* bjm);
|
||||||
|
void bjm_destroy(BACKGROUND_JOB_MANAGER bjm);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Re-allows a background job manager to accept background jobs
|
||||||
|
//
|
||||||
|
void bjm_reset(BACKGROUND_JOB_MANAGER bjm);
|
||||||
|
|
||||||
|
//
|
||||||
|
// add a background job. If return value is 0, then the addition of the job
|
||||||
|
// was successful and the user may perform the background job. If return
|
||||||
|
// value is non-zero, then adding of the background job failed and the user
|
||||||
|
// may not perform the background job.
|
||||||
|
//
|
||||||
|
int bjm_add_background_job(BACKGROUND_JOB_MANAGER bjm);
|
||||||
|
|
||||||
|
//
|
||||||
|
// remove a background job
|
||||||
|
//
|
||||||
|
void bjm_remove_background_job(BACKGROUND_JOB_MANAGER bjm);
|
||||||
|
|
||||||
|
//
|
||||||
|
// This function waits for all current background jobs to be removed. If the user
|
||||||
|
// calls bjm_add_background_job while this function is running, or after this function
|
||||||
|
// has completed, bjm_add_background_job returns an error.
|
||||||
|
//
|
||||||
|
void bjm_wait_for_jobs_to_finish(BACKGROUND_JOB_MANAGER bjm);
|
||||||
@@ -1,97 +1,43 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include "cachetable/background_job_manager.h"
|
#include "cachetable/background_job_manager.h"
|
||||||
#include <portability/toku_random.h>
|
#include <portability/toku_random.h>
|
||||||
#include <util/frwlock.h>
|
#include <util/frwlock.h>
|
||||||
@@ -525,11 +471,14 @@ public:
|
|||||||
void evict_pair(PAIR p, bool checkpoint_pending);
|
void evict_pair(PAIR p, bool checkpoint_pending);
|
||||||
void wait_for_cache_pressure_to_subside();
|
void wait_for_cache_pressure_to_subside();
|
||||||
void signal_eviction_thread();
|
void signal_eviction_thread();
|
||||||
|
void signal_eviction_thread_locked();
|
||||||
bool should_client_thread_sleep();
|
bool should_client_thread_sleep();
|
||||||
bool should_client_wake_eviction_thread();
|
bool should_client_wake_eviction_thread();
|
||||||
// function needed for testing
|
// function needed for testing
|
||||||
void get_state(long *size_current_ptr, long *size_limit_ptr);
|
void get_state(long *size_current_ptr, long *size_limit_ptr);
|
||||||
void fill_engine_status();
|
void fill_engine_status();
|
||||||
|
void set_enable_partial_eviction(bool enabled);
|
||||||
|
bool get_enable_partial_eviction(void) const;
|
||||||
private:
|
private:
|
||||||
void add_to_size_current(long size);
|
void add_to_size_current(long size);
|
||||||
void remove_from_size_current(long size);
|
void remove_from_size_current(long size);
|
||||||
@@ -560,6 +509,8 @@ private:
|
|||||||
int64_t m_high_size_watermark; // if cachetable grows to this size, client threads sleep upon adding data
|
int64_t m_high_size_watermark; // if cachetable grows to this size, client threads sleep upon adding data
|
||||||
int64_t m_high_size_hysteresis; // if > cachetable size, then sleeping client threads may wake up
|
int64_t m_high_size_hysteresis; // if > cachetable size, then sleeping client threads may wake up
|
||||||
|
|
||||||
|
bool m_enable_partial_eviction; // true if partial evictions are permitted
|
||||||
|
|
||||||
// used to calculate random numbers
|
// used to calculate random numbers
|
||||||
struct random_data m_random_data;
|
struct random_data m_random_data;
|
||||||
char m_random_statebuf[64];
|
char m_random_statebuf[64];
|
||||||
@@ -1,95 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
@@ -127,43 +72,9 @@ static uint64_t cachetable_prefetches; // how many times has a block been pre
|
|||||||
static uint64_t cachetable_evictions;
|
static uint64_t cachetable_evictions;
|
||||||
static uint64_t cleaner_executions; // number of times the cleaner thread's loop has executed
|
static uint64_t cleaner_executions; // number of times the cleaner thread's loop has executed
|
||||||
|
|
||||||
static CACHETABLE_STATUS_S ct_status;
|
|
||||||
|
|
||||||
// Note, toku_cachetable_get_status() is below, after declaration of cachetable.
|
// Note, toku_cachetable_get_status() is below, after declaration of cachetable.
|
||||||
|
|
||||||
#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ct_status, k, c, t, "cachetable: " l, inc)
|
|
||||||
|
|
||||||
static void
|
|
||||||
status_init(void) {
|
|
||||||
// Note, this function initializes the keyname, type, and legend fields.
|
|
||||||
// Value fields are initialized to zero by compiler.
|
|
||||||
|
|
||||||
STATUS_INIT(CT_MISS, CACHETABLE_MISS, UINT64, "miss", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_MISSTIME, CACHETABLE_MISS_TIME, UINT64, "miss time", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_PREFETCHES, CACHETABLE_PREFETCHES, UINT64, "prefetches", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_SIZE_CURRENT, CACHETABLE_SIZE_CURRENT, UINT64, "size current", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_SIZE_LIMIT, CACHETABLE_SIZE_LIMIT, UINT64, "size limit", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_SIZE_WRITING, CACHETABLE_SIZE_WRITING, UINT64, "size writing", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_SIZE_NONLEAF, CACHETABLE_SIZE_NONLEAF, UINT64, "size nonleaf", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_SIZE_LEAF, CACHETABLE_SIZE_LEAF, UINT64, "size leaf", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_SIZE_ROLLBACK, CACHETABLE_SIZE_ROLLBACK, UINT64, "size rollback", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_SIZE_CACHEPRESSURE, CACHETABLE_SIZE_CACHEPRESSURE, UINT64, "size cachepressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_SIZE_CLONED, CACHETABLE_SIZE_CLONED, UINT64, "size currently cloned data for checkpoint", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_EVICTIONS, CACHETABLE_EVICTIONS, UINT64, "evictions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_CLEANER_EXECUTIONS, CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_CLEANER_PERIOD, CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_CLEANER_ITERATIONS, CACHETABLE_CLEANER_ITERATIONS, UINT64, "cleaner iterations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
|
|
||||||
STATUS_INIT(CT_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_TIME, UINT64, "time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_LONG_WAIT_PRESSURE_COUNT, CACHETABLE_LONG_WAIT_PRESSURE_COUNT, UINT64, "number of long waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(CT_LONG_WAIT_PRESSURE_TIME, CACHETABLE_LONG_WAIT_PRESSURE_TIME, UINT64, "long time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
ct_status.initialized = true;
|
|
||||||
}
|
|
||||||
#undef STATUS_INIT
|
|
||||||
|
|
||||||
#define STATUS_VALUE(x) ct_status.status[x].value.num
|
|
||||||
|
|
||||||
static void * const zero_value = nullptr;
|
static void * const zero_value = nullptr;
|
||||||
static PAIR_ATTR const zero_attr = {
|
static PAIR_ATTR const zero_attr = {
|
||||||
.size = 0,
|
.size = 0,
|
||||||
@@ -221,16 +132,35 @@ bool toku_ctpair_is_write_locked(PAIR pair) {
|
|||||||
|
|
||||||
void
|
void
|
||||||
toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS statp) {
|
toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS statp) {
|
||||||
if (!ct_status.initialized) {
|
ct_status.init();
|
||||||
status_init();
|
CT_STATUS_VAL(CT_MISS) = cachetable_miss;
|
||||||
}
|
CT_STATUS_VAL(CT_MISSTIME) = cachetable_misstime;
|
||||||
STATUS_VALUE(CT_MISS) = cachetable_miss;
|
CT_STATUS_VAL(CT_PREFETCHES) = cachetable_prefetches;
|
||||||
STATUS_VALUE(CT_MISSTIME) = cachetable_misstime;
|
CT_STATUS_VAL(CT_EVICTIONS) = cachetable_evictions;
|
||||||
STATUS_VALUE(CT_PREFETCHES) = cachetable_prefetches;
|
CT_STATUS_VAL(CT_CLEANER_EXECUTIONS) = cleaner_executions;
|
||||||
STATUS_VALUE(CT_EVICTIONS) = cachetable_evictions;
|
CT_STATUS_VAL(CT_CLEANER_PERIOD) = toku_get_cleaner_period_unlocked(ct);
|
||||||
STATUS_VALUE(CT_CLEANER_EXECUTIONS) = cleaner_executions;
|
CT_STATUS_VAL(CT_CLEANER_ITERATIONS) = toku_get_cleaner_iterations_unlocked(ct);
|
||||||
STATUS_VALUE(CT_CLEANER_PERIOD) = toku_get_cleaner_period_unlocked(ct);
|
toku_kibbutz_get_status(ct->client_kibbutz,
|
||||||
STATUS_VALUE(CT_CLEANER_ITERATIONS) = toku_get_cleaner_iterations_unlocked(ct);
|
&CT_STATUS_VAL(CT_POOL_CLIENT_NUM_THREADS),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CLIENT_NUM_THREADS_ACTIVE),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CLIENT_QUEUE_SIZE),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CLIENT_MAX_QUEUE_SIZE),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CLIENT_TOTAL_ITEMS_PROCESSED),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CLIENT_TOTAL_EXECUTION_TIME));
|
||||||
|
toku_kibbutz_get_status(ct->ct_kibbutz,
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CACHETABLE_NUM_THREADS),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CACHETABLE_NUM_THREADS_ACTIVE),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CACHETABLE_QUEUE_SIZE),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CACHETABLE_MAX_QUEUE_SIZE),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CACHETABLE_TOTAL_EXECUTION_TIME));
|
||||||
|
toku_kibbutz_get_status(ct->checkpointing_kibbutz,
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CHECKPOINT_NUM_THREADS),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CHECKPOINT_NUM_THREADS_ACTIVE),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CHECKPOINT_QUEUE_SIZE),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CHECKPOINT_MAX_QUEUE_SIZE),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED),
|
||||||
|
&CT_STATUS_VAL(CT_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME));
|
||||||
ct->ev.fill_engine_status();
|
ct->ev.fill_engine_status();
|
||||||
*statp = ct_status;
|
*statp = ct_status;
|
||||||
}
|
}
|
||||||
@@ -294,10 +224,22 @@ uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct) {
|
|||||||
return ct->cl.get_iterations();
|
return ct->cl.get_iterations();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void toku_set_enable_partial_eviction (CACHETABLE ct, bool enabled) {
|
||||||
|
ct->ev.set_enable_partial_eviction(enabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool toku_get_enable_partial_eviction (CACHETABLE ct) {
|
||||||
|
return ct->ev.get_enable_partial_eviction();
|
||||||
|
}
|
||||||
|
|
||||||
// reserve 25% as "unreservable". The loader cannot have it.
|
// reserve 25% as "unreservable". The loader cannot have it.
|
||||||
#define unreservable_memory(size) ((size)/4)
|
#define unreservable_memory(size) ((size)/4)
|
||||||
|
|
||||||
int toku_cachetable_create(CACHETABLE *ct_result, long size_limit, LSN UU(initial_lsn), TOKULOGGER logger) {
|
int toku_cachetable_create_ex(CACHETABLE *ct_result, long size_limit,
|
||||||
|
unsigned long client_pool_threads,
|
||||||
|
unsigned long cachetable_pool_threads,
|
||||||
|
unsigned long checkpoint_pool_threads,
|
||||||
|
LSN UU(initial_lsn), TOKULOGGER logger) {
|
||||||
int result = 0;
|
int result = 0;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
@@ -311,17 +253,20 @@ int toku_cachetable_create(CACHETABLE *ct_result, long size_limit, LSN UU(initia
|
|||||||
|
|
||||||
int num_processors = toku_os_get_number_active_processors();
|
int num_processors = toku_os_get_number_active_processors();
|
||||||
int checkpointing_nworkers = (num_processors/4) ? num_processors/4 : 1;
|
int checkpointing_nworkers = (num_processors/4) ? num_processors/4 : 1;
|
||||||
r = toku_kibbutz_create(num_processors, &ct->client_kibbutz);
|
r = toku_kibbutz_create(client_pool_threads ? client_pool_threads : num_processors,
|
||||||
|
&ct->client_kibbutz);
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
result = r;
|
result = r;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
r = toku_kibbutz_create(2*num_processors, &ct->ct_kibbutz);
|
r = toku_kibbutz_create(cachetable_pool_threads ? cachetable_pool_threads : 2*num_processors,
|
||||||
|
&ct->ct_kibbutz);
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
result = r;
|
result = r;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
r = toku_kibbutz_create(checkpointing_nworkers, &ct->checkpointing_kibbutz);
|
r = toku_kibbutz_create(checkpoint_pool_threads ? checkpoint_pool_threads : checkpointing_nworkers,
|
||||||
|
&ct->checkpointing_kibbutz);
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
result = r;
|
result = r;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@@ -2516,7 +2461,7 @@ toku_cachetable_minicron_shutdown(CACHETABLE ct) {
|
|||||||
|
|
||||||
void toku_cachetable_prepare_close(CACHETABLE ct UU()) {
|
void toku_cachetable_prepare_close(CACHETABLE ct UU()) {
|
||||||
extern bool toku_serialize_in_parallel;
|
extern bool toku_serialize_in_parallel;
|
||||||
toku_drd_unsafe_set(&toku_serialize_in_parallel, true);
|
toku_unsafe_set(&toku_serialize_in_parallel, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Requires that it all be flushed. */
|
/* Requires that it all be flushed. */
|
||||||
@@ -3643,6 +3588,8 @@ int evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KI
|
|||||||
m_high_size_watermark = m_high_size_hysteresis + max_diff;
|
m_high_size_watermark = m_high_size_hysteresis + max_diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_enable_partial_eviction = true;
|
||||||
|
|
||||||
m_size_reserved = unreservable_memory(_size_limit);
|
m_size_reserved = unreservable_memory(_size_limit);
|
||||||
m_size_current = 0;
|
m_size_current = 0;
|
||||||
m_size_cloned_data = 0;
|
m_size_cloned_data = 0;
|
||||||
@@ -3703,7 +3650,7 @@ void evictor::destroy() {
|
|||||||
if (m_ev_thread_init) {
|
if (m_ev_thread_init) {
|
||||||
toku_mutex_lock(&m_ev_thread_lock);
|
toku_mutex_lock(&m_ev_thread_lock);
|
||||||
m_run_thread = false;
|
m_run_thread = false;
|
||||||
this->signal_eviction_thread();
|
this->signal_eviction_thread_locked();
|
||||||
toku_mutex_unlock(&m_ev_thread_lock);
|
toku_mutex_unlock(&m_ev_thread_lock);
|
||||||
void *ret;
|
void *ret;
|
||||||
int r = toku_pthread_join(m_ev_thread, &ret);
|
int r = toku_pthread_join(m_ev_thread, &ret);
|
||||||
@@ -3810,7 +3757,7 @@ uint64_t evictor::reserve_memory(double fraction, uint64_t upper_bound) {
|
|||||||
}
|
}
|
||||||
m_size_reserved += reserved_memory;
|
m_size_reserved += reserved_memory;
|
||||||
(void) toku_sync_fetch_and_add(&m_size_current, reserved_memory);
|
(void) toku_sync_fetch_and_add(&m_size_current, reserved_memory);
|
||||||
this->signal_eviction_thread();
|
this->signal_eviction_thread_locked();
|
||||||
toku_mutex_unlock(&m_ev_thread_lock);
|
toku_mutex_unlock(&m_ev_thread_lock);
|
||||||
|
|
||||||
if (this->should_client_thread_sleep()) {
|
if (this->should_client_thread_sleep()) {
|
||||||
@@ -3828,7 +3775,7 @@ void evictor::release_reserved_memory(uint64_t reserved_memory){
|
|||||||
m_size_reserved -= reserved_memory;
|
m_size_reserved -= reserved_memory;
|
||||||
// signal the eviction thread in order to possibly wake up sleeping clients
|
// signal the eviction thread in order to possibly wake up sleeping clients
|
||||||
if (m_num_sleepers > 0) {
|
if (m_num_sleepers > 0) {
|
||||||
this->signal_eviction_thread();
|
this->signal_eviction_thread_locked();
|
||||||
}
|
}
|
||||||
toku_mutex_unlock(&m_ev_thread_lock);
|
toku_mutex_unlock(&m_ev_thread_lock);
|
||||||
}
|
}
|
||||||
@@ -4020,6 +3967,8 @@ bool evictor::run_eviction_on_pair(PAIR curr_in_clock) {
|
|||||||
curr_in_clock->count--;
|
curr_in_clock->count--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_enable_partial_eviction) {
|
||||||
// call the partial eviction callback
|
// call the partial eviction callback
|
||||||
curr_in_clock->value_rwlock.write_lock(true);
|
curr_in_clock->value_rwlock.write_lock(true);
|
||||||
|
|
||||||
@@ -4028,20 +3977,15 @@ bool evictor::run_eviction_on_pair(PAIR curr_in_clock) {
|
|||||||
void *write_extraargs = curr_in_clock->write_extraargs;
|
void *write_extraargs = curr_in_clock->write_extraargs;
|
||||||
enum partial_eviction_cost cost;
|
enum partial_eviction_cost cost;
|
||||||
long bytes_freed_estimate = 0;
|
long bytes_freed_estimate = 0;
|
||||||
curr_in_clock->pe_est_callback(
|
curr_in_clock->pe_est_callback(value, disk_data,
|
||||||
value,
|
&bytes_freed_estimate, &cost,
|
||||||
disk_data,
|
write_extraargs);
|
||||||
&bytes_freed_estimate,
|
|
||||||
&cost,
|
|
||||||
write_extraargs
|
|
||||||
);
|
|
||||||
if (cost == PE_CHEAP) {
|
if (cost == PE_CHEAP) {
|
||||||
pair_unlock(curr_in_clock);
|
pair_unlock(curr_in_clock);
|
||||||
curr_in_clock->size_evicting_estimate = 0;
|
curr_in_clock->size_evicting_estimate = 0;
|
||||||
this->do_partial_eviction(curr_in_clock);
|
this->do_partial_eviction(curr_in_clock);
|
||||||
bjm_remove_background_job(cf->bjm);
|
bjm_remove_background_job(cf->bjm);
|
||||||
}
|
} else if (cost == PE_EXPENSIVE) {
|
||||||
else if (cost == PE_EXPENSIVE) {
|
|
||||||
// only bother running an expensive partial eviction
|
// only bother running an expensive partial eviction
|
||||||
// if it is expected to free space
|
// if it is expected to free space
|
||||||
if (bytes_freed_estimate > 0) {
|
if (bytes_freed_estimate > 0) {
|
||||||
@@ -4050,23 +3994,21 @@ bool evictor::run_eviction_on_pair(PAIR curr_in_clock) {
|
|||||||
toku_mutex_lock(&m_ev_thread_lock);
|
toku_mutex_lock(&m_ev_thread_lock);
|
||||||
m_size_evicting += bytes_freed_estimate;
|
m_size_evicting += bytes_freed_estimate;
|
||||||
toku_mutex_unlock(&m_ev_thread_lock);
|
toku_mutex_unlock(&m_ev_thread_lock);
|
||||||
toku_kibbutz_enq(
|
toku_kibbutz_enq(m_kibbutz, cachetable_partial_eviction,
|
||||||
m_kibbutz,
|
curr_in_clock);
|
||||||
cachetable_partial_eviction,
|
} else {
|
||||||
curr_in_clock
|
|
||||||
);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
curr_in_clock->value_rwlock.write_unlock();
|
curr_in_clock->value_rwlock.write_unlock();
|
||||||
pair_unlock(curr_in_clock);
|
pair_unlock(curr_in_clock);
|
||||||
bjm_remove_background_job(cf->bjm);
|
bjm_remove_background_job(cf->bjm);
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
pair_unlock(curr_in_clock);
|
||||||
|
bjm_remove_background_job(cf->bjm);
|
||||||
}
|
}
|
||||||
else {
|
} else {
|
||||||
toku::context pe_ctx(CTX_FULL_EVICTION);
|
toku::context pe_ctx(CTX_FULL_EVICTION);
|
||||||
|
|
||||||
// responsibility of try_evict_pair to eventually remove background job
|
// responsibility of try_evict_pair to eventually remove background job
|
||||||
@@ -4246,7 +4188,7 @@ void evictor::decrease_size_evicting(long size_evicting_estimate) {
|
|||||||
m_size_evicting -= size_evicting_estimate;
|
m_size_evicting -= size_evicting_estimate;
|
||||||
assert(m_size_evicting >= 0);
|
assert(m_size_evicting >= 0);
|
||||||
if (need_to_signal_ev_thread) {
|
if (need_to_signal_ev_thread) {
|
||||||
this->signal_eviction_thread();
|
this->signal_eviction_thread_locked();
|
||||||
}
|
}
|
||||||
toku_mutex_unlock(&m_ev_thread_lock);
|
toku_mutex_unlock(&m_ev_thread_lock);
|
||||||
}
|
}
|
||||||
@@ -4261,7 +4203,7 @@ void evictor::wait_for_cache_pressure_to_subside() {
|
|||||||
uint64_t t0 = toku_current_time_microsec();
|
uint64_t t0 = toku_current_time_microsec();
|
||||||
toku_mutex_lock(&m_ev_thread_lock);
|
toku_mutex_lock(&m_ev_thread_lock);
|
||||||
m_num_sleepers++;
|
m_num_sleepers++;
|
||||||
this->signal_eviction_thread();
|
this->signal_eviction_thread_locked();
|
||||||
toku_cond_wait(&m_flow_control_cond, &m_ev_thread_lock);
|
toku_cond_wait(&m_flow_control_cond, &m_ev_thread_lock);
|
||||||
m_num_sleepers--;
|
m_num_sleepers--;
|
||||||
toku_mutex_unlock(&m_ev_thread_lock);
|
toku_mutex_unlock(&m_ev_thread_lock);
|
||||||
@@ -4295,6 +4237,12 @@ void evictor::get_state(long *size_current_ptr, long *size_limit_ptr) {
|
|||||||
// As a result, scheduling is not guaranteed, but that is tolerable.
|
// As a result, scheduling is not guaranteed, but that is tolerable.
|
||||||
//
|
//
|
||||||
void evictor::signal_eviction_thread() {
|
void evictor::signal_eviction_thread() {
|
||||||
|
toku_mutex_lock(&m_ev_thread_lock);
|
||||||
|
toku_cond_signal(&m_ev_thread_cond);
|
||||||
|
toku_mutex_unlock(&m_ev_thread_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void evictor::signal_eviction_thread_locked() {
|
||||||
toku_cond_signal(&m_ev_thread_cond);
|
toku_cond_signal(&m_ev_thread_cond);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4353,18 +4301,26 @@ inline int64_t evictor::unsafe_read_size_current(void) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void evictor::fill_engine_status() {
|
void evictor::fill_engine_status() {
|
||||||
STATUS_VALUE(CT_SIZE_CURRENT) = m_size_current;
|
CT_STATUS_VAL(CT_SIZE_CURRENT) = m_size_current;
|
||||||
STATUS_VALUE(CT_SIZE_LIMIT) = m_low_size_hysteresis;
|
CT_STATUS_VAL(CT_SIZE_LIMIT) = m_low_size_hysteresis;
|
||||||
STATUS_VALUE(CT_SIZE_WRITING) = m_size_evicting;
|
CT_STATUS_VAL(CT_SIZE_WRITING) = m_size_evicting;
|
||||||
STATUS_VALUE(CT_SIZE_NONLEAF) = read_partitioned_counter(m_size_nonleaf);
|
CT_STATUS_VAL(CT_SIZE_NONLEAF) = read_partitioned_counter(m_size_nonleaf);
|
||||||
STATUS_VALUE(CT_SIZE_LEAF) = read_partitioned_counter(m_size_leaf);
|
CT_STATUS_VAL(CT_SIZE_LEAF) = read_partitioned_counter(m_size_leaf);
|
||||||
STATUS_VALUE(CT_SIZE_ROLLBACK) = read_partitioned_counter(m_size_rollback);
|
CT_STATUS_VAL(CT_SIZE_ROLLBACK) = read_partitioned_counter(m_size_rollback);
|
||||||
STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = read_partitioned_counter(m_size_cachepressure);
|
CT_STATUS_VAL(CT_SIZE_CACHEPRESSURE) = read_partitioned_counter(m_size_cachepressure);
|
||||||
STATUS_VALUE(CT_SIZE_CLONED) = m_size_cloned_data;
|
CT_STATUS_VAL(CT_SIZE_CLONED) = m_size_cloned_data;
|
||||||
STATUS_VALUE(CT_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_wait_pressure_count);
|
CT_STATUS_VAL(CT_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_wait_pressure_count);
|
||||||
STATUS_VALUE(CT_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_wait_pressure_time);
|
CT_STATUS_VAL(CT_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_wait_pressure_time);
|
||||||
STATUS_VALUE(CT_LONG_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_long_wait_pressure_count);
|
CT_STATUS_VAL(CT_LONG_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_long_wait_pressure_count);
|
||||||
STATUS_VALUE(CT_LONG_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_long_wait_pressure_time);
|
CT_STATUS_VAL(CT_LONG_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_long_wait_pressure_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
void evictor::set_enable_partial_eviction(bool enabled) {
|
||||||
|
m_enable_partial_eviction = enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool evictor::get_enable_partial_eviction(void) const {
|
||||||
|
return m_enable_partial_eviction;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -4852,6 +4808,7 @@ void cachefile_list::remove_stale_cf_unlocked(CACHEFILE cf) {
|
|||||||
|
|
||||||
FILENUM cachefile_list::reserve_filenum() {
|
FILENUM cachefile_list::reserve_filenum() {
|
||||||
// taking a write lock because we are modifying next_filenum_to_use
|
// taking a write lock because we are modifying next_filenum_to_use
|
||||||
|
FILENUM filenum = FILENUM_NONE;
|
||||||
write_lock();
|
write_lock();
|
||||||
while (1) {
|
while (1) {
|
||||||
int r = m_active_filenum.find_zero<FILENUM, cachefile_find_by_filenum>(m_next_filenum_to_use, nullptr, nullptr);
|
int r = m_active_filenum.find_zero<FILENUM, cachefile_find_by_filenum>(m_next_filenum_to_use, nullptr, nullptr);
|
||||||
@@ -4860,10 +4817,17 @@ FILENUM cachefile_list::reserve_filenum() {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
assert(r == DB_NOTFOUND);
|
assert(r == DB_NOTFOUND);
|
||||||
|
|
||||||
|
// skip the reserved value UINT32_MAX and wrap around to zero
|
||||||
|
if (m_next_filenum_to_use.fileid == FILENUM_NONE.fileid) {
|
||||||
|
m_next_filenum_to_use.fileid = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
filenum = m_next_filenum_to_use;
|
||||||
|
m_next_filenum_to_use.fileid++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
FILENUM filenum = m_next_filenum_to_use;
|
|
||||||
m_next_filenum_to_use.fileid++;
|
|
||||||
write_unlock();
|
write_unlock();
|
||||||
return filenum;
|
return filenum;
|
||||||
}
|
}
|
||||||
@@ -4971,5 +4935,3 @@ toku_cachetable_helgrind_ignore(void) {
|
|||||||
TOKU_VALGRIND_HG_DISABLE_CHECKING(&cleaner_executions, sizeof cleaner_executions);
|
TOKU_VALGRIND_HG_DISABLE_CHECKING(&cleaner_executions, sizeof cleaner_executions);
|
||||||
TOKU_VALGRIND_HG_DISABLE_CHECKING(&ct_status, sizeof ct_status);
|
TOKU_VALGRIND_HG_DISABLE_CHECKING(&ct_status, sizeof ct_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef STATUS_VALUE
|
|
||||||
@@ -1,102 +1,49 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
|
||||||
#include "ft/logger/logger.h"
|
#include "ft/logger/logger.h"
|
||||||
#include "ft/serialize/block_table.h"
|
#include "ft/serialize/block_table.h"
|
||||||
#include "ft/txn/txn.h"
|
#include "ft/txn/txn.h"
|
||||||
|
#include "ft/ft-status.h"
|
||||||
#include "util/minicron.h"
|
#include "util/minicron.h"
|
||||||
|
|
||||||
// Maintain a cache mapping from cachekeys to values (void*)
|
// Maintain a cache mapping from cachekeys to values (void*)
|
||||||
@@ -155,13 +102,22 @@ uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct);
|
|||||||
void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations);
|
void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations);
|
||||||
uint32_t toku_get_cleaner_iterations (CACHETABLE ct);
|
uint32_t toku_get_cleaner_iterations (CACHETABLE ct);
|
||||||
uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct);
|
uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct);
|
||||||
|
void toku_set_enable_partial_eviction (CACHETABLE ct, bool enabled);
|
||||||
|
bool toku_get_enable_partial_eviction (CACHETABLE ct);
|
||||||
|
|
||||||
// cachetable operations
|
// cachetable operations
|
||||||
|
|
||||||
// create and initialize a cache table
|
// create and initialize a cache table
|
||||||
// size_limit is the upper limit on the size of the size of the values in the table
|
// size_limit is the upper limit on the size of the size of the values in the table
|
||||||
// pass 0 if you want the default
|
// pass 0 if you want the default
|
||||||
int toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, struct tokulogger *logger);
|
int toku_cachetable_create_ex(CACHETABLE *result, long size_limit,
|
||||||
|
unsigned long client_pool_threads,
|
||||||
|
unsigned long cachetable_pool_threads,
|
||||||
|
unsigned long checkpoint_pool_threads,
|
||||||
|
LSN initial_lsn, struct tokulogger *logger);
|
||||||
|
|
||||||
|
#define toku_cachetable_create(r, s, l, o) \
|
||||||
|
toku_cachetable_create_ex(r, s, 0, 0, 0, l, o);
|
||||||
|
|
||||||
// Create a new cachetable.
|
// Create a new cachetable.
|
||||||
// Effects: a new cachetable is created and initialized.
|
// Effects: a new cachetable is created and initialized.
|
||||||
@@ -590,34 +546,6 @@ void toku_cachetable_maybe_flush_some(CACHETABLE ct);
|
|||||||
// for stat64
|
// for stat64
|
||||||
uint64_t toku_cachefile_size(CACHEFILE cf);
|
uint64_t toku_cachefile_size(CACHEFILE cf);
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
CT_MISS = 0,
|
|
||||||
CT_MISSTIME, // how many usec spent waiting for disk read because of cache miss
|
|
||||||
CT_PREFETCHES, // how many times has a block been prefetched into the cachetable?
|
|
||||||
CT_SIZE_CURRENT, // the sum of the sizes of the nodes represented in the cachetable
|
|
||||||
CT_SIZE_LIMIT, // the limit to the sum of the node sizes
|
|
||||||
CT_SIZE_WRITING, // the sum of the sizes of the nodes being written
|
|
||||||
CT_SIZE_NONLEAF, // number of bytes in cachetable belonging to nonleaf nodes
|
|
||||||
CT_SIZE_LEAF, // number of bytes in cachetable belonging to leaf nodes
|
|
||||||
CT_SIZE_ROLLBACK, // number of bytes in cachetable belonging to rollback nodes
|
|
||||||
CT_SIZE_CACHEPRESSURE, // number of bytes causing cache pressure (sum of buffers and workdone counters)
|
|
||||||
CT_SIZE_CLONED, // number of bytes of cloned data in the system
|
|
||||||
CT_EVICTIONS,
|
|
||||||
CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed
|
|
||||||
CT_CLEANER_PERIOD,
|
|
||||||
CT_CLEANER_ITERATIONS, // number of times the cleaner thread runs the cleaner per period
|
|
||||||
CT_WAIT_PRESSURE_COUNT,
|
|
||||||
CT_WAIT_PRESSURE_TIME,
|
|
||||||
CT_LONG_WAIT_PRESSURE_COUNT,
|
|
||||||
CT_LONG_WAIT_PRESSURE_TIME,
|
|
||||||
CT_STATUS_NUM_ROWS
|
|
||||||
} ct_status_entry;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool initialized;
|
|
||||||
TOKU_ENGINE_STATUS_ROW_S status[CT_STATUS_NUM_ROWS];
|
|
||||||
} CACHETABLE_STATUS_S, *CACHETABLE_STATUS;
|
|
||||||
|
|
||||||
void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s);
|
void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s);
|
||||||
|
|
||||||
void toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir);
|
void toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir);
|
||||||
316
storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc
Normal file
316
storage/tokudb/PerconaFT/ft/cachetable/checkpoint.cc
Normal file
@@ -0,0 +1,316 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
/***********
|
||||||
|
* The purpose of this file is to implement the high-level logic for
|
||||||
|
* taking a checkpoint.
|
||||||
|
*
|
||||||
|
* There are three locks used for taking a checkpoint. They are listed below.
|
||||||
|
*
|
||||||
|
* NOTE: The reader-writer locks may be held by either multiple clients
|
||||||
|
* or the checkpoint function. (The checkpoint function has the role
|
||||||
|
* of the writer, the clients have the reader roles.)
|
||||||
|
*
|
||||||
|
* - multi_operation_lock
|
||||||
|
* This is a new reader-writer lock.
|
||||||
|
* This lock is held by the checkpoint function only for as long as is required to
|
||||||
|
* to set all the "pending" bits and to create the checkpoint-in-progress versions
|
||||||
|
* of the header and translation table (btt).
|
||||||
|
* The following operations must take the multi_operation_lock:
|
||||||
|
* - any set of operations that must be atomic with respect to begin checkpoint
|
||||||
|
*
|
||||||
|
* - checkpoint_safe_lock
|
||||||
|
* This is a new reader-writer lock.
|
||||||
|
* This lock is held for the entire duration of the checkpoint.
|
||||||
|
* It is used to prevent more than one checkpoint from happening at a time
|
||||||
|
* (the checkpoint function is non-re-entrant), and to prevent certain operations
|
||||||
|
* that should not happen during a checkpoint.
|
||||||
|
* The following operations must take the checkpoint_safe lock:
|
||||||
|
* - delete a dictionary
|
||||||
|
* - rename a dictionary
|
||||||
|
* The application can use this lock to disable checkpointing during other sensitive
|
||||||
|
* operations, such as making a backup copy of the database.
|
||||||
|
*
|
||||||
|
* Once the "pending" bits are set and the snapshots are taken of the header and btt,
|
||||||
|
* most normal database operations are permitted to resume.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*****/
|
||||||
|
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#include "portability/toku_portability.h"
|
||||||
|
#include "portability/toku_atomic.h"
|
||||||
|
|
||||||
|
#include "ft/cachetable/cachetable.h"
|
||||||
|
#include "ft/cachetable/checkpoint.h"
|
||||||
|
#include "ft/ft.h"
|
||||||
|
#include "ft/logger/log-internal.h"
|
||||||
|
#include "ft/logger/recover.h"
|
||||||
|
#include "util/frwlock.h"
|
||||||
|
#include "util/status.h"
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
|
||||||
|
cp_status.init();
|
||||||
|
CP_STATUS_VAL(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
|
||||||
|
*statp = cp_status;
|
||||||
|
}
|
||||||
|
|
||||||
|
static LSN last_completed_checkpoint_lsn;
|
||||||
|
|
||||||
|
static toku_mutex_t checkpoint_safe_mutex;
|
||||||
|
static toku::frwlock checkpoint_safe_lock;
|
||||||
|
static toku_pthread_rwlock_t multi_operation_lock;
|
||||||
|
static toku_pthread_rwlock_t low_priority_multi_operation_lock;
|
||||||
|
|
||||||
|
static bool initialized = false; // sanity check
|
||||||
|
static volatile bool locked_mo = false; // true when the multi_operation write lock is held (by checkpoint)
|
||||||
|
static volatile bool locked_cs = false; // true when the checkpoint_safe write lock is held (by checkpoint)
|
||||||
|
static volatile uint64_t toku_checkpoint_begin_long_threshold = 1000000; // 1 second
|
||||||
|
static volatile uint64_t toku_checkpoint_end_long_threshold = 1000000 * 60; // 1 minute
|
||||||
|
|
||||||
|
// Note following static functions are called from checkpoint internal logic only,
|
||||||
|
// and use the "writer" calls for locking and unlocking.
|
||||||
|
|
||||||
|
static void
|
||||||
|
multi_operation_lock_init(void) {
|
||||||
|
pthread_rwlockattr_t attr;
|
||||||
|
pthread_rwlockattr_init(&attr);
|
||||||
|
#if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP)
|
||||||
|
pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
|
||||||
|
#else
|
||||||
|
// TODO: need to figure out how to make writer-preferential rwlocks
|
||||||
|
// happen on osx
|
||||||
|
#endif
|
||||||
|
toku_pthread_rwlock_init(&multi_operation_lock, &attr);
|
||||||
|
toku_pthread_rwlock_init(&low_priority_multi_operation_lock, &attr);
|
||||||
|
pthread_rwlockattr_destroy(&attr);
|
||||||
|
locked_mo = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
multi_operation_lock_destroy(void) {
|
||||||
|
toku_pthread_rwlock_destroy(&multi_operation_lock);
|
||||||
|
toku_pthread_rwlock_destroy(&low_priority_multi_operation_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
multi_operation_checkpoint_lock(void) {
|
||||||
|
toku_pthread_rwlock_wrlock(&low_priority_multi_operation_lock);
|
||||||
|
toku_pthread_rwlock_wrlock(&multi_operation_lock);
|
||||||
|
locked_mo = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
multi_operation_checkpoint_unlock(void) {
|
||||||
|
locked_mo = false;
|
||||||
|
toku_pthread_rwlock_wrunlock(&multi_operation_lock);
|
||||||
|
toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
checkpoint_safe_lock_init(void) {
|
||||||
|
toku_mutex_init(&checkpoint_safe_mutex, NULL);
|
||||||
|
checkpoint_safe_lock.init(&checkpoint_safe_mutex);
|
||||||
|
locked_cs = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
checkpoint_safe_lock_destroy(void) {
|
||||||
|
checkpoint_safe_lock.deinit();
|
||||||
|
toku_mutex_destroy(&checkpoint_safe_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
checkpoint_safe_checkpoint_lock(void) {
|
||||||
|
toku_mutex_lock(&checkpoint_safe_mutex);
|
||||||
|
checkpoint_safe_lock.write_lock(false);
|
||||||
|
toku_mutex_unlock(&checkpoint_safe_mutex);
|
||||||
|
locked_cs = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
checkpoint_safe_checkpoint_unlock(void) {
|
||||||
|
locked_cs = false;
|
||||||
|
toku_mutex_lock(&checkpoint_safe_mutex);
|
||||||
|
checkpoint_safe_lock.write_unlock();
|
||||||
|
toku_mutex_unlock(&checkpoint_safe_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
// toku_xxx_client_(un)lock() functions are only called from client code,
|
||||||
|
// never from checkpoint code, and use the "reader" interface to the lock functions.
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_multi_operation_client_lock(void) {
|
||||||
|
if (locked_mo)
|
||||||
|
(void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_MO), 1);
|
||||||
|
toku_pthread_rwlock_rdlock(&multi_operation_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_multi_operation_client_unlock(void) {
|
||||||
|
toku_pthread_rwlock_rdunlock(&multi_operation_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_low_priority_multi_operation_client_lock(void) {
|
||||||
|
toku_pthread_rwlock_rdlock(&low_priority_multi_operation_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_low_priority_multi_operation_client_unlock(void) {
|
||||||
|
toku_pthread_rwlock_rdunlock(&low_priority_multi_operation_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_checkpoint_safe_client_lock(void) {
|
||||||
|
if (locked_cs)
|
||||||
|
(void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_CS), 1);
|
||||||
|
toku_mutex_lock(&checkpoint_safe_mutex);
|
||||||
|
checkpoint_safe_lock.read_lock();
|
||||||
|
toku_mutex_unlock(&checkpoint_safe_mutex);
|
||||||
|
toku_multi_operation_client_lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_checkpoint_safe_client_unlock(void) {
|
||||||
|
toku_mutex_lock(&checkpoint_safe_mutex);
|
||||||
|
checkpoint_safe_lock.read_unlock();
|
||||||
|
toku_mutex_unlock(&checkpoint_safe_mutex);
|
||||||
|
toku_multi_operation_client_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the checkpoint mechanism, must be called before any client operations.
|
||||||
|
void
|
||||||
|
toku_checkpoint_init(void) {
|
||||||
|
multi_operation_lock_init();
|
||||||
|
checkpoint_safe_lock_init();
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_checkpoint_destroy(void) {
|
||||||
|
multi_operation_lock_destroy();
|
||||||
|
checkpoint_safe_lock_destroy();
|
||||||
|
initialized = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SET_CHECKPOINT_FOOTPRINT(x) CP_STATUS_VAL(CP_FOOTPRINT) = footprint_offset + x
|
||||||
|
|
||||||
|
|
||||||
|
// Take a checkpoint of all currently open dictionaries
|
||||||
|
int
|
||||||
|
toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger,
|
||||||
|
void (*callback_f)(void*), void * extra,
|
||||||
|
void (*callback2_f)(void*), void * extra2,
|
||||||
|
checkpoint_caller_t caller_id) {
|
||||||
|
int footprint_offset = (int) caller_id * 1000;
|
||||||
|
|
||||||
|
assert(initialized);
|
||||||
|
|
||||||
|
(void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
|
||||||
|
checkpoint_safe_checkpoint_lock();
|
||||||
|
(void) toku_sync_fetch_and_sub(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
|
||||||
|
|
||||||
|
if (CP_STATUS_VAL(CP_WAITERS_NOW) > CP_STATUS_VAL(CP_WAITERS_MAX))
|
||||||
|
CP_STATUS_VAL(CP_WAITERS_MAX) = CP_STATUS_VAL(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock
|
||||||
|
|
||||||
|
SET_CHECKPOINT_FOOTPRINT(10);
|
||||||
|
multi_operation_checkpoint_lock();
|
||||||
|
SET_CHECKPOINT_FOOTPRINT(20);
|
||||||
|
toku_ft_open_close_lock();
|
||||||
|
|
||||||
|
SET_CHECKPOINT_FOOTPRINT(30);
|
||||||
|
CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
|
||||||
|
uint64_t t_checkpoint_begin_start = toku_current_time_microsec();
|
||||||
|
toku_cachetable_begin_checkpoint(cp, logger);
|
||||||
|
uint64_t t_checkpoint_begin_end = toku_current_time_microsec();
|
||||||
|
|
||||||
|
toku_ft_open_close_unlock();
|
||||||
|
multi_operation_checkpoint_unlock();
|
||||||
|
|
||||||
|
SET_CHECKPOINT_FOOTPRINT(40);
|
||||||
|
if (callback_f) {
|
||||||
|
callback_f(extra); // callback is called with checkpoint_safe_lock still held
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t t_checkpoint_end_start = toku_current_time_microsec();
|
||||||
|
toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2);
|
||||||
|
uint64_t t_checkpoint_end_end = toku_current_time_microsec();
|
||||||
|
|
||||||
|
SET_CHECKPOINT_FOOTPRINT(50);
|
||||||
|
if (logger) {
|
||||||
|
last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
|
||||||
|
toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
|
||||||
|
CP_STATUS_VAL(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
|
||||||
|
}
|
||||||
|
|
||||||
|
SET_CHECKPOINT_FOOTPRINT(60);
|
||||||
|
CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END) = time(NULL);
|
||||||
|
CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN);
|
||||||
|
CP_STATUS_VAL(CP_CHECKPOINT_COUNT)++;
|
||||||
|
uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start;
|
||||||
|
CP_STATUS_VAL(CP_BEGIN_TIME) += duration;
|
||||||
|
if (duration >= toku_checkpoint_begin_long_threshold) {
|
||||||
|
CP_STATUS_VAL(CP_LONG_BEGIN_TIME) += duration;
|
||||||
|
CP_STATUS_VAL(CP_LONG_BEGIN_COUNT) += 1;
|
||||||
|
}
|
||||||
|
duration = t_checkpoint_end_end - t_checkpoint_end_start;
|
||||||
|
CP_STATUS_VAL(CP_END_TIME) += duration;
|
||||||
|
if (duration >= toku_checkpoint_end_long_threshold) {
|
||||||
|
CP_STATUS_VAL(CP_LONG_END_TIME) += duration;
|
||||||
|
CP_STATUS_VAL(CP_LONG_END_COUNT) += 1;
|
||||||
|
}
|
||||||
|
CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION) += (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
|
||||||
|
CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION_LAST) = (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
|
||||||
|
CP_STATUS_VAL(CP_FOOTPRINT) = 0;
|
||||||
|
|
||||||
|
checkpoint_safe_checkpoint_unlock();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include <toku_race_tools.h>
|
||||||
|
void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void);
|
||||||
|
void
|
||||||
|
toku_checkpoint_helgrind_ignore(void) {
|
||||||
|
TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status);
|
||||||
|
TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo);
|
||||||
|
TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef SET_CHECKPOINT_FOOTPRINT
|
||||||
120
storage/tokudb/PerconaFT/ft/cachetable/checkpoint.h
Normal file
120
storage/tokudb/PerconaFT/ft/cachetable/checkpoint.h
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "ft/cachetable/cachetable.h"
|
||||||
|
|
||||||
|
//Effect: Change [end checkpoint (n) - begin checkpoint (n+1)] delay to
|
||||||
|
// new_period seconds. 0 means disable.
|
||||||
|
void toku_set_checkpoint_period(CACHETABLE ct, uint32_t new_period);
|
||||||
|
|
||||||
|
uint32_t toku_get_checkpoint_period_unlocked(CACHETABLE ct);
|
||||||
|
|
||||||
|
|
||||||
|
/******
|
||||||
|
*
|
||||||
|
* NOTE: checkpoint_safe_lock is highest level lock
|
||||||
|
* multi_operation_lock is next level lock
|
||||||
|
* ydb_big_lock is next level lock
|
||||||
|
*
|
||||||
|
* Locks must always be taken in this sequence (highest level first).
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/******
|
||||||
|
* Client code must hold the checkpoint_safe lock during the following operations:
|
||||||
|
* - delete a dictionary via DB->remove
|
||||||
|
* - delete a dictionary via DB_TXN->abort(txn) (where txn created a dictionary)
|
||||||
|
* - rename a dictionary //TODO: Handlerton rename needs to take this
|
||||||
|
* //TODO: Handlerton rename needs to be recoded for transaction recovery
|
||||||
|
*****/
|
||||||
|
|
||||||
|
void toku_checkpoint_safe_client_lock(void);
|
||||||
|
|
||||||
|
void toku_checkpoint_safe_client_unlock(void);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/******
|
||||||
|
* These functions are called from the ydb level.
|
||||||
|
* Client code must hold the multi_operation lock during the following operations:
|
||||||
|
* - insertion into multiple indexes
|
||||||
|
* - replace into (simultaneous delete/insert on a single key)
|
||||||
|
*****/
|
||||||
|
|
||||||
|
void toku_multi_operation_client_lock(void);
|
||||||
|
void toku_low_priority_multi_operation_client_lock(void);
|
||||||
|
|
||||||
|
void toku_multi_operation_client_unlock(void);
|
||||||
|
void toku_low_priority_multi_operation_client_unlock(void);
|
||||||
|
|
||||||
|
|
||||||
|
// Initialize the checkpoint mechanism, must be called before any client operations.
|
||||||
|
// Must pass in function pointers to take/release ydb lock.
|
||||||
|
void toku_checkpoint_init(void);
|
||||||
|
|
||||||
|
void toku_checkpoint_destroy(void);
|
||||||
|
|
||||||
|
typedef enum {SCHEDULED_CHECKPOINT = 0, // "normal" checkpoint taken on checkpoint thread
|
||||||
|
CLIENT_CHECKPOINT = 1, // induced by client, such as FLUSH LOGS or SAVEPOINT
|
||||||
|
INDEXER_CHECKPOINT = 2,
|
||||||
|
STARTUP_CHECKPOINT = 3,
|
||||||
|
UPGRADE_CHECKPOINT = 4,
|
||||||
|
RECOVERY_CHECKPOINT = 5,
|
||||||
|
SHUTDOWN_CHECKPOINT = 6} checkpoint_caller_t;
|
||||||
|
|
||||||
|
// Take a checkpoint of all currently open dictionaries
|
||||||
|
// Callbacks are called during checkpoint procedure while checkpoint_safe lock is still held.
|
||||||
|
// Callbacks are primarily intended for use in testing.
|
||||||
|
// caller_id identifies why the checkpoint is being taken.
|
||||||
|
int toku_checkpoint(CHECKPOINTER cp, struct tokulogger *logger,
|
||||||
|
void (*callback_f)(void *extra), void *extra,
|
||||||
|
void (*callback2_f)(void *extra2), void *extra2,
|
||||||
|
checkpoint_caller_t caller_id);
|
||||||
|
|
||||||
|
/******
|
||||||
|
* These functions are called from the ydb level.
|
||||||
|
* They return status information and have no side effects.
|
||||||
|
* Some status information may be incorrect because no locks are taken to collect status.
|
||||||
|
* (If checkpoint is in progress, it may overwrite status info while it is being read.)
|
||||||
|
*****/
|
||||||
|
void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS stat);
|
||||||
136
storage/tokudb/PerconaFT/ft/comparator.h
Normal file
136
storage/tokudb/PerconaFT/ft/comparator.h
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "portability/memory.h"
|
||||||
|
|
||||||
|
#include "util/dbt.h"
|
||||||
|
|
||||||
|
typedef int (*ft_compare_func)(DB *db, const DBT *a, const DBT *b);
|
||||||
|
|
||||||
|
int toku_keycompare(const void *key1, uint32_t key1len, const void *key2, uint32_t key2len);
|
||||||
|
|
||||||
|
int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default")));
|
||||||
|
|
||||||
|
namespace toku {
|
||||||
|
|
||||||
|
// a comparator object encapsulates the data necessary for
|
||||||
|
// comparing two keys in a fractal tree. it further understands
|
||||||
|
// that points may be positive or negative infinity.
|
||||||
|
|
||||||
|
class comparator {
|
||||||
|
void init(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic) {
|
||||||
|
_cmp = cmp;
|
||||||
|
_fake_db->cmp_descriptor = desc;
|
||||||
|
_memcmp_magic = memcmp_magic;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
// This magic value is reserved to mean that the magic has not been set.
|
||||||
|
static const uint8_t MEMCMP_MAGIC_NONE = 0;
|
||||||
|
|
||||||
|
void create(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic = MEMCMP_MAGIC_NONE) {
|
||||||
|
XCALLOC(_fake_db);
|
||||||
|
init(cmp, desc, memcmp_magic);
|
||||||
|
}
|
||||||
|
|
||||||
|
// inherit the attributes of another comparator, but keep our own
|
||||||
|
// copy of fake_db that is owned separately from the one given.
|
||||||
|
void inherit(const comparator &cmp) {
|
||||||
|
invariant_notnull(_fake_db);
|
||||||
|
invariant_notnull(cmp._cmp);
|
||||||
|
invariant_notnull(cmp._fake_db);
|
||||||
|
init(cmp._cmp, cmp._fake_db->cmp_descriptor, cmp._memcmp_magic);
|
||||||
|
}
|
||||||
|
|
||||||
|
// like inherit, but doesn't require that the this comparator
|
||||||
|
// was already created
|
||||||
|
void create_from(const comparator &cmp) {
|
||||||
|
XCALLOC(_fake_db);
|
||||||
|
inherit(cmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void destroy() {
|
||||||
|
toku_free(_fake_db);
|
||||||
|
}
|
||||||
|
|
||||||
|
const DESCRIPTOR_S *get_descriptor() const {
|
||||||
|
return _fake_db->cmp_descriptor;
|
||||||
|
}
|
||||||
|
|
||||||
|
ft_compare_func get_compare_func() const {
|
||||||
|
return _cmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t get_memcmp_magic() const {
|
||||||
|
return _memcmp_magic;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool valid() const {
|
||||||
|
return _cmp != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool dbt_has_memcmp_magic(const DBT *dbt) const {
|
||||||
|
return *reinterpret_cast<const char *>(dbt->data) == _memcmp_magic;
|
||||||
|
}
|
||||||
|
|
||||||
|
int operator()(const DBT *a, const DBT *b) const {
|
||||||
|
if (__builtin_expect(toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b), 0)) {
|
||||||
|
return toku_dbt_infinite_compare(a, b);
|
||||||
|
} else if (_memcmp_magic != MEMCMP_MAGIC_NONE
|
||||||
|
// If `a' has the memcmp magic..
|
||||||
|
&& dbt_has_memcmp_magic(a)
|
||||||
|
// ..then we expect `b' to also have the memcmp magic
|
||||||
|
&& __builtin_expect(dbt_has_memcmp_magic(b), 1)) {
|
||||||
|
return toku_builtin_compare_fun(nullptr, a, b);
|
||||||
|
} else {
|
||||||
|
// yikes, const sadness here
|
||||||
|
return _cmp(const_cast<DB *>(_fake_db), a, b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DB *_fake_db;
|
||||||
|
ft_compare_func _cmp;
|
||||||
|
uint8_t _memcmp_magic;
|
||||||
|
};
|
||||||
|
|
||||||
|
} /* namespace toku */
|
||||||
@@ -1,92 +1,39 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
/*
|
|
||||||
COPYING CONDITIONS NOTICE:
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
GNU General Public License for more details.
|
||||||
GRANT (below) in the documentation and/or other materials
|
|
||||||
provided with the distribution.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2014 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include "ft/ft-internal.h"
|
#include "ft/ft-internal.h"
|
||||||
|
|
||||||
@@ -96,12 +43,12 @@ PATENT RIGHTS GRANT:
|
|||||||
#include "util/dbt.h"
|
#include "util/dbt.h"
|
||||||
|
|
||||||
int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn,
|
int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn,
|
||||||
bool is_snapshot_read,
|
enum cursor_read_type read_type,
|
||||||
bool disable_prefetching,
|
bool disable_prefetching,
|
||||||
bool is_temporary) {
|
bool is_temporary) {
|
||||||
if (is_snapshot_read) {
|
if (read_type == C_READ_SNAPSHOT) {
|
||||||
invariant(ttxn != NULL);
|
invariant(ttxn != NULL);
|
||||||
int accepted = toku_txn_reads_txnid(ft_handle->ft->h->root_xid_that_created, ttxn);
|
int accepted = toku_txn_reads_txnid(ft_handle->ft->h->root_xid_that_created, ttxn, false); // last parameter is irrelevant
|
||||||
if (accepted != TOKUDB_ACCEPT) {
|
if (accepted != TOKUDB_ACCEPT) {
|
||||||
invariant(accepted == 0);
|
invariant(accepted == 0);
|
||||||
return TOKUDB_MVCC_DICTIONARY_TOO_NEW;
|
return TOKUDB_MVCC_DICTIONARY_TOO_NEW;
|
||||||
@@ -111,7 +58,7 @@ int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn,
|
|||||||
memset(cursor, 0, sizeof(*cursor));
|
memset(cursor, 0, sizeof(*cursor));
|
||||||
cursor->ft_handle = ft_handle;
|
cursor->ft_handle = ft_handle;
|
||||||
cursor->ttxn = ttxn;
|
cursor->ttxn = ttxn;
|
||||||
cursor->is_snapshot_read = is_snapshot_read;
|
cursor->read_type = read_type;
|
||||||
cursor->disable_prefetching = disable_prefetching;
|
cursor->disable_prefetching = disable_prefetching;
|
||||||
cursor->is_temporary = is_temporary;
|
cursor->is_temporary = is_temporary;
|
||||||
return 0;
|
return 0;
|
||||||
@@ -128,7 +75,8 @@ void toku_ft_cursor_destroy(FT_CURSOR cursor) {
|
|||||||
int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *cursorptr, TOKUTXN ttxn,
|
int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *cursorptr, TOKUTXN ttxn,
|
||||||
bool is_snapshot_read, bool disable_prefetching) {
|
bool is_snapshot_read, bool disable_prefetching) {
|
||||||
FT_CURSOR XCALLOC(cursor);
|
FT_CURSOR XCALLOC(cursor);
|
||||||
int r = toku_ft_cursor_create(ft_handle, cursor, ttxn, is_snapshot_read, disable_prefetching, false);
|
enum cursor_read_type read_type = is_snapshot_read ? C_READ_SNAPSHOT : C_READ_ANY;
|
||||||
|
int r = toku_ft_cursor_create(ft_handle, cursor, ttxn, read_type, disable_prefetching, false);
|
||||||
if (r == 0) {
|
if (r == 0) {
|
||||||
*cursorptr = cursor;
|
*cursorptr = cursor;
|
||||||
} else {
|
} else {
|
||||||
@@ -323,11 +271,11 @@ int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_
|
|||||||
r = bd->fetch_klpair(index, &le, &foundkeylen, &foundkey);
|
r = bd->fetch_klpair(index, &le, &foundkeylen, &foundkey);
|
||||||
invariant_zero(r);
|
invariant_zero(r);
|
||||||
|
|
||||||
if (toku_ft_cursor_is_leaf_mode(cursor) || !le_val_is_del(le, cursor->is_snapshot_read, cursor->ttxn)) {
|
if (toku_ft_cursor_is_leaf_mode(cursor) || !le_val_is_del(le, cursor->read_type, cursor->ttxn)) {
|
||||||
le_extract_val(
|
le_extract_val(
|
||||||
le,
|
le,
|
||||||
toku_ft_cursor_is_leaf_mode(cursor),
|
toku_ft_cursor_is_leaf_mode(cursor),
|
||||||
cursor->is_snapshot_read,
|
cursor->read_type,
|
||||||
cursor->ttxn,
|
cursor->ttxn,
|
||||||
vallen,
|
vallen,
|
||||||
val
|
val
|
||||||
186
storage/tokudb/PerconaFT/ft/cursor.h
Normal file
186
storage/tokudb/PerconaFT/ft/cursor.h
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
|
||||||
|
#include "ft/ft-internal.h"
|
||||||
|
|
||||||
|
/* an ft cursor is represented as a kv pair in a tree */
|
||||||
|
struct ft_cursor {
|
||||||
|
FT_HANDLE ft_handle;
|
||||||
|
DBT key, val; // The key-value pair that the cursor currently points to
|
||||||
|
DBT range_lock_left_key, range_lock_right_key;
|
||||||
|
bool prefetching;
|
||||||
|
bool left_is_neg_infty, right_is_pos_infty;
|
||||||
|
enum cursor_read_type read_type; // true if query is reading from a snapshot, false otherwise
|
||||||
|
bool is_leaf_mode;
|
||||||
|
bool disable_prefetching;
|
||||||
|
bool is_temporary;
|
||||||
|
int out_of_range_error;
|
||||||
|
int direction;
|
||||||
|
TOKUTXN ttxn;
|
||||||
|
FT_CHECK_INTERRUPT_CALLBACK interrupt_cb;
|
||||||
|
void *interrupt_cb_extra;
|
||||||
|
};
|
||||||
|
typedef struct ft_cursor *FT_CURSOR;
|
||||||
|
|
||||||
|
enum ft_search_direction_e {
|
||||||
|
FT_SEARCH_LEFT = 1, /* search left -> right, finds min xy as defined by the compare function */
|
||||||
|
FT_SEARCH_RIGHT = 2, /* search right -> left, finds max xy as defined by the compare function */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ft_search;
|
||||||
|
|
||||||
|
/* the search compare function should return 0 for all xy < kv and 1 for all xy >= kv
|
||||||
|
the compare function should be a step function from 0 to 1 for a left to right search
|
||||||
|
and 1 to 0 for a right to left search */
|
||||||
|
|
||||||
|
typedef int (*ft_search_compare_func_t)(const struct ft_search &, const DBT *);
|
||||||
|
|
||||||
|
/* the search object contains the compare function, search direction, and the kv pair that
|
||||||
|
is used in the compare function. the context is the user's private data */
|
||||||
|
|
||||||
|
struct ft_search {
|
||||||
|
ft_search_compare_func_t compare;
|
||||||
|
enum ft_search_direction_e direction;
|
||||||
|
const DBT *k;
|
||||||
|
void *context;
|
||||||
|
|
||||||
|
// To fix #3522, we need to remember the pivots that we have searched unsuccessfully.
|
||||||
|
// For example, when searching right (left), we call search->compare() on the ith pivot key. If search->compare(0 returns
|
||||||
|
// nonzero, then we search the ith subtree. If that subsearch returns DB_NOTFOUND then maybe the key isn't present in the
|
||||||
|
// tree. But maybe we are doing a DB_NEXT (DB_PREV), and everything was deleted. So we remember the pivot, and later we
|
||||||
|
// will only search subtrees which contain keys that are bigger than (less than) the pivot.
|
||||||
|
// The code is a kludge (even before this fix), and interacts strangely with the TOKUDB_FOUND_BUT_REJECTED (which is there
|
||||||
|
// because a failed DB_GET we would keep searching the rest of the tree). We probably should write the various lookup
|
||||||
|
// codes (NEXT, PREV, CURRENT, etc) more directly, and we should probably use a binary search within a node to search the
|
||||||
|
// pivots so that we can support a larger fanout.
|
||||||
|
// These changes (3312+3522) also (probably) introduce an isolation error (#3529).
|
||||||
|
// We must make sure we lock the right range for proper isolation level.
|
||||||
|
// There's probably a bug in which the following could happen.
|
||||||
|
// Thread A: Searches through deleted keys A,B,D,E and finds nothing, so searches the next leaf, releasing the YDB lock.
|
||||||
|
// Thread B: Inserts key C, and acquires the write lock, then commits.
|
||||||
|
// Thread A: Resumes, searching F,G,H and return success. Thread A then read-locks the range A-H, and doesn't notice
|
||||||
|
// the value C inserted by thread B. Thus a failure of serialization.
|
||||||
|
// See #3529.
|
||||||
|
// There also remains a potential thrashing problem. When we get a TOKUDB_TRY_AGAIN, we unpin everything. There's
|
||||||
|
// no guarantee that we will get everything pinned again. We ought to keep nodes pinned when we retry, except that on the
|
||||||
|
// way out with a DB_NOTFOUND we ought to unpin those nodes. See #3528.
|
||||||
|
DBT pivot_bound;
|
||||||
|
const DBT *k_bound;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* initialize the search compare object */
|
||||||
|
static inline ft_search *ft_search_init(ft_search *search, ft_search_compare_func_t compare,
|
||||||
|
enum ft_search_direction_e direction,
|
||||||
|
const DBT *k, const DBT *k_bound, void *context) {
|
||||||
|
search->compare = compare;
|
||||||
|
search->direction = direction;
|
||||||
|
search->k = k;
|
||||||
|
search->context = context;
|
||||||
|
toku_init_dbt(&search->pivot_bound);
|
||||||
|
search->k_bound = k_bound;
|
||||||
|
return search;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ft_search_finish(ft_search *search) {
|
||||||
|
toku_destroy_dbt(&search->pivot_bound);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN txn,
|
||||||
|
enum cursor_read_type read_type,
|
||||||
|
bool disable_prefetching,
|
||||||
|
bool is_temporary);
|
||||||
|
|
||||||
|
void toku_ft_cursor_destroy(FT_CURSOR cursor);
|
||||||
|
|
||||||
|
int toku_ft_lookup(FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
void toku_ft_cursor_set_prefetching(FT_CURSOR cursor);
|
||||||
|
|
||||||
|
bool toku_ft_cursor_prefetching(FT_CURSOR cursor);
|
||||||
|
|
||||||
|
bool toku_ft_cursor_not_set(FT_CURSOR cursor);
|
||||||
|
|
||||||
|
void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor);
|
||||||
|
|
||||||
|
void toku_ft_cursor_remove_restriction(FT_CURSOR cursor);
|
||||||
|
|
||||||
|
void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra);
|
||||||
|
|
||||||
|
int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor);
|
||||||
|
|
||||||
|
void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int);
|
||||||
|
|
||||||
|
int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
bool toku_ft_cursor_uninitialized(FT_CURSOR cursor) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval);
|
||||||
|
|
||||||
|
int toku_ft_cursor_check_restricted_range(FT_CURSOR cursor, const void *key, uint32_t keylen);
|
||||||
|
|
||||||
|
int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd,
|
||||||
|
FT_GET_CALLBACK_FUNCTION getf, void *getf_v,
|
||||||
|
uint32_t *keylen, void **key, uint32_t *vallen, void **val);
|
||||||
|
|
||||||
|
// used by get_key_after_bytes
|
||||||
|
int toku_ft_cursor_compare_one(const ft_search &search, const DBT *x);
|
||||||
|
int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x);
|
||||||
|
|
||||||
|
// deprecated, should only be used by tests, and eventually removed
|
||||||
|
int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *ftcursor_p, TOKUTXN txn, bool, bool) __attribute__ ((warn_unused_result));
|
||||||
|
void toku_ft_cursor_close(FT_CURSOR cursor);
|
||||||
|
int toku_ft_cursor_get(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags);
|
||||||
|
int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn);
|
||||||
375
storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
Normal file
375
storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
Normal file
@@ -0,0 +1,375 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "ft/serialize/block_table.h"
|
||||||
|
#include "ft/ft-cachetable-wrappers.h"
|
||||||
|
#include "ft/ft-flusher.h"
|
||||||
|
#include "ft/ft-internal.h"
|
||||||
|
#include "ft/ft.h"
|
||||||
|
#include "ft/node.h"
|
||||||
|
|
||||||
|
#include <util/context.h>
|
||||||
|
|
||||||
|
static void
|
||||||
|
ftnode_get_key_and_fullhash(
|
||||||
|
BLOCKNUM* cachekey,
|
||||||
|
uint32_t* fullhash,
|
||||||
|
void* extra)
|
||||||
|
{
|
||||||
|
FT ft = (FT) extra;
|
||||||
|
BLOCKNUM blocknum;
|
||||||
|
ft->blocktable.allocate_blocknum(&blocknum, ft);
|
||||||
|
*cachekey = blocknum;
|
||||||
|
*fullhash = toku_cachetable_hash(ft->cf, blocknum);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
cachetable_put_empty_node_with_dep_nodes(
|
||||||
|
FT ft,
|
||||||
|
uint32_t num_dependent_nodes,
|
||||||
|
FTNODE* dependent_nodes,
|
||||||
|
BLOCKNUM* blocknum, //output
|
||||||
|
uint32_t* fullhash, //output
|
||||||
|
FTNODE* result)
|
||||||
|
{
|
||||||
|
FTNODE XCALLOC(new_node);
|
||||||
|
PAIR dependent_pairs[num_dependent_nodes];
|
||||||
|
enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
|
||||||
|
for (uint32_t i = 0; i < num_dependent_nodes; i++) {
|
||||||
|
dependent_pairs[i] = dependent_nodes[i]->ct_pair;
|
||||||
|
dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty;
|
||||||
|
}
|
||||||
|
|
||||||
|
toku_cachetable_put_with_dep_pairs(
|
||||||
|
ft->cf,
|
||||||
|
ftnode_get_key_and_fullhash,
|
||||||
|
new_node,
|
||||||
|
make_pair_attr(sizeof(FTNODE)),
|
||||||
|
get_write_callbacks_for_node(ft),
|
||||||
|
ft,
|
||||||
|
num_dependent_nodes,
|
||||||
|
dependent_pairs,
|
||||||
|
dependent_dirty_bits,
|
||||||
|
blocknum,
|
||||||
|
fullhash,
|
||||||
|
toku_ftnode_save_ct_pair);
|
||||||
|
*result = new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
create_new_ftnode_with_dep_nodes(
|
||||||
|
FT ft,
|
||||||
|
FTNODE *result,
|
||||||
|
int height,
|
||||||
|
int n_children,
|
||||||
|
uint32_t num_dependent_nodes,
|
||||||
|
FTNODE* dependent_nodes)
|
||||||
|
{
|
||||||
|
uint32_t fullhash = 0;
|
||||||
|
BLOCKNUM blocknum;
|
||||||
|
|
||||||
|
cachetable_put_empty_node_with_dep_nodes(
|
||||||
|
ft,
|
||||||
|
num_dependent_nodes,
|
||||||
|
dependent_nodes,
|
||||||
|
&blocknum,
|
||||||
|
&fullhash,
|
||||||
|
result);
|
||||||
|
|
||||||
|
assert(ft->h->basementnodesize > 0);
|
||||||
|
if (height == 0) {
|
||||||
|
assert(n_children > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
toku_initialize_empty_ftnode(
|
||||||
|
*result,
|
||||||
|
blocknum,
|
||||||
|
height,
|
||||||
|
n_children,
|
||||||
|
ft->h->layout_version,
|
||||||
|
ft->h->flags);
|
||||||
|
|
||||||
|
(*result)->fullhash = fullhash;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_create_new_ftnode (
|
||||||
|
FT_HANDLE t,
|
||||||
|
FTNODE *result,
|
||||||
|
int height,
|
||||||
|
int n_children)
|
||||||
|
{
|
||||||
|
return create_new_ftnode_with_dep_nodes(
|
||||||
|
t->ft,
|
||||||
|
result,
|
||||||
|
height,
|
||||||
|
n_children,
|
||||||
|
0,
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// On success, this function assumes that the caller is trying to pin the node
|
||||||
|
// with a PL_READ lock. If message application is needed,
|
||||||
|
// then a PL_WRITE_CHEAP lock is grabbed
|
||||||
|
//
|
||||||
|
int
|
||||||
|
toku_pin_ftnode_for_query(
|
||||||
|
FT_HANDLE ft_handle,
|
||||||
|
BLOCKNUM blocknum,
|
||||||
|
uint32_t fullhash,
|
||||||
|
UNLOCKERS unlockers,
|
||||||
|
ANCESTORS ancestors,
|
||||||
|
const pivot_bounds &bounds,
|
||||||
|
ftnode_fetch_extra *bfe,
|
||||||
|
bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
|
||||||
|
FTNODE *node_p,
|
||||||
|
bool* msgs_applied)
|
||||||
|
{
|
||||||
|
void *node_v;
|
||||||
|
*msgs_applied = false;
|
||||||
|
FTNODE node = nullptr;
|
||||||
|
MSN max_msn_in_path = ZERO_MSN;
|
||||||
|
bool needs_ancestors_messages = false;
|
||||||
|
// this function assumes that if you want ancestor messages applied,
|
||||||
|
// you are doing a read for a query. This is so we can make some optimizations
|
||||||
|
// below.
|
||||||
|
if (apply_ancestor_messages) {
|
||||||
|
paranoid_invariant(bfe->type == ftnode_fetch_subset);
|
||||||
|
}
|
||||||
|
|
||||||
|
int r = toku_cachetable_get_and_pin_nonblocking(
|
||||||
|
ft_handle->ft->cf,
|
||||||
|
blocknum,
|
||||||
|
fullhash,
|
||||||
|
&node_v,
|
||||||
|
NULL,
|
||||||
|
get_write_callbacks_for_node(ft_handle->ft),
|
||||||
|
toku_ftnode_fetch_callback,
|
||||||
|
toku_ftnode_pf_req_callback,
|
||||||
|
toku_ftnode_pf_callback,
|
||||||
|
PL_READ,
|
||||||
|
bfe, //read_extraargs
|
||||||
|
unlockers);
|
||||||
|
if (r != 0) {
|
||||||
|
assert(r == TOKUDB_TRY_AGAIN); // Any other error and we should bomb out ASAP.
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
node = static_cast<FTNODE>(node_v);
|
||||||
|
if (apply_ancestor_messages && node->height == 0) {
|
||||||
|
needs_ancestors_messages = toku_ft_leaf_needs_ancestors_messages(
|
||||||
|
ft_handle->ft,
|
||||||
|
node,
|
||||||
|
ancestors,
|
||||||
|
bounds,
|
||||||
|
&max_msn_in_path,
|
||||||
|
bfe->child_to_read
|
||||||
|
);
|
||||||
|
if (needs_ancestors_messages) {
|
||||||
|
toku::context apply_messages_ctx(CTX_MESSAGE_APPLICATION);
|
||||||
|
|
||||||
|
toku_unpin_ftnode_read_only(ft_handle->ft, node);
|
||||||
|
int rr = toku_cachetable_get_and_pin_nonblocking(
|
||||||
|
ft_handle->ft->cf,
|
||||||
|
blocknum,
|
||||||
|
fullhash,
|
||||||
|
&node_v,
|
||||||
|
NULL,
|
||||||
|
get_write_callbacks_for_node(ft_handle->ft),
|
||||||
|
toku_ftnode_fetch_callback,
|
||||||
|
toku_ftnode_pf_req_callback,
|
||||||
|
toku_ftnode_pf_callback,
|
||||||
|
PL_WRITE_CHEAP,
|
||||||
|
bfe, //read_extraargs
|
||||||
|
unlockers);
|
||||||
|
if (rr != 0) {
|
||||||
|
assert(rr == TOKUDB_TRY_AGAIN); // Any other error and we should bomb out ASAP.
|
||||||
|
r = TOKUDB_TRY_AGAIN;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
node = static_cast<FTNODE>(node_v);
|
||||||
|
toku_apply_ancestors_messages_to_node(
|
||||||
|
ft_handle,
|
||||||
|
node,
|
||||||
|
ancestors,
|
||||||
|
bounds,
|
||||||
|
msgs_applied,
|
||||||
|
bfe->child_to_read
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// At this point, we aren't going to run
|
||||||
|
// toku_apply_ancestors_messages_to_node but that doesn't
|
||||||
|
// mean max_msn_applied shouldn't be updated if possible
|
||||||
|
// (this saves the CPU work involved in
|
||||||
|
// toku_ft_leaf_needs_ancestors_messages).
|
||||||
|
//
|
||||||
|
// We still have a read lock, so we have not resolved
|
||||||
|
// checkpointing. If the node is pending and dirty, we
|
||||||
|
// can't modify anything, including max_msn, until we
|
||||||
|
// resolve checkpointing. If we do, the node might get
|
||||||
|
// written out that way as part of a checkpoint with a
|
||||||
|
// root that was already written out with a smaller
|
||||||
|
// max_msn. During recovery, we would then inject a
|
||||||
|
// message based on the root's max_msn, and that message
|
||||||
|
// would get filtered by the leaf because it had too high
|
||||||
|
// a max_msn value. (see #5407)
|
||||||
|
//
|
||||||
|
// So for simplicity we only update the max_msn if the
|
||||||
|
// node is clean. That way, in order for the node to get
|
||||||
|
// written out, it would have to be dirtied. That
|
||||||
|
// requires a write lock, and a write lock requires you to
|
||||||
|
// resolve checkpointing.
|
||||||
|
if (!node->dirty) {
|
||||||
|
toku_ft_bn_update_max_msn(node, max_msn_in_path, bfe->child_to_read);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*node_p = node;
|
||||||
|
exit:
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_pin_ftnode_with_dep_nodes(
|
||||||
|
FT ft,
|
||||||
|
BLOCKNUM blocknum,
|
||||||
|
uint32_t fullhash,
|
||||||
|
ftnode_fetch_extra *bfe,
|
||||||
|
pair_lock_type lock_type,
|
||||||
|
uint32_t num_dependent_nodes,
|
||||||
|
FTNODE *dependent_nodes,
|
||||||
|
FTNODE *node_p,
|
||||||
|
bool move_messages)
|
||||||
|
{
|
||||||
|
void *node_v;
|
||||||
|
PAIR dependent_pairs[num_dependent_nodes];
|
||||||
|
enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
|
||||||
|
for (uint32_t i = 0; i < num_dependent_nodes; i++) {
|
||||||
|
dependent_pairs[i] = dependent_nodes[i]->ct_pair;
|
||||||
|
dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty;
|
||||||
|
}
|
||||||
|
|
||||||
|
int r = toku_cachetable_get_and_pin_with_dep_pairs(
|
||||||
|
ft->cf,
|
||||||
|
blocknum,
|
||||||
|
fullhash,
|
||||||
|
&node_v,
|
||||||
|
NULL,
|
||||||
|
get_write_callbacks_for_node(ft),
|
||||||
|
toku_ftnode_fetch_callback,
|
||||||
|
toku_ftnode_pf_req_callback,
|
||||||
|
toku_ftnode_pf_callback,
|
||||||
|
lock_type,
|
||||||
|
bfe,
|
||||||
|
num_dependent_nodes,
|
||||||
|
dependent_pairs,
|
||||||
|
dependent_dirty_bits
|
||||||
|
);
|
||||||
|
invariant_zero(r);
|
||||||
|
FTNODE node = (FTNODE) node_v;
|
||||||
|
if (lock_type != PL_READ && node->height > 0 && move_messages) {
|
||||||
|
toku_move_ftnode_messages_to_stale(ft, node);
|
||||||
|
}
|
||||||
|
*node_p = node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_pin_ftnode(FT ft,
|
||||||
|
BLOCKNUM blocknum,
|
||||||
|
uint32_t fullhash,
|
||||||
|
ftnode_fetch_extra *bfe,
|
||||||
|
pair_lock_type lock_type,
|
||||||
|
FTNODE *node_p,
|
||||||
|
bool move_messages) {
|
||||||
|
toku_pin_ftnode_with_dep_nodes(ft, blocknum, fullhash, bfe, lock_type, 0, nullptr, node_p, move_messages);
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, pair_lock_type lock_type, FTNODE *nodep) {
|
||||||
|
void *node_v;
|
||||||
|
int r = toku_cachetable_maybe_get_and_pin_clean(ft->cf, blocknum, fullhash, lock_type, &node_v);
|
||||||
|
if (r != 0) {
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
CAST_FROM_VOIDP(*nodep, node_v);
|
||||||
|
if ((*nodep)->height > 0 && lock_type != PL_READ) {
|
||||||
|
toku_move_ftnode_messages_to_stale(ft, *nodep);
|
||||||
|
}
|
||||||
|
cleanup:
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_unpin_ftnode(FT ft, FTNODE node) {
|
||||||
|
int r = toku_cachetable_unpin(ft->cf,
|
||||||
|
node->ct_pair,
|
||||||
|
static_cast<enum cachetable_dirty>(node->dirty),
|
||||||
|
make_ftnode_pair_attr(node));
|
||||||
|
invariant_zero(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_unpin_ftnode_read_only(FT ft, FTNODE node)
|
||||||
|
{
|
||||||
|
int r = toku_cachetable_unpin(
|
||||||
|
ft->cf,
|
||||||
|
node->ct_pair,
|
||||||
|
(enum cachetable_dirty) node->dirty,
|
||||||
|
make_invalid_pair_attr()
|
||||||
|
);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_ftnode_swap_pair_values(FTNODE a, FTNODE b)
|
||||||
|
// Effect: Swap the blocknum, fullhash, and PAIR for for a and b
|
||||||
|
// Requires: Both nodes are pinned
|
||||||
|
{
|
||||||
|
BLOCKNUM tmp_blocknum = a->blocknum;
|
||||||
|
uint32_t tmp_fullhash = a->fullhash;
|
||||||
|
PAIR tmp_pair = a->ct_pair;
|
||||||
|
|
||||||
|
a->blocknum = b->blocknum;
|
||||||
|
a->fullhash = b->fullhash;
|
||||||
|
a->ct_pair = b->ct_pair;
|
||||||
|
|
||||||
|
b->blocknum = tmp_blocknum;
|
||||||
|
b->fullhash = tmp_fullhash;
|
||||||
|
b->ct_pair = tmp_pair;
|
||||||
|
|
||||||
|
// A and B swapped pair pointers, but we still have to swap
|
||||||
|
// the actual pair values (ie: the FTNODEs they represent)
|
||||||
|
// in the cachetable.
|
||||||
|
toku_cachetable_swap_pair_values(a->ct_pair, b->ct_pair);
|
||||||
|
}
|
||||||
141
storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.h
Normal file
141
storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.h
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ft/cachetable/cachetable.h"
|
||||||
|
#include "ft/ft-internal.h"
|
||||||
|
#include "ft/node.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Put an empty node (that is, no fields filled) into the cachetable.
|
||||||
|
* In the process, write dependent nodes out for checkpoint if
|
||||||
|
* necessary.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
cachetable_put_empty_node_with_dep_nodes(
|
||||||
|
FT ft,
|
||||||
|
uint32_t num_dependent_nodes,
|
||||||
|
FTNODE* dependent_nodes,
|
||||||
|
BLOCKNUM* name, //output
|
||||||
|
uint32_t* fullhash, //output
|
||||||
|
FTNODE* result
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new ftnode with specified height and number of children.
|
||||||
|
* In the process, write dependent nodes out for checkpoint if
|
||||||
|
* necessary.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
create_new_ftnode_with_dep_nodes(
|
||||||
|
FT ft,
|
||||||
|
FTNODE *result,
|
||||||
|
int height,
|
||||||
|
int n_children,
|
||||||
|
uint32_t num_dependent_nodes,
|
||||||
|
FTNODE* dependent_nodes
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new ftnode with specified height
|
||||||
|
* and children.
|
||||||
|
* Used for test functions only.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
toku_create_new_ftnode (
|
||||||
|
FT_HANDLE t,
|
||||||
|
FTNODE *result,
|
||||||
|
int height,
|
||||||
|
int n_children
|
||||||
|
);
|
||||||
|
|
||||||
|
// This function returns a pinned ftnode to the caller.
|
||||||
|
int
|
||||||
|
toku_pin_ftnode_for_query(
|
||||||
|
FT_HANDLE ft_h,
|
||||||
|
BLOCKNUM blocknum,
|
||||||
|
uint32_t fullhash,
|
||||||
|
UNLOCKERS unlockers,
|
||||||
|
ANCESTORS ancestors,
|
||||||
|
const pivot_bounds &bounds,
|
||||||
|
ftnode_fetch_extra *bfe,
|
||||||
|
bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
|
||||||
|
FTNODE *node_p,
|
||||||
|
bool* msgs_applied
|
||||||
|
);
|
||||||
|
|
||||||
|
// Pins an ftnode without dependent pairs
|
||||||
|
void toku_pin_ftnode(
|
||||||
|
FT ft,
|
||||||
|
BLOCKNUM blocknum,
|
||||||
|
uint32_t fullhash,
|
||||||
|
ftnode_fetch_extra *bfe,
|
||||||
|
pair_lock_type lock_type,
|
||||||
|
FTNODE *node_p,
|
||||||
|
bool move_messages
|
||||||
|
);
|
||||||
|
|
||||||
|
// Pins an ftnode with dependent pairs
|
||||||
|
// Unlike toku_pin_ftnode_for_query, this function blocks until the node is pinned.
|
||||||
|
void toku_pin_ftnode_with_dep_nodes(
|
||||||
|
FT ft,
|
||||||
|
BLOCKNUM blocknum,
|
||||||
|
uint32_t fullhash,
|
||||||
|
ftnode_fetch_extra *bfe,
|
||||||
|
pair_lock_type lock_type,
|
||||||
|
uint32_t num_dependent_nodes,
|
||||||
|
FTNODE *dependent_nodes,
|
||||||
|
FTNODE *node_p,
|
||||||
|
bool move_messages
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function may return a pinned ftnode to the caller, if pinning is cheap.
|
||||||
|
* If the node is already locked, or is pending a checkpoint, the node is not pinned and -1 is returned.
|
||||||
|
*/
|
||||||
|
int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, pair_lock_type lock_type, FTNODE *nodep);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Effect: Unpin an ftnode.
|
||||||
|
*/
|
||||||
|
void toku_unpin_ftnode(FT ft, FTNODE node);
|
||||||
|
void toku_unpin_ftnode_read_only(FT ft, FTNODE node);
|
||||||
|
|
||||||
|
// Effect: Swaps pair values of two pinned nodes
|
||||||
|
void toku_ftnode_swap_pair_values(FTNODE nodea, FTNODE nodeb);
|
||||||
183
storage/tokudb/PerconaFT/ft/ft-flusher-internal.h
Normal file
183
storage/tokudb/PerconaFT/ft/ft-flusher-internal.h
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#define flt_flush_before_applying_inbox 1
|
||||||
|
#define flt_flush_before_child_pin 2
|
||||||
|
#define ft_flush_aflter_child_pin 3
|
||||||
|
#define flt_flush_before_split 4
|
||||||
|
#define flt_flush_during_split 5
|
||||||
|
#define flt_flush_before_merge 6
|
||||||
|
#define ft_flush_aflter_merge 7
|
||||||
|
#define ft_flush_aflter_rebalance 8
|
||||||
|
#define flt_flush_before_unpin_remove 9
|
||||||
|
#define flt_flush_before_pin_second_node_for_merge 10
|
||||||
|
|
||||||
|
typedef struct flusher_advice FLUSHER_ADVICE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Choose a child to flush to. Returns a childnum, or -1 if we should
|
||||||
|
* go no further.
|
||||||
|
*
|
||||||
|
* Flusher threads: pick the heaviest child buffer
|
||||||
|
* Cleaner threads: pick the heaviest child buffer
|
||||||
|
* Cleaner thread merging leaf nodes: follow down to a key
|
||||||
|
* Hot optimize table: follow down to the right of a key
|
||||||
|
*/
|
||||||
|
typedef int (*FA_PICK_CHILD)(FT ft, FTNODE parent, void* extra);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decide whether to call `toku_ft_flush_some_child` on the child if it is
|
||||||
|
* stable and a nonleaf node.
|
||||||
|
*
|
||||||
|
* Flusher threads: yes if child is gorged
|
||||||
|
* Cleaner threads: yes if child is gorged
|
||||||
|
* Cleaner thread merging leaf nodes: always yes
|
||||||
|
* Hot optimize table: always yes
|
||||||
|
*/
|
||||||
|
typedef bool (*FA_SHOULD_RECURSIVELY_FLUSH)(FTNODE child, void* extra);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called if the child needs merging. Should do something to get the
|
||||||
|
* child out of a fusible state. Must unpin parent and child.
|
||||||
|
*
|
||||||
|
* Flusher threads: just do the merge
|
||||||
|
* Cleaner threads: if nonleaf, just merge, otherwise start a "cleaner
|
||||||
|
* thread merge"
|
||||||
|
* Cleaner thread merging leaf nodes: just do the merge
|
||||||
|
* Hot optimize table: just do the merge
|
||||||
|
*/
|
||||||
|
typedef void (*FA_MAYBE_MERGE_CHILD)(struct flusher_advice *fa,
|
||||||
|
FT ft,
|
||||||
|
FTNODE parent,
|
||||||
|
int childnum,
|
||||||
|
FTNODE child,
|
||||||
|
void* extra);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleaner threads may need to destroy basement nodes which have been
|
||||||
|
* brought more up to date than the height 1 node flushing to them.
|
||||||
|
* This function is used to determine if we need to check for basement
|
||||||
|
* nodes that are too up to date, and then destroy them if we find
|
||||||
|
* them.
|
||||||
|
*
|
||||||
|
* Flusher threads: no
|
||||||
|
* Cleaner threads: yes
|
||||||
|
* Cleaner thread merging leaf nodes: no
|
||||||
|
* Hot optimize table: no
|
||||||
|
*/
|
||||||
|
typedef bool (*FA_SHOULD_DESTROY_BN)(void* extra);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update `ft_flusher_status` in whatever way necessary. Called once
|
||||||
|
* by `toku_ft_flush_some_child` right before choosing what to do next (split,
|
||||||
|
* merge, recurse), with the number of nodes that were dirtied by this
|
||||||
|
* execution of `toku_ft_flush_some_child`.
|
||||||
|
*/
|
||||||
|
typedef void (*FA_UPDATE_STATUS)(FTNODE child, int dirtied, void* extra);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Choose whether to go to the left or right child after a split. Called
|
||||||
|
* by `ft_split_child`. If -1 is returned, `ft_split_child` defaults to
|
||||||
|
* the old behavior.
|
||||||
|
*/
|
||||||
|
typedef int (*FA_PICK_CHILD_AFTER_SPLIT)(FT ft,
|
||||||
|
FTNODE node,
|
||||||
|
int childnuma,
|
||||||
|
int childnumb,
|
||||||
|
void* extra);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A collection of callbacks used by the flushing machinery to make
|
||||||
|
* various decisions. There are implementations of each of these
|
||||||
|
* functions for flusher threads (flt_*), cleaner threads (ct_*), , and hot
|
||||||
|
* optimize table (hot_*).
|
||||||
|
*/
|
||||||
|
struct flusher_advice {
|
||||||
|
FA_PICK_CHILD pick_child;
|
||||||
|
FA_SHOULD_RECURSIVELY_FLUSH should_recursively_flush;
|
||||||
|
FA_MAYBE_MERGE_CHILD maybe_merge_child;
|
||||||
|
FA_SHOULD_DESTROY_BN should_destroy_basement_nodes;
|
||||||
|
FA_UPDATE_STATUS update_status;
|
||||||
|
FA_PICK_CHILD_AFTER_SPLIT pick_child_after_split;
|
||||||
|
void* extra; // parameter passed into callbacks
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
flusher_advice_init(
|
||||||
|
struct flusher_advice *fa,
|
||||||
|
FA_PICK_CHILD pick_child,
|
||||||
|
FA_SHOULD_DESTROY_BN should_destroy_basement_nodes,
|
||||||
|
FA_SHOULD_RECURSIVELY_FLUSH should_recursively_flush,
|
||||||
|
FA_MAYBE_MERGE_CHILD maybe_merge_child,
|
||||||
|
FA_UPDATE_STATUS update_status,
|
||||||
|
FA_PICK_CHILD_AFTER_SPLIT pick_child_after_split,
|
||||||
|
void* extra
|
||||||
|
);
|
||||||
|
|
||||||
|
void toku_ft_flush_some_child(
|
||||||
|
FT ft,
|
||||||
|
FTNODE parent,
|
||||||
|
struct flusher_advice *fa
|
||||||
|
);
|
||||||
|
|
||||||
|
bool
|
||||||
|
always_recursively_flush(FTNODE child, void* extra);
|
||||||
|
|
||||||
|
bool
|
||||||
|
never_recursively_flush(FTNODE UU(child), void* UU(extra));
|
||||||
|
|
||||||
|
bool
|
||||||
|
dont_destroy_basement_nodes(void* extra);
|
||||||
|
|
||||||
|
void
|
||||||
|
default_merge_child(struct flusher_advice *fa,
|
||||||
|
FT ft,
|
||||||
|
FTNODE parent,
|
||||||
|
int childnum,
|
||||||
|
FTNODE child,
|
||||||
|
void* extra);
|
||||||
|
|
||||||
|
int
|
||||||
|
default_pick_child_after_split(FT ft,
|
||||||
|
FTNODE parent,
|
||||||
|
int childnuma,
|
||||||
|
int childnumb,
|
||||||
|
void *extra);
|
||||||
|
|
||||||
@@ -1,95 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include "ft/ft.h"
|
#include "ft/ft.h"
|
||||||
#include "ft/ft-cachetable-wrappers.h"
|
#include "ft/ft-cachetable-wrappers.h"
|
||||||
@@ -104,60 +49,10 @@ PATENT RIGHTS GRANT:
|
|||||||
#include "util/status.h"
|
#include "util/status.h"
|
||||||
#include "util/context.h"
|
#include "util/context.h"
|
||||||
|
|
||||||
/* Status is intended for display to humans to help understand system behavior.
|
|
||||||
* It does not need to be perfectly thread-safe.
|
|
||||||
*/
|
|
||||||
static FT_FLUSHER_STATUS_S ft_flusher_status;
|
|
||||||
|
|
||||||
#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_flusher_status, k, c, t, "ft flusher: " l, inc)
|
|
||||||
|
|
||||||
#define STATUS_VALUE(x) ft_flusher_status.status[x].value.num
|
|
||||||
void toku_ft_flusher_status_init(void) {
|
|
||||||
// Note, this function initializes the keyname, type, and legend fields.
|
|
||||||
// Value fields are initialized to zero by compiler.
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_TOTAL_NODES, nullptr, UINT64, "total nodes potentially flushed by cleaner thread", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_H1_NODES, nullptr, UINT64, "height-one nodes flushed by cleaner thread", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_HGT1_NODES, nullptr, UINT64, "height-greater-than-one nodes flushed by cleaner thread", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_EMPTY_NODES, nullptr, UINT64, "nodes cleaned which had empty buffers", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_NODES_DIRTIED, nullptr, UINT64, "nodes dirtied by cleaner thread", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_MAX_BUFFER_SIZE, nullptr, UINT64, "max bytes in a buffer flushed by cleaner thread", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE, nullptr, UINT64, "min bytes in a buffer flushed by cleaner thread", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_TOTAL_BUFFER_SIZE, nullptr, UINT64, "total bytes in buffers flushed by cleaner thread", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE, nullptr, UINT64, "max workdone in a buffer flushed by cleaner thread", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE, nullptr, UINT64, "min workdone in a buffer flushed by cleaner thread", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE, nullptr, UINT64, "total workdone in buffers flushed by cleaner thread", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED, nullptr, UINT64, "times cleaner thread tries to merge a leaf", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING, nullptr, UINT64, "cleaner thread leaf merges in progress", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED, nullptr, UINT64, "cleaner thread leaf merges successful", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE, nullptr, UINT64, "nodes dirtied by cleaner thread leaf merges", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_FLUSH_TOTAL, nullptr, UINT64, "total number of flushes done by flusher threads or cleaner threads", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_FLUSH_IN_MEMORY, nullptr, UINT64, "number of in memory flushes", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_FLUSH_NEEDED_IO, nullptr, UINT64, "number of flushes that read something off disk", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES, nullptr, UINT64, "number of flushes that triggered another flush in child", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_1, nullptr, UINT64, "number of flushes that triggered 1 cascading flush", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_2, nullptr, UINT64, "number of flushes that triggered 2 cascading flushes", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_3, nullptr, UINT64, "number of flushes that triggered 3 cascading flushes", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_4, nullptr, UINT64, "number of flushes that triggered 4 cascading flushes", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_5, nullptr, UINT64, "number of flushes that triggered 5 cascading flushes", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_GT_5, nullptr, UINT64, "number of flushes that triggered over 5 cascading flushes", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_SPLIT_LEAF, nullptr, UINT64, "leaf node splits", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_SPLIT_NONLEAF, nullptr, UINT64, "nonleaf node splits", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_MERGE_LEAF, nullptr, UINT64, "leaf node merges", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_MERGE_NONLEAF, nullptr, UINT64, "nonleaf node merges", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_FLUSHER_BALANCE_LEAF, nullptr, UINT64, "leaf node balances", TOKU_ENGINE_STATUS);
|
|
||||||
|
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE) = UINT64_MAX;
|
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE) = UINT64_MAX;
|
|
||||||
|
|
||||||
ft_flusher_status.initialized = true;
|
|
||||||
}
|
|
||||||
#undef STATUS_INIT
|
|
||||||
|
|
||||||
void toku_ft_flusher_get_status(FT_FLUSHER_STATUS status) {
|
void toku_ft_flusher_get_status(FT_FLUSHER_STATUS status) {
|
||||||
if (!ft_flusher_status.initialized) {
|
fl_status.init();
|
||||||
toku_ft_flusher_status_init();
|
*status = fl_status;
|
||||||
}
|
|
||||||
*status = ft_flusher_status;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -204,22 +99,22 @@ find_heaviest_child(FTNODE node)
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
update_flush_status(FTNODE child, int cascades) {
|
update_flush_status(FTNODE child, int cascades) {
|
||||||
STATUS_VALUE(FT_FLUSHER_FLUSH_TOTAL)++;
|
FL_STATUS_VAL(FT_FLUSHER_FLUSH_TOTAL)++;
|
||||||
if (cascades > 0) {
|
if (cascades > 0) {
|
||||||
STATUS_VALUE(FT_FLUSHER_FLUSH_CASCADES)++;
|
FL_STATUS_VAL(FT_FLUSHER_FLUSH_CASCADES)++;
|
||||||
switch (cascades) {
|
switch (cascades) {
|
||||||
case 1:
|
case 1:
|
||||||
STATUS_VALUE(FT_FLUSHER_FLUSH_CASCADES_1)++; break;
|
FL_STATUS_VAL(FT_FLUSHER_FLUSH_CASCADES_1)++; break;
|
||||||
case 2:
|
case 2:
|
||||||
STATUS_VALUE(FT_FLUSHER_FLUSH_CASCADES_2)++; break;
|
FL_STATUS_VAL(FT_FLUSHER_FLUSH_CASCADES_2)++; break;
|
||||||
case 3:
|
case 3:
|
||||||
STATUS_VALUE(FT_FLUSHER_FLUSH_CASCADES_3)++; break;
|
FL_STATUS_VAL(FT_FLUSHER_FLUSH_CASCADES_3)++; break;
|
||||||
case 4:
|
case 4:
|
||||||
STATUS_VALUE(FT_FLUSHER_FLUSH_CASCADES_4)++; break;
|
FL_STATUS_VAL(FT_FLUSHER_FLUSH_CASCADES_4)++; break;
|
||||||
case 5:
|
case 5:
|
||||||
STATUS_VALUE(FT_FLUSHER_FLUSH_CASCADES_5)++; break;
|
FL_STATUS_VAL(FT_FLUSHER_FLUSH_CASCADES_5)++; break;
|
||||||
default:
|
default:
|
||||||
STATUS_VALUE(FT_FLUSHER_FLUSH_CASCADES_GT_5)++; break;
|
FL_STATUS_VAL(FT_FLUSHER_FLUSH_CASCADES_GT_5)++; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool flush_needs_io = false;
|
bool flush_needs_io = false;
|
||||||
@@ -229,9 +124,9 @@ update_flush_status(FTNODE child, int cascades) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (flush_needs_io) {
|
if (flush_needs_io) {
|
||||||
STATUS_VALUE(FT_FLUSHER_FLUSH_NEEDED_IO)++;
|
FL_STATUS_VAL(FT_FLUSHER_FLUSH_NEEDED_IO)++;
|
||||||
} else {
|
} else {
|
||||||
STATUS_VALUE(FT_FLUSHER_FLUSH_IN_MEMORY)++;
|
FL_STATUS_VAL(FT_FLUSHER_FLUSH_IN_MEMORY)++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -419,7 +314,7 @@ ctm_update_status(
|
|||||||
void* UU(extra)
|
void* UU(extra)
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE) += dirtied;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE) += dirtied;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -431,7 +326,7 @@ ctm_maybe_merge_child(struct flusher_advice *fa,
|
|||||||
void *extra)
|
void *extra)
|
||||||
{
|
{
|
||||||
if (child->height == 0) {
|
if (child->height == 0) {
|
||||||
(void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED), 1);
|
(void) toku_sync_fetch_and_add(&FL_STATUS_VAL(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED), 1);
|
||||||
}
|
}
|
||||||
default_merge_child(fa, ft, parent, childnum, child, extra);
|
default_merge_child(fa, ft, parent, childnum, child, extra);
|
||||||
}
|
}
|
||||||
@@ -495,12 +390,12 @@ ct_maybe_merge_child(struct flusher_advice *fa,
|
|||||||
toku_ftnode_assert_fully_in_memory(root_node);
|
toku_ftnode_assert_fully_in_memory(root_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED), 1);
|
(void) toku_sync_fetch_and_add(&FL_STATUS_VAL(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED), 1);
|
||||||
(void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1);
|
(void) toku_sync_fetch_and_add(&FL_STATUS_VAL(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1);
|
||||||
|
|
||||||
toku_ft_flush_some_child(ft, root_node, &new_fa);
|
toku_ft_flush_some_child(ft, root_node, &new_fa);
|
||||||
|
|
||||||
(void) toku_sync_fetch_and_sub(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1);
|
(void) toku_sync_fetch_and_sub(&FL_STATUS_VAL(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1);
|
||||||
|
|
||||||
toku_destroy_dbt(&ctme.target_key);
|
toku_destroy_dbt(&ctme.target_key);
|
||||||
}
|
}
|
||||||
@@ -513,7 +408,7 @@ ct_update_status(FTNODE child,
|
|||||||
{
|
{
|
||||||
struct flush_status_update_extra* fste = (struct flush_status_update_extra *) extra;
|
struct flush_status_update_extra* fste = (struct flush_status_update_extra *) extra;
|
||||||
update_flush_status(child, fste->cascades);
|
update_flush_status(child, fste->cascades);
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_NODES_DIRTIED) += dirtied;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_NODES_DIRTIED) += dirtied;
|
||||||
// Incrementing this in case `toku_ft_flush_some_child` decides to recurse.
|
// Incrementing this in case `toku_ft_flush_some_child` decides to recurse.
|
||||||
fste->cascades++;
|
fste->cascades++;
|
||||||
}
|
}
|
||||||
@@ -602,7 +497,7 @@ handle_split_of_child(
|
|||||||
// We never set the rightmost blocknum to be the root.
|
// We never set the rightmost blocknum to be the root.
|
||||||
// Instead, we wait for the root to split and let promotion initialize the rightmost
|
// Instead, we wait for the root to split and let promotion initialize the rightmost
|
||||||
// blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
|
// blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
|
||||||
BLOCKNUM rightmost_blocknum = toku_drd_unsafe_fetch(&ft->rightmost_blocknum);
|
BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum);
|
||||||
invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
|
invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
|
||||||
if (childa->blocknum.b == rightmost_blocknum.b) {
|
if (childa->blocknum.b == rightmost_blocknum.b) {
|
||||||
// The rightmost leaf (a) split into (a) and (b). We want (b) to swap pair values
|
// The rightmost leaf (a) split into (a) and (b). We want (b) to swap pair values
|
||||||
@@ -790,7 +685,7 @@ ftleaf_split(
|
|||||||
{
|
{
|
||||||
|
|
||||||
paranoid_invariant(node->height == 0);
|
paranoid_invariant(node->height == 0);
|
||||||
STATUS_VALUE(FT_FLUSHER_SPLIT_LEAF)++;
|
FL_STATUS_VAL(FT_FLUSHER_SPLIT_LEAF)++;
|
||||||
if (node->n_children) {
|
if (node->n_children) {
|
||||||
// First move all the accumulated stat64info deltas into the first basement.
|
// First move all the accumulated stat64info deltas into the first basement.
|
||||||
// After the split, either both nodes or neither node will be included in the next checkpoint.
|
// After the split, either both nodes or neither node will be included in the next checkpoint.
|
||||||
@@ -967,7 +862,7 @@ ft_nonleaf_split(
|
|||||||
FTNODE* dependent_nodes)
|
FTNODE* dependent_nodes)
|
||||||
{
|
{
|
||||||
//VERIFY_NODE(t,node);
|
//VERIFY_NODE(t,node);
|
||||||
STATUS_VALUE(FT_FLUSHER_SPLIT_NONLEAF)++;
|
FL_STATUS_VAL(FT_FLUSHER_SPLIT_NONLEAF)++;
|
||||||
toku_ftnode_assert_fully_in_memory(node);
|
toku_ftnode_assert_fully_in_memory(node);
|
||||||
int old_n_children = node->n_children;
|
int old_n_children = node->n_children;
|
||||||
int n_children_in_a = old_n_children/2;
|
int n_children_in_a = old_n_children/2;
|
||||||
@@ -1123,7 +1018,7 @@ flush_this_child(
|
|||||||
static void
|
static void
|
||||||
merge_leaf_nodes(FTNODE a, FTNODE b)
|
merge_leaf_nodes(FTNODE a, FTNODE b)
|
||||||
{
|
{
|
||||||
STATUS_VALUE(FT_FLUSHER_MERGE_LEAF)++;
|
FL_STATUS_VAL(FT_FLUSHER_MERGE_LEAF)++;
|
||||||
toku_ftnode_assert_fully_in_memory(a);
|
toku_ftnode_assert_fully_in_memory(a);
|
||||||
toku_ftnode_assert_fully_in_memory(b);
|
toku_ftnode_assert_fully_in_memory(b);
|
||||||
paranoid_invariant(a->height == 0);
|
paranoid_invariant(a->height == 0);
|
||||||
@@ -1199,7 +1094,7 @@ static void balance_leaf_nodes(
|
|||||||
// If b is bigger then move stuff from b to a until b is the smaller.
|
// If b is bigger then move stuff from b to a until b is the smaller.
|
||||||
// If a is bigger then move stuff from a to b until a is the smaller.
|
// If a is bigger then move stuff from a to b until a is the smaller.
|
||||||
{
|
{
|
||||||
STATUS_VALUE(FT_FLUSHER_BALANCE_LEAF)++;
|
FL_STATUS_VAL(FT_FLUSHER_BALANCE_LEAF)++;
|
||||||
// first merge all the data into a
|
// first merge all the data into a
|
||||||
merge_leaf_nodes(a,b);
|
merge_leaf_nodes(a,b);
|
||||||
// now split them
|
// now split them
|
||||||
@@ -1278,7 +1173,7 @@ maybe_merge_pinned_nonleaf_nodes(
|
|||||||
*did_rebalance = false;
|
*did_rebalance = false;
|
||||||
toku_init_dbt(splitk);
|
toku_init_dbt(splitk);
|
||||||
|
|
||||||
STATUS_VALUE(FT_FLUSHER_MERGE_NONLEAF)++;
|
FL_STATUS_VAL(FT_FLUSHER_MERGE_NONLEAF)++;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -1431,7 +1326,7 @@ ft_merge_child(
|
|||||||
node->pivotkeys.delete_at(childnuma);
|
node->pivotkeys.delete_at(childnuma);
|
||||||
|
|
||||||
// Handle a merge of the rightmost leaf node.
|
// Handle a merge of the rightmost leaf node.
|
||||||
BLOCKNUM rightmost_blocknum = toku_drd_unsafe_fetch(&ft->rightmost_blocknum);
|
BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum);
|
||||||
if (did_merge && childb->blocknum.b == rightmost_blocknum.b) {
|
if (did_merge && childb->blocknum.b == rightmost_blocknum.b) {
|
||||||
invariant(childb->blocknum.b != ft->h->root_blocknum.b);
|
invariant(childb->blocknum.b != ft->h->root_blocknum.b);
|
||||||
toku_ftnode_swap_pair_values(childa, childb);
|
toku_ftnode_swap_pair_values(childa, childb);
|
||||||
@@ -1730,33 +1625,33 @@ update_cleaner_status(
|
|||||||
FTNODE node,
|
FTNODE node,
|
||||||
int childnum)
|
int childnum)
|
||||||
{
|
{
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_TOTAL_NODES)++;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_TOTAL_NODES)++;
|
||||||
if (node->height == 1) {
|
if (node->height == 1) {
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_H1_NODES)++;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_H1_NODES)++;
|
||||||
} else {
|
} else {
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_HGT1_NODES)++;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_HGT1_NODES)++;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int nbytesinbuf = toku_bnc_nbytesinbuf(BNC(node, childnum));
|
unsigned int nbytesinbuf = toku_bnc_nbytesinbuf(BNC(node, childnum));
|
||||||
if (nbytesinbuf == 0) {
|
if (nbytesinbuf == 0) {
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_EMPTY_NODES)++;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_EMPTY_NODES)++;
|
||||||
} else {
|
} else {
|
||||||
if (nbytesinbuf > STATUS_VALUE(FT_FLUSHER_CLEANER_MAX_BUFFER_SIZE)) {
|
if (nbytesinbuf > FL_STATUS_VAL(FT_FLUSHER_CLEANER_MAX_BUFFER_SIZE)) {
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_MAX_BUFFER_SIZE) = nbytesinbuf;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_MAX_BUFFER_SIZE) = nbytesinbuf;
|
||||||
}
|
}
|
||||||
if (nbytesinbuf < STATUS_VALUE(FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE)) {
|
if (nbytesinbuf < FL_STATUS_VAL(FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE)) {
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE) = nbytesinbuf;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE) = nbytesinbuf;
|
||||||
}
|
}
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_TOTAL_BUFFER_SIZE) += nbytesinbuf;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_TOTAL_BUFFER_SIZE) += nbytesinbuf;
|
||||||
|
|
||||||
uint64_t workdone = BP_WORKDONE(node, childnum);
|
uint64_t workdone = BP_WORKDONE(node, childnum);
|
||||||
if (workdone > STATUS_VALUE(FT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE)) {
|
if (workdone > FL_STATUS_VAL(FT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE)) {
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE) = workdone;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE) = workdone;
|
||||||
}
|
}
|
||||||
if (workdone < STATUS_VALUE(FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE)) {
|
if (workdone < FL_STATUS_VAL(FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE)) {
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE) = workdone;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE) = workdone;
|
||||||
}
|
}
|
||||||
STATUS_VALUE(FT_FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE) += workdone;
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE) += workdone;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2027,7 +1922,5 @@ void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent)
|
|||||||
void __attribute__((__constructor__)) toku_ft_flusher_helgrind_ignore(void);
|
void __attribute__((__constructor__)) toku_ft_flusher_helgrind_ignore(void);
|
||||||
void
|
void
|
||||||
toku_ft_flusher_helgrind_ignore(void) {
|
toku_ft_flusher_helgrind_ignore(void) {
|
||||||
TOKU_VALGRIND_HG_DISABLE_CHECKING(&ft_flusher_status, sizeof ft_flusher_status);
|
TOKU_VALGRIND_HG_DISABLE_CHECKING(&fl_status, sizeof fl_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef STATUS_VALUE
|
|
||||||
147
storage/tokudb/PerconaFT/ft/ft-flusher.h
Normal file
147
storage/tokudb/PerconaFT/ft/ft-flusher.h
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ft/ft-internal.h"
|
||||||
|
|
||||||
|
void toku_ft_flusher_get_status(FT_FLUSHER_STATUS);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only for testing, not for production.
|
||||||
|
*
|
||||||
|
* Set a callback the flusher thread will use to signal various points
|
||||||
|
* during its execution.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
toku_flusher_thread_set_callback(
|
||||||
|
void (*callback_f)(int, void*),
|
||||||
|
void* extra
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Puts a workitem on the flusher thread queue, scheduling the node to be
|
||||||
|
* flushed by toku_ft_flush_some_child.
|
||||||
|
*/
|
||||||
|
void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent);
|
||||||
|
|
||||||
|
enum split_mode {
|
||||||
|
SPLIT_EVENLY,
|
||||||
|
SPLIT_LEFT_HEAVY,
|
||||||
|
SPLIT_RIGHT_HEAVY
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Given pinned node and pinned child, split child into two
|
||||||
|
// and update node with information about its new child.
|
||||||
|
void toku_ft_split_child(
|
||||||
|
FT ft,
|
||||||
|
FTNODE node,
|
||||||
|
int childnum,
|
||||||
|
FTNODE child,
|
||||||
|
enum split_mode split_mode
|
||||||
|
);
|
||||||
|
|
||||||
|
// Given pinned node, merge childnum with a neighbor and update node with
|
||||||
|
// information about the change
|
||||||
|
void toku_ft_merge_child(
|
||||||
|
FT ft,
|
||||||
|
FTNODE node,
|
||||||
|
int childnum
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Effect: Split a leaf node.
|
||||||
|
* Argument "node" is node to be split.
|
||||||
|
* Upon return:
|
||||||
|
* nodea and nodeb point to new nodes that result from split of "node"
|
||||||
|
* nodea is the left node that results from the split
|
||||||
|
* splitk is the right-most key of nodea
|
||||||
|
*/
|
||||||
|
// TODO: Rename toku_ft_leaf_split
|
||||||
|
void
|
||||||
|
ftleaf_split(
|
||||||
|
FT ft,
|
||||||
|
FTNODE node,
|
||||||
|
FTNODE *nodea,
|
||||||
|
FTNODE *nodeb,
|
||||||
|
DBT *splitk,
|
||||||
|
bool create_new_node,
|
||||||
|
enum split_mode split_mode,
|
||||||
|
uint32_t num_dependent_nodes,
|
||||||
|
FTNODE* dependent_nodes
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Effect: node must be a node-leaf node. It is split into two nodes, and
|
||||||
|
* the fanout is split between them.
|
||||||
|
* Sets splitk->data pointer to a malloc'd value
|
||||||
|
* Sets nodea, and nodeb to the two new nodes.
|
||||||
|
* The caller must replace the old node with the two new nodes.
|
||||||
|
* This function will definitely reduce the number of children for the node,
|
||||||
|
* but it does not guarantee that the resulting nodes are smaller than nodesize.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
// TODO: Rename toku_ft_nonleaf_split
|
||||||
|
ft_nonleaf_split(
|
||||||
|
FT ft,
|
||||||
|
FTNODE node,
|
||||||
|
FTNODE *nodea,
|
||||||
|
FTNODE *nodeb,
|
||||||
|
DBT *splitk,
|
||||||
|
uint32_t num_dependent_nodes,
|
||||||
|
FTNODE* dependent_nodes
|
||||||
|
);
|
||||||
|
|
||||||
|
/************************************************************************
|
||||||
|
* HOT optimize, should perhaps be factored out to its own header file *
|
||||||
|
************************************************************************
|
||||||
|
*/
|
||||||
|
void toku_ft_hot_get_status(FT_HOT_STATUS);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes given FT and pushes all pending messages between left and right to the leaf nodes.
|
||||||
|
* All messages between left and right (inclusive) will be pushed, as will some others
|
||||||
|
* that happen to share buffers with messages near the boundary.
|
||||||
|
* If left is NULL, messages from beginning of FT are pushed. If right is NULL, that means
|
||||||
|
* we go until the end of the FT.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
toku_ft_hot_optimize(FT_HANDLE ft_h, DBT* left, DBT* right,
|
||||||
|
int (*progress_callback)(void *extra, float progress),
|
||||||
|
void *progress_extra, uint64_t* loops_run);
|
||||||
361
storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc
Normal file
361
storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc
Normal file
@@ -0,0 +1,361 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "ft/ft.h"
|
||||||
|
#include "ft/ft-cachetable-wrappers.h"
|
||||||
|
#include "ft/ft-flusher.h"
|
||||||
|
#include "ft/ft-flusher-internal.h"
|
||||||
|
#include "ft/ft-internal.h"
|
||||||
|
#include "ft/node.h"
|
||||||
|
#include "portability/toku_atomic.h"
|
||||||
|
#include "util/context.h"
|
||||||
|
#include "util/status.h"
|
||||||
|
|
||||||
|
// Member Descirption:
|
||||||
|
// 1. highest_pivot_key - this is the key that corresponds to the
|
||||||
|
// most recently flushed leaf entry.
|
||||||
|
// 2. max_current_key - this is the pivot/key that we inherit as
|
||||||
|
// we descend down the tree. We use this to set the highest_pivot_key.
|
||||||
|
// 3. sub_tree_size - this is the percentage of the entire tree that our
|
||||||
|
// current position (in a sub-tree) encompasses.
|
||||||
|
// 4. percentage_done - this is the percentage of leaf nodes that have
|
||||||
|
// been flushed into.
|
||||||
|
// 5. rightmost_leaf_seen - this is a boolean we use to determine if
|
||||||
|
// if we have flushed to every leaf node.
|
||||||
|
struct hot_flusher_extra {
|
||||||
|
DBT highest_pivot_key;
|
||||||
|
DBT max_current_key;
|
||||||
|
float sub_tree_size;
|
||||||
|
float percentage_done;
|
||||||
|
bool rightmost_leaf_seen;
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_ft_hot_get_status(FT_HOT_STATUS s) {
|
||||||
|
hot_status.init();
|
||||||
|
*s = hot_status;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copies the max current key to the highest pivot key seen.
|
||||||
|
static void
|
||||||
|
hot_set_highest_key(struct hot_flusher_extra *flusher)
|
||||||
|
{
|
||||||
|
// The max current key will be NULL if we are traversing in the
|
||||||
|
// rightmost subtree of a given parent. As such, we don't want to
|
||||||
|
// allocate memory for this case.
|
||||||
|
toku_destroy_dbt(&flusher->highest_pivot_key);
|
||||||
|
if (flusher->max_current_key.data != NULL) {
|
||||||
|
// Otherwise, let's copy all the contents from one key to the other.
|
||||||
|
toku_clone_dbt(&flusher->highest_pivot_key, flusher->max_current_key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
hot_set_start_key(struct hot_flusher_extra *flusher, const DBT* start)
|
||||||
|
{
|
||||||
|
toku_destroy_dbt(&flusher->highest_pivot_key);
|
||||||
|
if (start != NULL) {
|
||||||
|
// Otherwise, let's copy all the contents from one key to the other.
|
||||||
|
toku_clone_dbt(&flusher->highest_pivot_key, *start);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
hot_just_pick_child(FT ft,
|
||||||
|
FTNODE parent,
|
||||||
|
struct hot_flusher_extra *flusher)
|
||||||
|
{
|
||||||
|
int childnum = 0;
|
||||||
|
|
||||||
|
// Search through Parents pivots, see which one is greater than
|
||||||
|
// the highest_pivot_key seen so far.
|
||||||
|
if (flusher->highest_pivot_key.data == NULL)
|
||||||
|
{
|
||||||
|
// Special case of the first child of the root node.
|
||||||
|
// Also known as, NEGATIVE INFINITY....
|
||||||
|
childnum = 0;
|
||||||
|
} else {
|
||||||
|
// Find the pivot boundary.
|
||||||
|
childnum = toku_ftnode_hot_next_child(parent, &flusher->highest_pivot_key, ft->cmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return childnum;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
hot_update_flusher_keys(FTNODE parent,
|
||||||
|
int childnum,
|
||||||
|
struct hot_flusher_extra *flusher)
|
||||||
|
{
|
||||||
|
// Update maximum current key if the child is NOT the rightmost
|
||||||
|
// child node.
|
||||||
|
if (childnum < (parent->n_children - 1)) {
|
||||||
|
toku_destroy_dbt(&flusher->max_current_key);
|
||||||
|
toku_clone_dbt(&flusher->max_current_key, parent->pivotkeys.get_pivot(childnum));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Picks which child toku_ft_flush_some_child will use for flushing and
|
||||||
|
// recursion.
|
||||||
|
static int
|
||||||
|
hot_pick_child(FT ft,
|
||||||
|
FTNODE parent,
|
||||||
|
void *extra)
|
||||||
|
{
|
||||||
|
struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra;
|
||||||
|
int childnum = hot_just_pick_child(ft, parent, flusher);
|
||||||
|
|
||||||
|
// Now we determine the percentage of the tree flushed so far.
|
||||||
|
|
||||||
|
// Whichever subtree we choose to recurse into, it is a fraction
|
||||||
|
// of the current parent.
|
||||||
|
flusher->sub_tree_size /= parent->n_children;
|
||||||
|
|
||||||
|
// Update the precentage complete, using our new sub tree size AND
|
||||||
|
// the number of children we have already flushed.
|
||||||
|
flusher->percentage_done += (flusher->sub_tree_size * childnum);
|
||||||
|
|
||||||
|
hot_update_flusher_keys(parent, childnum, flusher);
|
||||||
|
|
||||||
|
return childnum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Does nothing for now.
|
||||||
|
static void
|
||||||
|
hot_update_status(FTNODE UU(child),
|
||||||
|
int UU(dirtied),
|
||||||
|
void *UU(extra))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've just split a node, HOT needs another chance to decide which
|
||||||
|
// one to flush into. This gives it a chance to do that, and update the
|
||||||
|
// keys it maintains.
|
||||||
|
static int
|
||||||
|
hot_pick_child_after_split(FT ft,
|
||||||
|
FTNODE parent,
|
||||||
|
int childnuma,
|
||||||
|
int childnumb,
|
||||||
|
void *extra)
|
||||||
|
{
|
||||||
|
struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra;
|
||||||
|
int childnum = hot_just_pick_child(ft, parent, flusher);
|
||||||
|
assert(childnum == childnuma || childnum == childnumb);
|
||||||
|
hot_update_flusher_keys(parent, childnum, flusher);
|
||||||
|
if (parent->height == 1) {
|
||||||
|
// We don't want to recurse into a leaf node, but if we return
|
||||||
|
// anything valid, ft_split_child will try to go there, so we
|
||||||
|
// return -1 to allow ft_split_child to have its default
|
||||||
|
// behavior, which will be to stop recursing.
|
||||||
|
childnum = -1;
|
||||||
|
}
|
||||||
|
return childnum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic constructor/initializer for the hot flusher struct.
|
||||||
|
static void
|
||||||
|
hot_flusher_init(struct flusher_advice *advice,
|
||||||
|
struct hot_flusher_extra *flusher)
|
||||||
|
{
|
||||||
|
// Initialize the highest pivot key seen to NULL. This represents
|
||||||
|
// NEGATIVE INFINITY and is used to cover the special case of our
|
||||||
|
// first traversal of the tree.
|
||||||
|
toku_init_dbt(&(flusher->highest_pivot_key));
|
||||||
|
toku_init_dbt(&(flusher->max_current_key));
|
||||||
|
flusher->rightmost_leaf_seen = 0;
|
||||||
|
flusher->sub_tree_size = 1.0;
|
||||||
|
flusher->percentage_done = 0.0;
|
||||||
|
flusher_advice_init(advice,
|
||||||
|
hot_pick_child,
|
||||||
|
dont_destroy_basement_nodes,
|
||||||
|
always_recursively_flush,
|
||||||
|
default_merge_child,
|
||||||
|
hot_update_status,
|
||||||
|
hot_pick_child_after_split,
|
||||||
|
flusher
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Erases any DBT keys we have copied from a traversal.
|
||||||
|
static void
|
||||||
|
hot_flusher_destroy(struct hot_flusher_extra *flusher)
|
||||||
|
{
|
||||||
|
toku_destroy_dbt(&flusher->highest_pivot_key);
|
||||||
|
toku_destroy_dbt(&flusher->max_current_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Entry point for Hot Optimize Table (HOT). Note, this function is
|
||||||
|
// not recursive. It iterates over root-to-leaf paths.
|
||||||
|
int
|
||||||
|
toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right,
|
||||||
|
int (*progress_callback)(void *extra, float progress),
|
||||||
|
void *progress_extra, uint64_t* loops_run)
|
||||||
|
{
|
||||||
|
toku::context flush_ctx(CTX_FLUSH);
|
||||||
|
|
||||||
|
int r = 0;
|
||||||
|
struct hot_flusher_extra flusher;
|
||||||
|
struct flusher_advice advice;
|
||||||
|
|
||||||
|
hot_flusher_init(&advice, &flusher);
|
||||||
|
hot_set_start_key(&flusher, left);
|
||||||
|
|
||||||
|
uint64_t loop_count = 0;
|
||||||
|
MSN msn_at_start_of_hot = ZERO_MSN; // capture msn from root at
|
||||||
|
// start of HOT operation
|
||||||
|
(void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_STARTED), 1);
|
||||||
|
|
||||||
|
toku_ft_note_hot_begin(ft_handle);
|
||||||
|
|
||||||
|
// Higher level logic prevents a dictionary from being deleted or
|
||||||
|
// truncated during a hot optimize operation. Doing so would violate
|
||||||
|
// the hot optimize contract.
|
||||||
|
do {
|
||||||
|
FTNODE root;
|
||||||
|
CACHEKEY root_key;
|
||||||
|
uint32_t fullhash;
|
||||||
|
|
||||||
|
{
|
||||||
|
// Get root node (the first parent of each successive HOT
|
||||||
|
// call.)
|
||||||
|
toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
|
||||||
|
ftnode_fetch_extra bfe;
|
||||||
|
bfe.create_for_full_read(ft_handle->ft);
|
||||||
|
toku_pin_ftnode(ft_handle->ft,
|
||||||
|
(BLOCKNUM) root_key,
|
||||||
|
fullhash,
|
||||||
|
&bfe,
|
||||||
|
PL_WRITE_EXPENSIVE,
|
||||||
|
&root,
|
||||||
|
true);
|
||||||
|
toku_ftnode_assert_fully_in_memory(root);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepare HOT diagnostics.
|
||||||
|
if (loop_count == 0) {
|
||||||
|
// The first time through, capture msn from root
|
||||||
|
msn_at_start_of_hot = root->max_msn_applied_to_node_on_disk;
|
||||||
|
}
|
||||||
|
|
||||||
|
loop_count++;
|
||||||
|
|
||||||
|
if (loop_count > HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT)) {
|
||||||
|
HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT) = loop_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the maximum current key. We need to do this for
|
||||||
|
// every traversal.
|
||||||
|
toku_destroy_dbt(&flusher.max_current_key);
|
||||||
|
|
||||||
|
flusher.sub_tree_size = 1.0;
|
||||||
|
flusher.percentage_done = 0.0;
|
||||||
|
|
||||||
|
// This should recurse to the bottom of the tree and then
|
||||||
|
// return.
|
||||||
|
if (root->height > 0) {
|
||||||
|
toku_ft_flush_some_child(ft_handle->ft, root, &advice);
|
||||||
|
} else {
|
||||||
|
// Since there are no children to flush, we should abort
|
||||||
|
// the HOT call.
|
||||||
|
flusher.rightmost_leaf_seen = 1;
|
||||||
|
toku_unpin_ftnode(ft_handle->ft, root);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the highest pivot key seen here, since the parent may
|
||||||
|
// be unlocked and NULL'd later in our caller:
|
||||||
|
// toku_ft_flush_some_child().
|
||||||
|
hot_set_highest_key(&flusher);
|
||||||
|
|
||||||
|
// This is where we determine if the traversal is finished or
|
||||||
|
// not.
|
||||||
|
if (flusher.max_current_key.data == NULL) {
|
||||||
|
flusher.rightmost_leaf_seen = 1;
|
||||||
|
}
|
||||||
|
else if (right) {
|
||||||
|
// if we have flushed past the bounds set for us,
|
||||||
|
// set rightmost_leaf_seen so we exit
|
||||||
|
int cmp = ft_handle->ft->cmp(&flusher.max_current_key, right);
|
||||||
|
if (cmp > 0) {
|
||||||
|
flusher.rightmost_leaf_seen = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update HOT's progress.
|
||||||
|
if (progress_callback != NULL) {
|
||||||
|
r = progress_callback(progress_extra, flusher.percentage_done);
|
||||||
|
|
||||||
|
// Check if the callback wants us to stop running HOT.
|
||||||
|
if (r != 0) {
|
||||||
|
flusher.rightmost_leaf_seen = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop until the max key has been updated to positive
|
||||||
|
// infinity.
|
||||||
|
} while (!flusher.rightmost_leaf_seen);
|
||||||
|
*loops_run = loop_count;
|
||||||
|
|
||||||
|
// Cleanup.
|
||||||
|
hot_flusher_destroy(&flusher);
|
||||||
|
|
||||||
|
// More diagnostics.
|
||||||
|
{
|
||||||
|
bool success = false;
|
||||||
|
if (r == 0) { success = true; }
|
||||||
|
|
||||||
|
{
|
||||||
|
toku_ft_note_hot_complete(ft_handle, success, msn_at_start_of_hot);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
(void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_COMPLETED), 1);
|
||||||
|
} else {
|
||||||
|
(void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_ABORTED), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include <toku_race_tools.h>
|
||||||
|
void __attribute__((__constructor__)) toku_hot_helgrind_ignore(void);
|
||||||
|
void
|
||||||
|
toku_hot_helgrind_ignore(void) {
|
||||||
|
// incremented only while lock is held, but read by engine status asynchronously.
|
||||||
|
TOKU_VALGRIND_HG_DISABLE_CHECKING(&hot_status, sizeof hot_status);
|
||||||
|
}
|
||||||
471
storage/tokudb/PerconaFT/ft/ft-internal.h
Normal file
471
storage/tokudb/PerconaFT/ft/ft-internal.h
Normal file
@@ -0,0 +1,471 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "portability/toku_config.h"
|
||||||
|
#include "portability/toku_list.h"
|
||||||
|
#include "portability/toku_race_tools.h"
|
||||||
|
|
||||||
|
#include "ft/cachetable/cachetable.h"
|
||||||
|
#include "ft/comparator.h"
|
||||||
|
#include "ft/ft.h"
|
||||||
|
#include "ft/ft-ops.h"
|
||||||
|
#include "ft/node.h"
|
||||||
|
#include "ft/serialize/block_table.h"
|
||||||
|
#include "ft/txn/rollback.h"
|
||||||
|
#include "ft/ft-status.h"
|
||||||
|
|
||||||
|
// Symbol TOKUDB_REVISION is not defined by fractal-tree makefiles, so
|
||||||
|
// BUILD_ID of 1000 indicates development build of main, not a release build.
|
||||||
|
#if defined(TOKUDB_REVISION)
|
||||||
|
#define BUILD_ID TOKUDB_REVISION
|
||||||
|
#else
|
||||||
|
#error
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct ft_search;
|
||||||
|
|
||||||
|
enum { FT_DEFAULT_FANOUT = 16 };
|
||||||
|
enum { FT_DEFAULT_NODE_SIZE = 4 * 1024 * 1024 };
|
||||||
|
enum { FT_DEFAULT_BASEMENT_NODE_SIZE = 128 * 1024 };
|
||||||
|
|
||||||
|
// We optimize for a sequential insert pattern if 100 consecutive injections
|
||||||
|
// happen into the rightmost leaf node due to promotion.
|
||||||
|
enum { FT_SEQINSERT_SCORE_THRESHOLD = 100 };
|
||||||
|
|
||||||
|
uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum);
|
||||||
|
|
||||||
|
enum ft_type {
|
||||||
|
FT_CURRENT = 1,
|
||||||
|
FT_CHECKPOINT_INPROGRESS
|
||||||
|
};
|
||||||
|
|
||||||
|
// The ft_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata.
|
||||||
|
struct ft_header {
|
||||||
|
enum ft_type type;
|
||||||
|
|
||||||
|
int dirty;
|
||||||
|
|
||||||
|
// Free-running counter incremented once per checkpoint (toggling LSB).
|
||||||
|
// LSB indicates which header location is used on disk so this
|
||||||
|
// counter is effectively a boolean which alternates with each checkpoint.
|
||||||
|
uint64_t checkpoint_count;
|
||||||
|
// LSN of creation of "checkpoint-begin" record in log.
|
||||||
|
LSN checkpoint_lsn;
|
||||||
|
|
||||||
|
// see serialize/ft_layout_version.h. maybe don't need this if we assume
|
||||||
|
// it's always the current version after deserializing
|
||||||
|
const int layout_version;
|
||||||
|
// different (<) from layout_version if upgraded from a previous
|
||||||
|
// version (useful for debugging)
|
||||||
|
const int layout_version_original;
|
||||||
|
// build_id (svn rev number) of software that wrote this node to
|
||||||
|
// disk. (read from disk, overwritten when written to disk, I
|
||||||
|
// think).
|
||||||
|
const uint32_t build_id;
|
||||||
|
// build_id of software that created this tree
|
||||||
|
const uint32_t build_id_original;
|
||||||
|
|
||||||
|
// time this tree was created
|
||||||
|
const uint64_t time_of_creation;
|
||||||
|
// and the root transaction id that created it
|
||||||
|
TXNID root_xid_that_created;
|
||||||
|
// last time this header was serialized to disk (read from disk,
|
||||||
|
// overwritten when written to disk)
|
||||||
|
uint64_t time_of_last_modification;
|
||||||
|
// last time that this tree was verified
|
||||||
|
uint64_t time_of_last_verification;
|
||||||
|
|
||||||
|
// this field is essentially a const
|
||||||
|
BLOCKNUM root_blocknum;
|
||||||
|
|
||||||
|
const unsigned int flags;
|
||||||
|
|
||||||
|
//protected by toku_ft_lock
|
||||||
|
unsigned int nodesize;
|
||||||
|
unsigned int basementnodesize;
|
||||||
|
enum toku_compression_method compression_method;
|
||||||
|
unsigned int fanout;
|
||||||
|
|
||||||
|
// Current Minimum MSN to be used when upgrading pre-MSN FT's.
|
||||||
|
// This is decremented from our currnt MIN_MSN so as not to clash
|
||||||
|
// with any existing 'normal' MSN's.
|
||||||
|
MSN highest_unused_msn_for_upgrade;
|
||||||
|
// Largest MSN ever injected into the tree. Used to set the MSN for
|
||||||
|
// messages as they get injected.
|
||||||
|
MSN max_msn_in_ft;
|
||||||
|
|
||||||
|
// last time that a hot optimize operation was begun
|
||||||
|
uint64_t time_of_last_optimize_begin;
|
||||||
|
// last time that a hot optimize operation was successfully completed
|
||||||
|
uint64_t time_of_last_optimize_end;
|
||||||
|
// the number of hot optimize operations currently in progress on this tree
|
||||||
|
uint32_t count_of_optimize_in_progress;
|
||||||
|
// the number of hot optimize operations in progress on this tree at the time of the last crash (this field is in-memory only)
|
||||||
|
uint32_t count_of_optimize_in_progress_read_from_disk;
|
||||||
|
// all messages before this msn have been applied to leaf nodes
|
||||||
|
MSN msn_at_start_of_last_completed_optimize;
|
||||||
|
|
||||||
|
STAT64INFO_S on_disk_stats;
|
||||||
|
};
|
||||||
|
typedef struct ft_header *FT_HEADER;
|
||||||
|
|
||||||
|
// ft_header is always the current version.
|
||||||
|
struct ft {
|
||||||
|
FT_HEADER h;
|
||||||
|
FT_HEADER checkpoint_header;
|
||||||
|
|
||||||
|
// These are (mostly) read-only.
|
||||||
|
|
||||||
|
CACHEFILE cf;
|
||||||
|
// unique id for dictionary
|
||||||
|
DICTIONARY_ID dict_id;
|
||||||
|
|
||||||
|
// protected by locktree
|
||||||
|
DESCRIPTOR_S descriptor;
|
||||||
|
|
||||||
|
// protected by locktree and user.
|
||||||
|
// User makes sure this is only changed when no activity on tree
|
||||||
|
DESCRIPTOR_S cmp_descriptor;
|
||||||
|
// contains a pointer to cmp_descriptor (above) - their lifetimes are bound
|
||||||
|
toku::comparator cmp;
|
||||||
|
|
||||||
|
// the update function always utilizes the cmp_descriptor, not the regular one
|
||||||
|
ft_update_func update_fun;
|
||||||
|
|
||||||
|
// These are not read-only:
|
||||||
|
|
||||||
|
// protected by blocktable lock
|
||||||
|
block_table blocktable;
|
||||||
|
|
||||||
|
// protected by atomic builtins
|
||||||
|
STAT64INFO_S in_memory_stats;
|
||||||
|
|
||||||
|
// transient, not serialized to disk. updated when we do write to
|
||||||
|
// disk. tells us whether we can do partial eviction (we can't if
|
||||||
|
// the on-disk layout version is from before basement nodes)
|
||||||
|
int layout_version_read_from_disk;
|
||||||
|
|
||||||
|
// Logically the reference count is zero if live_ft_handles is empty, txns is 0, and pinned_by_checkpoint is false.
|
||||||
|
|
||||||
|
// ft_ref_lock protects modifying live_ft_handles, txns, and pinned_by_checkpoint.
|
||||||
|
toku_mutex_t ft_ref_lock;
|
||||||
|
struct toku_list live_ft_handles;
|
||||||
|
// Number of transactions that are using this FT. you should only be able
|
||||||
|
// to modify this if you have a valid handle in live_ft_handles
|
||||||
|
uint32_t num_txns;
|
||||||
|
// A checkpoint is running. If true, then keep this header around for checkpoint, like a transaction
|
||||||
|
bool pinned_by_checkpoint;
|
||||||
|
|
||||||
|
// is this ft a blackhole? if so, all messages are dropped.
|
||||||
|
bool blackhole;
|
||||||
|
|
||||||
|
// The blocknum of the rightmost leaf node in the tree. Stays constant through splits
|
||||||
|
// and merges using pair-swapping (like the root node, see toku_ftnode_swap_pair_values())
|
||||||
|
//
|
||||||
|
// This field only transitions from RESERVED_BLOCKNUM_NULL to non-null, never back.
|
||||||
|
// We initialize it when promotion inserts into a non-root leaf node on the right extreme.
|
||||||
|
// We use the blocktable lock to protect the initialize transition, though it's not really
|
||||||
|
// necessary since all threads should be setting it to the same value. We maintain that invariant
|
||||||
|
// on first initialization, see ft_set_or_verify_rightmost_blocknum()
|
||||||
|
BLOCKNUM rightmost_blocknum;
|
||||||
|
|
||||||
|
// sequential access pattern heuristic
|
||||||
|
// - when promotion pushes a message directly into the rightmost leaf, the score goes up.
|
||||||
|
// - if the score is high enough, we optimistically attempt to insert directly into the rightmost leaf
|
||||||
|
// - if our attempt fails because the key was not in range of the rightmost leaf, we reset the score back to 0
|
||||||
|
uint32_t seqinsert_score;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Allocate a DB struct off the stack and only set its comparison
|
||||||
|
// descriptor. We don't bother setting any other fields because
|
||||||
|
// the comparison function doesn't need it, and we would like to
|
||||||
|
// reduce the CPU work done per comparison.
|
||||||
|
#define FAKE_DB(db, desc) struct __toku_db db; do { db.cmp_descriptor = const_cast<DESCRIPTOR>(desc); } while (0)
|
||||||
|
|
||||||
|
struct ft_options {
|
||||||
|
unsigned int nodesize;
|
||||||
|
unsigned int basementnodesize;
|
||||||
|
enum toku_compression_method compression_method;
|
||||||
|
unsigned int fanout;
|
||||||
|
unsigned int flags;
|
||||||
|
uint8_t memcmp_magic;
|
||||||
|
ft_compare_func compare_fun;
|
||||||
|
ft_update_func update_fun;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ft_handle {
|
||||||
|
// The fractal tree.
|
||||||
|
FT ft;
|
||||||
|
|
||||||
|
on_redirect_callback redirect_callback;
|
||||||
|
void *redirect_callback_extra;
|
||||||
|
struct toku_list live_ft_handle_link;
|
||||||
|
bool did_set_flags;
|
||||||
|
|
||||||
|
struct ft_options options;
|
||||||
|
};
|
||||||
|
|
||||||
|
PAIR_ATTR make_ftnode_pair_attr(FTNODE node);
|
||||||
|
PAIR_ATTR make_invalid_pair_attr(void);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Field in ftnode_fetch_extra that tells the
|
||||||
|
// partial fetch callback what piece of the node
|
||||||
|
// is needed by the ydb
|
||||||
|
//
|
||||||
|
enum ftnode_fetch_type {
|
||||||
|
ftnode_fetch_none = 1, // no partitions needed.
|
||||||
|
ftnode_fetch_subset, // some subset of partitions needed
|
||||||
|
ftnode_fetch_prefetch, // this is part of a prefetch call
|
||||||
|
ftnode_fetch_all, // every partition is needed
|
||||||
|
ftnode_fetch_keymatch, // one child is needed if it holds both keys
|
||||||
|
};
|
||||||
|
|
||||||
|
// Info passed to cachetable fetch callbacks to say which parts of a node
|
||||||
|
// should be fetched (perhaps a subset, perhaps the whole thing, depending
|
||||||
|
// on operation)
|
||||||
|
class ftnode_fetch_extra {
|
||||||
|
public:
|
||||||
|
// Used when the whole node must be in memory, such as for flushes.
|
||||||
|
void create_for_full_read(FT ft);
|
||||||
|
|
||||||
|
// A subset of children are necessary. Used by point queries.
|
||||||
|
void create_for_subset_read(FT ft, ft_search *search, const DBT *left, const DBT *right,
|
||||||
|
bool left_is_neg_infty, bool right_is_pos_infty,
|
||||||
|
bool disable_prefetching, bool read_all_partitions);
|
||||||
|
|
||||||
|
// No partitions are necessary - only pivots and/or subtree estimates.
|
||||||
|
// Currently used for stat64.
|
||||||
|
void create_for_min_read(FT ft);
|
||||||
|
|
||||||
|
// Used to prefetch partitions that fall within the bounds given by the cursor.
|
||||||
|
void create_for_prefetch(FT ft, struct ft_cursor *cursor);
|
||||||
|
|
||||||
|
// Only a portion of the node (within a keyrange) is required.
|
||||||
|
// Used by keysrange when the left and right key are in the same basement node.
|
||||||
|
void create_for_keymatch(FT ft, const DBT *left, const DBT *right,
|
||||||
|
bool disable_prefetching, bool read_all_partitions);
|
||||||
|
|
||||||
|
void destroy(void);
|
||||||
|
|
||||||
|
// return: true if a specific childnum is required to be in memory
|
||||||
|
bool wants_child_available(int childnum) const;
|
||||||
|
|
||||||
|
// return: the childnum of the leftmost child that is required to be in memory
|
||||||
|
int leftmost_child_wanted(FTNODE node) const;
|
||||||
|
|
||||||
|
// return: the childnum of the rightmost child that is required to be in memory
|
||||||
|
int rightmost_child_wanted(FTNODE node) const;
|
||||||
|
|
||||||
|
// needed for reading a node off disk
|
||||||
|
FT ft;
|
||||||
|
|
||||||
|
enum ftnode_fetch_type type;
|
||||||
|
|
||||||
|
// used in the case where type == ftnode_fetch_subset
|
||||||
|
// parameters needed to find out which child needs to be decompressed (so it can be read)
|
||||||
|
ft_search *search;
|
||||||
|
DBT range_lock_left_key, range_lock_right_key;
|
||||||
|
bool left_is_neg_infty, right_is_pos_infty;
|
||||||
|
|
||||||
|
// states if we should try to aggressively fetch basement nodes
|
||||||
|
// that are not specifically needed for current query,
|
||||||
|
// but may be needed for other cursor operations user is doing
|
||||||
|
// For example, if we have not disabled prefetching,
|
||||||
|
// and the user is doing a dictionary wide scan, then
|
||||||
|
// even though a query may only want one basement node,
|
||||||
|
// we fetch all basement nodes in a leaf node.
|
||||||
|
bool disable_prefetching;
|
||||||
|
|
||||||
|
// this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
|
||||||
|
// thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
|
||||||
|
int child_to_read;
|
||||||
|
|
||||||
|
// when we read internal nodes, we want to read all the data off disk in one I/O
|
||||||
|
// then we'll treat it as normal and only decompress the needed partitions etc.
|
||||||
|
bool read_all_partitions;
|
||||||
|
|
||||||
|
// Accounting: How many bytes were read, and how much time did we spend doing I/O?
|
||||||
|
uint64_t bytes_read;
|
||||||
|
tokutime_t io_time;
|
||||||
|
tokutime_t decompress_time;
|
||||||
|
tokutime_t deserialize_time;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void _create_internal(FT ft_);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Only exported for tests.
|
||||||
|
// Cachetable callbacks for ftnodes.
|
||||||
|
void toku_ftnode_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs);
|
||||||
|
void toku_ftnode_checkpoint_complete_callback(void *value_data);
|
||||||
|
void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM blocknum, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone);
|
||||||
|
int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs);
|
||||||
|
void toku_ftnode_pe_est_callback(void* ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs);
|
||||||
|
int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *extraargs,
|
||||||
|
void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);
|
||||||
|
bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs);
|
||||||
|
int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep);
|
||||||
|
int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs);
|
||||||
|
|
||||||
|
CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft);
|
||||||
|
|
||||||
|
// This is only exported for tests.
|
||||||
|
// append a child node to a parent node
|
||||||
|
void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey);
|
||||||
|
|
||||||
|
// This is only exported for tests.
|
||||||
|
// append a message to a nonleaf node child buffer
|
||||||
|
void toku_ft_append_to_child_buffer(const toku::comparator &cmp, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val);
|
||||||
|
|
||||||
|
STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode);
|
||||||
|
|
||||||
|
//#define SLOW
|
||||||
|
#ifdef SLOW
|
||||||
|
#define VERIFY_NODE(t,n) (toku_verify_or_set_counts(n), toku_verify_estimates(t,n))
|
||||||
|
#else
|
||||||
|
#define VERIFY_NODE(t,n) ((void)0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void toku_verify_or_set_counts(FTNODE);
|
||||||
|
|
||||||
|
// TODO: consider moving this to ft/pivotkeys.cc
|
||||||
|
class pivot_bounds {
|
||||||
|
public:
|
||||||
|
pivot_bounds(const DBT &lbe_dbt, const DBT &ubi_dbt);
|
||||||
|
|
||||||
|
pivot_bounds next_bounds(FTNODE node, int childnum) const;
|
||||||
|
|
||||||
|
const DBT *lbe() const;
|
||||||
|
const DBT *ubi() const;
|
||||||
|
|
||||||
|
static pivot_bounds infinite_bounds();
|
||||||
|
|
||||||
|
private:
|
||||||
|
DBT _prepivotkey(FTNODE node, int childnum, const DBT &lbe_dbt) const;
|
||||||
|
DBT _postpivotkey(FTNODE node, int childnum, const DBT &ubi_dbt) const;
|
||||||
|
|
||||||
|
// if toku_dbt_is_empty() is true for either bound, then it represents
|
||||||
|
// negative or positive infinity (which are exclusive in practice)
|
||||||
|
const DBT _lower_bound_exclusive;
|
||||||
|
const DBT _upper_bound_inclusive;
|
||||||
|
};
|
||||||
|
|
||||||
|
// allocate a block number
|
||||||
|
// allocate and initialize a ftnode
|
||||||
|
// put the ftnode into the cache table
|
||||||
|
void toku_create_new_ftnode(FT_HANDLE ft_handle, FTNODE *result, int height, int n_children);
|
||||||
|
|
||||||
|
/* Stuff for testing */
|
||||||
|
// toku_testsetup_initialize() must be called before any other test_setup_xxx() functions are called.
|
||||||
|
void toku_testsetup_initialize(void);
|
||||||
|
int toku_testsetup_leaf(FT_HANDLE ft_h, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens);
|
||||||
|
int toku_testsetup_nonleaf (FT_HANDLE ft_h, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, char **keys, int *keylens);
|
||||||
|
int toku_testsetup_root(FT_HANDLE ft_h, BLOCKNUM);
|
||||||
|
int toku_testsetup_get_sersize(FT_HANDLE ft_h, BLOCKNUM); // Return the size on disk.
|
||||||
|
int toku_testsetup_insert_to_leaf (FT_HANDLE ft_h, BLOCKNUM, const char *key, int keylen, const char *val, int vallen);
|
||||||
|
int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_h, BLOCKNUM, enum ft_msg_type, const char *key, int keylen, const char *val, int vallen);
|
||||||
|
void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);
|
||||||
|
|
||||||
|
void toku_ft_root_put_msg(FT ft, const ft_msg &msg, txn_gc_info *gc_info);
|
||||||
|
|
||||||
|
// TODO: Rename
|
||||||
|
void toku_get_node_for_verify(BLOCKNUM blocknum, FT_HANDLE ft_h, FTNODE* nodep);
|
||||||
|
|
||||||
|
int
|
||||||
|
toku_verify_ftnode (FT_HANDLE ft_h,
|
||||||
|
MSN rootmsn, MSN parentmsn_with_messages, bool messages_exist_above,
|
||||||
|
FTNODE node, int height,
|
||||||
|
const DBT *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
|
||||||
|
const DBT *greatereq_pivot, // Everything in the subtree should be <= lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
|
||||||
|
int (*progress_callback)(void *extra, float progress), void *progress_extra,
|
||||||
|
int recurse, int verbose, int keep_going_on_failure)
|
||||||
|
__attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
|
int toku_db_badformat(void) __attribute__((__warn_unused_result__));
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
FT_UPGRADE_FOOTPRINT = 0,
|
||||||
|
FT_UPGRADE_STATUS_NUM_ROWS
|
||||||
|
} ft_upgrade_status_entry;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
bool initialized;
|
||||||
|
TOKU_ENGINE_STATUS_ROW_S status[FT_UPGRADE_STATUS_NUM_ROWS];
|
||||||
|
} FT_UPGRADE_STATUS_S, *FT_UPGRADE_STATUS;
|
||||||
|
|
||||||
|
void toku_ft_upgrade_get_status(FT_UPGRADE_STATUS);
|
||||||
|
|
||||||
|
void toku_le_get_status(LE_STATUS);
|
||||||
|
|
||||||
|
void toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe);
|
||||||
|
void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint);
|
||||||
|
void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time);
|
||||||
|
void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time);
|
||||||
|
void toku_ft_status_note_msn_discard(void);
|
||||||
|
void toku_ft_status_note_update(bool broadcast);
|
||||||
|
void toku_ft_status_note_msg_bytes_out(size_t buffsize);
|
||||||
|
void toku_ft_status_note_ftnode(int height, bool created); // created = false means destroyed
|
||||||
|
|
||||||
|
void toku_ft_get_status(FT_STATUS);
|
||||||
|
|
||||||
|
void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra);
|
||||||
|
|
||||||
|
// For upgrade
|
||||||
|
int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) __attribute__((nonnull));
|
||||||
|
int toku_upgrade_msn_from_root_to_header(int fd, FT ft) __attribute__((nonnull));
|
||||||
|
|
||||||
|
// A callback function is invoked with the key, and the data.
|
||||||
|
// The pointers (to the bytevecs) must not be modified. The data must be copied out before the callback function returns.
|
||||||
|
// Note: In the thread-safe version, the ftnode remains locked while the callback function runs. So return soon, and don't call the ft code from the callback function.
|
||||||
|
// If the callback function returns a nonzero value (an error code), then that error code is returned from the get function itself.
|
||||||
|
// The cursor object will have been updated (so that if result==0 the current value is the value being passed)
|
||||||
|
// (If r!=0 then the cursor won't have been updated.)
|
||||||
|
// If r!=0, it's up to the callback function to return that value of r.
|
||||||
|
// A 'key' pointer of NULL means that element is not found (effectively infinity or
|
||||||
|
// -infinity depending on direction)
|
||||||
|
// When lock_only is false, the callback does optional lock tree locking and then processes the key and val.
|
||||||
|
// When lock_only is true, the callback only does optional lock tree locking.
|
||||||
|
typedef int (*FT_GET_CALLBACK_FUNCTION)(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only);
|
||||||
|
|
||||||
|
typedef bool (*FT_CHECK_INTERRUPT_CALLBACK)(void *extra, uint64_t deleted_rows);
|
||||||
|
|
||||||
|
struct ft_cursor;
|
||||||
|
int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, struct ft_cursor *ftcursor, bool can_bulk_fetch);
|
||||||
@@ -1,93 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
@@ -200,8 +147,6 @@ basement nodes, bulk fetch, and partial fetch:
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include "ft/cachetable/checkpoint.h"
|
#include "ft/cachetable/checkpoint.h"
|
||||||
#include "ft/cursor.h"
|
#include "ft/cursor.h"
|
||||||
#include "ft/ft.h"
|
#include "ft/ft.h"
|
||||||
@@ -237,205 +182,44 @@ basement nodes, bulk fetch, and partial fetch:
|
|||||||
/* Status is intended for display to humans to help understand system behavior.
|
/* Status is intended for display to humans to help understand system behavior.
|
||||||
* It does not need to be perfectly thread-safe.
|
* It does not need to be perfectly thread-safe.
|
||||||
*/
|
*/
|
||||||
static FT_STATUS_S ft_status;
|
|
||||||
|
|
||||||
#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_status, k, c, t, "ft: " l, inc)
|
|
||||||
|
|
||||||
static toku_mutex_t ft_open_close_lock;
|
static toku_mutex_t ft_open_close_lock;
|
||||||
|
|
||||||
static void
|
|
||||||
status_init(void)
|
|
||||||
{
|
|
||||||
// Note, this function initializes the keyname, type, and legend fields.
|
|
||||||
// Value fields are initialized to zero by compiler.
|
|
||||||
STATUS_INIT(FT_UPDATES, DICTIONARY_UPDATES, PARCOUNT, "dictionary updates", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_UPDATES_BROADCAST, DICTIONARY_BROADCAST_UPDATES, PARCOUNT, "dictionary broadcast updates", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DESCRIPTOR_SET, DESCRIPTOR_SET, PARCOUNT, "descriptor set", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_MSN_DISCARDS, MESSAGES_IGNORED_BY_LEAF_DUE_TO_MSN, PARCOUNT, "messages ignored by leaf due to msn", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOTAL_RETRIES, nullptr, PARCOUNT, "total search retries due to TRY_AGAIN", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_SEARCH_TRIES_GT_HEIGHT, nullptr, PARCOUNT, "searches requiring more tries than the height of the tree", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_SEARCH_TRIES_GT_HEIGHTPLUS3, nullptr, PARCOUNT, "searches requiring more tries than the height of the tree plus three", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_CREATE_LEAF, LEAF_NODES_CREATED, PARCOUNT, "leaf nodes created", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_CREATE_NONLEAF, NONLEAF_NODES_CREATED, PARCOUNT, "nonleaf nodes created", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DESTROY_LEAF, LEAF_NODES_DESTROYED, PARCOUNT, "leaf nodes destroyed", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DESTROY_NONLEAF, NONLEAF_NODES_DESTROYED, PARCOUNT, "nonleaf nodes destroyed", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_MSG_BYTES_IN, MESSAGES_INJECTED_AT_ROOT_BYTES, PARCOUNT, "bytes of messages injected at root (all trees)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_MSG_BYTES_OUT, MESSAGES_FLUSHED_FROM_H1_TO_LEAVES_BYTES, PARCOUNT, "bytes of messages flushed from h1 nodes to leaves", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_MSG_BYTES_CURR, MESSAGES_IN_TREES_ESTIMATE_BYTES, PARCOUNT, "bytes of messages currently in trees (estimate)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_MSG_NUM, MESSAGES_INJECTED_AT_ROOT, PARCOUNT, "messages injected at root", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_MSG_NUM_BROADCAST, BROADCASE_MESSAGES_INJECTED_AT_ROOT, PARCOUNT, "broadcast messages injected at root", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
|
|
||||||
STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, BASEMENTS_DECOMPRESSED_TARGET_QUERY, PARCOUNT, "basements decompressed as a target of a query", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE, BASEMENTS_DECOMPRESSED_PRELOCKED_RANGE, PARCOUNT, "basements decompressed for prelocked range", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH, BASEMENTS_DECOMPRESSED_PREFETCH, PARCOUNT, "basements decompressed for prefetch", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_WRITE, BASEMENTS_DECOMPRESSED_FOR_WRITE, PARCOUNT, "basements decompressed for write", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL, BUFFERS_DECOMPRESSED_TARGET_QUERY, PARCOUNT, "buffers decompressed as a target of a query", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE, BUFFERS_DECOMPRESSED_PRELOCKED_RANGE, PARCOUNT, "buffers decompressed for prelocked range", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH, BUFFERS_DECOMPRESSED_PREFETCH, PARCOUNT, "buffers decompressed for prefetch", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE, BUFFERS_DECOMPRESSED_FOR_WRITE, PARCOUNT, "buffers decompressed for write", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
|
|
||||||
// Eviction statistics:
|
|
||||||
STATUS_INIT(FT_FULL_EVICTIONS_LEAF, LEAF_NODE_FULL_EVICTIONS, PARCOUNT, "leaf node full evictions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_FULL_EVICTIONS_LEAF_BYTES, LEAF_NODE_FULL_EVICTIONS_BYTES, PARCOUNT, "leaf node full evictions (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_FULL_EVICTIONS_NONLEAF, NONLEAF_NODE_FULL_EVICTIONS, PARCOUNT, "nonleaf node full evictions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_FULL_EVICTIONS_NONLEAF_BYTES, NONLEAF_NODE_FULL_EVICTIONS_BYTES, PARCOUNT, "nonleaf node full evictions (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PARTIAL_EVICTIONS_LEAF, LEAF_NODE_PARTIAL_EVICTIONS, PARCOUNT, "leaf node partial evictions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PARTIAL_EVICTIONS_LEAF_BYTES, LEAF_NODE_PARTIAL_EVICTIONS_BYTES, PARCOUNT, "leaf node partial evictions (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PARTIAL_EVICTIONS_NONLEAF, NONLEAF_NODE_PARTIAL_EVICTIONS, PARCOUNT, "nonleaf node partial evictions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PARTIAL_EVICTIONS_NONLEAF_BYTES, NONLEAF_NODE_PARTIAL_EVICTIONS_BYTES, PARCOUNT, "nonleaf node partial evictions (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
|
|
||||||
// Disk read statistics:
|
|
||||||
//
|
|
||||||
// Pivots: For queries, prefetching, or writing.
|
|
||||||
STATUS_INIT(FT_NUM_PIVOTS_FETCHED_QUERY, PIVOTS_FETCHED_FOR_QUERY, PARCOUNT, "pivots fetched for query", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_PIVOTS_FETCHED_QUERY, PIVOTS_FETCHED_FOR_QUERY_BYTES, PARCOUNT, "pivots fetched for query (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_PIVOTS_FETCHED_QUERY, PIVOTS_FETCHED_FOR_QUERY_SECONDS, TOKUTIME, "pivots fetched for query (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_PIVOTS_FETCHED_PREFETCH, PIVOTS_FETCHED_FOR_PREFETCH, PARCOUNT, "pivots fetched for prefetch", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_PIVOTS_FETCHED_PREFETCH, PIVOTS_FETCHED_FOR_PREFETCH_BYTES, PARCOUNT, "pivots fetched for prefetch (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_PIVOTS_FETCHED_PREFETCH, PIVOTS_FETCHED_FOR_PREFETCH_SECONDS, TOKUTIME, "pivots fetched for prefetch (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_PIVOTS_FETCHED_WRITE, PIVOTS_FETCHED_FOR_WRITE, PARCOUNT, "pivots fetched for write", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_PIVOTS_FETCHED_WRITE, PIVOTS_FETCHED_FOR_WRITE_BYTES, PARCOUNT, "pivots fetched for write (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_PIVOTS_FETCHED_WRITE, PIVOTS_FETCHED_FOR_WRITE_SECONDS, TOKUTIME, "pivots fetched for write (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
// Basements: For queries, aggressive fetching in prelocked range, prefetching, or writing.
|
|
||||||
STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_NORMAL, BASEMENTS_FETCHED_TARGET_QUERY, PARCOUNT, "basements fetched as a target of a query", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_NORMAL, BASEMENTS_FETCHED_TARGET_QUERY_BYTES, PARCOUNT, "basements fetched as a target of a query (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_NORMAL, BASEMENTS_FETCHED_TARGET_QUERY_SECONDS, TOKUTIME, "basements fetched as a target of a query (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, BASEMENTS_FETCHED_PRELOCKED_RANGE, PARCOUNT, "basements fetched for prelocked range", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_AGGRESSIVE, BASEMENTS_FETCHED_PRELOCKED_RANGE_BYTES, PARCOUNT, "basements fetched for prelocked range (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_AGGRESSIVE, BASEMENTS_FETCHED_PRELOCKED_RANGE_SECONDS, TOKUTIME, "basements fetched for prelocked range (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_PREFETCH, BASEMENTS_FETCHED_PREFETCH, PARCOUNT, "basements fetched for prefetch", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_PREFETCH, BASEMENTS_FETCHED_PREFETCH_BYTES, PARCOUNT, "basements fetched for prefetch (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_PREFETCH, BASEMENTS_FETCHED_PREFETCH_SECONDS, TOKUTIME, "basements fetched for prefetch (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_WRITE, BASEMENTS_FETCHED_FOR_WRITE, PARCOUNT, "basements fetched for write", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_WRITE, BASEMENTS_FETCHED_FOR_WRITE_BYTES, PARCOUNT, "basements fetched for write (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_WRITE, BASEMENTS_FETCHED_FOR_WRITE_SECONDS, TOKUTIME, "basements fetched for write (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
// Buffers: For queries, aggressive fetching in prelocked range, prefetching, or writing.
|
|
||||||
STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_NORMAL, BUFFERS_FETCHED_TARGET_QUERY, PARCOUNT, "buffers fetched as a target of a query", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_NORMAL, BUFFERS_FETCHED_TARGET_QUERY_BYTES, PARCOUNT, "buffers fetched as a target of a query (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_NORMAL, BUFFERS_FETCHED_TARGET_QUERY_SECONDS, TOKUTIME, "buffers fetched as a target of a query (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, BUFFERS_FETCHED_PRELOCKED_RANGE, PARCOUNT, "buffers fetched for prelocked range", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_AGGRESSIVE, BUFFERS_FETCHED_PRELOCKED_RANGE_BYTES, PARCOUNT, "buffers fetched for prelocked range (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_AGGRESSIVE, BUFFERS_FETCHED_PRELOCKED_RANGE_SECONDS, TOKUTIME, "buffers fetched for prelocked range (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_PREFETCH, BUFFERS_FETCHED_PREFETCH, PARCOUNT, "buffers fetched for prefetch", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_PREFETCH, BUFFERS_FETCHED_PREFETCH_BYTES, PARCOUNT, "buffers fetched for prefetch (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_PREFETCH, BUFFERS_FETCHED_PREFETCH_SECONDS, TOKUTIME, "buffers fetched for prefetch (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_WRITE, BUFFERS_FETCHED_FOR_WRITE, PARCOUNT, "buffers fetched for write", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_WRITE, BUFFERS_FETCHED_FOR_WRITE_BYTES, PARCOUNT, "buffers fetched for write (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_WRITE, BUFFERS_FETCHED_FOR_WRITE_SECONDS, TOKUTIME, "buffers fetched for write (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
|
|
||||||
// Disk write statistics.
|
|
||||||
//
|
|
||||||
// Leaf/Nonleaf: Not for checkpoint
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_LEAF, LEAF_NODES_FLUSHED_NOT_CHECKPOINT, PARCOUNT, "leaf nodes flushed to disk (not for checkpoint)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_LEAF_BYTES, LEAF_NODES_FLUSHED_NOT_CHECKPOINT_BYTES, PARCOUNT, "leaf nodes flushed to disk (not for checkpoint) (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES, LEAF_NODES_FLUSHED_NOT_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "leaf nodes flushed to disk (not for checkpoint) (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_LEAF_TOKUTIME, LEAF_NODES_FLUSHED_NOT_CHECKPOINT_SECONDS, TOKUTIME, "leaf nodes flushed to disk (not for checkpoint) (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_NONLEAF, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT, PARCOUNT, "nonleaf nodes flushed to disk (not for checkpoint)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_NONLEAF_BYTES, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (not for checkpoint) (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (not for checkpoint) (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_NONLEAF_TOKUTIME, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT_SECONDS, TOKUTIME, "nonleaf nodes flushed to disk (not for checkpoint) (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
// Leaf/Nonleaf: For checkpoint
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_LEAF_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT, PARCOUNT, "leaf nodes flushed to disk (for checkpoint)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_LEAF_BYTES_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT_BYTES, PARCOUNT, "leaf nodes flushed to disk (for checkpoint) (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "leaf nodes flushed to disk (for checkpoint) (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_LEAF_TOKUTIME_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT_SECONDS, TOKUTIME, "leaf nodes flushed to disk (for checkpoint) (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT, PARCOUNT, "nonleaf nodes flushed to disk (for checkpoint)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (for checkpoint) (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (for checkpoint) (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_NONLEAF_TOKUTIME_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT_SECONDS, TOKUTIME, "nonleaf nodes flushed to disk (for checkpoint) (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_LEAF_COMPRESSION_RATIO, LEAF_NODE_COMPRESSION_RATIO, DOUBLE, "uncompressed / compressed bytes written (leaf)", TOKU_GLOBAL_STATUS|TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_NONLEAF_COMPRESSION_RATIO, NONLEAF_NODE_COMPRESSION_RATIO, DOUBLE, "uncompressed / compressed bytes written (nonleaf)", TOKU_GLOBAL_STATUS|TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_DISK_FLUSH_OVERALL_COMPRESSION_RATIO, OVERALL_NODE_COMPRESSION_RATIO, DOUBLE, "uncompressed / compressed bytes written (overall)", TOKU_GLOBAL_STATUS|TOKU_ENGINE_STATUS);
|
|
||||||
|
|
||||||
// CPU time statistics for [de]serialization and [de]compression.
|
|
||||||
STATUS_INIT(FT_LEAF_COMPRESS_TOKUTIME, LEAF_COMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "leaf compression to memory (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_LEAF_SERIALIZE_TOKUTIME, LEAF_SERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "leaf serialization to memory (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_LEAF_DECOMPRESS_TOKUTIME, LEAF_DECOMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "leaf decompression to memory (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_LEAF_DESERIALIZE_TOKUTIME, LEAF_DESERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "leaf deserialization to memory (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NONLEAF_COMPRESS_TOKUTIME, NONLEAF_COMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf compression to memory (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NONLEAF_SERIALIZE_TOKUTIME, NONLEAF_SERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf serialization to memory (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NONLEAF_DECOMPRESS_TOKUTIME, NONLEAF_DECOMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf decompression to memory (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_NONLEAF_DESERIALIZE_TOKUTIME, NONLEAF_DESERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf deserialization to memory (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
|
|
||||||
// Promotion statistics.
|
|
||||||
STATUS_INIT(FT_PRO_NUM_ROOT_SPLIT, PROMOTION_ROOTS_SPLIT, PARCOUNT, "promotion: roots split", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_ROOT_H0_INJECT, PROMOTION_LEAF_ROOTS_INJECTED_INTO, PARCOUNT, "promotion: leaf roots injected into", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_ROOT_H1_INJECT, PROMOTION_H1_ROOTS_INJECTED_INTO, PARCOUNT, "promotion: h1 roots injected into", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_0, PROMOTION_INJECTIONS_AT_DEPTH_0, PARCOUNT, "promotion: injections at depth 0", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_1, PROMOTION_INJECTIONS_AT_DEPTH_1, PARCOUNT, "promotion: injections at depth 1", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_2, PROMOTION_INJECTIONS_AT_DEPTH_2, PARCOUNT, "promotion: injections at depth 2", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_3, PROMOTION_INJECTIONS_AT_DEPTH_3, PARCOUNT, "promotion: injections at depth 3", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_GT3, PROMOTION_INJECTIONS_LOWER_THAN_DEPTH_3, PARCOUNT, "promotion: injections lower than depth 3", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_STOP_NONEMPTY_BUF, PROMOTION_STOPPED_NONEMPTY_BUFFER, PARCOUNT, "promotion: stopped because of a nonempty buffer", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_STOP_H1, PROMOTION_STOPPED_AT_HEIGHT_1, PARCOUNT, "promotion: stopped at height 1", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_STOP_LOCK_CHILD, PROMOTION_STOPPED_CHILD_LOCKED_OR_NOT_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was locked or not at all in memory", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_STOP_CHILD_INMEM, PROMOTION_STOPPED_CHILD_NOT_FULLY_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was not fully in memory", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_NUM_DIDNT_WANT_PROMOTE, PROMOTION_STOPPED_AFTER_LOCKING_CHILD, PARCOUNT, "promotion: stopped anyway, after locking the child", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, BASEMENT_DESERIALIZATION_FIXED_KEY, PARCOUNT, "basement nodes deserialized with fixed-keysize", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, BASEMENT_DESERIALIZATION_VARIABLE_KEY, PARCOUNT, "basement nodes deserialized with variable-keysize", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, nullptr, PARCOUNT, "promotion: succeeded in using the rightmost leaf shortcut", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, nullptr, PARCOUNT, "promotion: tried the rightmost leaf shorcut but failed (out-of-bounds)", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE,nullptr, PARCOUNT, "promotion: tried the rightmost leaf shorcut but failed (child reactive)", TOKU_ENGINE_STATUS);
|
|
||||||
|
|
||||||
STATUS_INIT(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, CURSOR_SKIP_DELETED_LEAF_ENTRY, PARCOUNT, "cursor skipped deleted leaf entries", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
|
|
||||||
ft_status.initialized = true;
|
|
||||||
}
|
|
||||||
static void status_destroy(void) {
|
|
||||||
for (int i = 0; i < FT_STATUS_NUM_ROWS; ++i) {
|
|
||||||
if (ft_status.status[i].type == PARCOUNT) {
|
|
||||||
destroy_partitioned_counter(ft_status.status[i].value.parcount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#undef STATUS_INIT
|
|
||||||
|
|
||||||
#define STATUS_VAL(x) \
|
|
||||||
(ft_status.status[x].type == PARCOUNT ? \
|
|
||||||
read_partitioned_counter(ft_status.status[x].value.parcount) : \
|
|
||||||
ft_status.status[x].value.num)
|
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_ft_get_status(FT_STATUS s) {
|
toku_ft_get_status(FT_STATUS s) {
|
||||||
|
ft_status.init();
|
||||||
*s = ft_status;
|
*s = ft_status;
|
||||||
|
|
||||||
// Calculate compression ratios for leaf and nonleaf nodes
|
// Calculate compression ratios for leaf and nonleaf nodes
|
||||||
const double compressed_leaf_bytes = STATUS_VAL(FT_DISK_FLUSH_LEAF_BYTES) +
|
const double compressed_leaf_bytes = FT_STATUS_VAL(FT_DISK_FLUSH_LEAF_BYTES) +
|
||||||
STATUS_VAL(FT_DISK_FLUSH_LEAF_BYTES_FOR_CHECKPOINT);
|
FT_STATUS_VAL(FT_DISK_FLUSH_LEAF_BYTES_FOR_CHECKPOINT);
|
||||||
const double uncompressed_leaf_bytes = STATUS_VAL(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES) +
|
const double uncompressed_leaf_bytes = FT_STATUS_VAL(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES) +
|
||||||
STATUS_VAL(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT);
|
FT_STATUS_VAL(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT);
|
||||||
const double compressed_nonleaf_bytes = STATUS_VAL(FT_DISK_FLUSH_NONLEAF_BYTES) +
|
const double compressed_nonleaf_bytes = FT_STATUS_VAL(FT_DISK_FLUSH_NONLEAF_BYTES) +
|
||||||
STATUS_VAL(FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT);
|
FT_STATUS_VAL(FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT);
|
||||||
const double uncompressed_nonleaf_bytes = STATUS_VAL(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES) +
|
const double uncompressed_nonleaf_bytes = FT_STATUS_VAL(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES) +
|
||||||
STATUS_VAL(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT);
|
FT_STATUS_VAL(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT);
|
||||||
|
|
||||||
if (compressed_leaf_bytes > 0) {
|
if (compressed_leaf_bytes > 0) {
|
||||||
s->status[FT_DISK_FLUSH_LEAF_COMPRESSION_RATIO].value.dnum = uncompressed_leaf_bytes / compressed_leaf_bytes;
|
s->status[FT_STATUS_S::FT_DISK_FLUSH_LEAF_COMPRESSION_RATIO].value.dnum
|
||||||
|
= uncompressed_leaf_bytes / compressed_leaf_bytes;
|
||||||
}
|
}
|
||||||
if (compressed_nonleaf_bytes > 0) {
|
if (compressed_nonleaf_bytes > 0) {
|
||||||
s->status[FT_DISK_FLUSH_NONLEAF_COMPRESSION_RATIO].value.dnum = uncompressed_nonleaf_bytes / compressed_nonleaf_bytes;
|
s->status[FT_STATUS_S::FT_DISK_FLUSH_NONLEAF_COMPRESSION_RATIO].value.dnum
|
||||||
|
= uncompressed_nonleaf_bytes / compressed_nonleaf_bytes;
|
||||||
}
|
}
|
||||||
if (compressed_leaf_bytes > 0 || compressed_nonleaf_bytes > 0) {
|
if (compressed_leaf_bytes > 0 || compressed_nonleaf_bytes > 0) {
|
||||||
s->status[FT_DISK_FLUSH_OVERALL_COMPRESSION_RATIO].value.dnum =
|
s->status[FT_STATUS_S::FT_DISK_FLUSH_OVERALL_COMPRESSION_RATIO].value.dnum
|
||||||
(uncompressed_leaf_bytes + uncompressed_nonleaf_bytes) / (compressed_leaf_bytes + compressed_nonleaf_bytes);
|
= (uncompressed_leaf_bytes + uncompressed_nonleaf_bytes) /
|
||||||
|
(compressed_leaf_bytes + compressed_nonleaf_bytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define STATUS_INC(x, d) \
|
|
||||||
do { \
|
|
||||||
if (ft_status.status[x].type == PARCOUNT) { \
|
|
||||||
increment_partitioned_counter(ft_status.status[x].value.parcount, d); \
|
|
||||||
} else { \
|
|
||||||
toku_sync_fetch_and_add(&ft_status.status[x].value.num, d); \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
|
|
||||||
void toku_note_deserialized_basement_node(bool fixed_key_size) {
|
void toku_note_deserialized_basement_node(bool fixed_key_size) {
|
||||||
if (fixed_key_size) {
|
if (fixed_key_size) {
|
||||||
STATUS_INC(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, 1);
|
FT_STATUS_INC(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, 1);
|
FT_STATUS_INC(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -773,30 +557,30 @@ void toku_ft_status_update_flush_reason(FTNODE node,
|
|||||||
tokutime_t write_time, bool for_checkpoint) {
|
tokutime_t write_time, bool for_checkpoint) {
|
||||||
if (node->height == 0) {
|
if (node->height == 0) {
|
||||||
if (for_checkpoint) {
|
if (for_checkpoint) {
|
||||||
STATUS_INC(FT_DISK_FLUSH_LEAF_FOR_CHECKPOINT, 1);
|
FT_STATUS_INC(FT_DISK_FLUSH_LEAF_FOR_CHECKPOINT, 1);
|
||||||
STATUS_INC(FT_DISK_FLUSH_LEAF_BYTES_FOR_CHECKPOINT, bytes_written);
|
FT_STATUS_INC(FT_DISK_FLUSH_LEAF_BYTES_FOR_CHECKPOINT, bytes_written);
|
||||||
STATUS_INC(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT, uncompressed_bytes_flushed);
|
FT_STATUS_INC(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT, uncompressed_bytes_flushed);
|
||||||
STATUS_INC(FT_DISK_FLUSH_LEAF_TOKUTIME_FOR_CHECKPOINT, write_time);
|
FT_STATUS_INC(FT_DISK_FLUSH_LEAF_TOKUTIME_FOR_CHECKPOINT, write_time);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
STATUS_INC(FT_DISK_FLUSH_LEAF, 1);
|
FT_STATUS_INC(FT_DISK_FLUSH_LEAF, 1);
|
||||||
STATUS_INC(FT_DISK_FLUSH_LEAF_BYTES, bytes_written);
|
FT_STATUS_INC(FT_DISK_FLUSH_LEAF_BYTES, bytes_written);
|
||||||
STATUS_INC(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES, uncompressed_bytes_flushed);
|
FT_STATUS_INC(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES, uncompressed_bytes_flushed);
|
||||||
STATUS_INC(FT_DISK_FLUSH_LEAF_TOKUTIME, write_time);
|
FT_STATUS_INC(FT_DISK_FLUSH_LEAF_TOKUTIME, write_time);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (for_checkpoint) {
|
if (for_checkpoint) {
|
||||||
STATUS_INC(FT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT, 1);
|
FT_STATUS_INC(FT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT, 1);
|
||||||
STATUS_INC(FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT, bytes_written);
|
FT_STATUS_INC(FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT, bytes_written);
|
||||||
STATUS_INC(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT, uncompressed_bytes_flushed);
|
FT_STATUS_INC(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT, uncompressed_bytes_flushed);
|
||||||
STATUS_INC(FT_DISK_FLUSH_NONLEAF_TOKUTIME_FOR_CHECKPOINT, write_time);
|
FT_STATUS_INC(FT_DISK_FLUSH_NONLEAF_TOKUTIME_FOR_CHECKPOINT, write_time);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
STATUS_INC(FT_DISK_FLUSH_NONLEAF, 1);
|
FT_STATUS_INC(FT_DISK_FLUSH_NONLEAF, 1);
|
||||||
STATUS_INC(FT_DISK_FLUSH_NONLEAF_BYTES, bytes_written);
|
FT_STATUS_INC(FT_DISK_FLUSH_NONLEAF_BYTES, bytes_written);
|
||||||
STATUS_INC(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES, uncompressed_bytes_flushed);
|
FT_STATUS_INC(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES, uncompressed_bytes_flushed);
|
||||||
STATUS_INC(FT_DISK_FLUSH_NONLEAF_TOKUTIME, write_time);
|
FT_STATUS_INC(FT_DISK_FLUSH_NONLEAF_TOKUTIME, write_time);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -912,11 +696,11 @@ void toku_ftnode_flush_callback(
|
|||||||
if (!is_clone) {
|
if (!is_clone) {
|
||||||
long node_size = ftnode_memory_size(ftnode);
|
long node_size = ftnode_memory_size(ftnode);
|
||||||
if (ftnode->height == 0) {
|
if (ftnode->height == 0) {
|
||||||
STATUS_INC(FT_FULL_EVICTIONS_LEAF, 1);
|
FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF, 1);
|
||||||
STATUS_INC(FT_FULL_EVICTIONS_LEAF_BYTES, node_size);
|
FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF_BYTES, node_size);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_FULL_EVICTIONS_NONLEAF, 1);
|
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF, 1);
|
||||||
STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
|
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
|
||||||
}
|
}
|
||||||
toku_free(*disk_data);
|
toku_free(*disk_data);
|
||||||
}
|
}
|
||||||
@@ -941,17 +725,17 @@ void
|
|||||||
toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe)
|
toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe)
|
||||||
{
|
{
|
||||||
if (bfe->type == ftnode_fetch_prefetch) {
|
if (bfe->type == ftnode_fetch_prefetch) {
|
||||||
STATUS_INC(FT_NUM_PIVOTS_FETCHED_PREFETCH, 1);
|
FT_STATUS_INC(FT_NUM_PIVOTS_FETCHED_PREFETCH, 1);
|
||||||
STATUS_INC(FT_BYTES_PIVOTS_FETCHED_PREFETCH, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_PIVOTS_FETCHED_PREFETCH, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_PIVOTS_FETCHED_PREFETCH, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_PIVOTS_FETCHED_PREFETCH, bfe->io_time);
|
||||||
} else if (bfe->type == ftnode_fetch_all) {
|
} else if (bfe->type == ftnode_fetch_all) {
|
||||||
STATUS_INC(FT_NUM_PIVOTS_FETCHED_WRITE, 1);
|
FT_STATUS_INC(FT_NUM_PIVOTS_FETCHED_WRITE, 1);
|
||||||
STATUS_INC(FT_BYTES_PIVOTS_FETCHED_WRITE, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_PIVOTS_FETCHED_WRITE, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_PIVOTS_FETCHED_WRITE, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_PIVOTS_FETCHED_WRITE, bfe->io_time);
|
||||||
} else if (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_keymatch) {
|
} else if (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_keymatch) {
|
||||||
STATUS_INC(FT_NUM_PIVOTS_FETCHED_QUERY, 1);
|
FT_STATUS_INC(FT_NUM_PIVOTS_FETCHED_QUERY, 1);
|
||||||
STATUS_INC(FT_BYTES_PIVOTS_FETCHED_QUERY, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_PIVOTS_FETCHED_QUERY, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_PIVOTS_FETCHED_QUERY, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_PIVOTS_FETCHED_QUERY, bfe->io_time);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1187,12 +971,12 @@ exit:
|
|||||||
if (num_partial_evictions > 0) {
|
if (num_partial_evictions > 0) {
|
||||||
if (height == 0) {
|
if (height == 0) {
|
||||||
long delta = old_attr.leaf_size - new_attr.leaf_size;
|
long delta = old_attr.leaf_size - new_attr.leaf_size;
|
||||||
STATUS_INC(FT_PARTIAL_EVICTIONS_LEAF, num_partial_evictions);
|
FT_STATUS_INC(FT_PARTIAL_EVICTIONS_LEAF, num_partial_evictions);
|
||||||
STATUS_INC(FT_PARTIAL_EVICTIONS_LEAF_BYTES, delta);
|
FT_STATUS_INC(FT_PARTIAL_EVICTIONS_LEAF_BYTES, delta);
|
||||||
} else {
|
} else {
|
||||||
long delta = old_attr.nonleaf_size - new_attr.nonleaf_size;
|
long delta = old_attr.nonleaf_size - new_attr.nonleaf_size;
|
||||||
STATUS_INC(FT_PARTIAL_EVICTIONS_NONLEAF, num_partial_evictions);
|
FT_STATUS_INC(FT_PARTIAL_EVICTIONS_NONLEAF, num_partial_evictions);
|
||||||
STATUS_INC(FT_PARTIAL_EVICTIONS_NONLEAF_BYTES, delta);
|
FT_STATUS_INC(FT_PARTIAL_EVICTIONS_NONLEAF_BYTES, delta);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1204,7 +988,7 @@ exit:
|
|||||||
// We need a function to have something a drd suppression can reference
|
// We need a function to have something a drd suppression can reference
|
||||||
// see src/tests/drd.suppressions (unsafe_touch_clock)
|
// see src/tests/drd.suppressions (unsafe_touch_clock)
|
||||||
static void unsafe_touch_clock(FTNODE node, int i) {
|
static void unsafe_touch_clock(FTNODE node, int i) {
|
||||||
toku_drd_unsafe_set(&node->bp[i].clock_count, static_cast<unsigned char>(1));
|
toku_unsafe_set(&node->bp[i].clock_count, static_cast<unsigned char>(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Callback that states if a partial fetch of the node is necessary
|
// Callback that states if a partial fetch of the node is necessary
|
||||||
@@ -1306,70 +1090,70 @@ ft_status_update_partial_fetch_reason(
|
|||||||
if (is_leaf) {
|
if (is_leaf) {
|
||||||
if (bfe->type == ftnode_fetch_prefetch) {
|
if (bfe->type == ftnode_fetch_prefetch) {
|
||||||
if (state == PT_COMPRESSED) {
|
if (state == PT_COMPRESSED) {
|
||||||
STATUS_INC(FT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH, 1);
|
FT_STATUS_INC(FT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_NUM_BASEMENTS_FETCHED_PREFETCH, 1);
|
FT_STATUS_INC(FT_NUM_BASEMENTS_FETCHED_PREFETCH, 1);
|
||||||
STATUS_INC(FT_BYTES_BASEMENTS_FETCHED_PREFETCH, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_BASEMENTS_FETCHED_PREFETCH, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_BASEMENTS_FETCHED_PREFETCH, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_BASEMENTS_FETCHED_PREFETCH, bfe->io_time);
|
||||||
}
|
}
|
||||||
} else if (bfe->type == ftnode_fetch_all) {
|
} else if (bfe->type == ftnode_fetch_all) {
|
||||||
if (state == PT_COMPRESSED) {
|
if (state == PT_COMPRESSED) {
|
||||||
STATUS_INC(FT_NUM_BASEMENTS_DECOMPRESSED_WRITE, 1);
|
FT_STATUS_INC(FT_NUM_BASEMENTS_DECOMPRESSED_WRITE, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_NUM_BASEMENTS_FETCHED_WRITE, 1);
|
FT_STATUS_INC(FT_NUM_BASEMENTS_FETCHED_WRITE, 1);
|
||||||
STATUS_INC(FT_BYTES_BASEMENTS_FETCHED_WRITE, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_BASEMENTS_FETCHED_WRITE, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_BASEMENTS_FETCHED_WRITE, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_BASEMENTS_FETCHED_WRITE, bfe->io_time);
|
||||||
}
|
}
|
||||||
} else if (childnum == bfe->child_to_read) {
|
} else if (childnum == bfe->child_to_read) {
|
||||||
if (state == PT_COMPRESSED) {
|
if (state == PT_COMPRESSED) {
|
||||||
STATUS_INC(FT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, 1);
|
FT_STATUS_INC(FT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_NUM_BASEMENTS_FETCHED_NORMAL, 1);
|
FT_STATUS_INC(FT_NUM_BASEMENTS_FETCHED_NORMAL, 1);
|
||||||
STATUS_INC(FT_BYTES_BASEMENTS_FETCHED_NORMAL, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_BASEMENTS_FETCHED_NORMAL, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_BASEMENTS_FETCHED_NORMAL, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_BASEMENTS_FETCHED_NORMAL, bfe->io_time);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (state == PT_COMPRESSED) {
|
if (state == PT_COMPRESSED) {
|
||||||
STATUS_INC(FT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE, 1);
|
FT_STATUS_INC(FT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, 1);
|
FT_STATUS_INC(FT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, 1);
|
||||||
STATUS_INC(FT_BYTES_BASEMENTS_FETCHED_AGGRESSIVE, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_BASEMENTS_FETCHED_AGGRESSIVE, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_BASEMENTS_FETCHED_AGGRESSIVE, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_BASEMENTS_FETCHED_AGGRESSIVE, bfe->io_time);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (bfe->type == ftnode_fetch_prefetch) {
|
if (bfe->type == ftnode_fetch_prefetch) {
|
||||||
if (state == PT_COMPRESSED) {
|
if (state == PT_COMPRESSED) {
|
||||||
STATUS_INC(FT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH, 1);
|
FT_STATUS_INC(FT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_NUM_MSG_BUFFER_FETCHED_PREFETCH, 1);
|
FT_STATUS_INC(FT_NUM_MSG_BUFFER_FETCHED_PREFETCH, 1);
|
||||||
STATUS_INC(FT_BYTES_MSG_BUFFER_FETCHED_PREFETCH, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_MSG_BUFFER_FETCHED_PREFETCH, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_MSG_BUFFER_FETCHED_PREFETCH, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_MSG_BUFFER_FETCHED_PREFETCH, bfe->io_time);
|
||||||
}
|
}
|
||||||
} else if (bfe->type == ftnode_fetch_all) {
|
} else if (bfe->type == ftnode_fetch_all) {
|
||||||
if (state == PT_COMPRESSED) {
|
if (state == PT_COMPRESSED) {
|
||||||
STATUS_INC(FT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE, 1);
|
FT_STATUS_INC(FT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_NUM_MSG_BUFFER_FETCHED_WRITE, 1);
|
FT_STATUS_INC(FT_NUM_MSG_BUFFER_FETCHED_WRITE, 1);
|
||||||
STATUS_INC(FT_BYTES_MSG_BUFFER_FETCHED_WRITE, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_MSG_BUFFER_FETCHED_WRITE, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_MSG_BUFFER_FETCHED_WRITE, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_MSG_BUFFER_FETCHED_WRITE, bfe->io_time);
|
||||||
}
|
}
|
||||||
} else if (childnum == bfe->child_to_read) {
|
} else if (childnum == bfe->child_to_read) {
|
||||||
if (state == PT_COMPRESSED) {
|
if (state == PT_COMPRESSED) {
|
||||||
STATUS_INC(FT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL, 1);
|
FT_STATUS_INC(FT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_NUM_MSG_BUFFER_FETCHED_NORMAL, 1);
|
FT_STATUS_INC(FT_NUM_MSG_BUFFER_FETCHED_NORMAL, 1);
|
||||||
STATUS_INC(FT_BYTES_MSG_BUFFER_FETCHED_NORMAL, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_MSG_BUFFER_FETCHED_NORMAL, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_MSG_BUFFER_FETCHED_NORMAL, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_MSG_BUFFER_FETCHED_NORMAL, bfe->io_time);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (state == PT_COMPRESSED) {
|
if (state == PT_COMPRESSED) {
|
||||||
STATUS_INC(FT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE, 1);
|
FT_STATUS_INC(FT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, 1);
|
FT_STATUS_INC(FT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, 1);
|
||||||
STATUS_INC(FT_BYTES_MSG_BUFFER_FETCHED_AGGRESSIVE, bfe->bytes_read);
|
FT_STATUS_INC(FT_BYTES_MSG_BUFFER_FETCHED_AGGRESSIVE, bfe->bytes_read);
|
||||||
STATUS_INC(FT_TOKUTIME_MSG_BUFFER_FETCHED_AGGRESSIVE, bfe->io_time);
|
FT_STATUS_INC(FT_TOKUTIME_MSG_BUFFER_FETCHED_AGGRESSIVE, bfe->io_time);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1377,46 +1161,46 @@ ft_status_update_partial_fetch_reason(
|
|||||||
|
|
||||||
void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time) {
|
void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time) {
|
||||||
if (node->height == 0) {
|
if (node->height == 0) {
|
||||||
STATUS_INC(FT_LEAF_SERIALIZE_TOKUTIME, serialize_time);
|
FT_STATUS_INC(FT_LEAF_SERIALIZE_TOKUTIME, serialize_time);
|
||||||
STATUS_INC(FT_LEAF_COMPRESS_TOKUTIME, compress_time);
|
FT_STATUS_INC(FT_LEAF_COMPRESS_TOKUTIME, compress_time);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_NONLEAF_SERIALIZE_TOKUTIME, serialize_time);
|
FT_STATUS_INC(FT_NONLEAF_SERIALIZE_TOKUTIME, serialize_time);
|
||||||
STATUS_INC(FT_NONLEAF_COMPRESS_TOKUTIME, compress_time);
|
FT_STATUS_INC(FT_NONLEAF_COMPRESS_TOKUTIME, compress_time);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time) {
|
void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time) {
|
||||||
if (node->height == 0) {
|
if (node->height == 0) {
|
||||||
STATUS_INC(FT_LEAF_DESERIALIZE_TOKUTIME, deserialize_time);
|
FT_STATUS_INC(FT_LEAF_DESERIALIZE_TOKUTIME, deserialize_time);
|
||||||
STATUS_INC(FT_LEAF_DECOMPRESS_TOKUTIME, decompress_time);
|
FT_STATUS_INC(FT_LEAF_DECOMPRESS_TOKUTIME, decompress_time);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_NONLEAF_DESERIALIZE_TOKUTIME, deserialize_time);
|
FT_STATUS_INC(FT_NONLEAF_DESERIALIZE_TOKUTIME, deserialize_time);
|
||||||
STATUS_INC(FT_NONLEAF_DECOMPRESS_TOKUTIME, decompress_time);
|
FT_STATUS_INC(FT_NONLEAF_DECOMPRESS_TOKUTIME, decompress_time);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ft_status_note_msn_discard(void) {
|
void toku_ft_status_note_msn_discard(void) {
|
||||||
STATUS_INC(FT_MSN_DISCARDS, 1);
|
FT_STATUS_INC(FT_MSN_DISCARDS, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ft_status_note_update(bool broadcast) {
|
void toku_ft_status_note_update(bool broadcast) {
|
||||||
if (broadcast) {
|
if (broadcast) {
|
||||||
STATUS_INC(FT_UPDATES_BROADCAST, 1);
|
FT_STATUS_INC(FT_UPDATES_BROADCAST, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_UPDATES, 1);
|
FT_STATUS_INC(FT_UPDATES, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ft_status_note_msg_bytes_out(size_t buffsize) {
|
void toku_ft_status_note_msg_bytes_out(size_t buffsize) {
|
||||||
STATUS_INC(FT_MSG_BYTES_OUT, buffsize);
|
FT_STATUS_INC(FT_MSG_BYTES_OUT, buffsize);
|
||||||
STATUS_INC(FT_MSG_BYTES_CURR, -buffsize);
|
FT_STATUS_INC(FT_MSG_BYTES_CURR, -buffsize);
|
||||||
}
|
}
|
||||||
void toku_ft_status_note_ftnode(int height, bool created) {
|
void toku_ft_status_note_ftnode(int height, bool created) {
|
||||||
if (created) {
|
if (created) {
|
||||||
if (height == 0) {
|
if (height == 0) {
|
||||||
STATUS_INC(FT_CREATE_LEAF, 1);
|
FT_STATUS_INC(FT_CREATE_LEAF, 1);
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_CREATE_NONLEAF, 1);
|
FT_STATUS_INC(FT_CREATE_NONLEAF, 1);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// created = false means destroyed
|
// created = false means destroyed
|
||||||
@@ -1612,11 +1396,11 @@ static void inject_message_in_locked_node(
|
|||||||
// update some status variables
|
// update some status variables
|
||||||
if (node->height != 0) {
|
if (node->height != 0) {
|
||||||
size_t msgsize = msg.total_size();
|
size_t msgsize = msg.total_size();
|
||||||
STATUS_INC(FT_MSG_BYTES_IN, msgsize);
|
FT_STATUS_INC(FT_MSG_BYTES_IN, msgsize);
|
||||||
STATUS_INC(FT_MSG_BYTES_CURR, msgsize);
|
FT_STATUS_INC(FT_MSG_BYTES_CURR, msgsize);
|
||||||
STATUS_INC(FT_MSG_NUM, 1);
|
FT_STATUS_INC(FT_MSG_NUM, 1);
|
||||||
if (ft_msg_type_applies_all(msg.type())) {
|
if (ft_msg_type_applies_all(msg.type())) {
|
||||||
STATUS_INC(FT_MSG_NUM_BROADCAST, 1);
|
FT_STATUS_INC(FT_MSG_NUM_BROADCAST, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1624,13 +1408,13 @@ static void inject_message_in_locked_node(
|
|||||||
paranoid_invariant(msg_with_msn.msn().msn == node->max_msn_applied_to_node_on_disk.msn);
|
paranoid_invariant(msg_with_msn.msn().msn == node->max_msn_applied_to_node_on_disk.msn);
|
||||||
|
|
||||||
if (node->blocknum.b == ft->rightmost_blocknum.b) {
|
if (node->blocknum.b == ft->rightmost_blocknum.b) {
|
||||||
if (toku_drd_unsafe_fetch(&ft->seqinsert_score) < FT_SEQINSERT_SCORE_THRESHOLD) {
|
if (toku_unsafe_fetch(&ft->seqinsert_score) < FT_SEQINSERT_SCORE_THRESHOLD) {
|
||||||
// we promoted to the rightmost leaf node and the seqinsert score has not yet saturated.
|
// we promoted to the rightmost leaf node and the seqinsert score has not yet saturated.
|
||||||
toku_sync_fetch_and_add(&ft->seqinsert_score, 1);
|
toku_sync_fetch_and_add(&ft->seqinsert_score, 1);
|
||||||
}
|
}
|
||||||
} else if (toku_drd_unsafe_fetch(&ft->seqinsert_score) != 0) {
|
} else if (toku_unsafe_fetch(&ft->seqinsert_score) != 0) {
|
||||||
// we promoted to something other than the rightmost leaf node and the score should reset
|
// we promoted to something other than the rightmost leaf node and the score should reset
|
||||||
toku_drd_unsafe_set(&ft->seqinsert_score, static_cast<uint32_t>(0));
|
toku_unsafe_set(&ft->seqinsert_score, static_cast<uint32_t>(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we call toku_ft_flush_some_child, then that function unpins the root
|
// if we call toku_ft_flush_some_child, then that function unpins the root
|
||||||
@@ -1792,16 +1576,16 @@ static inline bool should_inject_in_node(seqinsert_loc loc, int height, int dept
|
|||||||
static void ft_verify_or_set_rightmost_blocknum(FT ft, BLOCKNUM b)
|
static void ft_verify_or_set_rightmost_blocknum(FT ft, BLOCKNUM b)
|
||||||
// Given: 'b', the _definitive_ and constant rightmost blocknum of 'ft'
|
// Given: 'b', the _definitive_ and constant rightmost blocknum of 'ft'
|
||||||
{
|
{
|
||||||
if (toku_drd_unsafe_fetch(&ft->rightmost_blocknum.b) == RESERVED_BLOCKNUM_NULL) {
|
if (toku_unsafe_fetch(&ft->rightmost_blocknum.b) == RESERVED_BLOCKNUM_NULL) {
|
||||||
toku_ft_lock(ft);
|
toku_ft_lock(ft);
|
||||||
if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) {
|
if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) {
|
||||||
toku_drd_unsafe_set(&ft->rightmost_blocknum, b);
|
toku_unsafe_set(&ft->rightmost_blocknum, b);
|
||||||
}
|
}
|
||||||
toku_ft_unlock(ft);
|
toku_ft_unlock(ft);
|
||||||
}
|
}
|
||||||
// The rightmost blocknum only transitions from RESERVED_BLOCKNUM_NULL to non-null.
|
// The rightmost blocknum only transitions from RESERVED_BLOCKNUM_NULL to non-null.
|
||||||
// If it's already set, verify that the stored value is consistent with 'b'
|
// If it's already set, verify that the stored value is consistent with 'b'
|
||||||
invariant(toku_drd_unsafe_fetch(&ft->rightmost_blocknum.b) == b.b);
|
invariant(toku_unsafe_fetch(&ft->rightmost_blocknum.b) == b.b);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) {
|
bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) {
|
||||||
@@ -1847,15 +1631,15 @@ static void push_something_in_subtree(
|
|||||||
if (should_inject_in_node(loc, subtree_root->height, depth)) {
|
if (should_inject_in_node(loc, subtree_root->height, depth)) {
|
||||||
switch (depth) {
|
switch (depth) {
|
||||||
case 0:
|
case 0:
|
||||||
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break;
|
FT_STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break;
|
||||||
case 1:
|
case 1:
|
||||||
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break;
|
FT_STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break;
|
||||||
case 2:
|
case 2:
|
||||||
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break;
|
FT_STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break;
|
||||||
case 3:
|
case 3:
|
||||||
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break;
|
FT_STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break;
|
||||||
default:
|
default:
|
||||||
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break;
|
FT_STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break;
|
||||||
}
|
}
|
||||||
// If the target node is a non-root leaf node on the right extreme,
|
// If the target node is a non-root leaf node on the right extreme,
|
||||||
// set the rightmost blocknum. We know there are no messages above us
|
// set the rightmost blocknum. We know there are no messages above us
|
||||||
@@ -1880,7 +1664,7 @@ static void push_something_in_subtree(
|
|||||||
|
|
||||||
if (toku_bnc_n_entries(bnc) > 0) {
|
if (toku_bnc_n_entries(bnc) > 0) {
|
||||||
// The buffer is non-empty, give up on promoting.
|
// The buffer is non-empty, give up on promoting.
|
||||||
STATUS_INC(FT_PRO_NUM_STOP_NONEMPTY_BUF, 1);
|
FT_STATUS_INC(FT_PRO_NUM_STOP_NONEMPTY_BUF, 1);
|
||||||
goto relock_and_push_here;
|
goto relock_and_push_here;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1895,7 +1679,7 @@ static void push_something_in_subtree(
|
|||||||
|
|
||||||
if (next_loc == NEITHER_EXTREME && subtree_root->height <= 1) {
|
if (next_loc == NEITHER_EXTREME && subtree_root->height <= 1) {
|
||||||
// Never promote to leaf nodes except on the edges
|
// Never promote to leaf nodes except on the edges
|
||||||
STATUS_INC(FT_PRO_NUM_STOP_H1, 1);
|
FT_STATUS_INC(FT_PRO_NUM_STOP_H1, 1);
|
||||||
goto relock_and_push_here;
|
goto relock_and_push_here;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1934,7 +1718,7 @@ static void push_something_in_subtree(
|
|||||||
r = toku_maybe_pin_ftnode_clean(ft, child_blocknum, child_fullhash, lock_type, &child);
|
r = toku_maybe_pin_ftnode_clean(ft, child_blocknum, child_fullhash, lock_type, &child);
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
// We couldn't get the child cheaply, so give up on promoting.
|
// We couldn't get the child cheaply, so give up on promoting.
|
||||||
STATUS_INC(FT_PRO_NUM_STOP_LOCK_CHILD, 1);
|
FT_STATUS_INC(FT_PRO_NUM_STOP_LOCK_CHILD, 1);
|
||||||
goto relock_and_push_here;
|
goto relock_and_push_here;
|
||||||
}
|
}
|
||||||
if (toku_ftnode_fully_in_memory(child)) {
|
if (toku_ftnode_fully_in_memory(child)) {
|
||||||
@@ -1945,7 +1729,7 @@ static void push_something_in_subtree(
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// We got the child, but it's not fully in memory. Give up on promoting.
|
// We got the child, but it's not fully in memory. Give up on promoting.
|
||||||
STATUS_INC(FT_PRO_NUM_STOP_CHILD_INMEM, 1);
|
FT_STATUS_INC(FT_PRO_NUM_STOP_CHILD_INMEM, 1);
|
||||||
goto unlock_child_and_push_here;
|
goto unlock_child_and_push_here;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1976,7 +1760,7 @@ static void push_something_in_subtree(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
STATUS_INC(FT_PRO_NUM_DIDNT_WANT_PROMOTE, 1);
|
FT_STATUS_INC(FT_PRO_NUM_DIDNT_WANT_PROMOTE, 1);
|
||||||
unlock_child_and_push_here:
|
unlock_child_and_push_here:
|
||||||
// We locked the child, but we decided not to promote.
|
// We locked the child, but we decided not to promote.
|
||||||
// Unlock the child, and fall through to the next case.
|
// Unlock the child, and fall through to the next case.
|
||||||
@@ -1994,15 +1778,15 @@ static void push_something_in_subtree(
|
|||||||
toku_unpin_ftnode_read_only(ft, subtree_root);
|
toku_unpin_ftnode_read_only(ft, subtree_root);
|
||||||
switch (depth) {
|
switch (depth) {
|
||||||
case 0:
|
case 0:
|
||||||
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break;
|
FT_STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break;
|
||||||
case 1:
|
case 1:
|
||||||
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break;
|
FT_STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break;
|
||||||
case 2:
|
case 2:
|
||||||
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break;
|
FT_STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break;
|
||||||
case 3:
|
case 3:
|
||||||
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break;
|
FT_STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break;
|
||||||
default:
|
default:
|
||||||
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break;
|
FT_STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break;
|
||||||
}
|
}
|
||||||
inject_message_at_this_blocknum(ft, subtree_root_blocknum, subtree_root_fullhash, msg, flow_deltas, gc_info);
|
inject_message_at_this_blocknum(ft, subtree_root_blocknum, subtree_root_fullhash, msg, flow_deltas, gc_info);
|
||||||
}
|
}
|
||||||
@@ -2097,7 +1881,7 @@ void toku_ft_root_put_msg(
|
|||||||
// do the injection.
|
// do the injection.
|
||||||
toku_unpin_ftnode(ft, node);
|
toku_unpin_ftnode(ft, node);
|
||||||
lock_type = PL_READ;
|
lock_type = PL_READ;
|
||||||
STATUS_INC(FT_PRO_NUM_ROOT_SPLIT, 1);
|
FT_STATUS_INC(FT_PRO_NUM_ROOT_SPLIT, 1);
|
||||||
goto change_lock_type;
|
goto change_lock_type;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -2114,7 +1898,7 @@ void toku_ft_root_put_msg(
|
|||||||
if (node->height == 0 || !ft_msg_type_applies_once(msg.type())) {
|
if (node->height == 0 || !ft_msg_type_applies_once(msg.type())) {
|
||||||
// If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here.
|
// If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here.
|
||||||
toku_unpin_ftnode_read_only(ft, node);
|
toku_unpin_ftnode_read_only(ft, node);
|
||||||
STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1);
|
FT_STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1);
|
||||||
inject_message_at_this_blocknum(ft, root_key, fullhash, msg, flow_deltas, gc_info);
|
inject_message_at_this_blocknum(ft, root_key, fullhash, msg, flow_deltas, gc_info);
|
||||||
} else if (node->height > 1) {
|
} else if (node->height > 1) {
|
||||||
// If the root's above height 1, we are definitely eligible for promotion.
|
// If the root's above height 1, we are definitely eligible for promotion.
|
||||||
@@ -2129,7 +1913,7 @@ void toku_ft_root_put_msg(
|
|||||||
} else {
|
} else {
|
||||||
// At height 1 in the middle, don't promote, drop the read lock and inject here.
|
// At height 1 in the middle, don't promote, drop the read lock and inject here.
|
||||||
toku_unpin_ftnode_read_only(ft, node);
|
toku_unpin_ftnode_read_only(ft, node);
|
||||||
STATUS_INC(FT_PRO_NUM_ROOT_H1_INJECT, 1);
|
FT_STATUS_INC(FT_PRO_NUM_ROOT_H1_INJECT, 1);
|
||||||
inject_message_at_this_blocknum(ft, root_key, fullhash, msg, flow_deltas, gc_info);
|
inject_message_at_this_blocknum(ft, root_key, fullhash, msg, flow_deltas, gc_info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2285,7 +2069,7 @@ static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS m
|
|||||||
|
|
||||||
// Don't do the optimization if our heurstic suggests that
|
// Don't do the optimization if our heurstic suggests that
|
||||||
// insertion pattern is not sequential.
|
// insertion pattern is not sequential.
|
||||||
if (toku_drd_unsafe_fetch(&ft->seqinsert_score) < FT_SEQINSERT_SCORE_THRESHOLD) {
|
if (toku_unsafe_fetch(&ft->seqinsert_score) < FT_SEQINSERT_SCORE_THRESHOLD) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2311,7 +2095,7 @@ static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS m
|
|||||||
// take care of it. This also ensures that if any of our ancestors are reactive,
|
// take care of it. This also ensures that if any of our ancestors are reactive,
|
||||||
// they'll be taken care of too.
|
// they'll be taken care of too.
|
||||||
if (toku_ftnode_get_leaf_reactivity(rightmost_leaf, ft->h->nodesize) != RE_STABLE) {
|
if (toku_ftnode_get_leaf_reactivity(rightmost_leaf, ft->h->nodesize) != RE_STABLE) {
|
||||||
STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE, 1);
|
FT_STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE, 1);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2332,7 +2116,7 @@ static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS m
|
|||||||
unique ? &nondeleted_key_found : nullptr,
|
unique ? &nondeleted_key_found : nullptr,
|
||||||
&target_childnum);
|
&target_childnum);
|
||||||
if (relative_pos >= 0) {
|
if (relative_pos >= 0) {
|
||||||
STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, 1);
|
FT_STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, 1);
|
||||||
if (unique && nondeleted_key_found) {
|
if (unique && nondeleted_key_found) {
|
||||||
r = DB_KEYEXIST;
|
r = DB_KEYEXIST;
|
||||||
} else {
|
} else {
|
||||||
@@ -2341,7 +2125,7 @@ static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS m
|
|||||||
r = 0;
|
r = 0;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, 1);
|
FT_STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, 1);
|
||||||
r = -1;
|
r = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2934,7 +2718,7 @@ void toku_ft_change_descriptor(
|
|||||||
new_d.dbt = *new_descriptor;
|
new_d.dbt = *new_descriptor;
|
||||||
toku_ft_update_descriptor(ft_h->ft, &new_d);
|
toku_ft_update_descriptor(ft_h->ft, &new_d);
|
||||||
// very infrequent operation, worth precise threadsafe count
|
// very infrequent operation, worth precise threadsafe count
|
||||||
STATUS_INC(FT_DESCRIPTOR_SET, 1);
|
FT_STATUS_INC(FT_DESCRIPTOR_SET, 1);
|
||||||
|
|
||||||
if (update_cmp_descriptor) {
|
if (update_cmp_descriptor) {
|
||||||
toku_ft_update_cmp_descriptor(ft_h->ft);
|
toku_ft_update_cmp_descriptor(ft_h->ft);
|
||||||
@@ -3377,7 +3161,7 @@ ok: ;
|
|||||||
|
|
||||||
if (toku_ft_cursor_is_leaf_mode(ftcursor))
|
if (toku_ft_cursor_is_leaf_mode(ftcursor))
|
||||||
goto got_a_good_value; // leaf mode cursors see all leaf entries
|
goto got_a_good_value; // leaf mode cursors see all leaf entries
|
||||||
if (le_val_is_del(le, ftcursor->is_snapshot_read, ftcursor->ttxn)) {
|
if (le_val_is_del(le, ftcursor->read_type, ftcursor->ttxn)) {
|
||||||
// Provisionally deleted stuff is gone.
|
// Provisionally deleted stuff is gone.
|
||||||
// So we need to scan in the direction to see if we can find something.
|
// So we need to scan in the direction to see if we can find something.
|
||||||
// Every 64 deleted leaf entries check if the leaf's key is within the search bounds.
|
// Every 64 deleted leaf entries check if the leaf's key is within the search bounds.
|
||||||
@@ -3386,7 +3170,7 @@ ok: ;
|
|||||||
case FT_SEARCH_LEFT:
|
case FT_SEARCH_LEFT:
|
||||||
idx++;
|
idx++;
|
||||||
if (idx >= bn->data_buffer.num_klpairs() || ((n_deleted % 64) == 0 && !search_continue(search, key, keylen))) {
|
if (idx >= bn->data_buffer.num_klpairs() || ((n_deleted % 64) == 0 && !search_continue(search, key, keylen))) {
|
||||||
STATUS_INC(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, n_deleted);
|
FT_STATUS_INC(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, n_deleted);
|
||||||
if (ftcursor->interrupt_cb && ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra, n_deleted)) {
|
if (ftcursor->interrupt_cb && ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra, n_deleted)) {
|
||||||
return TOKUDB_INTERRUPTED;
|
return TOKUDB_INTERRUPTED;
|
||||||
}
|
}
|
||||||
@@ -3395,7 +3179,7 @@ ok: ;
|
|||||||
break;
|
break;
|
||||||
case FT_SEARCH_RIGHT:
|
case FT_SEARCH_RIGHT:
|
||||||
if (idx == 0) {
|
if (idx == 0) {
|
||||||
STATUS_INC(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, n_deleted);
|
FT_STATUS_INC(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, n_deleted);
|
||||||
if (ftcursor->interrupt_cb && ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra, n_deleted)) {
|
if (ftcursor->interrupt_cb && ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra, n_deleted)) {
|
||||||
return TOKUDB_INTERRUPTED;
|
return TOKUDB_INTERRUPTED;
|
||||||
}
|
}
|
||||||
@@ -3408,8 +3192,8 @@ ok: ;
|
|||||||
}
|
}
|
||||||
r = bn->data_buffer.fetch_klpair(idx, &le, &keylen, &key);
|
r = bn->data_buffer.fetch_klpair(idx, &le, &keylen, &key);
|
||||||
assert_zero(r); // we just validated the index
|
assert_zero(r); // we just validated the index
|
||||||
if (!le_val_is_del(le, ftcursor->is_snapshot_read, ftcursor->ttxn)) {
|
if (!le_val_is_del(le, ftcursor->read_type, ftcursor->ttxn)) {
|
||||||
STATUS_INC(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, n_deleted);
|
FT_STATUS_INC(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, n_deleted);
|
||||||
if (ftcursor->interrupt_cb)
|
if (ftcursor->interrupt_cb)
|
||||||
ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra, n_deleted);
|
ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra, n_deleted);
|
||||||
goto got_a_good_value;
|
goto got_a_good_value;
|
||||||
@@ -3422,7 +3206,7 @@ got_a_good_value:
|
|||||||
void *val;
|
void *val;
|
||||||
|
|
||||||
le_extract_val(le, toku_ft_cursor_is_leaf_mode(ftcursor),
|
le_extract_val(le, toku_ft_cursor_is_leaf_mode(ftcursor),
|
||||||
ftcursor->is_snapshot_read, ftcursor->ttxn,
|
ftcursor->read_type, ftcursor->ttxn,
|
||||||
&vallen, &val);
|
&vallen, &val);
|
||||||
r = toku_ft_cursor_check_restricted_range(ftcursor, key, keylen);
|
r = toku_ft_cursor_check_restricted_range(ftcursor, key, keylen);
|
||||||
if (r == 0) {
|
if (r == 0) {
|
||||||
@@ -3948,12 +3732,12 @@ try_again:
|
|||||||
{ // accounting (to detect and measure thrashing)
|
{ // accounting (to detect and measure thrashing)
|
||||||
uint retrycount = trycount - 1; // how many retries were needed?
|
uint retrycount = trycount - 1; // how many retries were needed?
|
||||||
if (retrycount) {
|
if (retrycount) {
|
||||||
STATUS_INC(FT_TOTAL_RETRIES, retrycount);
|
FT_STATUS_INC(FT_TOTAL_RETRIES, retrycount);
|
||||||
}
|
}
|
||||||
if (retrycount > tree_height) { // if at least one node was read from disk more than once
|
if (retrycount > tree_height) { // if at least one node was read from disk more than once
|
||||||
STATUS_INC(FT_SEARCH_TRIES_GT_HEIGHT, 1);
|
FT_STATUS_INC(FT_SEARCH_TRIES_GT_HEIGHT, 1);
|
||||||
if (retrycount > (tree_height+3))
|
if (retrycount > (tree_height+3))
|
||||||
STATUS_INC(FT_SEARCH_TRIES_GT_HEIGHTPLUS3, 1);
|
FT_STATUS_INC(FT_SEARCH_TRIES_GT_HEIGHTPLUS3, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
@@ -4538,9 +4322,8 @@ int toku_ft_layer_init(void) {
|
|||||||
if (r) { goto exit; }
|
if (r) { goto exit; }
|
||||||
|
|
||||||
partitioned_counters_init();
|
partitioned_counters_init();
|
||||||
status_init();
|
toku_status_init();
|
||||||
txn_status_init();
|
toku_context_status_init();
|
||||||
toku_ule_status_init();
|
|
||||||
toku_checkpoint_init();
|
toku_checkpoint_init();
|
||||||
toku_ft_serialize_layer_init();
|
toku_ft_serialize_layer_init();
|
||||||
toku_mutex_init(&ft_open_close_lock, NULL);
|
toku_mutex_init(&ft_open_close_lock, NULL);
|
||||||
@@ -4553,10 +4336,8 @@ void toku_ft_layer_destroy(void) {
|
|||||||
toku_mutex_destroy(&ft_open_close_lock);
|
toku_mutex_destroy(&ft_open_close_lock);
|
||||||
toku_ft_serialize_layer_destroy();
|
toku_ft_serialize_layer_destroy();
|
||||||
toku_checkpoint_destroy();
|
toku_checkpoint_destroy();
|
||||||
status_destroy();
|
|
||||||
txn_status_destroy();
|
|
||||||
toku_ule_status_destroy();
|
|
||||||
toku_context_status_destroy();
|
toku_context_status_destroy();
|
||||||
|
toku_status_destroy();
|
||||||
partitioned_counters_destroy();
|
partitioned_counters_destroy();
|
||||||
toku_scoped_malloc_destroy();
|
toku_scoped_malloc_destroy();
|
||||||
//Portability must be cleaned up last
|
//Portability must be cleaned up last
|
||||||
@@ -4744,5 +4525,3 @@ void
|
|||||||
toku_ft_helgrind_ignore(void) {
|
toku_ft_helgrind_ignore(void) {
|
||||||
TOKU_VALGRIND_HG_DISABLE_CHECKING(&ft_status, sizeof ft_status);
|
TOKU_VALGRIND_HG_DISABLE_CHECKING(&ft_status, sizeof ft_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef STATUS_INC
|
|
||||||
@@ -1,96 +1,43 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
// This must be first to make the 64-bit file mode work right in Linux
|
// This must be first to make the 64-bit file mode work right in Linux
|
||||||
#define _FILE_OFFSET_BITS 64
|
#define _FILE_OFFSET_BITS 64
|
||||||
|
|
||||||
@@ -321,7 +268,7 @@ bool toku_ft_is_empty_fast (FT_HANDLE ft_h) __attribute__ ((warn_unused_result))
|
|||||||
int toku_ft_strerror_r(int error, char *buf, size_t buflen);
|
int toku_ft_strerror_r(int error, char *buf, size_t buflen);
|
||||||
// Effect: LIke the XSI-compliant strerorr_r, extended to db_strerror().
|
// Effect: LIke the XSI-compliant strerorr_r, extended to db_strerror().
|
||||||
// If error>=0 then the result is to do strerror_r(error, buf, buflen), that is fill buf with a descriptive error message.
|
// If error>=0 then the result is to do strerror_r(error, buf, buflen), that is fill buf with a descriptive error message.
|
||||||
// If error<0 then return a TokuFT-specific error code. For unknown cases, we return -1 and set errno=EINVAL, even for cases that *should* be known. (Not all DB errors are known by this function which is a bug.)
|
// If error<0 then return a PerconaFT-specific error code. For unknown cases, we return -1 and set errno=EINVAL, even for cases that *should* be known. (Not all DB errors are known by this function which is a bug.)
|
||||||
|
|
||||||
extern bool garbage_collection_debug;
|
extern bool garbage_collection_debug;
|
||||||
|
|
||||||
488
storage/tokudb/PerconaFT/ft/ft-status.cc
Normal file
488
storage/tokudb/PerconaFT/ft/ft-status.cc
Normal file
@@ -0,0 +1,488 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "ft/ft.h"
|
||||||
|
#include "ft/ft-status.h"
|
||||||
|
|
||||||
|
#include <toku_race_tools.h>
|
||||||
|
|
||||||
|
LE_STATUS_S le_status;
|
||||||
|
void LE_STATUS_S::init() {
|
||||||
|
if (m_initialized) return;
|
||||||
|
#define LE_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "le: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||||
|
LE_STATUS_INIT(LE_MAX_COMMITTED_XR, LEAF_ENTRY_MAX_COMMITTED_XR, UINT64, "max committed xr");
|
||||||
|
LE_STATUS_INIT(LE_MAX_PROVISIONAL_XR, LEAF_ENTRY_MAX_PROVISIONAL_XR, UINT64, "max provisional xr");
|
||||||
|
LE_STATUS_INIT(LE_EXPANDED, LEAF_ENTRY_EXPANDED, UINT64, "expanded");
|
||||||
|
LE_STATUS_INIT(LE_MAX_MEMSIZE, LEAF_ENTRY_MAX_MEMSIZE, UINT64, "max memsize");
|
||||||
|
LE_STATUS_INIT(LE_APPLY_GC_BYTES_IN, LEAF_ENTRY_APPLY_GC_BYTES_IN, PARCOUNT, "size of leafentries before garbage collection (during message application)");
|
||||||
|
LE_STATUS_INIT(LE_APPLY_GC_BYTES_OUT, LEAF_ENTRY_APPLY_GC_BYTES_OUT, PARCOUNT, "size of leafentries after garbage collection (during message application)");
|
||||||
|
LE_STATUS_INIT(LE_NORMAL_GC_BYTES_IN, LEAF_ENTRY_NORMAL_GC_BYTES_IN, PARCOUNT, "size of leafentries before garbage collection (outside message application)");
|
||||||
|
LE_STATUS_INIT(LE_NORMAL_GC_BYTES_OUT, LEAF_ENTRY_NORMAL_GC_BYTES_OUT, PARCOUNT, "size of leafentries after garbage collection (outside message application)");
|
||||||
|
m_initialized = true;
|
||||||
|
#undef LE_STATUS_INIT
|
||||||
|
}
|
||||||
|
void LE_STATUS_S::destroy() {
|
||||||
|
if (!m_initialized) return;
|
||||||
|
for (int i = 0; i < LE_STATUS_NUM_ROWS; ++i) {
|
||||||
|
if (status[i].type == PARCOUNT) {
|
||||||
|
destroy_partitioned_counter(status[i].value.parcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CHECKPOINT_STATUS_S cp_status;
|
||||||
|
void CHECKPOINT_STATUS_S::init(void) {
|
||||||
|
if (m_initialized) return;
|
||||||
|
#define CP_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "checkpoint: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||||
|
CP_STATUS_INIT(CP_PERIOD, CHECKPOINT_PERIOD, UINT64, "period");
|
||||||
|
CP_STATUS_INIT(CP_FOOTPRINT, CHECKPOINT_FOOTPRINT, UINT64, "footprint");
|
||||||
|
CP_STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN, CHECKPOINT_LAST_BEGAN, UNIXTIME, "last checkpoint began");
|
||||||
|
CP_STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE, CHECKPOINT_LAST_COMPLETE_BEGAN, UNIXTIME, "last complete checkpoint began");
|
||||||
|
CP_STATUS_INIT(CP_TIME_LAST_CHECKPOINT_END, CHECKPOINT_LAST_COMPLETE_ENDED, UNIXTIME, "last complete checkpoint ended");
|
||||||
|
CP_STATUS_INIT(CP_TIME_CHECKPOINT_DURATION, CHECKPOINT_DURATION, UINT64, "time spent during checkpoint (begin and end phases)");
|
||||||
|
CP_STATUS_INIT(CP_TIME_CHECKPOINT_DURATION_LAST, CHECKPOINT_DURATION_LAST, UINT64, "time spent during last checkpoint (begin and end phases)");
|
||||||
|
CP_STATUS_INIT(CP_LAST_LSN, CHECKPOINT_LAST_LSN, UINT64, "last complete checkpoint LSN");
|
||||||
|
CP_STATUS_INIT(CP_CHECKPOINT_COUNT, CHECKPOINT_TAKEN, UINT64, "checkpoints taken ");
|
||||||
|
CP_STATUS_INIT(CP_CHECKPOINT_COUNT_FAIL, CHECKPOINT_FAILED, UINT64, "checkpoints failed");
|
||||||
|
CP_STATUS_INIT(CP_WAITERS_NOW, CHECKPOINT_WAITERS_NOW, UINT64, "waiters now");
|
||||||
|
CP_STATUS_INIT(CP_WAITERS_MAX, CHECKPOINT_WAITERS_MAX, UINT64, "waiters max");
|
||||||
|
CP_STATUS_INIT(CP_CLIENT_WAIT_ON_MO, CHECKPOINT_CLIENT_WAIT_ON_MO, UINT64, "non-checkpoint client wait on mo lock");
|
||||||
|
CP_STATUS_INIT(CP_CLIENT_WAIT_ON_CS, CHECKPOINT_CLIENT_WAIT_ON_CS, UINT64, "non-checkpoint client wait on cs lock");
|
||||||
|
CP_STATUS_INIT(CP_BEGIN_TIME, CHECKPOINT_BEGIN_TIME, UINT64, "checkpoint begin time");
|
||||||
|
CP_STATUS_INIT(CP_LONG_BEGIN_COUNT, CHECKPOINT_LONG_BEGIN_COUNT, UINT64, "long checkpoint begin count");
|
||||||
|
CP_STATUS_INIT(CP_LONG_BEGIN_TIME, CHECKPOINT_LONG_BEGIN_TIME, UINT64, "long checkpoint begin time");
|
||||||
|
CP_STATUS_INIT(CP_END_TIME, CHECKPOINT_END_TIME, UINT64, "checkpoint end time");
|
||||||
|
CP_STATUS_INIT(CP_LONG_END_COUNT, CHECKPOINT_LONG_END_COUNT, UINT64, "long checkpoint end count");
|
||||||
|
CP_STATUS_INIT(CP_LONG_END_TIME, CHECKPOINT_LONG_END_TIME, UINT64, "long checkpoint end time");
|
||||||
|
|
||||||
|
m_initialized = true;
|
||||||
|
#undef CP_STATUS_INIT
|
||||||
|
}
|
||||||
|
void CHECKPOINT_STATUS_S::destroy() {
|
||||||
|
if (!m_initialized) return;
|
||||||
|
for (int i = 0; i < CP_STATUS_NUM_ROWS; ++i) {
|
||||||
|
if (status[i].type == PARCOUNT) {
|
||||||
|
destroy_partitioned_counter(status[i].value.parcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CACHETABLE_STATUS_S ct_status;
|
||||||
|
void CACHETABLE_STATUS_S::init() {
|
||||||
|
if (m_initialized) return;
|
||||||
|
#define CT_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "cachetable: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||||
|
CT_STATUS_INIT(CT_MISS, CACHETABLE_MISS, UINT64, "miss");
|
||||||
|
CT_STATUS_INIT(CT_MISSTIME, CACHETABLE_MISS_TIME, UINT64, "miss time");
|
||||||
|
CT_STATUS_INIT(CT_PREFETCHES, CACHETABLE_PREFETCHES, UINT64, "prefetches");
|
||||||
|
CT_STATUS_INIT(CT_SIZE_CURRENT, CACHETABLE_SIZE_CURRENT, UINT64, "size current");
|
||||||
|
CT_STATUS_INIT(CT_SIZE_LIMIT, CACHETABLE_SIZE_LIMIT, UINT64, "size limit");
|
||||||
|
CT_STATUS_INIT(CT_SIZE_WRITING, CACHETABLE_SIZE_WRITING, UINT64, "size writing");
|
||||||
|
CT_STATUS_INIT(CT_SIZE_NONLEAF, CACHETABLE_SIZE_NONLEAF, UINT64, "size nonleaf");
|
||||||
|
CT_STATUS_INIT(CT_SIZE_LEAF, CACHETABLE_SIZE_LEAF, UINT64, "size leaf");
|
||||||
|
CT_STATUS_INIT(CT_SIZE_ROLLBACK, CACHETABLE_SIZE_ROLLBACK, UINT64, "size rollback");
|
||||||
|
CT_STATUS_INIT(CT_SIZE_CACHEPRESSURE, CACHETABLE_SIZE_CACHEPRESSURE, UINT64, "size cachepressure");
|
||||||
|
CT_STATUS_INIT(CT_SIZE_CLONED, CACHETABLE_SIZE_CLONED, UINT64, "size currently cloned data for checkpoint");
|
||||||
|
CT_STATUS_INIT(CT_EVICTIONS, CACHETABLE_EVICTIONS, UINT64, "evictions");
|
||||||
|
CT_STATUS_INIT(CT_CLEANER_EXECUTIONS, CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions");
|
||||||
|
CT_STATUS_INIT(CT_CLEANER_PERIOD, CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period");
|
||||||
|
CT_STATUS_INIT(CT_CLEANER_ITERATIONS, CACHETABLE_CLEANER_ITERATIONS, UINT64, "cleaner iterations");
|
||||||
|
CT_STATUS_INIT(CT_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of waits on cache pressure");
|
||||||
|
CT_STATUS_INIT(CT_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_TIME, UINT64, "time waiting on cache pressure");
|
||||||
|
CT_STATUS_INIT(CT_LONG_WAIT_PRESSURE_COUNT, CACHETABLE_LONG_WAIT_PRESSURE_COUNT, UINT64, "number of long waits on cache pressure");
|
||||||
|
CT_STATUS_INIT(CT_LONG_WAIT_PRESSURE_TIME, CACHETABLE_LONG_WAIT_PRESSURE_TIME, UINT64, "long time waiting on cache pressure");
|
||||||
|
|
||||||
|
CT_STATUS_INIT(CT_POOL_CLIENT_NUM_THREADS, CACHETABLE_POOL_CLIENT_NUM_THREADS, UINT64, "number of threads in pool");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CLIENT_NUM_THREADS_ACTIVE, CACHETABLE_POOL_CLIENT_NUM_THREADS_ACTIVE, UINT64, "number of currently active threads in pool");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CLIENT_QUEUE_SIZE, CACHETABLE_POOL_CLIENT_QUEUE_SIZE, UINT64, "number of currently queued work items");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CLIENT_MAX_QUEUE_SIZE, CACHETABLE_POOL_CLIENT_MAX_QUEUE_SIZE, UINT64, "largest number of queued work items");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CLIENT_TOTAL_ITEMS_PROCESSED, CACHETABLE_POOL_CLIENT_TOTAL_ITEMS_PROCESSED, UINT64, "total number of work items processed");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CLIENT_TOTAL_EXECUTION_TIME, CACHETABLE_POOL_CLIENT_TOTAL_EXECUTION_TIME, UINT64, "total execution time of processing work items");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CACHETABLE_NUM_THREADS, CACHETABLE_POOL_CACHETABLE_NUM_THREADS, UINT64, "number of threads in pool");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CACHETABLE_NUM_THREADS_ACTIVE, CACHETABLE_POOL_CACHETABLE_NUM_THREADS_ACTIVE, UINT64, "number of currently active threads in pool");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CACHETABLE_QUEUE_SIZE, CACHETABLE_POOL_CACHETABLE_QUEUE_SIZE, UINT64, "number of currently queued work items");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CACHETABLE_MAX_QUEUE_SIZE, CACHETABLE_POOL_CACHETABLE_MAX_QUEUE_SIZE, UINT64, "largest number of queued work items");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED, CACHETABLE_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED, UINT64, "total number of work items processed");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CACHETABLE_TOTAL_EXECUTION_TIME, CACHETABLE_POOL_CACHETABLE_TOTAL_EXECUTION_TIME, UINT64, "total execution time of processing work items");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CHECKPOINT_NUM_THREADS, CACHETABLE_POOL_CHECKPOINT_NUM_THREADS, UINT64, "number of threads in pool");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CHECKPOINT_NUM_THREADS_ACTIVE, CACHETABLE_POOL_CHECKPOINT_NUM_THREADS_ACTIVE, UINT64, "number of currently active threads in pool");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CHECKPOINT_QUEUE_SIZE, CACHETABLE_POOL_CHECKPOINT_QUEUE_SIZE, UINT64, "number of currently queued work items");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CHECKPOINT_MAX_QUEUE_SIZE, CACHETABLE_POOL_CHECKPOINT_MAX_QUEUE_SIZE, UINT64, "largest number of queued work items");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED, CACHETABLE_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED, UINT64, "total number of work items processed");
|
||||||
|
CT_STATUS_INIT(CT_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME, CACHETABLE_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME, UINT64, "total execution time of processing work items");
|
||||||
|
|
||||||
|
m_initialized = true;
|
||||||
|
#undef CT_STATUS_INIT
|
||||||
|
}
|
||||||
|
void CACHETABLE_STATUS_S::destroy() {
|
||||||
|
if (!m_initialized) return;
|
||||||
|
for (int i = 0; i < CT_STATUS_NUM_ROWS; ++i) {
|
||||||
|
if (status[i].type == PARCOUNT) {
|
||||||
|
destroy_partitioned_counter(status[i].value.parcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
LTM_STATUS_S ltm_status;
|
||||||
|
void LTM_STATUS_S::init() {
|
||||||
|
if (m_initialized) return;
|
||||||
|
#define LTM_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "locktree: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||||
|
LTM_STATUS_INIT(LTM_SIZE_CURRENT, LOCKTREE_MEMORY_SIZE, UINT64, "memory size");
|
||||||
|
LTM_STATUS_INIT(LTM_SIZE_LIMIT, LOCKTREE_MEMORY_SIZE_LIMIT, UINT64, "memory size limit");
|
||||||
|
LTM_STATUS_INIT(LTM_ESCALATION_COUNT, LOCKTREE_ESCALATION_NUM, UINT64, "number of times lock escalation ran");
|
||||||
|
LTM_STATUS_INIT(LTM_ESCALATION_TIME, LOCKTREE_ESCALATION_SECONDS, TOKUTIME, "time spent running escalation (seconds)");
|
||||||
|
LTM_STATUS_INIT(LTM_ESCALATION_LATEST_RESULT, LOCKTREE_LATEST_POST_ESCALATION_MEMORY_SIZE, UINT64, "latest post-escalation memory size");
|
||||||
|
LTM_STATUS_INIT(LTM_NUM_LOCKTREES, LOCKTREE_OPEN_CURRENT, UINT64, "number of locktrees open now");
|
||||||
|
LTM_STATUS_INIT(LTM_LOCK_REQUESTS_PENDING, LOCKTREE_PENDING_LOCK_REQUESTS, UINT64, "number of pending lock requests");
|
||||||
|
LTM_STATUS_INIT(LTM_STO_NUM_ELIGIBLE, LOCKTREE_STO_ELIGIBLE_NUM, UINT64, "number of locktrees eligible for the STO");
|
||||||
|
LTM_STATUS_INIT(LTM_STO_END_EARLY_COUNT, LOCKTREE_STO_ENDED_NUM, UINT64, "number of times a locktree ended the STO early");
|
||||||
|
LTM_STATUS_INIT(LTM_STO_END_EARLY_TIME, LOCKTREE_STO_ENDED_SECONDS, TOKUTIME, "time spent ending the STO early (seconds)");
|
||||||
|
LTM_STATUS_INIT(LTM_WAIT_COUNT, LOCKTREE_WAIT_COUNT, UINT64, "number of wait locks");
|
||||||
|
LTM_STATUS_INIT(LTM_WAIT_TIME, LOCKTREE_WAIT_TIME, UINT64, "time waiting for locks");
|
||||||
|
LTM_STATUS_INIT(LTM_LONG_WAIT_COUNT, LOCKTREE_LONG_WAIT_COUNT, UINT64, "number of long wait locks");
|
||||||
|
LTM_STATUS_INIT(LTM_LONG_WAIT_TIME, LOCKTREE_LONG_WAIT_TIME, UINT64, "long time waiting for locks");
|
||||||
|
LTM_STATUS_INIT(LTM_TIMEOUT_COUNT, LOCKTREE_TIMEOUT_COUNT, UINT64, "number of lock timeouts");
|
||||||
|
LTM_STATUS_INIT(LTM_WAIT_ESCALATION_COUNT, LOCKTREE_WAIT_ESCALATION_COUNT, UINT64, "number of waits on lock escalation");
|
||||||
|
LTM_STATUS_INIT(LTM_WAIT_ESCALATION_TIME, LOCKTREE_WAIT_ESCALATION_TIME, UINT64, "time waiting on lock escalation");
|
||||||
|
LTM_STATUS_INIT(LTM_LONG_WAIT_ESCALATION_COUNT, LOCKTREE_LONG_WAIT_ESCALATION_COUNT, UINT64, "number of long waits on lock escalation");
|
||||||
|
LTM_STATUS_INIT(LTM_LONG_WAIT_ESCALATION_TIME, LOCKTREE_LONG_WAIT_ESCALATION_TIME, UINT64, "long time waiting on lock escalation");
|
||||||
|
|
||||||
|
m_initialized = true;
|
||||||
|
#undef LTM_STATUS_INIT
|
||||||
|
}
|
||||||
|
void LTM_STATUS_S::destroy() {
|
||||||
|
if (!m_initialized) return;
|
||||||
|
for (int i = 0; i < LTM_STATUS_NUM_ROWS; ++i) {
|
||||||
|
if (status[i].type == PARCOUNT) {
|
||||||
|
destroy_partitioned_counter(status[i].value.parcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
FT_STATUS_S ft_status;
|
||||||
|
void FT_STATUS_S::init() {
|
||||||
|
if (m_initialized) return;
|
||||||
|
#define FT_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "ft: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||||
|
FT_STATUS_INIT(FT_UPDATES, DICTIONARY_UPDATES, PARCOUNT, "dictionary updates");
|
||||||
|
FT_STATUS_INIT(FT_UPDATES_BROADCAST, DICTIONARY_BROADCAST_UPDATES, PARCOUNT, "dictionary broadcast updates");
|
||||||
|
FT_STATUS_INIT(FT_DESCRIPTOR_SET, DESCRIPTOR_SET, PARCOUNT, "descriptor set");
|
||||||
|
FT_STATUS_INIT(FT_MSN_DISCARDS, MESSAGES_IGNORED_BY_LEAF_DUE_TO_MSN, PARCOUNT, "messages ignored by leaf due to msn");
|
||||||
|
FT_STATUS_INIT(FT_TOTAL_RETRIES, TOTAL_SEARCH_RETRIES, PARCOUNT, "total search retries due to TRY_AGAIN");
|
||||||
|
FT_STATUS_INIT(FT_SEARCH_TRIES_GT_HEIGHT, SEARCH_TRIES_GT_HEIGHT, PARCOUNT, "searches requiring more tries than the height of the tree");
|
||||||
|
FT_STATUS_INIT(FT_SEARCH_TRIES_GT_HEIGHTPLUS3, SEARCH_TRIES_GT_HEIGHTPLUS3, PARCOUNT, "searches requiring more tries than the height of the tree plus three");
|
||||||
|
FT_STATUS_INIT(FT_CREATE_LEAF, LEAF_NODES_CREATED, PARCOUNT, "leaf nodes created");
|
||||||
|
FT_STATUS_INIT(FT_CREATE_NONLEAF, NONLEAF_NODES_CREATED, PARCOUNT, "nonleaf nodes created");
|
||||||
|
FT_STATUS_INIT(FT_DESTROY_LEAF, LEAF_NODES_DESTROYED, PARCOUNT, "leaf nodes destroyed");
|
||||||
|
FT_STATUS_INIT(FT_DESTROY_NONLEAF, NONLEAF_NODES_DESTROYED, PARCOUNT, "nonleaf nodes destroyed");
|
||||||
|
FT_STATUS_INIT(FT_MSG_BYTES_IN, MESSAGES_INJECTED_AT_ROOT_BYTES, PARCOUNT, "bytes of messages injected at root (all trees)");
|
||||||
|
FT_STATUS_INIT(FT_MSG_BYTES_OUT, MESSAGES_FLUSHED_FROM_H1_TO_LEAVES_BYTES, PARCOUNT, "bytes of messages flushed from h1 nodes to leaves");
|
||||||
|
FT_STATUS_INIT(FT_MSG_BYTES_CURR, MESSAGES_IN_TREES_ESTIMATE_BYTES, PARCOUNT, "bytes of messages currently in trees (estimate)");
|
||||||
|
FT_STATUS_INIT(FT_MSG_NUM, MESSAGES_INJECTED_AT_ROOT, PARCOUNT, "messages injected at root");
|
||||||
|
FT_STATUS_INIT(FT_MSG_NUM_BROADCAST, BROADCASE_MESSAGES_INJECTED_AT_ROOT, PARCOUNT, "broadcast messages injected at root");
|
||||||
|
|
||||||
|
FT_STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, BASEMENTS_DECOMPRESSED_TARGET_QUERY, PARCOUNT, "basements decompressed as a target of a query");
|
||||||
|
FT_STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE, BASEMENTS_DECOMPRESSED_PRELOCKED_RANGE, PARCOUNT, "basements decompressed for prelocked range");
|
||||||
|
FT_STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH, BASEMENTS_DECOMPRESSED_PREFETCH, PARCOUNT, "basements decompressed for prefetch");
|
||||||
|
FT_STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_WRITE, BASEMENTS_DECOMPRESSED_FOR_WRITE, PARCOUNT, "basements decompressed for write");
|
||||||
|
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL, BUFFERS_DECOMPRESSED_TARGET_QUERY, PARCOUNT, "buffers decompressed as a target of a query");
|
||||||
|
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE, BUFFERS_DECOMPRESSED_PRELOCKED_RANGE, PARCOUNT, "buffers decompressed for prelocked range");
|
||||||
|
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH, BUFFERS_DECOMPRESSED_PREFETCH, PARCOUNT, "buffers decompressed for prefetch");
|
||||||
|
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE, BUFFERS_DECOMPRESSED_FOR_WRITE, PARCOUNT, "buffers decompressed for write");
|
||||||
|
|
||||||
|
// Eviction statistics:
|
||||||
|
FT_STATUS_INIT(FT_FULL_EVICTIONS_LEAF, LEAF_NODE_FULL_EVICTIONS, PARCOUNT, "leaf node full evictions");
|
||||||
|
FT_STATUS_INIT(FT_FULL_EVICTIONS_LEAF_BYTES, LEAF_NODE_FULL_EVICTIONS_BYTES, PARCOUNT, "leaf node full evictions (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_FULL_EVICTIONS_NONLEAF, NONLEAF_NODE_FULL_EVICTIONS, PARCOUNT, "nonleaf node full evictions");
|
||||||
|
FT_STATUS_INIT(FT_FULL_EVICTIONS_NONLEAF_BYTES, NONLEAF_NODE_FULL_EVICTIONS_BYTES, PARCOUNT, "nonleaf node full evictions (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_PARTIAL_EVICTIONS_LEAF, LEAF_NODE_PARTIAL_EVICTIONS, PARCOUNT, "leaf node partial evictions");
|
||||||
|
FT_STATUS_INIT(FT_PARTIAL_EVICTIONS_LEAF_BYTES, LEAF_NODE_PARTIAL_EVICTIONS_BYTES, PARCOUNT, "leaf node partial evictions (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_PARTIAL_EVICTIONS_NONLEAF, NONLEAF_NODE_PARTIAL_EVICTIONS, PARCOUNT, "nonleaf node partial evictions");
|
||||||
|
FT_STATUS_INIT(FT_PARTIAL_EVICTIONS_NONLEAF_BYTES, NONLEAF_NODE_PARTIAL_EVICTIONS_BYTES, PARCOUNT, "nonleaf node partial evictions (bytes)");
|
||||||
|
|
||||||
|
// Disk read statistics:
|
||||||
|
//
|
||||||
|
// Pivots: For queries, prefetching, or writing.
|
||||||
|
FT_STATUS_INIT(FT_NUM_PIVOTS_FETCHED_QUERY, PIVOTS_FETCHED_FOR_QUERY, PARCOUNT, "pivots fetched for query");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_PIVOTS_FETCHED_QUERY, PIVOTS_FETCHED_FOR_QUERY_BYTES, PARCOUNT, "pivots fetched for query (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_PIVOTS_FETCHED_QUERY, PIVOTS_FETCHED_FOR_QUERY_SECONDS, TOKUTIME, "pivots fetched for query (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NUM_PIVOTS_FETCHED_PREFETCH, PIVOTS_FETCHED_FOR_PREFETCH, PARCOUNT, "pivots fetched for prefetch");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_PIVOTS_FETCHED_PREFETCH, PIVOTS_FETCHED_FOR_PREFETCH_BYTES, PARCOUNT, "pivots fetched for prefetch (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_PIVOTS_FETCHED_PREFETCH, PIVOTS_FETCHED_FOR_PREFETCH_SECONDS, TOKUTIME, "pivots fetched for prefetch (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NUM_PIVOTS_FETCHED_WRITE, PIVOTS_FETCHED_FOR_WRITE, PARCOUNT, "pivots fetched for write");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_PIVOTS_FETCHED_WRITE, PIVOTS_FETCHED_FOR_WRITE_BYTES, PARCOUNT, "pivots fetched for write (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_PIVOTS_FETCHED_WRITE, PIVOTS_FETCHED_FOR_WRITE_SECONDS, TOKUTIME, "pivots fetched for write (seconds)");
|
||||||
|
// Basements: For queries, aggressive fetching in prelocked range, prefetching, or writing.
|
||||||
|
FT_STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_NORMAL, BASEMENTS_FETCHED_TARGET_QUERY, PARCOUNT, "basements fetched as a target of a query");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_NORMAL, BASEMENTS_FETCHED_TARGET_QUERY_BYTES, PARCOUNT, "basements fetched as a target of a query (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_NORMAL, BASEMENTS_FETCHED_TARGET_QUERY_SECONDS, TOKUTIME, "basements fetched as a target of a query (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, BASEMENTS_FETCHED_PRELOCKED_RANGE, PARCOUNT, "basements fetched for prelocked range");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_AGGRESSIVE, BASEMENTS_FETCHED_PRELOCKED_RANGE_BYTES, PARCOUNT, "basements fetched for prelocked range (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_AGGRESSIVE, BASEMENTS_FETCHED_PRELOCKED_RANGE_SECONDS, TOKUTIME, "basements fetched for prelocked range (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_PREFETCH, BASEMENTS_FETCHED_PREFETCH, PARCOUNT, "basements fetched for prefetch");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_PREFETCH, BASEMENTS_FETCHED_PREFETCH_BYTES, PARCOUNT, "basements fetched for prefetch (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_PREFETCH, BASEMENTS_FETCHED_PREFETCH_SECONDS, TOKUTIME, "basements fetched for prefetch (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_WRITE, BASEMENTS_FETCHED_FOR_WRITE, PARCOUNT, "basements fetched for write");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_WRITE, BASEMENTS_FETCHED_FOR_WRITE_BYTES, PARCOUNT, "basements fetched for write (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_WRITE, BASEMENTS_FETCHED_FOR_WRITE_SECONDS, TOKUTIME, "basements fetched for write (seconds)");
|
||||||
|
// Buffers: For queries, aggressive fetching in prelocked range, prefetching, or writing.
|
||||||
|
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_NORMAL, BUFFERS_FETCHED_TARGET_QUERY, PARCOUNT, "buffers fetched as a target of a query");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_NORMAL, BUFFERS_FETCHED_TARGET_QUERY_BYTES, PARCOUNT, "buffers fetched as a target of a query (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_NORMAL, BUFFERS_FETCHED_TARGET_QUERY_SECONDS, TOKUTIME, "buffers fetched as a target of a query (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, BUFFERS_FETCHED_PRELOCKED_RANGE, PARCOUNT, "buffers fetched for prelocked range");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_AGGRESSIVE, BUFFERS_FETCHED_PRELOCKED_RANGE_BYTES, PARCOUNT, "buffers fetched for prelocked range (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_AGGRESSIVE, BUFFERS_FETCHED_PRELOCKED_RANGE_SECONDS, TOKUTIME, "buffers fetched for prelocked range (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_PREFETCH, BUFFERS_FETCHED_PREFETCH, PARCOUNT, "buffers fetched for prefetch");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_PREFETCH, BUFFERS_FETCHED_PREFETCH_BYTES, PARCOUNT, "buffers fetched for prefetch (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_PREFETCH, BUFFERS_FETCHED_PREFETCH_SECONDS, TOKUTIME, "buffers fetched for prefetch (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_WRITE, BUFFERS_FETCHED_FOR_WRITE, PARCOUNT, "buffers fetched for write");
|
||||||
|
FT_STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_WRITE, BUFFERS_FETCHED_FOR_WRITE_BYTES, PARCOUNT, "buffers fetched for write (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_WRITE, BUFFERS_FETCHED_FOR_WRITE_SECONDS, TOKUTIME, "buffers fetched for write (seconds)");
|
||||||
|
|
||||||
|
// Disk write statistics.
|
||||||
|
//
|
||||||
|
// Leaf/Nonleaf: Not for checkpoint
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF, LEAF_NODES_FLUSHED_NOT_CHECKPOINT, PARCOUNT, "leaf nodes flushed to disk (not for checkpoint)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_BYTES, LEAF_NODES_FLUSHED_NOT_CHECKPOINT_BYTES, PARCOUNT, "leaf nodes flushed to disk (not for checkpoint) (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES, LEAF_NODES_FLUSHED_NOT_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "leaf nodes flushed to disk (not for checkpoint) (uncompressed bytes)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_TOKUTIME, LEAF_NODES_FLUSHED_NOT_CHECKPOINT_SECONDS, TOKUTIME, "leaf nodes flushed to disk (not for checkpoint) (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT, PARCOUNT, "nonleaf nodes flushed to disk (not for checkpoint)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_BYTES, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (not for checkpoint) (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (not for checkpoint) (uncompressed bytes)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_TOKUTIME, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT_SECONDS, TOKUTIME, "nonleaf nodes flushed to disk (not for checkpoint) (seconds)");
|
||||||
|
// Leaf/Nonleaf: For checkpoint
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT, PARCOUNT, "leaf nodes flushed to disk (for checkpoint)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_BYTES_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT_BYTES, PARCOUNT, "leaf nodes flushed to disk (for checkpoint) (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "leaf nodes flushed to disk (for checkpoint) (uncompressed bytes)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_TOKUTIME_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT_SECONDS, TOKUTIME, "leaf nodes flushed to disk (for checkpoint) (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT, PARCOUNT, "nonleaf nodes flushed to disk (for checkpoint)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (for checkpoint) (bytes)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (for checkpoint) (uncompressed bytes)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_TOKUTIME_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT_SECONDS, TOKUTIME, "nonleaf nodes flushed to disk (for checkpoint) (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_COMPRESSION_RATIO, LEAF_NODE_COMPRESSION_RATIO, DOUBLE, "uncompressed / compressed bytes written (leaf)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_COMPRESSION_RATIO, NONLEAF_NODE_COMPRESSION_RATIO, DOUBLE, "uncompressed / compressed bytes written (nonleaf)");
|
||||||
|
FT_STATUS_INIT(FT_DISK_FLUSH_OVERALL_COMPRESSION_RATIO, OVERALL_NODE_COMPRESSION_RATIO, DOUBLE, "uncompressed / compressed bytes written (overall)");
|
||||||
|
|
||||||
|
// CPU time statistics for [de]serialization and [de]compression.
|
||||||
|
FT_STATUS_INIT(FT_LEAF_COMPRESS_TOKUTIME, LEAF_COMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "leaf compression to memory (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_LEAF_SERIALIZE_TOKUTIME, LEAF_SERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "leaf serialization to memory (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_LEAF_DECOMPRESS_TOKUTIME, LEAF_DECOMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "leaf decompression to memory (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_LEAF_DESERIALIZE_TOKUTIME, LEAF_DESERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "leaf deserialization to memory (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NONLEAF_COMPRESS_TOKUTIME, NONLEAF_COMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf compression to memory (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NONLEAF_SERIALIZE_TOKUTIME, NONLEAF_SERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf serialization to memory (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NONLEAF_DECOMPRESS_TOKUTIME, NONLEAF_DECOMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf decompression to memory (seconds)");
|
||||||
|
FT_STATUS_INIT(FT_NONLEAF_DESERIALIZE_TOKUTIME, NONLEAF_DESERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf deserialization to memory (seconds)");
|
||||||
|
|
||||||
|
// Promotion statistics.
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_ROOT_SPLIT, PROMOTION_ROOTS_SPLIT, PARCOUNT, "promotion: roots split");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_ROOT_H0_INJECT, PROMOTION_LEAF_ROOTS_INJECTED_INTO, PARCOUNT, "promotion: leaf roots injected into");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_ROOT_H1_INJECT, PROMOTION_H1_ROOTS_INJECTED_INTO, PARCOUNT, "promotion: h1 roots injected into");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_0, PROMOTION_INJECTIONS_AT_DEPTH_0, PARCOUNT, "promotion: injections at depth 0");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_1, PROMOTION_INJECTIONS_AT_DEPTH_1, PARCOUNT, "promotion: injections at depth 1");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_2, PROMOTION_INJECTIONS_AT_DEPTH_2, PARCOUNT, "promotion: injections at depth 2");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_3, PROMOTION_INJECTIONS_AT_DEPTH_3, PARCOUNT, "promotion: injections at depth 3");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_GT3, PROMOTION_INJECTIONS_LOWER_THAN_DEPTH_3, PARCOUNT, "promotion: injections lower than depth 3");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_STOP_NONEMPTY_BUF, PROMOTION_STOPPED_NONEMPTY_BUFFER, PARCOUNT, "promotion: stopped because of a nonempty buffer");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_STOP_H1, PROMOTION_STOPPED_AT_HEIGHT_1, PARCOUNT, "promotion: stopped at height 1");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_STOP_LOCK_CHILD, PROMOTION_STOPPED_CHILD_LOCKED_OR_NOT_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was locked or not at all in memory");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_STOP_CHILD_INMEM, PROMOTION_STOPPED_CHILD_NOT_FULLY_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was not fully in memory");
|
||||||
|
FT_STATUS_INIT(FT_PRO_NUM_DIDNT_WANT_PROMOTE, PROMOTION_STOPPED_AFTER_LOCKING_CHILD, PARCOUNT, "promotion: stopped anyway, after locking the child");
|
||||||
|
FT_STATUS_INIT(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, BASEMENT_DESERIALIZATION_FIXED_KEY, PARCOUNT, "basement nodes deserialized with fixed-keysize");
|
||||||
|
FT_STATUS_INIT(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, BASEMENT_DESERIALIZATION_VARIABLE_KEY, PARCOUNT, "basement nodes deserialized with variable-keysize");
|
||||||
|
FT_STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, PARCOUNT, "promotion: succeeded in using the rightmost leaf shortcut");
|
||||||
|
FT_STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, PARCOUNT, "promotion: tried the rightmost leaf shorcut but failed (out-of-bounds)");
|
||||||
|
FT_STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE,RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE, PARCOUNT, "promotion: tried the rightmost leaf shorcut but failed (child reactive)");
|
||||||
|
|
||||||
|
FT_STATUS_INIT(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, CURSOR_SKIP_DELETED_LEAF_ENTRY, PARCOUNT, "cursor skipped deleted leaf entries");
|
||||||
|
|
||||||
|
m_initialized = true;
|
||||||
|
#undef FT_STATUS_INIT
|
||||||
|
}
|
||||||
|
void FT_STATUS_S::destroy() {
|
||||||
|
if (!m_initialized) return;
|
||||||
|
for (int i = 0; i < FT_STATUS_NUM_ROWS; ++i) {
|
||||||
|
if (status[i].type == PARCOUNT) {
|
||||||
|
destroy_partitioned_counter(status[i].value.parcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
FT_FLUSHER_STATUS_S fl_status;
|
||||||
|
void FT_FLUSHER_STATUS_S::init() {
|
||||||
|
if (m_initialized) return;
|
||||||
|
#define FL_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "ft flusher: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_TOTAL_NODES, FLUSHER_CLEANER_TOTAL_NODES, UINT64, "total nodes potentially flushed by cleaner thread");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_H1_NODES, FLUSHER_CLEANER_H1_NODES, UINT64, "height-one nodes flushed by cleaner thread");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_HGT1_NODES, FLUSHER_CLEANER_HGT1_NODES, UINT64, "height-greater-than-one nodes flushed by cleaner thread");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_EMPTY_NODES, FLUSHER_CLEANER_EMPTY_NODES, UINT64, "nodes cleaned which had empty buffers");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_NODES_DIRTIED, FLUSHER_CLEANER_NODES_DIRTIED, UINT64, "nodes dirtied by cleaner thread");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_MAX_BUFFER_SIZE, FLUSHER_CLEANER_MAX_BUFFER_SIZE, UINT64, "max bytes in a buffer flushed by cleaner thread");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE, FLUSHER_CLEANER_MIN_BUFFER_SIZE, UINT64, "min bytes in a buffer flushed by cleaner thread");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_TOTAL_BUFFER_SIZE, FLUSHER_CLEANER_TOTAL_BUFFER_SIZE, UINT64, "total bytes in buffers flushed by cleaner thread");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE, FLUSHER_CLEANER_MAX_BUFFER_WORKDONE, UINT64, "max workdone in a buffer flushed by cleaner thread");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE, FLUSHER_CLEANER_MIN_BUFFER_WORKDONE, UINT64, "min workdone in a buffer flushed by cleaner thread");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE, FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE, UINT64, "total workdone in buffers flushed by cleaner thread");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED, FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED, UINT64, "times cleaner thread tries to merge a leaf");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING, FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING, UINT64, "cleaner thread leaf merges in progress");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED, FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED, UINT64, "cleaner thread leaf merges successful");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE, FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE, UINT64, "nodes dirtied by cleaner thread leaf merges");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_FLUSH_TOTAL, FLUSHER_FLUSH_TOTAL, UINT64, "total number of flushes done by flusher threads or cleaner threads");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_FLUSH_IN_MEMORY, FLUSHER_FLUSH_IN_MEMORY, UINT64, "number of in memory flushes");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_FLUSH_NEEDED_IO, FLUSHER_FLUSH_NEEDED_IO, UINT64, "number of flushes that read something off disk");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES, FLUSHER_FLUSH_CASCADES, UINT64, "number of flushes that triggered another flush in child");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_1, FLUSHER_FLUSH_CASCADES_1, UINT64, "number of flushes that triggered 1 cascading flush");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_2, FLUSHER_FLUSH_CASCADES_2, UINT64, "number of flushes that triggered 2 cascading flushes");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_3, FLUSHER_FLUSH_CASCADES_3, UINT64, "number of flushes that triggered 3 cascading flushes");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_4, FLUSHER_FLUSH_CASCADES_4, UINT64, "number of flushes that triggered 4 cascading flushes");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_5, FLUSHER_FLUSH_CASCADES_5, UINT64, "number of flushes that triggered 5 cascading flushes");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_GT_5, FLUSHER_FLUSH_CASCADES_GT_5, UINT64, "number of flushes that triggered over 5 cascading flushes");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_SPLIT_LEAF, FLUSHER_SPLIT_LEAF, UINT64, "leaf node splits");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_SPLIT_NONLEAF, FLUSHER_SPLIT_NONLEAF, UINT64, "nonleaf node splits");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_MERGE_LEAF, FLUSHER_MERGE_LEAF, UINT64, "leaf node merges");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_MERGE_NONLEAF, FLUSHER_MERGE_NONLEAF, UINT64, "nonleaf node merges");
|
||||||
|
FL_STATUS_INIT(FT_FLUSHER_BALANCE_LEAF, FLUSHER_BALANCE_LEAF, UINT64, "leaf node balances");
|
||||||
|
|
||||||
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE) = UINT64_MAX;
|
||||||
|
FL_STATUS_VAL(FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE) = UINT64_MAX;
|
||||||
|
|
||||||
|
m_initialized = true;
|
||||||
|
#undef FL_STATUS_INIT
|
||||||
|
}
|
||||||
|
void FT_FLUSHER_STATUS_S::destroy() {
|
||||||
|
if (!m_initialized) return;
|
||||||
|
for (int i = 0; i < FT_FLUSHER_STATUS_NUM_ROWS; ++i) {
|
||||||
|
if (status[i].type == PARCOUNT) {
|
||||||
|
destroy_partitioned_counter(status[i].value.parcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
FT_HOT_STATUS_S hot_status;
|
||||||
|
void FT_HOT_STATUS_S::init() {
|
||||||
|
if (m_initialized) return;
|
||||||
|
#define HOT_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "hot: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||||
|
HOT_STATUS_INIT(FT_HOT_NUM_STARTED, HOT_NUM_STARTED, UINT64, "operations ever started");
|
||||||
|
HOT_STATUS_INIT(FT_HOT_NUM_COMPLETED, HOT_NUM_COMPLETED, UINT64, "operations successfully completed");
|
||||||
|
HOT_STATUS_INIT(FT_HOT_NUM_ABORTED, HOT_NUM_ABORTED, UINT64, "operations aborted");
|
||||||
|
HOT_STATUS_INIT(FT_HOT_MAX_ROOT_FLUSH_COUNT, HOT_MAX_ROOT_FLUSH_COUNT, UINT64, "max number of flushes from root ever required to optimize a tree");
|
||||||
|
|
||||||
|
m_initialized = true;
|
||||||
|
#undef HOT_STATUS_INIT
|
||||||
|
}
|
||||||
|
void FT_HOT_STATUS_S::destroy() {
|
||||||
|
if (!m_initialized) return;
|
||||||
|
for (int i = 0; i < FT_HOT_STATUS_NUM_ROWS; ++i) {
|
||||||
|
if (status[i].type == PARCOUNT) {
|
||||||
|
destroy_partitioned_counter(status[i].value.parcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
TXN_STATUS_S txn_status;
|
||||||
|
void TXN_STATUS_S::init() {
|
||||||
|
if (m_initialized) return;
|
||||||
|
#define TXN_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "txn: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||||
|
TXN_STATUS_INIT(TXN_BEGIN, TXN_BEGIN, PARCOUNT, "begin");
|
||||||
|
TXN_STATUS_INIT(TXN_READ_BEGIN, TXN_BEGIN_READ_ONLY, PARCOUNT, "begin read only");
|
||||||
|
TXN_STATUS_INIT(TXN_COMMIT, TXN_COMMITS, PARCOUNT, "successful commits");
|
||||||
|
TXN_STATUS_INIT(TXN_ABORT, TXN_ABORTS, PARCOUNT, "aborts");
|
||||||
|
m_initialized = true;
|
||||||
|
#undef TXN_STATUS_INIT
|
||||||
|
}
|
||||||
|
void TXN_STATUS_S::destroy() {
|
||||||
|
if (!m_initialized) return;
|
||||||
|
for (int i = 0; i < TXN_STATUS_NUM_ROWS; ++i) {
|
||||||
|
if (status[i].type == PARCOUNT) {
|
||||||
|
destroy_partitioned_counter(status[i].value.parcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
LOGGER_STATUS_S log_status;
|
||||||
|
void LOGGER_STATUS_S::init() {
|
||||||
|
if (m_initialized) return;
|
||||||
|
#define LOG_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "logger: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||||
|
LOG_STATUS_INIT(LOGGER_NEXT_LSN, LOGGER_NEXT_LSN, UINT64, "next LSN");
|
||||||
|
LOG_STATUS_INIT(LOGGER_NUM_WRITES, LOGGER_WRITES, UINT64, "writes");
|
||||||
|
LOG_STATUS_INIT(LOGGER_BYTES_WRITTEN, LOGGER_WRITES_BYTES, UINT64, "writes (bytes)");
|
||||||
|
LOG_STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN, LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)");
|
||||||
|
LOG_STATUS_INIT(LOGGER_TOKUTIME_WRITES, LOGGER_WRITES_SECONDS, TOKUTIME, "writes (seconds)");
|
||||||
|
LOG_STATUS_INIT(LOGGER_WAIT_BUF_LONG, LOGGER_WAIT_LONG, UINT64, "number of long logger write operations");
|
||||||
|
m_initialized = true;
|
||||||
|
#undef LOG_STATUS_INIT
|
||||||
|
}
|
||||||
|
void LOGGER_STATUS_S::destroy() {
|
||||||
|
if (!m_initialized) return;
|
||||||
|
for (int i = 0; i < LOGGER_STATUS_NUM_ROWS; ++i) {
|
||||||
|
if (status[i].type == PARCOUNT) {
|
||||||
|
destroy_partitioned_counter(status[i].value.parcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_status_init(void) {
|
||||||
|
le_status.init();
|
||||||
|
cp_status.init();
|
||||||
|
ltm_status.init();
|
||||||
|
ft_status.init();
|
||||||
|
fl_status.init();
|
||||||
|
hot_status.init();
|
||||||
|
txn_status.init();
|
||||||
|
log_status.init();
|
||||||
|
}
|
||||||
|
void toku_status_destroy(void) {
|
||||||
|
log_status.destroy();
|
||||||
|
txn_status.destroy();
|
||||||
|
hot_status.destroy();
|
||||||
|
fl_status.destroy();
|
||||||
|
ft_status.destroy();
|
||||||
|
ltm_status.destroy();
|
||||||
|
cp_status.destroy();
|
||||||
|
le_status.destroy();
|
||||||
|
}
|
||||||
525
storage/tokudb/PerconaFT/ft/ft-status.h
Normal file
525
storage/tokudb/PerconaFT/ft/ft-status.h
Normal file
@@ -0,0 +1,525 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
|
||||||
|
#include "portability/toku_config.h"
|
||||||
|
#include "portability/toku_list.h"
|
||||||
|
#include "portability/toku_race_tools.h"
|
||||||
|
|
||||||
|
#include "util/status.h"
|
||||||
|
|
||||||
|
//
|
||||||
|
// Leaf Entry statistics
|
||||||
|
//
|
||||||
|
class LE_STATUS_S {
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
LE_MAX_COMMITTED_XR = 0,
|
||||||
|
LE_MAX_PROVISIONAL_XR,
|
||||||
|
LE_EXPANDED,
|
||||||
|
LE_MAX_MEMSIZE,
|
||||||
|
LE_APPLY_GC_BYTES_IN,
|
||||||
|
LE_APPLY_GC_BYTES_OUT,
|
||||||
|
LE_NORMAL_GC_BYTES_IN,
|
||||||
|
LE_NORMAL_GC_BYTES_OUT,
|
||||||
|
LE_STATUS_NUM_ROWS
|
||||||
|
};
|
||||||
|
|
||||||
|
void init();
|
||||||
|
void destroy();
|
||||||
|
|
||||||
|
TOKU_ENGINE_STATUS_ROW_S status[LE_STATUS_NUM_ROWS];
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_initialized;
|
||||||
|
};
|
||||||
|
typedef LE_STATUS_S* LE_STATUS;
|
||||||
|
extern LE_STATUS_S le_status;
|
||||||
|
|
||||||
|
// executed too often to be worth making threadsafe
|
||||||
|
#define LE_STATUS_VAL(x) le_status.status[LE_STATUS_S::x].value.num
|
||||||
|
#define LE_STATUS_INC(x, d) \
|
||||||
|
do { \
|
||||||
|
if (le_status.status[LE_STATUS_S::x].type == PARCOUNT) { \
|
||||||
|
increment_partitioned_counter(le_status.status[LE_STATUS_S::x].value.parcount, d); \
|
||||||
|
} else { \
|
||||||
|
toku_sync_fetch_and_add(&le_status.status[LE_STATUS_S::x].value.num, d); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Checkpoint statistics
|
||||||
|
//
|
||||||
|
class CHECKPOINT_STATUS_S {
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
CP_PERIOD,
|
||||||
|
CP_FOOTPRINT,
|
||||||
|
CP_TIME_LAST_CHECKPOINT_BEGIN,
|
||||||
|
CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE,
|
||||||
|
CP_TIME_LAST_CHECKPOINT_END,
|
||||||
|
CP_TIME_CHECKPOINT_DURATION,
|
||||||
|
CP_TIME_CHECKPOINT_DURATION_LAST,
|
||||||
|
CP_LAST_LSN,
|
||||||
|
CP_CHECKPOINT_COUNT,
|
||||||
|
CP_CHECKPOINT_COUNT_FAIL,
|
||||||
|
CP_WAITERS_NOW, // how many threads are currently waiting for the checkpoint_safe lock to perform a checkpoint
|
||||||
|
CP_WAITERS_MAX, // max threads ever simultaneously waiting for the checkpoint_safe lock to perform a checkpoint
|
||||||
|
CP_CLIENT_WAIT_ON_MO, // how many times a client thread waited to take the multi_operation lock, not for checkpoint
|
||||||
|
CP_CLIENT_WAIT_ON_CS, // how many times a client thread waited for the checkpoint_safe lock, not for checkpoint
|
||||||
|
CP_BEGIN_TIME,
|
||||||
|
CP_LONG_BEGIN_TIME,
|
||||||
|
CP_LONG_BEGIN_COUNT,
|
||||||
|
CP_END_TIME,
|
||||||
|
CP_LONG_END_TIME,
|
||||||
|
CP_LONG_END_COUNT,
|
||||||
|
CP_STATUS_NUM_ROWS // number of rows in this status array. must be last.
|
||||||
|
};
|
||||||
|
|
||||||
|
void init();
|
||||||
|
void destroy();
|
||||||
|
|
||||||
|
TOKU_ENGINE_STATUS_ROW_S status[CP_STATUS_NUM_ROWS];
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_initialized;
|
||||||
|
};
|
||||||
|
typedef CHECKPOINT_STATUS_S* CHECKPOINT_STATUS;
|
||||||
|
extern CHECKPOINT_STATUS_S cp_status;
|
||||||
|
|
||||||
|
#define CP_STATUS_VAL(x) cp_status.status[CHECKPOINT_STATUS_S::x].value.num
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Cachetable statistics
|
||||||
|
//
|
||||||
|
class CACHETABLE_STATUS_S {
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
CT_MISS = 0,
|
||||||
|
CT_MISSTIME, // how many usec spent waiting for disk read because of cache miss
|
||||||
|
CT_PREFETCHES, // how many times has a block been prefetched into the cachetable?
|
||||||
|
CT_SIZE_CURRENT, // the sum of the sizes of the nodes represented in the cachetable
|
||||||
|
CT_SIZE_LIMIT, // the limit to the sum of the node sizes
|
||||||
|
CT_SIZE_WRITING, // the sum of the sizes of the nodes being written
|
||||||
|
CT_SIZE_NONLEAF, // number of bytes in cachetable belonging to nonleaf nodes
|
||||||
|
CT_SIZE_LEAF, // number of bytes in cachetable belonging to leaf nodes
|
||||||
|
CT_SIZE_ROLLBACK, // number of bytes in cachetable belonging to rollback nodes
|
||||||
|
CT_SIZE_CACHEPRESSURE, // number of bytes causing cache pressure (sum of buffers and workdone counters)
|
||||||
|
CT_SIZE_CLONED, // number of bytes of cloned data in the system
|
||||||
|
CT_EVICTIONS,
|
||||||
|
CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed
|
||||||
|
CT_CLEANER_PERIOD,
|
||||||
|
CT_CLEANER_ITERATIONS, // number of times the cleaner thread runs the cleaner per period
|
||||||
|
CT_WAIT_PRESSURE_COUNT,
|
||||||
|
CT_WAIT_PRESSURE_TIME,
|
||||||
|
CT_LONG_WAIT_PRESSURE_COUNT,
|
||||||
|
CT_LONG_WAIT_PRESSURE_TIME,
|
||||||
|
|
||||||
|
CT_POOL_CLIENT_NUM_THREADS,
|
||||||
|
CT_POOL_CLIENT_NUM_THREADS_ACTIVE,
|
||||||
|
CT_POOL_CLIENT_QUEUE_SIZE,
|
||||||
|
CT_POOL_CLIENT_MAX_QUEUE_SIZE,
|
||||||
|
CT_POOL_CLIENT_TOTAL_ITEMS_PROCESSED,
|
||||||
|
CT_POOL_CLIENT_TOTAL_EXECUTION_TIME,
|
||||||
|
CT_POOL_CACHETABLE_NUM_THREADS,
|
||||||
|
CT_POOL_CACHETABLE_NUM_THREADS_ACTIVE,
|
||||||
|
CT_POOL_CACHETABLE_QUEUE_SIZE,
|
||||||
|
CT_POOL_CACHETABLE_MAX_QUEUE_SIZE,
|
||||||
|
CT_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED,
|
||||||
|
CT_POOL_CACHETABLE_TOTAL_EXECUTION_TIME,
|
||||||
|
CT_POOL_CHECKPOINT_NUM_THREADS,
|
||||||
|
CT_POOL_CHECKPOINT_NUM_THREADS_ACTIVE,
|
||||||
|
CT_POOL_CHECKPOINT_QUEUE_SIZE,
|
||||||
|
CT_POOL_CHECKPOINT_MAX_QUEUE_SIZE,
|
||||||
|
CT_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED,
|
||||||
|
CT_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME,
|
||||||
|
|
||||||
|
CT_STATUS_NUM_ROWS
|
||||||
|
};
|
||||||
|
|
||||||
|
void init();
|
||||||
|
void destroy();
|
||||||
|
|
||||||
|
TOKU_ENGINE_STATUS_ROW_S status[CT_STATUS_NUM_ROWS];
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_initialized;
|
||||||
|
};
|
||||||
|
typedef CACHETABLE_STATUS_S* CACHETABLE_STATUS;
|
||||||
|
extern CACHETABLE_STATUS_S ct_status;
|
||||||
|
|
||||||
|
#define CT_STATUS_VAL(x) ct_status.status[CACHETABLE_STATUS_S::x].value.num
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Lock Tree Manager statistics
|
||||||
|
//
|
||||||
|
class LTM_STATUS_S {
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
LTM_SIZE_CURRENT = 0,
|
||||||
|
LTM_SIZE_LIMIT,
|
||||||
|
LTM_ESCALATION_COUNT,
|
||||||
|
LTM_ESCALATION_TIME,
|
||||||
|
LTM_ESCALATION_LATEST_RESULT,
|
||||||
|
LTM_NUM_LOCKTREES,
|
||||||
|
LTM_LOCK_REQUESTS_PENDING,
|
||||||
|
LTM_STO_NUM_ELIGIBLE,
|
||||||
|
LTM_STO_END_EARLY_COUNT,
|
||||||
|
LTM_STO_END_EARLY_TIME,
|
||||||
|
LTM_WAIT_COUNT,
|
||||||
|
LTM_WAIT_TIME,
|
||||||
|
LTM_LONG_WAIT_COUNT,
|
||||||
|
LTM_LONG_WAIT_TIME,
|
||||||
|
LTM_TIMEOUT_COUNT,
|
||||||
|
LTM_WAIT_ESCALATION_COUNT,
|
||||||
|
LTM_WAIT_ESCALATION_TIME,
|
||||||
|
LTM_LONG_WAIT_ESCALATION_COUNT,
|
||||||
|
LTM_LONG_WAIT_ESCALATION_TIME,
|
||||||
|
LTM_STATUS_NUM_ROWS // must be last
|
||||||
|
};
|
||||||
|
|
||||||
|
void init(void);
|
||||||
|
void destroy(void);
|
||||||
|
|
||||||
|
TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS];
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_initialized;
|
||||||
|
};
|
||||||
|
typedef LTM_STATUS_S* LTM_STATUS;
|
||||||
|
extern LTM_STATUS_S ltm_status;
|
||||||
|
|
||||||
|
#define LTM_STATUS_VAL(x) ltm_status.status[LTM_STATUS_S::x].value.num
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Fractal Tree statistics
|
||||||
|
//
|
||||||
|
class FT_STATUS_S {
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
FT_UPDATES = 0,
|
||||||
|
FT_UPDATES_BROADCAST,
|
||||||
|
FT_DESCRIPTOR_SET,
|
||||||
|
FT_MSN_DISCARDS, // how many messages were ignored by leaf because of msn
|
||||||
|
FT_TOTAL_RETRIES, // total number of search retries due to TRY_AGAIN
|
||||||
|
FT_SEARCH_TRIES_GT_HEIGHT, // number of searches that required more tries than the height of the tree
|
||||||
|
FT_SEARCH_TRIES_GT_HEIGHTPLUS3, // number of searches that required more tries than the height of the tree plus three
|
||||||
|
FT_DISK_FLUSH_LEAF, // number of leaf nodes flushed to disk, not for checkpoint
|
||||||
|
FT_DISK_FLUSH_LEAF_BYTES, // number of leaf nodes flushed to disk, not for checkpoint
|
||||||
|
FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES, // number of leaf nodes flushed to disk, not for checkpoint
|
||||||
|
FT_DISK_FLUSH_LEAF_TOKUTIME, // number of leaf nodes flushed to disk, not for checkpoint
|
||||||
|
FT_DISK_FLUSH_NONLEAF, // number of nonleaf nodes flushed to disk, not for checkpoint
|
||||||
|
FT_DISK_FLUSH_NONLEAF_BYTES, // number of nonleaf nodes flushed to disk, not for checkpoint
|
||||||
|
FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES, // number of nonleaf nodes flushed to disk, not for checkpoint
|
||||||
|
FT_DISK_FLUSH_NONLEAF_TOKUTIME, // number of nonleaf nodes flushed to disk, not for checkpoint
|
||||||
|
FT_DISK_FLUSH_LEAF_FOR_CHECKPOINT, // number of leaf nodes flushed to disk for checkpoint
|
||||||
|
FT_DISK_FLUSH_LEAF_BYTES_FOR_CHECKPOINT, // number of leaf nodes flushed to disk for checkpoint
|
||||||
|
FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT,// number of leaf nodes flushed to disk for checkpoint
|
||||||
|
FT_DISK_FLUSH_LEAF_TOKUTIME_FOR_CHECKPOINT,// number of leaf nodes flushed to disk for checkpoint
|
||||||
|
FT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT, // number of nonleaf nodes flushed to disk for checkpoint
|
||||||
|
FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
|
||||||
|
FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
|
||||||
|
FT_DISK_FLUSH_NONLEAF_TOKUTIME_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
|
||||||
|
FT_DISK_FLUSH_LEAF_COMPRESSION_RATIO, // effective compression ratio for leaf bytes flushed to disk
|
||||||
|
FT_DISK_FLUSH_NONLEAF_COMPRESSION_RATIO, // effective compression ratio for nonleaf bytes flushed to disk
|
||||||
|
FT_DISK_FLUSH_OVERALL_COMPRESSION_RATIO, // effective compression ratio for all bytes flushed to disk
|
||||||
|
FT_PARTIAL_EVICTIONS_NONLEAF, // number of nonleaf node partial evictions
|
||||||
|
FT_PARTIAL_EVICTIONS_NONLEAF_BYTES, // number of nonleaf node partial evictions
|
||||||
|
FT_PARTIAL_EVICTIONS_LEAF, // number of leaf node partial evictions
|
||||||
|
FT_PARTIAL_EVICTIONS_LEAF_BYTES, // number of leaf node partial evictions
|
||||||
|
FT_FULL_EVICTIONS_LEAF, // number of full cachetable evictions on leaf nodes
|
||||||
|
FT_FULL_EVICTIONS_LEAF_BYTES, // number of full cachetable evictions on leaf nodes (bytes)
|
||||||
|
FT_FULL_EVICTIONS_NONLEAF, // number of full cachetable evictions on nonleaf nodes
|
||||||
|
FT_FULL_EVICTIONS_NONLEAF_BYTES, // number of full cachetable evictions on nonleaf nodes (bytes)
|
||||||
|
FT_CREATE_LEAF, // number of leaf nodes created
|
||||||
|
FT_CREATE_NONLEAF, // number of nonleaf nodes created
|
||||||
|
FT_DESTROY_LEAF, // number of leaf nodes destroyed
|
||||||
|
FT_DESTROY_NONLEAF, // number of nonleaf nodes destroyed
|
||||||
|
FT_MSG_BYTES_IN, // how many bytes of messages injected at root (for all trees)
|
||||||
|
FT_MSG_BYTES_OUT, // how many bytes of messages flushed from h1 nodes to leaves
|
||||||
|
FT_MSG_BYTES_CURR, // how many bytes of messages currently in trees (estimate)
|
||||||
|
FT_MSG_NUM, // how many messages injected at root
|
||||||
|
FT_MSG_NUM_BROADCAST, // how many broadcast messages injected at root
|
||||||
|
FT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, // how many basement nodes were decompressed because they were the target of a query
|
||||||
|
FT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE, // ... because they were between lc and rc
|
||||||
|
FT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH,
|
||||||
|
FT_NUM_BASEMENTS_DECOMPRESSED_WRITE,
|
||||||
|
FT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL, // how many msg buffers were decompressed because they were the target of a query
|
||||||
|
FT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE, // ... because they were between lc and rc
|
||||||
|
FT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH,
|
||||||
|
FT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE,
|
||||||
|
FT_NUM_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query
|
||||||
|
FT_BYTES_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query
|
||||||
|
FT_TOKUTIME_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query
|
||||||
|
FT_NUM_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch
|
||||||
|
FT_BYTES_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch
|
||||||
|
FT_TOKUTIME_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch
|
||||||
|
FT_NUM_PIVOTS_FETCHED_WRITE, // ... for a write
|
||||||
|
FT_BYTES_PIVOTS_FETCHED_WRITE, // ... for a write
|
||||||
|
FT_TOKUTIME_PIVOTS_FETCHED_WRITE, // ... for a write
|
||||||
|
FT_NUM_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query
|
||||||
|
FT_BYTES_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query
|
||||||
|
FT_TOKUTIME_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query
|
||||||
|
FT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||||
|
FT_BYTES_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||||
|
FT_TOKUTIME_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||||
|
FT_NUM_BASEMENTS_FETCHED_PREFETCH,
|
||||||
|
FT_BYTES_BASEMENTS_FETCHED_PREFETCH,
|
||||||
|
FT_TOKUTIME_BASEMENTS_FETCHED_PREFETCH,
|
||||||
|
FT_NUM_BASEMENTS_FETCHED_WRITE,
|
||||||
|
FT_BYTES_BASEMENTS_FETCHED_WRITE,
|
||||||
|
FT_TOKUTIME_BASEMENTS_FETCHED_WRITE,
|
||||||
|
FT_NUM_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query
|
||||||
|
FT_BYTES_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query
|
||||||
|
FT_TOKUTIME_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query
|
||||||
|
FT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||||
|
FT_BYTES_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||||
|
FT_TOKUTIME_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||||
|
FT_NUM_MSG_BUFFER_FETCHED_PREFETCH,
|
||||||
|
FT_BYTES_MSG_BUFFER_FETCHED_PREFETCH,
|
||||||
|
FT_TOKUTIME_MSG_BUFFER_FETCHED_PREFETCH,
|
||||||
|
FT_NUM_MSG_BUFFER_FETCHED_WRITE,
|
||||||
|
FT_BYTES_MSG_BUFFER_FETCHED_WRITE,
|
||||||
|
FT_TOKUTIME_MSG_BUFFER_FETCHED_WRITE,
|
||||||
|
FT_LEAF_COMPRESS_TOKUTIME, // seconds spent compressing leaf leaf nodes to memory
|
||||||
|
FT_LEAF_SERIALIZE_TOKUTIME, // seconds spent serializing leaf node to memory
|
||||||
|
FT_LEAF_DECOMPRESS_TOKUTIME, // seconds spent decompressing leaf nodes to memory
|
||||||
|
FT_LEAF_DESERIALIZE_TOKUTIME, // seconds spent deserializing leaf nodes to memory
|
||||||
|
FT_NONLEAF_COMPRESS_TOKUTIME, // seconds spent compressing nonleaf nodes to memory
|
||||||
|
FT_NONLEAF_SERIALIZE_TOKUTIME, // seconds spent serializing nonleaf nodes to memory
|
||||||
|
FT_NONLEAF_DECOMPRESS_TOKUTIME, // seconds spent decompressing nonleaf nodes to memory
|
||||||
|
FT_NONLEAF_DESERIALIZE_TOKUTIME, // seconds spent deserializing nonleaf nodes to memory
|
||||||
|
FT_PRO_NUM_ROOT_SPLIT,
|
||||||
|
FT_PRO_NUM_ROOT_H0_INJECT,
|
||||||
|
FT_PRO_NUM_ROOT_H1_INJECT,
|
||||||
|
FT_PRO_NUM_INJECT_DEPTH_0,
|
||||||
|
FT_PRO_NUM_INJECT_DEPTH_1,
|
||||||
|
FT_PRO_NUM_INJECT_DEPTH_2,
|
||||||
|
FT_PRO_NUM_INJECT_DEPTH_3,
|
||||||
|
FT_PRO_NUM_INJECT_DEPTH_GT3,
|
||||||
|
FT_PRO_NUM_STOP_NONEMPTY_BUF,
|
||||||
|
FT_PRO_NUM_STOP_H1,
|
||||||
|
FT_PRO_NUM_STOP_LOCK_CHILD,
|
||||||
|
FT_PRO_NUM_STOP_CHILD_INMEM,
|
||||||
|
FT_PRO_NUM_DIDNT_WANT_PROMOTE,
|
||||||
|
FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, // how many basement nodes were deserialized with a fixed keysize
|
||||||
|
FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, // how many basement nodes were deserialized with a variable keysize
|
||||||
|
FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS,
|
||||||
|
FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS,
|
||||||
|
FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE,
|
||||||
|
FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, // how many deleted leaf entries were skipped by a cursor
|
||||||
|
FT_STATUS_NUM_ROWS
|
||||||
|
};
|
||||||
|
|
||||||
|
void init(void);
|
||||||
|
void destroy(void);
|
||||||
|
|
||||||
|
TOKU_ENGINE_STATUS_ROW_S status[FT_STATUS_NUM_ROWS];
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_initialized;
|
||||||
|
};
|
||||||
|
typedef FT_STATUS_S* FT_STATUS;
|
||||||
|
extern FT_STATUS_S ft_status;
|
||||||
|
|
||||||
|
#define FT_STATUS_VAL(x) \
|
||||||
|
(ft_status.status[FT_STATUS_S::x].type == PARCOUNT ? \
|
||||||
|
read_partitioned_counter(ft_status.status[FT_STATUS_S::x].value.parcount) : \
|
||||||
|
ft_status.status[FT_STATUS_S::x].value.num)
|
||||||
|
|
||||||
|
#define FT_STATUS_INC(x, d) \
|
||||||
|
do { \
|
||||||
|
if (ft_status.status[FT_STATUS_S::x].type == PARCOUNT) { \
|
||||||
|
increment_partitioned_counter(ft_status.status[FT_STATUS_S::x].value.parcount, d); \
|
||||||
|
} else { \
|
||||||
|
toku_sync_fetch_and_add(&ft_status.status[FT_STATUS_S::x].value.num, d); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Flusher statistics
|
||||||
|
//
|
||||||
|
class FT_FLUSHER_STATUS_S {
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
FT_FLUSHER_CLEANER_TOTAL_NODES = 0, // total number of nodes whose buffers are potentially flushed by cleaner thread
|
||||||
|
FT_FLUSHER_CLEANER_H1_NODES, // number of nodes of height one whose message buffers are flushed by cleaner thread
|
||||||
|
FT_FLUSHER_CLEANER_HGT1_NODES, // number of nodes of height > 1 whose message buffers are flushed by cleaner thread
|
||||||
|
FT_FLUSHER_CLEANER_EMPTY_NODES, // number of nodes that are selected by cleaner, but whose buffers are empty
|
||||||
|
FT_FLUSHER_CLEANER_NODES_DIRTIED, // number of nodes that are made dirty by the cleaner thread
|
||||||
|
FT_FLUSHER_CLEANER_MAX_BUFFER_SIZE, // max number of bytes in message buffer flushed by cleaner thread
|
||||||
|
FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE,
|
||||||
|
FT_FLUSHER_CLEANER_TOTAL_BUFFER_SIZE,
|
||||||
|
FT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE, // max workdone value of any message buffer flushed by cleaner thread
|
||||||
|
FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE,
|
||||||
|
FT_FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE,
|
||||||
|
FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED, // number of times cleaner thread tries to merge a leaf
|
||||||
|
FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING, // number of cleaner thread leaf merges in progress
|
||||||
|
FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED, // number of times cleaner thread successfully merges a leaf
|
||||||
|
FT_FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE, // nodes dirtied by the "flush from root" process to merge a leaf node
|
||||||
|
FT_FLUSHER_FLUSH_TOTAL, // total number of flushes done by flusher threads or cleaner threads
|
||||||
|
FT_FLUSHER_FLUSH_IN_MEMORY, // number of in memory flushes
|
||||||
|
FT_FLUSHER_FLUSH_NEEDED_IO, // number of flushes that had to read a child (or part) off disk
|
||||||
|
FT_FLUSHER_FLUSH_CASCADES, // number of flushes that triggered another flush in the child
|
||||||
|
FT_FLUSHER_FLUSH_CASCADES_1, // number of flushes that triggered 1 cascading flush
|
||||||
|
FT_FLUSHER_FLUSH_CASCADES_2, // number of flushes that triggered 2 cascading flushes
|
||||||
|
FT_FLUSHER_FLUSH_CASCADES_3, // number of flushes that triggered 3 cascading flushes
|
||||||
|
FT_FLUSHER_FLUSH_CASCADES_4, // number of flushes that triggered 4 cascading flushes
|
||||||
|
FT_FLUSHER_FLUSH_CASCADES_5, // number of flushes that triggered 5 cascading flushes
|
||||||
|
FT_FLUSHER_FLUSH_CASCADES_GT_5, // number of flushes that triggered more than 5 cascading flushes
|
||||||
|
FT_FLUSHER_SPLIT_LEAF, // number of leaf nodes split
|
||||||
|
FT_FLUSHER_SPLIT_NONLEAF, // number of nonleaf nodes split
|
||||||
|
FT_FLUSHER_MERGE_LEAF, // number of times leaf nodes are merged
|
||||||
|
FT_FLUSHER_MERGE_NONLEAF, // number of times nonleaf nodes are merged
|
||||||
|
FT_FLUSHER_BALANCE_LEAF, // number of times a leaf node is balanced
|
||||||
|
FT_FLUSHER_STATUS_NUM_ROWS
|
||||||
|
};
|
||||||
|
|
||||||
|
void init(void);
|
||||||
|
void destroy(void);
|
||||||
|
|
||||||
|
TOKU_ENGINE_STATUS_ROW_S status[FT_FLUSHER_STATUS_NUM_ROWS];
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_initialized;
|
||||||
|
};
|
||||||
|
typedef FT_FLUSHER_STATUS_S* FT_FLUSHER_STATUS;
|
||||||
|
extern FT_FLUSHER_STATUS_S fl_status;
|
||||||
|
|
||||||
|
#define FL_STATUS_VAL(x) fl_status.status[FT_FLUSHER_STATUS_S::x].value.num
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Hot Flusher
|
||||||
|
//
|
||||||
|
class FT_HOT_STATUS_S {
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
FT_HOT_NUM_STARTED = 0, // number of HOT operations that have begun
|
||||||
|
FT_HOT_NUM_COMPLETED, // number of HOT operations that have successfully completed
|
||||||
|
FT_HOT_NUM_ABORTED, // number of HOT operations that have been aborted
|
||||||
|
FT_HOT_MAX_ROOT_FLUSH_COUNT, // max number of flushes from root ever required to optimize a tree
|
||||||
|
FT_HOT_STATUS_NUM_ROWS
|
||||||
|
};
|
||||||
|
|
||||||
|
void init(void);
|
||||||
|
void destroy(void);
|
||||||
|
|
||||||
|
TOKU_ENGINE_STATUS_ROW_S status[FT_HOT_STATUS_NUM_ROWS];
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_initialized;
|
||||||
|
};
|
||||||
|
typedef FT_HOT_STATUS_S* FT_HOT_STATUS;
|
||||||
|
extern FT_HOT_STATUS_S hot_status;
|
||||||
|
|
||||||
|
#define HOT_STATUS_VAL(x) hot_status.status[FT_HOT_STATUS_S::x].value.num
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Transaction statistics
|
||||||
|
//
|
||||||
|
class TXN_STATUS_S {
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
TXN_BEGIN, // total number of transactions begun (does not include recovered txns)
|
||||||
|
TXN_READ_BEGIN, // total number of read only transactions begun (does not include recovered txns)
|
||||||
|
TXN_COMMIT, // successful commits
|
||||||
|
TXN_ABORT,
|
||||||
|
TXN_STATUS_NUM_ROWS
|
||||||
|
};
|
||||||
|
|
||||||
|
void init(void);
|
||||||
|
void destroy(void);
|
||||||
|
|
||||||
|
TOKU_ENGINE_STATUS_ROW_S status[TXN_STATUS_NUM_ROWS];
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_initialized;
|
||||||
|
};
|
||||||
|
typedef TXN_STATUS_S* TXN_STATUS;
|
||||||
|
extern TXN_STATUS_S txn_status;
|
||||||
|
|
||||||
|
#define TXN_STATUS_INC(x, d) increment_partitioned_counter(txn_status.status[TXN_STATUS_S::x].value.parcount, d)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Logger statistics
|
||||||
|
//
|
||||||
|
class LOGGER_STATUS_S {
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
LOGGER_NEXT_LSN = 0,
|
||||||
|
LOGGER_NUM_WRITES,
|
||||||
|
LOGGER_BYTES_WRITTEN,
|
||||||
|
LOGGER_UNCOMPRESSED_BYTES_WRITTEN,
|
||||||
|
LOGGER_TOKUTIME_WRITES,
|
||||||
|
LOGGER_WAIT_BUF_LONG,
|
||||||
|
LOGGER_STATUS_NUM_ROWS
|
||||||
|
};
|
||||||
|
|
||||||
|
void init(void);
|
||||||
|
void destroy(void);
|
||||||
|
|
||||||
|
TOKU_ENGINE_STATUS_ROW_S status[LOGGER_STATUS_NUM_ROWS];
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_initialized;
|
||||||
|
};
|
||||||
|
typedef LOGGER_STATUS_S* LOGGER_STATUS;
|
||||||
|
extern LOGGER_STATUS_S log_status;
|
||||||
|
|
||||||
|
#define LOG_STATUS_VAL(x) log_status.status[LOGGER_STATUS_S::x].value.num
|
||||||
|
|
||||||
|
void toku_status_init(void);
|
||||||
|
void toku_status_destroy(void);
|
||||||
265
storage/tokudb/PerconaFT/ft/ft-test-helpers.cc
Normal file
265
storage/tokudb/PerconaFT/ft/ft-test-helpers.cc
Normal file
@@ -0,0 +1,265 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "ft/ft.h"
|
||||||
|
#include "ft/ft-cachetable-wrappers.h"
|
||||||
|
#include "ft/ft-internal.h"
|
||||||
|
#include "ft/ft-flusher.h"
|
||||||
|
#include "ft/serialize/ft_node-serialize.h"
|
||||||
|
#include "ft/node.h"
|
||||||
|
#include "ft/ule.h"
|
||||||
|
|
||||||
|
// dummymsn needed to simulate msn because messages are injected at a lower level than toku_ft_root_put_msg()
|
||||||
|
#define MIN_DUMMYMSN ((MSN) {(uint64_t)1 << 62})
|
||||||
|
static MSN dummymsn;
|
||||||
|
static int testsetup_initialized = 0;
|
||||||
|
|
||||||
|
|
||||||
|
// Must be called before any other test_setup_xxx() functions are called.
|
||||||
|
void
|
||||||
|
toku_testsetup_initialize(void) {
|
||||||
|
if (testsetup_initialized == 0) {
|
||||||
|
testsetup_initialized = 1;
|
||||||
|
dummymsn = MIN_DUMMYMSN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static MSN
|
||||||
|
next_dummymsn(void) {
|
||||||
|
++(dummymsn.msn);
|
||||||
|
return dummymsn;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool ignore_if_was_already_open;
|
||||||
|
int toku_testsetup_leaf(FT_HANDLE ft_handle, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens) {
|
||||||
|
FTNODE node;
|
||||||
|
assert(testsetup_initialized);
|
||||||
|
toku_create_new_ftnode(ft_handle, &node, 0, n_children);
|
||||||
|
for (int i = 0; i < n_children; i++) {
|
||||||
|
BP_STATE(node, i) = PT_AVAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
DBT *XMALLOC_N(n_children - 1, pivotkeys);
|
||||||
|
for (int i = 0; i + 1 < n_children; i++) {
|
||||||
|
toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
|
||||||
|
}
|
||||||
|
node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
|
||||||
|
for (int i = 0; i + 1 < n_children; i++) {
|
||||||
|
toku_destroy_dbt(&pivotkeys[i]);
|
||||||
|
}
|
||||||
|
toku_free(pivotkeys);
|
||||||
|
|
||||||
|
*blocknum = node->blocknum;
|
||||||
|
toku_unpin_ftnode(ft_handle->ft, node);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't bother to clean up carefully if something goes wrong. (E.g., it's OK to have malloced stuff that hasn't been freed.)
|
||||||
|
int toku_testsetup_nonleaf (FT_HANDLE ft_handle, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, char **keys, int *keylens) {
|
||||||
|
FTNODE node;
|
||||||
|
assert(testsetup_initialized);
|
||||||
|
toku_create_new_ftnode(ft_handle, &node, height, n_children);
|
||||||
|
for (int i = 0; i < n_children; i++) {
|
||||||
|
BP_BLOCKNUM(node, i) = children[i];
|
||||||
|
BP_STATE(node,i) = PT_AVAIL;
|
||||||
|
}
|
||||||
|
DBT *XMALLOC_N(n_children - 1, pivotkeys);
|
||||||
|
for (int i = 0; i + 1 < n_children; i++) {
|
||||||
|
toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
|
||||||
|
}
|
||||||
|
node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
|
||||||
|
for (int i = 0; i + 1 < n_children; i++) {
|
||||||
|
toku_destroy_dbt(&pivotkeys[i]);
|
||||||
|
}
|
||||||
|
toku_free(pivotkeys);
|
||||||
|
|
||||||
|
*blocknum = node->blocknum;
|
||||||
|
toku_unpin_ftnode(ft_handle->ft, node);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_testsetup_root(FT_HANDLE ft_handle, BLOCKNUM blocknum) {
|
||||||
|
assert(testsetup_initialized);
|
||||||
|
ft_handle->ft->h->root_blocknum = blocknum;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_testsetup_get_sersize(FT_HANDLE ft_handle, BLOCKNUM diskoff) // Return the size on disk
|
||||||
|
{
|
||||||
|
assert(testsetup_initialized);
|
||||||
|
void *node_v;
|
||||||
|
ftnode_fetch_extra bfe;
|
||||||
|
bfe.create_for_full_read(ft_handle->ft);
|
||||||
|
int r = toku_cachetable_get_and_pin(
|
||||||
|
ft_handle->ft->cf, diskoff,
|
||||||
|
toku_cachetable_hash(ft_handle->ft->cf, diskoff),
|
||||||
|
&node_v,
|
||||||
|
NULL,
|
||||||
|
get_write_callbacks_for_node(ft_handle->ft),
|
||||||
|
toku_ftnode_fetch_callback,
|
||||||
|
toku_ftnode_pf_req_callback,
|
||||||
|
toku_ftnode_pf_callback,
|
||||||
|
true,
|
||||||
|
&bfe
|
||||||
|
);
|
||||||
|
assert(r==0);
|
||||||
|
FTNODE CAST_FROM_VOIDP(node, node_v);
|
||||||
|
int size = toku_serialize_ftnode_size(node);
|
||||||
|
toku_unpin_ftnode(ft_handle->ft, node);
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_testsetup_insert_to_leaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, const char *key, int keylen, const char *val, int vallen) {
|
||||||
|
void *node_v;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(testsetup_initialized);
|
||||||
|
|
||||||
|
ftnode_fetch_extra bfe;
|
||||||
|
bfe.create_for_full_read(ft_handle->ft);
|
||||||
|
r = toku_cachetable_get_and_pin(
|
||||||
|
ft_handle->ft->cf,
|
||||||
|
blocknum,
|
||||||
|
toku_cachetable_hash(ft_handle->ft->cf, blocknum),
|
||||||
|
&node_v,
|
||||||
|
NULL,
|
||||||
|
get_write_callbacks_for_node(ft_handle->ft),
|
||||||
|
toku_ftnode_fetch_callback,
|
||||||
|
toku_ftnode_pf_req_callback,
|
||||||
|
toku_ftnode_pf_callback,
|
||||||
|
true,
|
||||||
|
&bfe
|
||||||
|
);
|
||||||
|
if (r!=0) return r;
|
||||||
|
FTNODE CAST_FROM_VOIDP(node, node_v);
|
||||||
|
toku_verify_or_set_counts(node);
|
||||||
|
assert(node->height==0);
|
||||||
|
|
||||||
|
DBT kdbt, vdbt;
|
||||||
|
ft_msg msg(toku_fill_dbt(&kdbt, key, keylen), toku_fill_dbt(&vdbt, val, vallen),
|
||||||
|
FT_INSERT, next_dummymsn(), toku_xids_get_root_xids());
|
||||||
|
|
||||||
|
static size_t zero_flow_deltas[] = { 0, 0 };
|
||||||
|
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
|
||||||
|
toku_ftnode_put_msg(ft_handle->ft->cmp,
|
||||||
|
ft_handle->ft->update_fun,
|
||||||
|
node,
|
||||||
|
-1,
|
||||||
|
msg,
|
||||||
|
true,
|
||||||
|
&gc_info,
|
||||||
|
zero_flow_deltas,
|
||||||
|
NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
toku_verify_or_set_counts(node);
|
||||||
|
|
||||||
|
toku_unpin_ftnode(ft_handle->ft, node);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
testhelper_string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
|
||||||
|
{
|
||||||
|
char *CAST_FROM_VOIDP(s, a->data), *CAST_FROM_VOIDP(t, b->data);
|
||||||
|
return strcmp(s, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t)
|
||||||
|
{
|
||||||
|
ftnode_fetch_extra bfe;
|
||||||
|
bfe.create_for_min_read(t->ft);
|
||||||
|
toku_pin_ftnode(
|
||||||
|
t->ft,
|
||||||
|
b,
|
||||||
|
toku_cachetable_hash(t->ft->cf, b),
|
||||||
|
&bfe,
|
||||||
|
PL_WRITE_EXPENSIVE,
|
||||||
|
node,
|
||||||
|
true
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, enum ft_msg_type msgtype, const char *key, int keylen, const char *val, int vallen) {
|
||||||
|
void *node_v;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(testsetup_initialized);
|
||||||
|
|
||||||
|
ftnode_fetch_extra bfe;
|
||||||
|
bfe.create_for_full_read(ft_handle->ft);
|
||||||
|
r = toku_cachetable_get_and_pin(
|
||||||
|
ft_handle->ft->cf,
|
||||||
|
blocknum,
|
||||||
|
toku_cachetable_hash(ft_handle->ft->cf, blocknum),
|
||||||
|
&node_v,
|
||||||
|
NULL,
|
||||||
|
get_write_callbacks_for_node(ft_handle->ft),
|
||||||
|
toku_ftnode_fetch_callback,
|
||||||
|
toku_ftnode_pf_req_callback,
|
||||||
|
toku_ftnode_pf_callback,
|
||||||
|
true,
|
||||||
|
&bfe
|
||||||
|
);
|
||||||
|
if (r!=0) return r;
|
||||||
|
FTNODE CAST_FROM_VOIDP(node, node_v);
|
||||||
|
assert(node->height>0);
|
||||||
|
|
||||||
|
DBT k;
|
||||||
|
int childnum = toku_ftnode_which_child(node, toku_fill_dbt(&k, key, keylen), ft_handle->ft->cmp);
|
||||||
|
|
||||||
|
XIDS xids_0 = toku_xids_get_root_xids();
|
||||||
|
MSN msn = next_dummymsn();
|
||||||
|
toku::comparator cmp;
|
||||||
|
cmp.create(testhelper_string_key_cmp, nullptr);
|
||||||
|
toku_bnc_insert_msg(BNC(node, childnum), key, keylen, val, vallen, msgtype, msn, xids_0, true, cmp);
|
||||||
|
cmp.destroy();
|
||||||
|
// Hack to get the test working. The problem is that this test
|
||||||
|
// is directly queueing something in a FIFO instead of
|
||||||
|
// using ft APIs.
|
||||||
|
node->max_msn_applied_to_node_on_disk = msn;
|
||||||
|
node->dirty = 1;
|
||||||
|
// Also hack max_msn_in_ft
|
||||||
|
ft_handle->ft->h->max_msn_in_ft = msn;
|
||||||
|
|
||||||
|
toku_unpin_ftnode(ft_handle->ft, node);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -1,93 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
/* Verify an FT. */
|
/* Verify an FT. */
|
||||||
/* Check:
|
/* Check:
|
||||||
@@ -97,8 +44,6 @@ PATENT RIGHTS GRANT:
|
|||||||
* For each nonleaf node: All the messages have keys that are between the associated pivot keys ( left_pivot_key < message <= right_pivot_key)
|
* For each nonleaf node: All the messages have keys that are between the associated pivot keys ( left_pivot_key < message <= right_pivot_key)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include "ft/serialize/block_table.h"
|
#include "ft/serialize/block_table.h"
|
||||||
#include "ft/ft.h"
|
#include "ft/ft.h"
|
||||||
#include "ft/ft-cachetable-wrappers.h"
|
#include "ft/ft-cachetable-wrappers.h"
|
||||||
@@ -1,95 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include "ft/serialize/block_table.h"
|
#include "ft/serialize/block_table.h"
|
||||||
#include "ft/ft.h"
|
#include "ft/ft.h"
|
||||||
182
storage/tokudb/PerconaFT/ft/ft.h
Normal file
182
storage/tokudb/PerconaFT/ft/ft.h
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
|
||||||
|
#include "ft/cachetable/cachetable.h"
|
||||||
|
#include "ft/ft-ops.h"
|
||||||
|
#include "ft/logger/log.h"
|
||||||
|
#include "util/dbt.h"
|
||||||
|
|
||||||
|
typedef struct ft *FT;
|
||||||
|
typedef struct ft_options *FT_OPTIONS;
|
||||||
|
|
||||||
|
// unlink a ft from the filesystem with or without a txn.
|
||||||
|
// if with a txn, then the unlink happens on commit.
|
||||||
|
void toku_ft_unlink(FT_HANDLE handle);
|
||||||
|
void toku_ft_unlink_on_commit(FT_HANDLE handle, TOKUTXN txn);
|
||||||
|
|
||||||
|
void toku_ft_init_reflock(FT ft);
|
||||||
|
void toku_ft_destroy_reflock(FT ft);
|
||||||
|
void toku_ft_grab_reflock(FT ft);
|
||||||
|
void toku_ft_release_reflock(FT ft);
|
||||||
|
|
||||||
|
void toku_ft_lock(struct ft *ft);
|
||||||
|
void toku_ft_unlock(struct ft *ft);
|
||||||
|
|
||||||
|
void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn);
|
||||||
|
void toku_ft_free (FT ft);
|
||||||
|
|
||||||
|
int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_h, CACHEFILE cf, LSN max_acceptable_lsn, FT *header);
|
||||||
|
void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live);
|
||||||
|
|
||||||
|
bool toku_ft_needed_unlocked(FT ft);
|
||||||
|
bool toku_ft_has_one_reference_unlocked(FT ft);
|
||||||
|
|
||||||
|
// evict a ft from memory by closing its cachefile. any future work
|
||||||
|
// will have to read in the ft in a new cachefile and new FT object.
|
||||||
|
void toku_ft_evict_from_memory(FT ft, bool oplsn_valid, LSN oplsn);
|
||||||
|
|
||||||
|
FT_HANDLE toku_ft_get_only_existing_ft_handle(FT ft);
|
||||||
|
|
||||||
|
void toku_ft_note_hot_begin(FT_HANDLE ft_h);
|
||||||
|
void toku_ft_note_hot_complete(FT_HANDLE ft_h, bool success, MSN msn_at_start_of_hot);
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_ft_init(
|
||||||
|
FT ft,
|
||||||
|
BLOCKNUM root_blocknum_on_disk,
|
||||||
|
LSN checkpoint_lsn,
|
||||||
|
TXNID root_xid_that_created,
|
||||||
|
uint32_t target_nodesize,
|
||||||
|
uint32_t target_basementnodesize,
|
||||||
|
enum toku_compression_method compression_method,
|
||||||
|
uint32_t fanout
|
||||||
|
);
|
||||||
|
|
||||||
|
int toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) __attribute__ ((warn_unused_result));
|
||||||
|
int toku_dictionary_redirect (const char *dst_fname_in_env, FT_HANDLE old_ft, TOKUTXN txn);
|
||||||
|
void toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created);
|
||||||
|
// Reset the root_xid_that_created field to the given value.
|
||||||
|
// This redefines which xid created the dictionary.
|
||||||
|
|
||||||
|
void toku_ft_add_txn_ref(FT ft);
|
||||||
|
void toku_ft_remove_txn_ref(FT ft);
|
||||||
|
|
||||||
|
void toku_calculate_root_offset_pointer (FT ft, CACHEKEY* root_key, uint32_t *roothash);
|
||||||
|
void toku_ft_set_new_root_blocknum(FT ft, CACHEKEY new_root_key);
|
||||||
|
LSN toku_ft_checkpoint_lsn(FT ft) __attribute__ ((warn_unused_result));
|
||||||
|
void toku_ft_stat64 (FT ft, struct ftstat64_s *s);
|
||||||
|
void toku_ft_get_fractal_tree_info64 (FT ft, struct ftinfo64 *s);
|
||||||
|
int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *iter_extra);
|
||||||
|
|
||||||
|
// unconditionally set the descriptor for an open FT. can't do this when
|
||||||
|
// any operation has already occurred on the ft.
|
||||||
|
// see toku_ft_change_descriptor(), which is the transactional version
|
||||||
|
// used by the ydb layer. it better describes the client contract.
|
||||||
|
void toku_ft_update_descriptor(FT ft, DESCRIPTOR desc);
|
||||||
|
// use this version if the FT is not fully user-opened with a valid cachefile.
|
||||||
|
// this is a clean hack to get deserialization code to update a descriptor
|
||||||
|
// while the FT and cf are in the process of opening, for upgrade purposes
|
||||||
|
void toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd);
|
||||||
|
void toku_ft_update_cmp_descriptor(FT ft);
|
||||||
|
|
||||||
|
// get the descriptor for a ft. safe to read as long as clients honor the
|
||||||
|
// strict contract put forth by toku_ft_update_descriptor/toku_ft_change_descriptor
|
||||||
|
// essentially, there should never be a reader while there is a writer, enforced
|
||||||
|
// by the client, not the FT.
|
||||||
|
DESCRIPTOR toku_ft_get_descriptor(FT_HANDLE ft_handle);
|
||||||
|
DESCRIPTOR toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
// delta versions in basements could be negative
|
||||||
|
int64_t numrows;
|
||||||
|
int64_t numbytes;
|
||||||
|
} STAT64INFO_S, *STAT64INFO;
|
||||||
|
static const STAT64INFO_S ZEROSTATS = { .numrows = 0, .numbytes = 0};
|
||||||
|
|
||||||
|
void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta);
|
||||||
|
void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta);
|
||||||
|
|
||||||
|
typedef void (*remove_ft_ref_callback)(FT ft, void *extra);
|
||||||
|
void toku_ft_remove_reference(FT ft,
|
||||||
|
bool oplsn_valid, LSN oplsn,
|
||||||
|
remove_ft_ref_callback remove_ref, void *extra);
|
||||||
|
|
||||||
|
void toku_ft_set_nodesize(FT ft, unsigned int nodesize);
|
||||||
|
void toku_ft_get_nodesize(FT ft, unsigned int *nodesize);
|
||||||
|
void toku_ft_set_basementnodesize(FT ft, unsigned int basementnodesize);
|
||||||
|
void toku_ft_get_basementnodesize(FT ft, unsigned int *basementnodesize);
|
||||||
|
void toku_ft_set_compression_method(FT ft, enum toku_compression_method method);
|
||||||
|
void toku_ft_get_compression_method(FT ft, enum toku_compression_method *methodp);
|
||||||
|
void toku_ft_set_fanout(FT ft, unsigned int fanout);
|
||||||
|
void toku_ft_get_fanout(FT ft, unsigned int *fanout);
|
||||||
|
|
||||||
|
// mark the ft as a blackhole. any message injections will be a no op.
|
||||||
|
void toku_ft_set_blackhole(FT_HANDLE ft_handle);
|
||||||
|
|
||||||
|
// Effect: Calculates the total space and used space for a FT's leaf data.
|
||||||
|
// The difference between the two is MVCC garbage.
|
||||||
|
void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space);
|
||||||
|
|
||||||
|
// TODO: Should be in portability
|
||||||
|
int get_num_cores(void);
|
||||||
|
|
||||||
|
// TODO: Use the cachetable's worker pool instead of something managed by the FT...
|
||||||
|
struct toku_thread_pool *get_ft_pool(void);
|
||||||
|
|
||||||
|
// TODO: Should be in portability
|
||||||
|
int toku_single_process_lock(const char *lock_dir, const char *which, int *lockfd);
|
||||||
|
int toku_single_process_unlock(int *lockfd);
|
||||||
|
|
||||||
|
void tokuft_update_product_name_strings(void);
|
||||||
|
#define TOKU_MAX_PRODUCT_NAME_LENGTH (256)
|
||||||
|
extern char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH];
|
||||||
|
|
||||||
|
struct toku_product_name_strings_struct {
|
||||||
|
char db_version[sizeof(toku_product_name) + sizeof("1.2.3 build ") + 256];
|
||||||
|
char environmentdictionary[sizeof(toku_product_name) + sizeof(".environment")];
|
||||||
|
char fileopsdirectory[sizeof(toku_product_name) + sizeof(".directory")];
|
||||||
|
char single_process_lock[sizeof(toku_product_name) + sizeof("___lock_dont_delete_me")];
|
||||||
|
char rollback_cachefile[sizeof(toku_product_name) + sizeof(".rollback")];
|
||||||
|
};
|
||||||
|
|
||||||
|
extern struct toku_product_name_strings_struct toku_product_name_strings;
|
||||||
|
extern int tokuft_num_envs;
|
||||||
138
storage/tokudb/PerconaFT/ft/le-cursor.cc
Normal file
138
storage/tokudb/PerconaFT/ft/le-cursor.cc
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "ft/ft.h"
|
||||||
|
#include "ft/ft-internal.h"
|
||||||
|
#include "ft/le-cursor.h"
|
||||||
|
#include "ft/cursor.h"
|
||||||
|
|
||||||
|
// A LE_CURSOR is a special purpose FT_CURSOR that:
|
||||||
|
// - enables prefetching
|
||||||
|
// - does not perform snapshot reads. it reads everything, including uncommitted.
|
||||||
|
//
|
||||||
|
// A LE_CURSOR is good for scanning a FT from beginning to end. Useful for hot indexing.
|
||||||
|
|
||||||
|
struct le_cursor {
|
||||||
|
FT_CURSOR ft_cursor;
|
||||||
|
bool neg_infinity; // true when the le cursor is positioned at -infinity (initial setting)
|
||||||
|
bool pos_infinity; // true when the le cursor is positioned at +infinity (when _next returns DB_NOTFOUND)
|
||||||
|
};
|
||||||
|
|
||||||
|
int
|
||||||
|
toku_le_cursor_create(LE_CURSOR *le_cursor_result, FT_HANDLE ft_handle, TOKUTXN txn) {
|
||||||
|
int result = 0;
|
||||||
|
LE_CURSOR MALLOC(le_cursor);
|
||||||
|
if (le_cursor == NULL) {
|
||||||
|
result = get_error_errno();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result = toku_ft_cursor(ft_handle, &le_cursor->ft_cursor, txn, false, false);
|
||||||
|
if (result == 0) {
|
||||||
|
// TODO move the leaf mode to the ft cursor constructor
|
||||||
|
toku_ft_cursor_set_leaf_mode(le_cursor->ft_cursor);
|
||||||
|
le_cursor->neg_infinity = false;
|
||||||
|
le_cursor->pos_infinity = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result == 0) {
|
||||||
|
*le_cursor_result = le_cursor;
|
||||||
|
} else {
|
||||||
|
toku_free(le_cursor);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_le_cursor_close(LE_CURSOR le_cursor) {
|
||||||
|
toku_ft_cursor_close(le_cursor->ft_cursor);
|
||||||
|
toku_free(le_cursor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move to the next leaf entry under the LE_CURSOR
|
||||||
|
// Success: returns zero, calls the getf callback with the getf_v parameter
|
||||||
|
// Failure: returns a non-zero error number
|
||||||
|
int
|
||||||
|
toku_le_cursor_next(LE_CURSOR le_cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||||
|
int result;
|
||||||
|
if (le_cursor->neg_infinity) {
|
||||||
|
result = DB_NOTFOUND;
|
||||||
|
} else {
|
||||||
|
le_cursor->pos_infinity = false;
|
||||||
|
// TODO replace this with a non deprecated function. Which?
|
||||||
|
result = toku_ft_cursor_get(le_cursor->ft_cursor, NULL, getf, getf_v, DB_PREV);
|
||||||
|
if (result == DB_NOTFOUND) {
|
||||||
|
le_cursor->neg_infinity = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
toku_le_cursor_is_key_greater_or_equal(LE_CURSOR le_cursor, const DBT *key) {
|
||||||
|
bool result;
|
||||||
|
if (le_cursor->neg_infinity) {
|
||||||
|
result = true; // all keys are greater than -infinity
|
||||||
|
} else if (le_cursor->pos_infinity) {
|
||||||
|
result = false; // all keys are less than +infinity
|
||||||
|
} else {
|
||||||
|
FT ft = le_cursor->ft_cursor->ft_handle->ft;
|
||||||
|
// get the current position from the cursor and compare it to the given key.
|
||||||
|
int r = ft->cmp(&le_cursor->ft_cursor->key, key);
|
||||||
|
if (r <= 0) {
|
||||||
|
result = true; // key is right of the cursor key
|
||||||
|
} else {
|
||||||
|
result = false; // key is at or left of the cursor key
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_le_cursor_update_estimate(LE_CURSOR le_cursor, DBT* estimate) {
|
||||||
|
// don't handle these edge cases, not worth it.
|
||||||
|
// estimate stays same
|
||||||
|
if (le_cursor->pos_infinity || le_cursor->neg_infinity) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
DBT *cursor_key = &le_cursor->ft_cursor->key;
|
||||||
|
estimate->data = toku_xrealloc(estimate->data, cursor_key->size);
|
||||||
|
memcpy(estimate->data, cursor_key->data, cursor_key->size);
|
||||||
|
estimate->size = cursor_key->size;
|
||||||
|
estimate->flags = DB_DBT_REALLOC;
|
||||||
|
}
|
||||||
75
storage/tokudb/PerconaFT/ft/le-cursor.h
Normal file
75
storage/tokudb/PerconaFT/ft/le-cursor.h
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ft/ft-internal.h"
|
||||||
|
|
||||||
|
// A leaf entry cursor (LE_CURSOR) is a special type of FT_CURSOR that visits all of the leaf entries in a tree
|
||||||
|
// and returns the leaf entry to the caller. It maintains a copy of the key that it was last positioned over to
|
||||||
|
// speed up key comparisions with a given key. For example, the hot indexing could use the _key_right_of_cursor
|
||||||
|
// function to determine where a given key sits relative to the LE_CURSOR position.
|
||||||
|
|
||||||
|
// When _next and _key_right_of_cursor functions are run on multiple threads, they must be protected by a lock. This
|
||||||
|
// lock is assumed to exist outside of the LE_CURSOR.
|
||||||
|
|
||||||
|
typedef struct le_cursor *LE_CURSOR;
|
||||||
|
|
||||||
|
// Create a leaf cursor for a tree (ft_h) within a transaction (txn)
|
||||||
|
// Success: returns 0, stores the LE_CURSOR in the le_cursor_result
|
||||||
|
// Failure: returns a non-zero error number
|
||||||
|
int toku_le_cursor_create(LE_CURSOR *le_cursor_result, FT_HANDLE ft_h, TOKUTXN txn);
|
||||||
|
|
||||||
|
// Close and free the LE_CURSOR
|
||||||
|
void toku_le_cursor_close(LE_CURSOR le_cursor);
|
||||||
|
|
||||||
|
// Move to the next leaf entry under the LE_CURSOR
|
||||||
|
// Success: returns zero, calls the getf callback with the getf_v parameter
|
||||||
|
// Failure: returns a non-zero error number
|
||||||
|
int toku_le_cursor_next(LE_CURSOR le_cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v);
|
||||||
|
|
||||||
|
// Return true if the key is to the right of the LE_CURSOR position. that is, current cursor key < given key
|
||||||
|
// Otherwise returns false when the key is at or to the left of the LE_CURSOR position. that is, current cursor key >= given key
|
||||||
|
// The LE_CURSOR position is intialized to -infinity. Any key comparision with -infinity returns true.
|
||||||
|
// When the cursor runs off the right edge of the tree, the LE_CURSOR position is set to +infinity. Any key comparision with +infinity
|
||||||
|
// returns false.
|
||||||
|
bool toku_le_cursor_is_key_greater_or_equal(LE_CURSOR le_cursor, const DBT *key);
|
||||||
|
|
||||||
|
// extracts position of le_cursor into estimate. Responsibility of caller to handle
|
||||||
|
// thread safety. Caller (the indexer), does so by ensuring indexer lock is held
|
||||||
|
void toku_le_cursor_update_estimate(LE_CURSOR le_cursor, DBT* estimate);
|
||||||
44
storage/tokudb/PerconaFT/ft/leafentry.cc
Normal file
44
storage/tokudb/PerconaFT/ft/leafentry.cc
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "serialize/wbuf.h"
|
||||||
|
#include "leafentry.h"
|
||||||
|
|
||||||
|
void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le) {
|
||||||
|
wbuf_nocrc_literal_bytes(w, le, leafentry_disksize(le));
|
||||||
|
}
|
||||||
222
storage/tokudb/PerconaFT/ft/leafentry.h
Normal file
222
storage/tokudb/PerconaFT/ft/leafentry.h
Normal file
@@ -0,0 +1,222 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <toku_portability.h>
|
||||||
|
|
||||||
|
#include <util/mempool.h>
|
||||||
|
#include <util/omt.h>
|
||||||
|
|
||||||
|
#include "ft/txn/txn_manager.h"
|
||||||
|
#include "ft/serialize/rbuf.h"
|
||||||
|
#include "ft/msg.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
Memory format of packed leaf entry
|
||||||
|
CONSTANTS:
|
||||||
|
num_uxrs
|
||||||
|
keylen
|
||||||
|
Run-time-constants
|
||||||
|
voffset of val/vallen??? (for le_any_val) This must be small if it is interpreted as voffset = realoffset_of_val - keylen
|
||||||
|
GOOD performance optimization.
|
||||||
|
ALSO good for simplicity (no having to scan packed version)
|
||||||
|
key[]
|
||||||
|
variable length
|
||||||
|
|
||||||
|
|
||||||
|
Memory format of packed dup leaf entry
|
||||||
|
CONSTANTS:
|
||||||
|
num_uxrs
|
||||||
|
keylen
|
||||||
|
vallen
|
||||||
|
Run-time-constants
|
||||||
|
key[]
|
||||||
|
val[]
|
||||||
|
*/
|
||||||
|
|
||||||
|
enum cursor_read_type {
|
||||||
|
C_READ_ANY = 0,
|
||||||
|
C_READ_SNAPSHOT = 1,
|
||||||
|
C_READ_COMMITTED = 2
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// enum of possible values for LEAFENTRY->type field
|
||||||
|
// LE_CLEAN means that there is a single committed value in a format that saves disk space
|
||||||
|
// LE_MVCC means that there may be multiple committed values or there are provisional values
|
||||||
|
//
|
||||||
|
enum { LE_CLEAN = 0, LE_MVCC = 1 };
|
||||||
|
|
||||||
|
// This is an on-disk format. static_asserts verify everything is packed and aligned correctly.
|
||||||
|
struct leafentry {
|
||||||
|
struct leafentry_clean {
|
||||||
|
uint32_t vallen;
|
||||||
|
uint8_t val[0]; //actual val
|
||||||
|
}; // For the case where LEAFENTRY->type is LE_CLEAN
|
||||||
|
static_assert(4 == sizeof(leafentry::leafentry_clean), "leafentry_clean size is wrong");
|
||||||
|
static_assert(4 == __builtin_offsetof(leafentry::leafentry_clean, val), "val is in the wrong place");
|
||||||
|
struct __attribute__ ((__packed__)) leafentry_mvcc {
|
||||||
|
uint32_t num_cxrs; // number of committed transaction records
|
||||||
|
uint8_t num_pxrs; // number of provisional transaction records
|
||||||
|
uint8_t xrs[0]; //then TXNIDs of XRs relevant for reads:
|
||||||
|
// if provisional XRs exist, store OUTERMOST TXNID
|
||||||
|
// store committed TXNIDs, from most recently committed to least recently committed (newest first)
|
||||||
|
//then lengths of XRs relevant for reads (length is at most 1<<31, MSB is 1 for insert, 0 for delete):
|
||||||
|
// if provisional XRs exist (num_pxrs>0), store length and insert/delete flag associated with INNERMOST TXNID
|
||||||
|
// store length and insert/delete flag associated with each committed TXNID, in same order as above (newest first)
|
||||||
|
//then data of XRs relevant for reads
|
||||||
|
// if provisional XRs exist (num_pxrs>0), store data associated with INNERMOST provisional TXNID
|
||||||
|
// store data associated with committed TXNIDs (all committed data, newest committed values first)
|
||||||
|
//if provisional XRs still exist (that is, num_puxrs > 1, so INNERMOST provisional TXNID != OUTERMOST provisional TXNID):
|
||||||
|
// for OUTERMOST provisional XR:
|
||||||
|
// 1 byte: store type (insert/delete/placeholder)
|
||||||
|
// 4 bytes: length (if type is INSERT, no length stored if placeholder or delete)
|
||||||
|
// data
|
||||||
|
// for rest of provisional stack (if num_pxrs > 2), from second-outermost to second-innermost (outermost is stored above, innermost is stored separately):
|
||||||
|
// 8 bytes: TXNID
|
||||||
|
// 1 byte: store type (insert/delete/placeholder)
|
||||||
|
// 4 bytes: length (if type is INSERT)
|
||||||
|
// data
|
||||||
|
// for INNERMOST provisional XR:
|
||||||
|
// 8 bytes: TXNID
|
||||||
|
// (innermost data and length with insert/delete flag are stored above, cannot be a placeholder)
|
||||||
|
}; // For the case where LEAFENTRY->type is LE_MVCC
|
||||||
|
static_assert(5 == sizeof(leafentry::leafentry_mvcc), "leafentry_mvcc size is wrong");
|
||||||
|
static_assert(5 == __builtin_offsetof(leafentry::leafentry_mvcc, xrs), "xrs is in the wrong place");
|
||||||
|
|
||||||
|
uint8_t type; // type is LE_CLEAN or LE_MVCC
|
||||||
|
//uint32_t keylen;
|
||||||
|
union __attribute__ ((__packed__)) {
|
||||||
|
struct leafentry_clean clean;
|
||||||
|
struct leafentry_mvcc mvcc;
|
||||||
|
} u;
|
||||||
|
};
|
||||||
|
static_assert(6 == sizeof(leafentry), "leafentry size is wrong");
|
||||||
|
static_assert(1 == __builtin_offsetof(leafentry, u), "union is in the wrong place");
|
||||||
|
|
||||||
|
#define LE_CLEAN_MEMSIZE(_vallen) \
|
||||||
|
(sizeof(((LEAFENTRY)NULL)->type) /* type */ \
|
||||||
|
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen) /* vallen */ \
|
||||||
|
+(_vallen)) /* actual val */
|
||||||
|
|
||||||
|
#define LE_MVCC_COMMITTED_HEADER_MEMSIZE \
|
||||||
|
(sizeof(((LEAFENTRY)NULL)->type) /* type */ \
|
||||||
|
+sizeof(((LEAFENTRY)NULL)->u.mvcc.num_cxrs) /* committed */ \
|
||||||
|
+sizeof(((LEAFENTRY)NULL)->u.mvcc.num_pxrs) /* provisional */ \
|
||||||
|
+sizeof(TXNID) /* transaction */ \
|
||||||
|
+sizeof(uint32_t) /* length+bit */ \
|
||||||
|
+sizeof(uint32_t)) /* length+bit */
|
||||||
|
|
||||||
|
#define LE_MVCC_COMMITTED_MEMSIZE(_vallen) \
|
||||||
|
(LE_MVCC_COMMITTED_HEADER_MEMSIZE \
|
||||||
|
+(_vallen)) /* actual val */
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct leafentry *LEAFENTRY;
|
||||||
|
typedef struct leafentry_13 *LEAFENTRY_13;
|
||||||
|
|
||||||
|
//
|
||||||
|
// TODO: consistency among names is very poor.
|
||||||
|
//
|
||||||
|
|
||||||
|
// TODO: rename this helper function for deserialization
|
||||||
|
size_t leafentry_rest_memsize(uint32_t num_puxrs, uint32_t num_cuxrs, uint8_t* start);
|
||||||
|
size_t leafentry_memsize (LEAFENTRY le); // the size of a leafentry in memory.
|
||||||
|
size_t leafentry_disksize (LEAFENTRY le); // this is the same as logsizeof_LEAFENTRY. The size of a leafentry on disk.
|
||||||
|
void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le);
|
||||||
|
int print_klpair (FILE *outf, const void* key, uint32_t keylen, LEAFENTRY v); // Print a leafentry out in human-readable form.
|
||||||
|
|
||||||
|
int le_latest_is_del(LEAFENTRY le); // Return true if it is a provisional delete.
|
||||||
|
int le_val_is_del(LEAFENTRY le, enum cursor_read_type read_type, TOKUTXN txn); // Returns true if the value that is to be read is empty
|
||||||
|
bool le_is_clean(LEAFENTRY le); //Return how many xids exist (0 does not count)
|
||||||
|
bool le_has_xids(LEAFENTRY le, XIDS xids); // Return true transaction represented by xids is still provisional in this leafentry (le's xid stack is a superset or equal to xids)
|
||||||
|
void* le_latest_val (LEAFENTRY le); // Return the latest val (return NULL for provisional deletes)
|
||||||
|
uint32_t le_latest_vallen (LEAFENTRY le); // Return the latest vallen. Returns 0 for provisional deletes.
|
||||||
|
void* le_latest_val_and_len (LEAFENTRY le, uint32_t *len);
|
||||||
|
|
||||||
|
uint64_t le_outermost_uncommitted_xid (LEAFENTRY le);
|
||||||
|
|
||||||
|
//Callback contract:
|
||||||
|
// Function checks to see if id is accepted by context.
|
||||||
|
// Returns:
|
||||||
|
// 0: context ignores this entry, id.
|
||||||
|
// TOKUDB_ACCEPT: context accepts id
|
||||||
|
// r|r!=0&&r!=TOKUDB_ACCEPT: Quit early, return r, because something unexpected went wrong (error case)
|
||||||
|
typedef int(*LE_ITERATE_CALLBACK)(TXNID id, TOKUTXN context, bool is_provisional);
|
||||||
|
|
||||||
|
int le_iterate_val(LEAFENTRY le, LE_ITERATE_CALLBACK f, void** valpp, uint32_t *vallenp, TOKUTXN context);
|
||||||
|
|
||||||
|
void le_extract_val(LEAFENTRY le,
|
||||||
|
// should we return the entire leafentry as the val?
|
||||||
|
bool is_leaf_mode, enum cursor_read_type read_type,
|
||||||
|
TOKUTXN ttxn, uint32_t *vallen, void **val);
|
||||||
|
|
||||||
|
size_t
|
||||||
|
leafentry_disksize_13(LEAFENTRY_13 le);
|
||||||
|
|
||||||
|
int
|
||||||
|
toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, // NULL if there was no stored data.
|
||||||
|
void** keyp,
|
||||||
|
uint32_t* keylen,
|
||||||
|
size_t *new_leafentry_memorysize,
|
||||||
|
LEAFENTRY *new_leafentry_p);
|
||||||
|
|
||||||
|
class bn_data;
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_le_apply_msg(const ft_msg &msg,
|
||||||
|
LEAFENTRY old_leafentry, // NULL if there was no stored data.
|
||||||
|
bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
|
||||||
|
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
|
||||||
|
uint32_t old_keylen,
|
||||||
|
txn_gc_info *gc_info,
|
||||||
|
LEAFENTRY *new_leafentry_p,
|
||||||
|
int64_t * numbytes_delta_p);
|
||||||
|
|
||||||
|
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, txn_gc_info *gc_info);
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
|
||||||
|
bn_data* data_buffer,
|
||||||
|
uint32_t idx,
|
||||||
|
void* keyp,
|
||||||
|
uint32_t keylen,
|
||||||
|
txn_gc_info *gc_info,
|
||||||
|
LEAFENTRY *new_leaf_entry,
|
||||||
|
int64_t * numbytes_delta_p);
|
||||||
146
storage/tokudb/PerconaFT/ft/loader/callbacks.cc
Normal file
146
storage/tokudb/PerconaFT/ft/loader/callbacks.cc
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include <toku_portability.h>
|
||||||
|
#include <toku_assert.h>
|
||||||
|
#include <toku_pthread.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "loader/loader-internal.h"
|
||||||
|
#include "util/dbt.h"
|
||||||
|
|
||||||
|
static void error_callback_lock(ft_loader_error_callback loader_error) {
|
||||||
|
toku_mutex_lock(&loader_error->mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void error_callback_unlock(ft_loader_error_callback loader_error) {
|
||||||
|
toku_mutex_unlock(&loader_error->mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ft_loader_init_error_callback(ft_loader_error_callback loader_error) {
|
||||||
|
memset(loader_error, 0, sizeof *loader_error);
|
||||||
|
toku_init_dbt(&loader_error->key);
|
||||||
|
toku_init_dbt(&loader_error->val);
|
||||||
|
toku_mutex_init(&loader_error->mutex, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ft_loader_destroy_error_callback(ft_loader_error_callback loader_error) {
|
||||||
|
toku_mutex_destroy(&loader_error->mutex);
|
||||||
|
toku_destroy_dbt(&loader_error->key);
|
||||||
|
toku_destroy_dbt(&loader_error->val);
|
||||||
|
memset(loader_error, 0, sizeof *loader_error);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ft_loader_get_error(ft_loader_error_callback loader_error) {
|
||||||
|
error_callback_lock(loader_error);
|
||||||
|
int r = loader_error->error;
|
||||||
|
error_callback_unlock(loader_error);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ft_loader_set_error_function(ft_loader_error_callback loader_error, ft_loader_error_func error_function, void *error_extra) {
|
||||||
|
loader_error->error_callback = error_function;
|
||||||
|
loader_error->extra = error_extra;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ft_loader_set_error(ft_loader_error_callback loader_error, int error, DB *db, int which_db, DBT *key, DBT *val) {
|
||||||
|
int r;
|
||||||
|
error_callback_lock(loader_error);
|
||||||
|
if (loader_error->error) { // there can be only one
|
||||||
|
r = EEXIST;
|
||||||
|
} else {
|
||||||
|
r = 0;
|
||||||
|
loader_error->error = error; // set the error
|
||||||
|
loader_error->db = db;
|
||||||
|
loader_error->which_db = which_db;
|
||||||
|
if (key != nullptr) {
|
||||||
|
toku_clone_dbt(&loader_error->key, *key);
|
||||||
|
}
|
||||||
|
if (val != nullptr) {
|
||||||
|
toku_clone_dbt(&loader_error->val, *val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
error_callback_unlock(loader_error);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ft_loader_call_error_function(ft_loader_error_callback loader_error) {
|
||||||
|
int r;
|
||||||
|
error_callback_lock(loader_error);
|
||||||
|
r = loader_error->error;
|
||||||
|
if (r && loader_error->error_callback && !loader_error->did_callback) {
|
||||||
|
loader_error->did_callback = true;
|
||||||
|
loader_error->error_callback(loader_error->db,
|
||||||
|
loader_error->which_db,
|
||||||
|
loader_error->error,
|
||||||
|
&loader_error->key,
|
||||||
|
&loader_error->val,
|
||||||
|
loader_error->extra);
|
||||||
|
}
|
||||||
|
error_callback_unlock(loader_error);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ft_loader_set_error_and_callback(ft_loader_error_callback loader_error, int error, DB *db, int which_db, DBT *key, DBT *val) {
|
||||||
|
int r = ft_loader_set_error(loader_error, error, db, which_db, key, val);
|
||||||
|
if (r == 0)
|
||||||
|
r = ft_loader_call_error_function(loader_error);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ft_loader_init_poll_callback(ft_loader_poll_callback p) {
|
||||||
|
memset(p, 0, sizeof *p);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ft_loader_destroy_poll_callback(ft_loader_poll_callback p) {
|
||||||
|
memset(p, 0, sizeof *p);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ft_loader_set_poll_function(ft_loader_poll_callback p, ft_loader_poll_func poll_function, void *poll_extra) {
|
||||||
|
p->poll_function = poll_function;
|
||||||
|
p->poll_extra = poll_extra;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ft_loader_call_poll_function(ft_loader_poll_callback p, float progress) {
|
||||||
|
int r = 0;
|
||||||
|
if (p->poll_function)
|
||||||
|
r = p->poll_function(p->poll_extra, progress);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
@@ -1,95 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
58
storage/tokudb/PerconaFT/ft/loader/dbufio.h
Normal file
58
storage/tokudb/PerconaFT/ft/loader/dbufio.h
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <toku_portability.h>
|
||||||
|
#include <toku_pthread.h>
|
||||||
|
|
||||||
|
/* Maintain a set of files for reading, with double buffering for the reads. */
|
||||||
|
|
||||||
|
/* A DBUFIO_FILESET is a set of files. The files are indexed from 0 to N-1, where N is specified when the set is created (and the files are also provided when the set is creaed). */
|
||||||
|
/* An implementation would typically use a separate thread or asynchronous I/O to fetch ahead data for each file. The system will typically fill two buffers of size M for each file. One buffer is being read out of using dbuf_read(), and the other buffer is either empty (waiting on the asynchronous I/O to start), being filled in by the asynchronous I/O mechanism, or is waiting for the caller to read data from it. */
|
||||||
|
typedef struct dbufio_fileset *DBUFIO_FILESET;
|
||||||
|
|
||||||
|
int create_dbufio_fileset (DBUFIO_FILESET *bfsp, int N, int fds[/*N*/], size_t bufsize, bool compressed);
|
||||||
|
|
||||||
|
int destroy_dbufio_fileset(DBUFIO_FILESET);
|
||||||
|
|
||||||
|
int dbufio_fileset_read (DBUFIO_FILESET bfs, int filenum, void *buf_v, size_t count, size_t *n_read);
|
||||||
|
|
||||||
|
int panic_dbufio_fileset(DBUFIO_FILESET, int error);
|
||||||
|
|
||||||
|
void dbufio_print(DBUFIO_FILESET);
|
||||||
@@ -1,95 +1,43 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved."
|
|
||||||
|
|
||||||
#include <db.h>
|
#include <db.h>
|
||||||
|
|
||||||
#include "portability/toku_pthread.h"
|
#include "portability/toku_pthread.h"
|
||||||
3287
storage/tokudb/PerconaFT/ft/loader/loader.cc
Normal file
3287
storage/tokudb/PerconaFT/ft/loader/loader.cc
Normal file
File diff suppressed because it is too large
Load Diff
84
storage/tokudb/PerconaFT/ft/loader/loader.h
Normal file
84
storage/tokudb/PerconaFT/ft/loader/loader.h
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ft/txn/txn.h"
|
||||||
|
#include "ft/cachetable/cachetable.h"
|
||||||
|
#include "ft/comparator.h"
|
||||||
|
#include "ft/ft-ops.h"
|
||||||
|
|
||||||
|
// The loader callbacks are C functions and need to be defined as such
|
||||||
|
|
||||||
|
typedef void (*ft_loader_error_func)(DB *, int which_db, int err, DBT *key, DBT *val, void *extra);
|
||||||
|
|
||||||
|
typedef int (*ft_loader_poll_func)(void *extra, float progress);
|
||||||
|
|
||||||
|
typedef struct ft_loader_s *FTLOADER;
|
||||||
|
|
||||||
|
int toku_ft_loader_open (FTLOADER *bl,
|
||||||
|
CACHETABLE cachetable,
|
||||||
|
generate_row_for_put_func g,
|
||||||
|
DB *src_db,
|
||||||
|
int N,
|
||||||
|
FT_HANDLE ft_hs[/*N*/], DB* dbs[/*N*/],
|
||||||
|
const char * new_fnames_in_env[/*N*/],
|
||||||
|
ft_compare_func bt_compare_functions[/*N*/],
|
||||||
|
const char *temp_file_template,
|
||||||
|
LSN load_lsn,
|
||||||
|
TOKUTXN txn,
|
||||||
|
bool reserve_memory,
|
||||||
|
uint64_t reserve_memory_size,
|
||||||
|
bool compress_intermediates,
|
||||||
|
bool allow_puts);
|
||||||
|
|
||||||
|
int toku_ft_loader_put (FTLOADER bl, DBT *key, DBT *val);
|
||||||
|
|
||||||
|
int toku_ft_loader_close (FTLOADER bl,
|
||||||
|
ft_loader_error_func error_callback, void *error_callback_extra,
|
||||||
|
ft_loader_poll_func poll_callback, void *poll_callback_extra);
|
||||||
|
|
||||||
|
int toku_ft_loader_abort(FTLOADER bl,
|
||||||
|
bool is_error);
|
||||||
|
|
||||||
|
// For test purposes only
|
||||||
|
void toku_ft_loader_set_size_factor (uint32_t factor);
|
||||||
|
|
||||||
|
void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*));
|
||||||
|
|
||||||
|
size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid);
|
||||||
180
storage/tokudb/PerconaFT/ft/loader/pqueue.cc
Normal file
180
storage/tokudb/PerconaFT/ft/loader/pqueue.cc
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include <toku_portability.h>
|
||||||
|
#include "toku_os.h"
|
||||||
|
#include "ft-internal.h"
|
||||||
|
#include "loader/loader-internal.h"
|
||||||
|
#include "loader/pqueue.h"
|
||||||
|
|
||||||
|
#define pqueue_left(i) ((i) << 1)
|
||||||
|
#define pqueue_right(i) (((i) << 1) + 1)
|
||||||
|
#define pqueue_parent(i) ((i) >> 1)
|
||||||
|
|
||||||
|
int pqueue_init(pqueue_t **result, size_t n, int which_db, DB *db, ft_compare_func compare, struct error_callback_s *err_callback)
|
||||||
|
{
|
||||||
|
pqueue_t *MALLOC(q);
|
||||||
|
if (!q) {
|
||||||
|
return get_error_errno();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Need to allocate n+1 elements since element 0 isn't used. */
|
||||||
|
MALLOC_N(n + 1, q->d);
|
||||||
|
if (!q->d) {
|
||||||
|
int r = get_error_errno();
|
||||||
|
toku_free(q);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
q->size = 1;
|
||||||
|
q->avail = q->step = (n+1); /* see comment above about n+1 */
|
||||||
|
|
||||||
|
q->which_db = which_db;
|
||||||
|
q->db = db;
|
||||||
|
q->compare = compare;
|
||||||
|
q->dup_error = 0;
|
||||||
|
|
||||||
|
q->error_callback = err_callback;
|
||||||
|
|
||||||
|
*result = q;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pqueue_free(pqueue_t *q)
|
||||||
|
{
|
||||||
|
toku_free(q->d);
|
||||||
|
toku_free(q);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t pqueue_size(pqueue_t *q)
|
||||||
|
{
|
||||||
|
/* queue element 0 exists but doesn't count since it isn't used. */
|
||||||
|
return (q->size - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int pqueue_compare(pqueue_t *q, DBT *next_key, DBT *next_val, DBT *curr_key)
|
||||||
|
{
|
||||||
|
int r = q->compare(q->db, next_key, curr_key);
|
||||||
|
if ( r == 0 ) { // duplicate key : next_key == curr_key
|
||||||
|
q->dup_error = 1;
|
||||||
|
if (q->error_callback)
|
||||||
|
ft_loader_set_error_and_callback(q->error_callback, DB_KEYEXIST, q->db, q->which_db, next_key, next_val);
|
||||||
|
}
|
||||||
|
return ( r > -1 );
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pqueue_bubble_up(pqueue_t *q, size_t i)
|
||||||
|
{
|
||||||
|
size_t parent_node;
|
||||||
|
pqueue_node_t *moving_node = q->d[i];
|
||||||
|
DBT *moving_key = moving_node->key;
|
||||||
|
|
||||||
|
for (parent_node = pqueue_parent(i);
|
||||||
|
((i > 1) && pqueue_compare(q, q->d[parent_node]->key, q->d[parent_node]->val, moving_key));
|
||||||
|
i = parent_node, parent_node = pqueue_parent(i))
|
||||||
|
{
|
||||||
|
q->d[i] = q->d[parent_node];
|
||||||
|
}
|
||||||
|
|
||||||
|
q->d[i] = moving_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static size_t pqueue_maxchild(pqueue_t *q, size_t i)
|
||||||
|
{
|
||||||
|
size_t child_node = pqueue_left(i);
|
||||||
|
|
||||||
|
if (child_node >= q->size)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if ((child_node+1) < q->size &&
|
||||||
|
pqueue_compare(q, q->d[child_node]->key, q->d[child_node]->val, q->d[child_node+1]->key))
|
||||||
|
child_node++; /* use right child instead of left */
|
||||||
|
|
||||||
|
return child_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void pqueue_percolate_down(pqueue_t *q, size_t i)
|
||||||
|
{
|
||||||
|
size_t child_node;
|
||||||
|
pqueue_node_t *moving_node = q->d[i];
|
||||||
|
DBT *moving_key = moving_node->key;
|
||||||
|
DBT *moving_val = moving_node->val;
|
||||||
|
|
||||||
|
while ((child_node = pqueue_maxchild(q, i)) &&
|
||||||
|
pqueue_compare(q, moving_key, moving_val, q->d[child_node]->key))
|
||||||
|
{
|
||||||
|
q->d[i] = q->d[child_node];
|
||||||
|
i = child_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
q->d[i] = moving_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int pqueue_insert(pqueue_t *q, pqueue_node_t *d)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if (!q) return 1;
|
||||||
|
if (q->size >= q->avail) return 1;
|
||||||
|
|
||||||
|
/* insert item */
|
||||||
|
i = q->size++;
|
||||||
|
q->d[i] = d;
|
||||||
|
pqueue_bubble_up(q, i);
|
||||||
|
|
||||||
|
if ( q->dup_error ) return DB_KEYEXIST;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pqueue_pop(pqueue_t *q, pqueue_node_t **d)
|
||||||
|
{
|
||||||
|
if (!q || q->size == 1) {
|
||||||
|
*d = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
*d = q->d[1];
|
||||||
|
q->d[1] = q->d[--q->size];
|
||||||
|
pqueue_percolate_down(q, 1);
|
||||||
|
|
||||||
|
if ( q->dup_error ) return DB_KEYEXIST;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
68
storage/tokudb/PerconaFT/ft/loader/pqueue.h
Normal file
68
storage/tokudb/PerconaFT/ft/loader/pqueue.h
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
typedef struct ft_pqueue_node_t
|
||||||
|
{
|
||||||
|
DBT *key;
|
||||||
|
DBT *val;
|
||||||
|
int i;
|
||||||
|
} pqueue_node_t;
|
||||||
|
|
||||||
|
typedef struct ft_pqueue_t
|
||||||
|
{
|
||||||
|
size_t size;
|
||||||
|
size_t avail;
|
||||||
|
size_t step;
|
||||||
|
|
||||||
|
int which_db;
|
||||||
|
DB *db; // needed for compare function
|
||||||
|
ft_compare_func compare;
|
||||||
|
pqueue_node_t **d;
|
||||||
|
int dup_error;
|
||||||
|
|
||||||
|
struct error_callback_s *error_callback;
|
||||||
|
|
||||||
|
} pqueue_t;
|
||||||
|
|
||||||
|
int pqueue_init(pqueue_t **result, size_t n, int which_db, DB *db, ft_compare_func compare, struct error_callback_s *err_callback);
|
||||||
|
void pqueue_free(pqueue_t *q);
|
||||||
|
size_t pqueue_size(pqueue_t *q);
|
||||||
|
int pqueue_insert(pqueue_t *q, pqueue_node_t *d);
|
||||||
|
int pqueue_pop(pqueue_t *q, pqueue_node_t **d);
|
||||||
225
storage/tokudb/PerconaFT/ft/logger/log-internal.h
Normal file
225
storage/tokudb/PerconaFT/ft/logger/log-internal.h
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <dirent.h>
|
||||||
|
|
||||||
|
#include "portability/toku_list.h"
|
||||||
|
#include "portability/toku_pthread.h"
|
||||||
|
#include "ft/ft-internal.h"
|
||||||
|
#include "ft/logger/log.h"
|
||||||
|
#include "ft/logger/logfilemgr.h"
|
||||||
|
#include "ft/txn/txn.h"
|
||||||
|
#include "ft/txn/txn_manager.h"
|
||||||
|
#include "ft/txn/rollback_log_node_cache.h"
|
||||||
|
|
||||||
|
#include "util/memarena.h"
|
||||||
|
#include "util/omt.h"
|
||||||
|
|
||||||
|
using namespace toku;
|
||||||
|
// Locking for the logger
|
||||||
|
// For most purposes we use the big ydb lock.
|
||||||
|
// To log: grab the buf lock
|
||||||
|
// If the buf would overflow, then grab the file lock, swap file&buf, release buf lock, write the file, write the entry, release the file lock
|
||||||
|
// else append to buf & release lock
|
||||||
|
|
||||||
|
#define LOGGER_MIN_BUF_SIZE (1<<24)
|
||||||
|
|
||||||
|
// TODO: Remove mylock, it has no value
|
||||||
|
struct mylock {
|
||||||
|
toku_mutex_t lock;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void ml_init(struct mylock *l) {
|
||||||
|
toku_mutex_init(&l->lock, 0);
|
||||||
|
}
|
||||||
|
static inline void ml_lock(struct mylock *l) {
|
||||||
|
toku_mutex_lock(&l->lock);
|
||||||
|
}
|
||||||
|
static inline void ml_unlock(struct mylock *l) {
|
||||||
|
toku_mutex_unlock(&l->lock);
|
||||||
|
}
|
||||||
|
static inline void ml_destroy(struct mylock *l) {
|
||||||
|
toku_mutex_destroy(&l->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct logbuf {
|
||||||
|
int n_in_buf;
|
||||||
|
int buf_size;
|
||||||
|
char *buf;
|
||||||
|
LSN max_lsn_in_buf;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct tokulogger {
|
||||||
|
struct mylock input_lock;
|
||||||
|
|
||||||
|
toku_mutex_t output_condition_lock; // if you need both this lock and input_lock, acquire the output_lock first, then input_lock. More typical is to get the output_is_available condition to be false, and then acquire the input_lock.
|
||||||
|
toku_cond_t output_condition; //
|
||||||
|
bool output_is_available; // this is part of the predicate for the output condition. It's true if no thread is modifying the output (either doing an fsync or otherwise fiddling with the output).
|
||||||
|
|
||||||
|
bool is_open;
|
||||||
|
bool write_log_files;
|
||||||
|
bool trim_log_files; // for test purposes
|
||||||
|
char *directory; // file system directory
|
||||||
|
DIR *dir; // descriptor for directory
|
||||||
|
int fd;
|
||||||
|
CACHETABLE ct;
|
||||||
|
int lg_max; // The size of the single file in the log. Default is 100MB.
|
||||||
|
|
||||||
|
// To access these, you must have the input lock
|
||||||
|
LSN lsn; // the next available lsn
|
||||||
|
struct logbuf inbuf; // data being accumulated for the write
|
||||||
|
|
||||||
|
// To access these, you must have the output condition lock.
|
||||||
|
LSN written_lsn; // the last lsn written
|
||||||
|
LSN fsynced_lsn; // What is the LSN of the highest fsynced log entry (accessed only while holding the output lock, and updated only when the output lock and output permission are held)
|
||||||
|
LSN last_completed_checkpoint_lsn; // What is the LSN of the most recent completed checkpoint.
|
||||||
|
long long next_log_file_number;
|
||||||
|
struct logbuf outbuf; // data being written to the file
|
||||||
|
int n_in_file; // The amount of data in the current file
|
||||||
|
|
||||||
|
// To access the logfilemgr you must have the output condition lock.
|
||||||
|
TOKULOGFILEMGR logfilemgr;
|
||||||
|
|
||||||
|
uint32_t write_block_size; // How big should the blocks be written to various logs?
|
||||||
|
|
||||||
|
uint64_t num_writes_to_disk; // how many times did we write to disk?
|
||||||
|
uint64_t bytes_written_to_disk; // how many bytes have been written to disk?
|
||||||
|
tokutime_t time_spent_writing_to_disk; // how much tokutime did we spend writing to disk?
|
||||||
|
uint64_t num_wait_buf_long; // how many times we waited >= 100ms for the in buf
|
||||||
|
|
||||||
|
CACHEFILE rollback_cachefile;
|
||||||
|
rollback_log_node_cache rollback_cache;
|
||||||
|
TXN_MANAGER txn_manager;
|
||||||
|
};
|
||||||
|
|
||||||
|
int toku_logger_find_next_unused_log_file(const char *directory, long long *result);
|
||||||
|
int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_logfiles);
|
||||||
|
void toku_logger_free_logfiles (char **logfiles, int n_logfiles);
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
txn_has_current_rollback_log(TOKUTXN txn) {
|
||||||
|
return txn->roll_info.current_rollback.b != ROLLBACK_NONE.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
txn_has_spilled_rollback_logs(TOKUTXN txn) {
|
||||||
|
return txn->roll_info.spilled_rollback_tail.b != ROLLBACK_NONE.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct txninfo {
|
||||||
|
uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children.
|
||||||
|
uint32_t num_fts;
|
||||||
|
FT *open_fts;
|
||||||
|
bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn)
|
||||||
|
uint64_t num_rollback_nodes;
|
||||||
|
uint64_t num_rollentries;
|
||||||
|
BLOCKNUM spilled_rollback_head;
|
||||||
|
BLOCKNUM spilled_rollback_tail;
|
||||||
|
BLOCKNUM current_rollback;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_uint8_t (uint32_t v __attribute__((__unused__))) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_uint32_t (uint32_t v __attribute__((__unused__))) {
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_uint64_t (uint32_t v __attribute__((__unused__))) {
|
||||||
|
return 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_bool (uint32_t v __attribute__((__unused__))) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_FILENUM (FILENUM v __attribute__((__unused__))) {
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_DISKOFF (DISKOFF v __attribute__((__unused__))) {
|
||||||
|
return 8;
|
||||||
|
}
|
||||||
|
static inline int toku_logsizeof_BLOCKNUM (BLOCKNUM v __attribute__((__unused__))) {
|
||||||
|
return 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_LSN (LSN lsn __attribute__((__unused__))) {
|
||||||
|
return 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_TXNID (TXNID txnid __attribute__((__unused__))) {
|
||||||
|
return 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_TXNID_PAIR (TXNID_PAIR txnid __attribute__((__unused__))) {
|
||||||
|
return 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_XIDP (XIDP xid) {
|
||||||
|
assert(0<=xid->gtrid_length && xid->gtrid_length<=64);
|
||||||
|
assert(0<=xid->bqual_length && xid->bqual_length<=64);
|
||||||
|
return xid->gtrid_length
|
||||||
|
+ xid->bqual_length
|
||||||
|
+ 4 // formatID
|
||||||
|
+ 1 // gtrid_length
|
||||||
|
+ 1; // bqual_length
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_FILENUMS (FILENUMS fs) {
|
||||||
|
static const FILENUM f = {0}; //fs could have .num==0 and then we cannot dereference
|
||||||
|
return 4 + fs.num * toku_logsizeof_FILENUM(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int toku_logsizeof_BYTESTRING (BYTESTRING bs) {
|
||||||
|
return 4+bs.len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline char *fixup_fname(BYTESTRING *f) {
|
||||||
|
assert(f->len>0);
|
||||||
|
char *fname = (char*)toku_xmalloc(f->len+1);
|
||||||
|
memcpy(fname, f->data, f->len);
|
||||||
|
fname[f->len]=0;
|
||||||
|
return fname;
|
||||||
|
}
|
||||||
69
storage/tokudb/PerconaFT/ft/logger/log.h
Normal file
69
storage/tokudb/PerconaFT/ft/logger/log.h
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
#include "portability/memory.h"
|
||||||
|
#include "portability/toku_portability.h"
|
||||||
|
|
||||||
|
#include "ft/logger/recover.h"
|
||||||
|
#include "ft/txn/rollback.h"
|
||||||
|
#include "ft/txn/txn.h"
|
||||||
|
#include "util/bytestring.h"
|
||||||
|
|
||||||
|
struct roll_entry;
|
||||||
|
|
||||||
|
static inline void toku_free_TXNID(TXNID txnid __attribute__((__unused__))) {}
|
||||||
|
static inline void toku_free_TXNID_PAIR(TXNID_PAIR txnid __attribute__((__unused__))) {}
|
||||||
|
|
||||||
|
static inline void toku_free_LSN(LSN lsn __attribute__((__unused__))) {}
|
||||||
|
static inline void toku_free_uint64_t(uint64_t u __attribute__((__unused__))) {}
|
||||||
|
static inline void toku_free_uint32_t(uint32_t u __attribute__((__unused__))) {}
|
||||||
|
static inline void toku_free_uint8_t(uint8_t u __attribute__((__unused__))) {}
|
||||||
|
static inline void toku_free_FILENUM(FILENUM u __attribute__((__unused__))) {}
|
||||||
|
static inline void toku_free_BLOCKNUM(BLOCKNUM u __attribute__((__unused__))) {}
|
||||||
|
static inline void toku_free_bool(bool u __attribute__((__unused__))) {}
|
||||||
|
static inline void toku_free_XIDP(XIDP xidp) { toku_free(xidp); }
|
||||||
|
static inline void toku_free_BYTESTRING(BYTESTRING val) { toku_free(val.data); }
|
||||||
|
static inline void toku_free_FILENUMS(FILENUMS val) { toku_free(val.filenums); }
|
||||||
|
|
||||||
|
int toku_maybe_upgrade_log (const char *env_dir, const char *log_dir, LSN * lsn_of_clean_shutdown, bool * upgrade_in_progress);
|
||||||
|
uint64_t toku_log_upgrade_get_footprint(void);
|
||||||
293
storage/tokudb/PerconaFT/ft/logger/log_upgrade.cc
Normal file
293
storage/tokudb/PerconaFT/ft/logger/log_upgrade.cc
Normal file
@@ -0,0 +1,293 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include <ft/log_header.h>
|
||||||
|
|
||||||
|
#include "log-internal.h"
|
||||||
|
#include "logger/logcursor.h"
|
||||||
|
#include "cachetable/checkpoint.h"
|
||||||
|
|
||||||
|
static uint64_t footprint = 0; // for debug and accountability
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
toku_log_upgrade_get_footprint(void) {
|
||||||
|
return footprint;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Footprint concept here is that each function increments a different decimal digit.
|
||||||
|
// The cumulative total shows the path taken for the upgrade.
|
||||||
|
// Each function must have a single return for this to work.
|
||||||
|
#define FOOTPRINT(x) function_footprint=(x*footprint_increment)
|
||||||
|
#define FOOTPRINTSETUP(increment) uint64_t function_footprint = 0; uint64_t footprint_increment=increment;
|
||||||
|
#define FOOTPRINTCAPTURE footprint+=function_footprint;
|
||||||
|
|
||||||
|
|
||||||
|
// return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown
|
||||||
|
static int
|
||||||
|
verify_clean_shutdown_of_log_version_current(const char *log_dir, LSN * last_lsn, TXNID *last_xid) {
|
||||||
|
int rval = TOKUDB_UPGRADE_FAILURE;
|
||||||
|
TOKULOGCURSOR cursor = NULL;
|
||||||
|
int r;
|
||||||
|
FOOTPRINTSETUP(100);
|
||||||
|
|
||||||
|
FOOTPRINT(1);
|
||||||
|
|
||||||
|
r = toku_logcursor_create(&cursor, log_dir);
|
||||||
|
assert(r == 0);
|
||||||
|
struct log_entry *le = NULL;
|
||||||
|
r = toku_logcursor_last(cursor, &le);
|
||||||
|
if (r == 0) {
|
||||||
|
FOOTPRINT(2);
|
||||||
|
if (le->cmd==LT_shutdown) {
|
||||||
|
LSN lsn = le->u.shutdown.lsn;
|
||||||
|
if (last_lsn) {
|
||||||
|
*last_lsn = lsn;
|
||||||
|
}
|
||||||
|
if (last_xid) {
|
||||||
|
*last_xid = le->u.shutdown.last_xid;
|
||||||
|
}
|
||||||
|
rval = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r = toku_logcursor_destroy(&cursor);
|
||||||
|
assert(r == 0);
|
||||||
|
FOOTPRINTCAPTURE;
|
||||||
|
return rval;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown
|
||||||
|
static int
|
||||||
|
verify_clean_shutdown_of_log_version_old(const char *log_dir, LSN * last_lsn, TXNID *last_xid, uint32_t version) {
|
||||||
|
int rval = TOKUDB_UPGRADE_FAILURE;
|
||||||
|
int r;
|
||||||
|
FOOTPRINTSETUP(10);
|
||||||
|
|
||||||
|
FOOTPRINT(1);
|
||||||
|
|
||||||
|
int n_logfiles;
|
||||||
|
char **logfiles;
|
||||||
|
r = toku_logger_find_logfiles(log_dir, &logfiles, &n_logfiles);
|
||||||
|
if (r!=0) return r;
|
||||||
|
|
||||||
|
char *basename;
|
||||||
|
TOKULOGCURSOR cursor;
|
||||||
|
struct log_entry *entry;
|
||||||
|
// Only look at newest log
|
||||||
|
// basename points to first char after last / in file pathname
|
||||||
|
basename = strrchr(logfiles[n_logfiles-1], '/') + 1;
|
||||||
|
uint32_t version_name;
|
||||||
|
long long index = -1;
|
||||||
|
r = sscanf(basename, "log%lld.tokulog%u", &index, &version_name);
|
||||||
|
assert(r==2); // found index and version
|
||||||
|
invariant(version_name == version);
|
||||||
|
assert(version>=TOKU_LOG_MIN_SUPPORTED_VERSION);
|
||||||
|
assert(version< TOKU_LOG_VERSION); //Must be old
|
||||||
|
// find last LSN
|
||||||
|
r = toku_logcursor_create_for_file(&cursor, log_dir, basename);
|
||||||
|
if (r != 0) {
|
||||||
|
goto cleanup_no_logcursor;
|
||||||
|
}
|
||||||
|
r = toku_logcursor_last(cursor, &entry);
|
||||||
|
if (r != 0) {
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
FOOTPRINT(2);
|
||||||
|
//TODO: Remove this special case once FT_LAYOUT_VERSION_19 (and older) are not supported.
|
||||||
|
if (version <= FT_LAYOUT_VERSION_19) {
|
||||||
|
if (entry->cmd==LT_shutdown_up_to_19) {
|
||||||
|
LSN lsn = entry->u.shutdown_up_to_19.lsn;
|
||||||
|
if (last_lsn) {
|
||||||
|
*last_lsn = lsn;
|
||||||
|
}
|
||||||
|
if (last_xid) {
|
||||||
|
// Use lsn as last_xid.
|
||||||
|
*last_xid = lsn.lsn;
|
||||||
|
}
|
||||||
|
rval = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (entry->cmd==LT_shutdown) {
|
||||||
|
LSN lsn = entry->u.shutdown.lsn;
|
||||||
|
if (last_lsn) {
|
||||||
|
*last_lsn = lsn;
|
||||||
|
}
|
||||||
|
if (last_xid) {
|
||||||
|
*last_xid = entry->u.shutdown.last_xid;
|
||||||
|
}
|
||||||
|
rval = 0;
|
||||||
|
}
|
||||||
|
cleanup:
|
||||||
|
r = toku_logcursor_destroy(&cursor);
|
||||||
|
assert(r == 0);
|
||||||
|
cleanup_no_logcursor:
|
||||||
|
toku_logger_free_logfiles(logfiles, n_logfiles);
|
||||||
|
FOOTPRINTCAPTURE;
|
||||||
|
return rval;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
verify_clean_shutdown_of_log_version(const char *log_dir, uint32_t version, LSN *last_lsn, TXNID *last_xid) {
|
||||||
|
// return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown
|
||||||
|
int r = 0;
|
||||||
|
FOOTPRINTSETUP(1000);
|
||||||
|
|
||||||
|
if (version < TOKU_LOG_VERSION) {
|
||||||
|
FOOTPRINT(1);
|
||||||
|
r = verify_clean_shutdown_of_log_version_old(log_dir, last_lsn, last_xid, version);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
FOOTPRINT(2);
|
||||||
|
assert(version == TOKU_LOG_VERSION);
|
||||||
|
r = verify_clean_shutdown_of_log_version_current(log_dir, last_lsn, last_xid);
|
||||||
|
}
|
||||||
|
FOOTPRINTCAPTURE;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Actually create a log file of the current version, making the environment be of the current version.
|
||||||
|
// TODO: can't fail
|
||||||
|
static int
|
||||||
|
upgrade_log(const char *env_dir, const char *log_dir, LSN last_lsn, TXNID last_xid) { // the real deal
|
||||||
|
int r;
|
||||||
|
FOOTPRINTSETUP(10000);
|
||||||
|
|
||||||
|
LSN initial_lsn = last_lsn;
|
||||||
|
initial_lsn.lsn++;
|
||||||
|
CACHETABLE ct;
|
||||||
|
TOKULOGGER logger;
|
||||||
|
|
||||||
|
FOOTPRINT(1);
|
||||||
|
|
||||||
|
{ //Create temporary environment
|
||||||
|
toku_cachetable_create(&ct, 1<<25, initial_lsn, NULL);
|
||||||
|
toku_cachetable_set_env_dir(ct, env_dir);
|
||||||
|
r = toku_logger_create(&logger);
|
||||||
|
assert(r == 0);
|
||||||
|
toku_logger_set_cachetable(logger, ct);
|
||||||
|
r = toku_logger_open_with_last_xid(log_dir, logger, last_xid);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
{ //Checkpoint
|
||||||
|
CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct);
|
||||||
|
r = toku_checkpoint(cp, logger, NULL, NULL, NULL, NULL, UPGRADE_CHECKPOINT); //fsyncs log dir
|
||||||
|
assert(r == 0);
|
||||||
|
}
|
||||||
|
{ //Close cachetable and logger
|
||||||
|
toku_logger_shutdown(logger);
|
||||||
|
toku_cachetable_close(&ct);
|
||||||
|
r = toku_logger_close(&logger);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
r = verify_clean_shutdown_of_log_version(log_dir, TOKU_LOG_VERSION, NULL, NULL);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
FOOTPRINTCAPTURE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If log on disk is old (environment is old) and clean shutdown, then create log of current version,
|
||||||
|
// which will make the environment of the current version (and delete the old logs).
|
||||||
|
int
|
||||||
|
toku_maybe_upgrade_log(const char *env_dir, const char *log_dir, LSN * lsn_of_clean_shutdown, bool * upgrade_in_progress) {
|
||||||
|
int r;
|
||||||
|
int lockfd = -1;
|
||||||
|
FOOTPRINTSETUP(100000);
|
||||||
|
|
||||||
|
footprint = 0;
|
||||||
|
*upgrade_in_progress = false; // set true only if all criteria are met and we're actually doing an upgrade
|
||||||
|
|
||||||
|
FOOTPRINT(1);
|
||||||
|
r = toku_recover_lock(log_dir, &lockfd);
|
||||||
|
if (r != 0) {
|
||||||
|
goto cleanup_no_lock;
|
||||||
|
}
|
||||||
|
FOOTPRINT(2);
|
||||||
|
assert(log_dir);
|
||||||
|
assert(env_dir);
|
||||||
|
|
||||||
|
uint32_t version_of_logs_on_disk;
|
||||||
|
bool found_any_logs;
|
||||||
|
r = toku_get_version_of_logs_on_disk(log_dir, &found_any_logs, &version_of_logs_on_disk);
|
||||||
|
if (r != 0) {
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
FOOTPRINT(3);
|
||||||
|
if (!found_any_logs)
|
||||||
|
r = 0; //No logs means no logs to upgrade.
|
||||||
|
else if (version_of_logs_on_disk > TOKU_LOG_VERSION)
|
||||||
|
r = TOKUDB_DICTIONARY_TOO_NEW;
|
||||||
|
else if (version_of_logs_on_disk < TOKU_LOG_MIN_SUPPORTED_VERSION)
|
||||||
|
r = TOKUDB_DICTIONARY_TOO_OLD;
|
||||||
|
else if (version_of_logs_on_disk == TOKU_LOG_VERSION)
|
||||||
|
r = 0; //Logs are up to date
|
||||||
|
else {
|
||||||
|
FOOTPRINT(4);
|
||||||
|
LSN last_lsn = ZERO_LSN;
|
||||||
|
TXNID last_xid = TXNID_NONE;
|
||||||
|
r = verify_clean_shutdown_of_log_version(log_dir, version_of_logs_on_disk, &last_lsn, &last_xid);
|
||||||
|
if (r != 0) {
|
||||||
|
if (TOKU_LOG_VERSION_25 <= version_of_logs_on_disk && version_of_logs_on_disk <= TOKU_LOG_VERSION_27
|
||||||
|
&& TOKU_LOG_VERSION_28 == TOKU_LOG_VERSION) {
|
||||||
|
r = 0; // can do recovery on dirty shutdown
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Cannot upgrade PerconaFT version %d database.", version_of_logs_on_disk);
|
||||||
|
fprintf(stderr, " Previous improper shutdown detected.\n");
|
||||||
|
}
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
FOOTPRINT(5);
|
||||||
|
*lsn_of_clean_shutdown = last_lsn;
|
||||||
|
*upgrade_in_progress = true;
|
||||||
|
r = upgrade_log(env_dir, log_dir, last_lsn, last_xid);
|
||||||
|
}
|
||||||
|
cleanup:
|
||||||
|
{
|
||||||
|
//Clean up
|
||||||
|
int rc;
|
||||||
|
rc = toku_recover_unlock(lockfd);
|
||||||
|
if (r==0) r = rc;
|
||||||
|
}
|
||||||
|
cleanup_no_lock:
|
||||||
|
FOOTPRINTCAPTURE;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
496
storage/tokudb/PerconaFT/ft/logger/logcursor.cc
Normal file
496
storage/tokudb/PerconaFT/ft/logger/logcursor.cc
Normal file
@@ -0,0 +1,496 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "log-internal.h"
|
||||||
|
#include "logger/logcursor.h"
|
||||||
|
#include <limits.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
enum lc_direction { LC_FORWARD, LC_BACKWARD, LC_FIRST, LC_LAST };
|
||||||
|
|
||||||
|
struct toku_logcursor {
|
||||||
|
char *logdir; // absolute directory name
|
||||||
|
char **logfiles;
|
||||||
|
int n_logfiles;
|
||||||
|
int cur_logfiles_index;
|
||||||
|
FILE *cur_fp;
|
||||||
|
size_t buffer_size;
|
||||||
|
void *buffer;
|
||||||
|
bool is_open;
|
||||||
|
struct log_entry entry;
|
||||||
|
bool entry_valid;
|
||||||
|
LSN cur_lsn;
|
||||||
|
enum lc_direction last_direction;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define LC_LSN_ERROR (DB_RUNRECOVERY)
|
||||||
|
|
||||||
|
void toku_logcursor_print(TOKULOGCURSOR lc) {
|
||||||
|
printf("lc = %p\n", lc);
|
||||||
|
printf(" logdir = %s\n", lc->logdir);
|
||||||
|
printf(" logfiles = %p\n", lc->logfiles);
|
||||||
|
for (int lf=0;lf<lc->n_logfiles;lf++) {
|
||||||
|
printf(" logfile[%d] = %p (%s)\n", lf, lc->logfiles[lf], lc->logfiles[lf]);
|
||||||
|
}
|
||||||
|
printf(" n_logfiles = %d\n", lc->n_logfiles);
|
||||||
|
printf(" cur_logfiles_index = %d\n", lc->cur_logfiles_index);
|
||||||
|
printf(" cur_fp = %p\n", lc->cur_fp);
|
||||||
|
printf(" cur_lsn = %" PRIu64 "\n", lc->cur_lsn.lsn);
|
||||||
|
printf(" last_direction = %d\n", (int) lc->last_direction);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int lc_close_cur_logfile(TOKULOGCURSOR lc) {
|
||||||
|
int r=0;
|
||||||
|
if ( lc->is_open ) {
|
||||||
|
r = fclose(lc->cur_fp);
|
||||||
|
assert(0==r);
|
||||||
|
lc->is_open = false;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static toku_off_t lc_file_len(const char *name) {
|
||||||
|
toku_struct_stat buf;
|
||||||
|
int r = toku_stat(name, &buf);
|
||||||
|
assert(r == 0);
|
||||||
|
return buf.st_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cat the file and throw away the contents. This brings the file into the file system cache
|
||||||
|
// and makes subsequent accesses to it fast. The intention is to speed up backward scans of the
|
||||||
|
// file.
|
||||||
|
static void lc_catfile(const char *fname, void *buffer, size_t buffer_size) {
|
||||||
|
int fd = open(fname, O_RDONLY);
|
||||||
|
if (fd >= 0) {
|
||||||
|
while (1) {
|
||||||
|
ssize_t r = read(fd, buffer, buffer_size);
|
||||||
|
if ((int)r <= 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int lc_open_logfile(TOKULOGCURSOR lc, int index) {
|
||||||
|
int r=0;
|
||||||
|
assert( !lc->is_open );
|
||||||
|
if( index == -1 || index >= lc->n_logfiles) return DB_NOTFOUND;
|
||||||
|
lc_catfile(lc->logfiles[index], lc->buffer, lc->buffer_size);
|
||||||
|
lc->cur_fp = fopen(lc->logfiles[index], "rb");
|
||||||
|
if ( lc->cur_fp == NULL )
|
||||||
|
return DB_NOTFOUND;
|
||||||
|
r = setvbuf(lc->cur_fp, (char *) lc->buffer, _IOFBF, lc->buffer_size);
|
||||||
|
assert(r == 0);
|
||||||
|
// position fp past header, ignore 0 length file (t:2384)
|
||||||
|
unsigned int version=0;
|
||||||
|
if ( lc_file_len(lc->logfiles[index]) >= 12 ) {
|
||||||
|
r = toku_read_logmagic(lc->cur_fp, &version);
|
||||||
|
if (r!=0)
|
||||||
|
return DB_BADFORMAT;
|
||||||
|
if (version < TOKU_LOG_MIN_SUPPORTED_VERSION || version > TOKU_LOG_VERSION)
|
||||||
|
return DB_BADFORMAT;
|
||||||
|
}
|
||||||
|
// mark as open
|
||||||
|
lc->is_open = true;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int lc_check_lsn(TOKULOGCURSOR lc, int dir) {
|
||||||
|
int r=0;
|
||||||
|
LSN lsn = toku_log_entry_get_lsn(&(lc->entry));
|
||||||
|
if (((dir == LC_FORWARD) && ( lsn.lsn != lc->cur_lsn.lsn + 1 )) ||
|
||||||
|
((dir == LC_BACKWARD) && ( lsn.lsn != lc->cur_lsn.lsn - 1 ))) {
|
||||||
|
// int index = lc->cur_logfiles_index;
|
||||||
|
// fprintf(stderr, "Bad LSN: %d %s direction = %d, lsn.lsn = %" PRIu64 ", cur_lsn.lsn=%" PRIu64 "\n",
|
||||||
|
// index, lc->logfiles[index], dir, lsn.lsn, lc->cur_lsn.lsn);
|
||||||
|
if (tokuft_recovery_trace)
|
||||||
|
printf("DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, 0);
|
||||||
|
return LC_LSN_ERROR;
|
||||||
|
}
|
||||||
|
lc->cur_lsn.lsn = lsn.lsn;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// toku_logcursor_create()
|
||||||
|
// - returns a pointer to a logcursor
|
||||||
|
|
||||||
|
static int lc_create(TOKULOGCURSOR *lc, const char *log_dir) {
|
||||||
|
|
||||||
|
// malloc a cursor
|
||||||
|
TOKULOGCURSOR cursor = (TOKULOGCURSOR) toku_xmalloc(sizeof(struct toku_logcursor));
|
||||||
|
// find logfiles in logdir
|
||||||
|
cursor->is_open = false;
|
||||||
|
cursor->cur_logfiles_index = 0;
|
||||||
|
cursor->entry_valid = false;
|
||||||
|
cursor->buffer_size = 1<<20; // use a 1MB stream buffer (setvbuf)
|
||||||
|
cursor->buffer = toku_malloc(cursor->buffer_size); // it does not matter if it failes
|
||||||
|
// cursor->logdir must be an absolute path
|
||||||
|
if (toku_os_is_absolute_name(log_dir)) {
|
||||||
|
cursor->logdir = (char *) toku_xmalloc(strlen(log_dir)+1);
|
||||||
|
sprintf(cursor->logdir, "%s", log_dir);
|
||||||
|
} else {
|
||||||
|
char cwdbuf[PATH_MAX];
|
||||||
|
char *cwd = getcwd(cwdbuf, PATH_MAX);
|
||||||
|
assert(cwd);
|
||||||
|
cursor->logdir = (char *) toku_xmalloc(strlen(cwd)+strlen(log_dir)+2);
|
||||||
|
sprintf(cursor->logdir, "%s/%s", cwd, log_dir);
|
||||||
|
}
|
||||||
|
cursor->logfiles = NULL;
|
||||||
|
cursor->n_logfiles = 0;
|
||||||
|
cursor->cur_fp = NULL;
|
||||||
|
cursor->cur_lsn.lsn=0;
|
||||||
|
cursor->last_direction=LC_FIRST;
|
||||||
|
|
||||||
|
*lc = cursor;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int lc_fix_bad_logfile(TOKULOGCURSOR lc);
|
||||||
|
|
||||||
|
int toku_logcursor_create(TOKULOGCURSOR *lc, const char *log_dir) {
|
||||||
|
TOKULOGCURSOR cursor;
|
||||||
|
int r = lc_create(&cursor, log_dir);
|
||||||
|
if ( r!=0 )
|
||||||
|
return r;
|
||||||
|
|
||||||
|
r = toku_logger_find_logfiles(cursor->logdir, &(cursor->logfiles), &(cursor->n_logfiles));
|
||||||
|
if (r!=0) {
|
||||||
|
toku_logcursor_destroy(&cursor);
|
||||||
|
} else {
|
||||||
|
*lc = cursor;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_logcursor_create_for_file(TOKULOGCURSOR *lc, const char *log_dir, const char *log_file) {
|
||||||
|
int r = lc_create(lc, log_dir);
|
||||||
|
if ( r!=0 )
|
||||||
|
return r;
|
||||||
|
|
||||||
|
TOKULOGCURSOR cursor = *lc;
|
||||||
|
int fullnamelen = strlen(cursor->logdir) + strlen(log_file) + 3;
|
||||||
|
char *XMALLOC_N(fullnamelen, log_file_fullname);
|
||||||
|
sprintf(log_file_fullname, "%s/%s", cursor->logdir, log_file);
|
||||||
|
|
||||||
|
cursor->n_logfiles=1;
|
||||||
|
|
||||||
|
char **XMALLOC(logfiles);
|
||||||
|
cursor->logfiles = logfiles;
|
||||||
|
cursor->logfiles[0] = log_file_fullname;
|
||||||
|
*lc = cursor;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_logcursor_destroy(TOKULOGCURSOR *lc) {
|
||||||
|
int r=0;
|
||||||
|
if ( *lc ) {
|
||||||
|
if ( (*lc)->entry_valid ) {
|
||||||
|
toku_log_free_log_entry_resources(&((*lc)->entry));
|
||||||
|
(*lc)->entry_valid = false;
|
||||||
|
}
|
||||||
|
r = lc_close_cur_logfile(*lc);
|
||||||
|
toku_logger_free_logfiles((*lc)->logfiles, (*lc)->n_logfiles);
|
||||||
|
if ( (*lc)->logdir ) toku_free((*lc)->logdir);
|
||||||
|
if ( (*lc)->buffer ) toku_free((*lc)->buffer);
|
||||||
|
toku_free(*lc);
|
||||||
|
*lc = NULL;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int lc_log_read(TOKULOGCURSOR lc)
|
||||||
|
{
|
||||||
|
int r = toku_log_fread(lc->cur_fp, &(lc->entry));
|
||||||
|
while ( r == EOF ) {
|
||||||
|
// move to next file
|
||||||
|
r = lc_close_cur_logfile(lc);
|
||||||
|
if (r!=0) return r;
|
||||||
|
if ( lc->cur_logfiles_index == lc->n_logfiles-1) return DB_NOTFOUND;
|
||||||
|
lc->cur_logfiles_index++;
|
||||||
|
r = lc_open_logfile(lc, lc->cur_logfiles_index);
|
||||||
|
if (r!=0) return r;
|
||||||
|
r = toku_log_fread(lc->cur_fp, &(lc->entry));
|
||||||
|
}
|
||||||
|
if (r!=0) {
|
||||||
|
toku_log_free_log_entry_resources(&(lc->entry));
|
||||||
|
time_t tnow = time(NULL);
|
||||||
|
if (r==DB_BADFORMAT) {
|
||||||
|
fprintf(stderr, "%.24s PerconaFT bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fprintf(stderr, "%.24s PerconaFT unexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int lc_log_read_backward(TOKULOGCURSOR lc)
|
||||||
|
{
|
||||||
|
int r = toku_log_fread_backward(lc->cur_fp, &(lc->entry));
|
||||||
|
while ( -1 == r) { // if within header length of top of file
|
||||||
|
// move to previous file
|
||||||
|
r = lc_close_cur_logfile(lc);
|
||||||
|
if (r!=0)
|
||||||
|
return r;
|
||||||
|
if ( lc->cur_logfiles_index == 0 )
|
||||||
|
return DB_NOTFOUND;
|
||||||
|
lc->cur_logfiles_index--;
|
||||||
|
r = lc_open_logfile(lc, lc->cur_logfiles_index);
|
||||||
|
if (r!=0)
|
||||||
|
return r;
|
||||||
|
// seek to end
|
||||||
|
r = fseek(lc->cur_fp, 0, SEEK_END);
|
||||||
|
assert(0==r);
|
||||||
|
r = toku_log_fread_backward(lc->cur_fp, &(lc->entry));
|
||||||
|
}
|
||||||
|
if (r!=0) {
|
||||||
|
toku_log_free_log_entry_resources(&(lc->entry));
|
||||||
|
time_t tnow = time(NULL);
|
||||||
|
if (r==DB_BADFORMAT) {
|
||||||
|
fprintf(stderr, "%.24s PerconaFT bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fprintf(stderr, "%.24s PerconaFT uUnexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_logcursor_next(TOKULOGCURSOR lc, struct log_entry **le) {
|
||||||
|
int r=0;
|
||||||
|
if ( lc->entry_valid ) {
|
||||||
|
toku_log_free_log_entry_resources(&(lc->entry));
|
||||||
|
lc->entry_valid = false;
|
||||||
|
if (lc->last_direction == LC_BACKWARD) {
|
||||||
|
struct log_entry junk;
|
||||||
|
r = toku_log_fread(lc->cur_fp, &junk);
|
||||||
|
assert(r == 0);
|
||||||
|
toku_log_free_log_entry_resources(&junk);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
r = toku_logcursor_first(lc, le);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
// read the entry
|
||||||
|
r = lc_log_read(lc);
|
||||||
|
if (r!=0) return r;
|
||||||
|
r = lc_check_lsn(lc, LC_FORWARD);
|
||||||
|
if (r!=0) return r;
|
||||||
|
lc->last_direction = LC_FORWARD;
|
||||||
|
lc->entry_valid = true;
|
||||||
|
*le = &(lc->entry);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_logcursor_prev(TOKULOGCURSOR lc, struct log_entry **le) {
|
||||||
|
int r=0;
|
||||||
|
if ( lc->entry_valid ) {
|
||||||
|
toku_log_free_log_entry_resources(&(lc->entry));
|
||||||
|
lc->entry_valid = false;
|
||||||
|
if (lc->last_direction == LC_FORWARD) {
|
||||||
|
struct log_entry junk;
|
||||||
|
r = toku_log_fread_backward(lc->cur_fp, &junk);
|
||||||
|
assert(r == 0);
|
||||||
|
toku_log_free_log_entry_resources(&junk);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
r = toku_logcursor_last(lc, le);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
// read the entry
|
||||||
|
r = lc_log_read_backward(lc);
|
||||||
|
if (r!=0) return r;
|
||||||
|
r = lc_check_lsn(lc, LC_BACKWARD);
|
||||||
|
if (r!=0) return r;
|
||||||
|
lc->last_direction = LC_BACKWARD;
|
||||||
|
lc->entry_valid = true;
|
||||||
|
*le = &(lc->entry);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_logcursor_first(TOKULOGCURSOR lc, struct log_entry **le) {
|
||||||
|
int r=0;
|
||||||
|
if ( lc->entry_valid ) {
|
||||||
|
toku_log_free_log_entry_resources(&(lc->entry));
|
||||||
|
lc->entry_valid = false;
|
||||||
|
}
|
||||||
|
// close any but the first log file
|
||||||
|
if ( lc->cur_logfiles_index != 0 ) {
|
||||||
|
lc_close_cur_logfile(lc);
|
||||||
|
}
|
||||||
|
// open first log file if needed
|
||||||
|
if ( !lc->is_open ) {
|
||||||
|
r = lc_open_logfile(lc, 0);
|
||||||
|
if (r!=0)
|
||||||
|
return r;
|
||||||
|
lc->cur_logfiles_index = 0;
|
||||||
|
}
|
||||||
|
// read the entry
|
||||||
|
r = lc_log_read(lc);
|
||||||
|
if (r!=0) return r;
|
||||||
|
|
||||||
|
r = lc_check_lsn(lc, LC_FIRST);
|
||||||
|
if (r!=0) return r;
|
||||||
|
lc->last_direction = LC_FIRST;
|
||||||
|
lc->entry_valid = true;
|
||||||
|
*le = &(lc->entry);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
//get last entry in the logfile specified by logcursor
|
||||||
|
int toku_logcursor_last(TOKULOGCURSOR lc, struct log_entry **le) {
|
||||||
|
int r=0;
|
||||||
|
if ( lc->entry_valid ) {
|
||||||
|
toku_log_free_log_entry_resources(&(lc->entry));
|
||||||
|
lc->entry_valid = false;
|
||||||
|
}
|
||||||
|
// close any but last log file
|
||||||
|
if ( lc->cur_logfiles_index != lc->n_logfiles-1 ) {
|
||||||
|
lc_close_cur_logfile(lc);
|
||||||
|
}
|
||||||
|
// open last log file if needed
|
||||||
|
if ( !lc->is_open ) {
|
||||||
|
r = lc_open_logfile(lc, lc->n_logfiles-1);
|
||||||
|
if (r!=0)
|
||||||
|
return r;
|
||||||
|
lc->cur_logfiles_index = lc->n_logfiles-1;
|
||||||
|
}
|
||||||
|
while (1) {
|
||||||
|
// seek to end
|
||||||
|
r = fseek(lc->cur_fp, 0, SEEK_END); assert(r==0);
|
||||||
|
// read backward
|
||||||
|
r = toku_log_fread_backward(lc->cur_fp, &(lc->entry));
|
||||||
|
if (r==0) // got a good entry
|
||||||
|
break;
|
||||||
|
if (r>0) {
|
||||||
|
toku_log_free_log_entry_resources(&(lc->entry));
|
||||||
|
// got an error,
|
||||||
|
// probably a corrupted last log entry due to a crash
|
||||||
|
// try scanning forward from the beginning to find the last good entry
|
||||||
|
time_t tnow = time(NULL);
|
||||||
|
fprintf(stderr, "%.24s PerconaFT recovery repairing log\n", ctime(&tnow));
|
||||||
|
r = lc_fix_bad_logfile(lc);
|
||||||
|
if ( r != 0 ) {
|
||||||
|
fprintf(stderr, "%.24s PerconaFT recovery repair unsuccessful\n", ctime(&tnow));
|
||||||
|
return DB_BADFORMAT;
|
||||||
|
}
|
||||||
|
// try reading again
|
||||||
|
r = toku_log_fread_backward(lc->cur_fp, &(lc->entry));
|
||||||
|
if (r==0) // got a good entry
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// move to previous file
|
||||||
|
r = lc_close_cur_logfile(lc);
|
||||||
|
if (r!=0)
|
||||||
|
return r;
|
||||||
|
if ( lc->cur_logfiles_index == 0 )
|
||||||
|
return DB_NOTFOUND;
|
||||||
|
lc->cur_logfiles_index--;
|
||||||
|
r = lc_open_logfile(lc, lc->cur_logfiles_index);
|
||||||
|
if (r!=0)
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
r = lc_check_lsn(lc, LC_LAST);
|
||||||
|
if (r!=0)
|
||||||
|
return r;
|
||||||
|
lc->last_direction = LC_LAST;
|
||||||
|
lc->entry_valid = true;
|
||||||
|
*le = &(lc->entry);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// return 0 if log exists, ENOENT if no log
|
||||||
|
int
|
||||||
|
toku_logcursor_log_exists(const TOKULOGCURSOR lc) {
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (lc->n_logfiles)
|
||||||
|
r = 0;
|
||||||
|
else
|
||||||
|
r = ENOENT;
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// fix a logfile with a bad last entry
|
||||||
|
// - return with fp pointing to end-of-file so that toku_logcursor_last can be retried
|
||||||
|
static int lc_fix_bad_logfile(TOKULOGCURSOR lc) {
|
||||||
|
struct log_entry le;
|
||||||
|
unsigned int version=0;
|
||||||
|
int r = 0;
|
||||||
|
|
||||||
|
r = fseek(lc->cur_fp, 0, SEEK_SET);
|
||||||
|
if ( r!=0 )
|
||||||
|
return r;
|
||||||
|
r = toku_read_logmagic(lc->cur_fp, &version);
|
||||||
|
if ( r!=0 )
|
||||||
|
return r;
|
||||||
|
if (version != TOKU_LOG_VERSION)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
toku_off_t last_good_pos;
|
||||||
|
last_good_pos = ftello(lc->cur_fp);
|
||||||
|
while (1) {
|
||||||
|
// initialize le
|
||||||
|
// - reading incomplete entries can result in fields that cannot be freed
|
||||||
|
memset(&le, 0, sizeof(le));
|
||||||
|
r = toku_log_fread(lc->cur_fp, &le);
|
||||||
|
toku_log_free_log_entry_resources(&le);
|
||||||
|
if ( r!=0 )
|
||||||
|
break;
|
||||||
|
last_good_pos = ftello(lc->cur_fp);
|
||||||
|
}
|
||||||
|
// now have position of last good entry
|
||||||
|
// 1) close the file
|
||||||
|
// 2) truncate the file to remove the error
|
||||||
|
// 3) reopen the file
|
||||||
|
// 4) set the pos to last
|
||||||
|
r = lc_close_cur_logfile(lc);
|
||||||
|
if ( r!=0 )
|
||||||
|
return r;
|
||||||
|
r = truncate(lc->logfiles[lc->n_logfiles - 1], last_good_pos);
|
||||||
|
if ( r!=0 )
|
||||||
|
return r;
|
||||||
|
r = lc_open_logfile(lc, lc->n_logfiles-1);
|
||||||
|
if ( r!=0 )
|
||||||
|
return r;
|
||||||
|
r = fseek(lc->cur_fp, 0, SEEK_END);
|
||||||
|
if ( r!=0 )
|
||||||
|
return r;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
74
storage/tokudb/PerconaFT/ft/logger/logcursor.h
Normal file
74
storage/tokudb/PerconaFT/ft/logger/logcursor.h
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <ft/log_header.h>
|
||||||
|
|
||||||
|
struct toku_logcursor;
|
||||||
|
typedef struct toku_logcursor *TOKULOGCURSOR;
|
||||||
|
|
||||||
|
// All routines return 0 on success
|
||||||
|
|
||||||
|
// toku_logcursor_create()
|
||||||
|
// - creates a logcursor (lc)
|
||||||
|
// - following toku_logcursor_create()
|
||||||
|
// if toku_logcursor_next() is called, it returns the first entry in the log
|
||||||
|
// if toku_logcursor_prev() is called, it returns the last entry in the log
|
||||||
|
int toku_logcursor_create(TOKULOGCURSOR *lc, const char *log_dir);
|
||||||
|
// toku_logcursor_create_for_file()
|
||||||
|
// - creates a logcusor (lc) that only knows about the file log_file
|
||||||
|
int toku_logcursor_create_for_file(TOKULOGCURSOR *lc, const char *log_dir, const char *log_file);
|
||||||
|
// toku_logcursor_destroy()
|
||||||
|
// - frees all resources associated with the logcursor, including the log_entry
|
||||||
|
// associated with the latest cursor action
|
||||||
|
int toku_logcursor_destroy(TOKULOGCURSOR *lc);
|
||||||
|
|
||||||
|
// toku_logcursor_[next,prev,first,last] take care of malloc'ing and free'ing log_entrys.
|
||||||
|
// - routines NULL out the **le pointers on entry, then set the **le pointers to
|
||||||
|
// the malloc'ed entries when successful,
|
||||||
|
int toku_logcursor_next(TOKULOGCURSOR lc, struct log_entry **le);
|
||||||
|
int toku_logcursor_prev(TOKULOGCURSOR lc, struct log_entry **le);
|
||||||
|
|
||||||
|
int toku_logcursor_first(const TOKULOGCURSOR lc, struct log_entry **le);
|
||||||
|
int toku_logcursor_last(const TOKULOGCURSOR lc, struct log_entry **le);
|
||||||
|
|
||||||
|
// return 0 if log exists, ENOENT if no log
|
||||||
|
int toku_logcursor_log_exists(const TOKULOGCURSOR lc);
|
||||||
|
|
||||||
|
void toku_logcursor_print(TOKULOGCURSOR lc);
|
||||||
204
storage/tokudb/PerconaFT/ft/logger/logfilemgr.cc
Normal file
204
storage/tokudb/PerconaFT/ft/logger/logfilemgr.cc
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "logger/log-internal.h"
|
||||||
|
#include "logger/logcursor.h"
|
||||||
|
#include "logger/logfilemgr.h"
|
||||||
|
|
||||||
|
// for now, implement with singlely-linked-list
|
||||||
|
// first = oldest (delete from beginning)
|
||||||
|
// last = newest (add to end)
|
||||||
|
|
||||||
|
struct lfm_entry {
|
||||||
|
TOKULOGFILEINFO lf_info;
|
||||||
|
struct lfm_entry *next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct toku_logfilemgr {
|
||||||
|
struct lfm_entry *first;
|
||||||
|
struct lfm_entry *last;
|
||||||
|
int n_entries;
|
||||||
|
};
|
||||||
|
|
||||||
|
int toku_logfilemgr_create(TOKULOGFILEMGR *lfm) {
|
||||||
|
// malloc a logfilemgr
|
||||||
|
TOKULOGFILEMGR XMALLOC(mgr);
|
||||||
|
mgr->first = NULL;
|
||||||
|
mgr->last = NULL;
|
||||||
|
mgr->n_entries = 0;
|
||||||
|
*lfm = mgr;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_logfilemgr_destroy(TOKULOGFILEMGR *lfm) {
|
||||||
|
int r=0;
|
||||||
|
if ( *lfm != NULL ) { // be tolerant of being passed a NULL
|
||||||
|
TOKULOGFILEMGR mgr = *lfm;
|
||||||
|
while ( mgr->n_entries > 0 ) {
|
||||||
|
toku_logfilemgr_delete_oldest_logfile_info(mgr);
|
||||||
|
}
|
||||||
|
toku_free(*lfm);
|
||||||
|
*lfm = NULL;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir, TXNID *last_xid_if_clean_shutdown) {
|
||||||
|
invariant_notnull(lfm);
|
||||||
|
invariant_notnull(last_xid_if_clean_shutdown);
|
||||||
|
|
||||||
|
int r;
|
||||||
|
int n_logfiles;
|
||||||
|
char **logfiles;
|
||||||
|
r = toku_logger_find_logfiles(log_dir, &logfiles, &n_logfiles);
|
||||||
|
if (r!=0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
TOKULOGCURSOR cursor;
|
||||||
|
struct log_entry *entry;
|
||||||
|
TOKULOGFILEINFO lf_info;
|
||||||
|
long long index = -1;
|
||||||
|
char *basename;
|
||||||
|
LSN tmp_lsn = {0};
|
||||||
|
TXNID last_xid = TXNID_NONE;
|
||||||
|
for(int i=0;i<n_logfiles;i++){
|
||||||
|
XMALLOC(lf_info);
|
||||||
|
// find the index
|
||||||
|
// basename is the filename of the i-th logfile
|
||||||
|
basename = strrchr(logfiles[i], '/') + 1;
|
||||||
|
int version;
|
||||||
|
r = sscanf(basename, "log%lld.tokulog%d", &index, &version);
|
||||||
|
assert(r==2); // found index and version
|
||||||
|
assert(version>=TOKU_LOG_MIN_SUPPORTED_VERSION);
|
||||||
|
assert(version<=TOKU_LOG_VERSION);
|
||||||
|
lf_info->index = index;
|
||||||
|
lf_info->version = version;
|
||||||
|
// find last LSN in logfile
|
||||||
|
r = toku_logcursor_create_for_file(&cursor, log_dir, basename);
|
||||||
|
if (r!=0) {
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
r = toku_logcursor_last(cursor, &entry); // set "entry" to last log entry in logfile
|
||||||
|
if (r == 0) {
|
||||||
|
lf_info->maxlsn = toku_log_entry_get_lsn(entry);
|
||||||
|
|
||||||
|
invariant(lf_info->maxlsn.lsn >= tmp_lsn.lsn);
|
||||||
|
tmp_lsn = lf_info->maxlsn;
|
||||||
|
if (entry->cmd == LT_shutdown) {
|
||||||
|
last_xid = entry->u.shutdown.last_xid;
|
||||||
|
} else {
|
||||||
|
last_xid = TXNID_NONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
lf_info->maxlsn = tmp_lsn; // handle empty logfile (no LSN in file) case
|
||||||
|
}
|
||||||
|
|
||||||
|
// add to logfilemgr
|
||||||
|
toku_logfilemgr_add_logfile_info(lfm, lf_info);
|
||||||
|
toku_logcursor_destroy(&cursor);
|
||||||
|
}
|
||||||
|
toku_logger_free_logfiles(logfiles, n_logfiles);
|
||||||
|
*last_xid_if_clean_shutdown = last_xid;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_logfilemgr_num_logfiles(TOKULOGFILEMGR lfm) {
|
||||||
|
assert(lfm);
|
||||||
|
return lfm->n_entries;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_logfilemgr_add_logfile_info(TOKULOGFILEMGR lfm, TOKULOGFILEINFO lf_info) {
|
||||||
|
assert(lfm);
|
||||||
|
struct lfm_entry *XMALLOC(entry);
|
||||||
|
entry->lf_info = lf_info;
|
||||||
|
entry->next = NULL;
|
||||||
|
if ( lfm->n_entries != 0 )
|
||||||
|
lfm->last->next = entry;
|
||||||
|
lfm->last = entry;
|
||||||
|
lfm->n_entries++;
|
||||||
|
if (lfm->n_entries == 1 ) {
|
||||||
|
lfm->first = lfm->last;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
TOKULOGFILEINFO toku_logfilemgr_get_oldest_logfile_info(TOKULOGFILEMGR lfm) {
|
||||||
|
assert(lfm);
|
||||||
|
return lfm->first->lf_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_logfilemgr_delete_oldest_logfile_info(TOKULOGFILEMGR lfm) {
|
||||||
|
assert(lfm);
|
||||||
|
if ( lfm->n_entries > 0 ) {
|
||||||
|
struct lfm_entry *entry = lfm->first;
|
||||||
|
toku_free(entry->lf_info);
|
||||||
|
lfm->first = entry->next;
|
||||||
|
toku_free(entry);
|
||||||
|
lfm->n_entries--;
|
||||||
|
if ( lfm->n_entries == 0 ) {
|
||||||
|
lfm->last = lfm->first = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LSN toku_logfilemgr_get_last_lsn(TOKULOGFILEMGR lfm) {
|
||||||
|
assert(lfm);
|
||||||
|
if ( lfm->n_entries == 0 ) {
|
||||||
|
LSN lsn;
|
||||||
|
lsn.lsn = 0;
|
||||||
|
return lsn;
|
||||||
|
}
|
||||||
|
return lfm->last->lf_info->maxlsn;
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_logfilemgr_update_last_lsn(TOKULOGFILEMGR lfm, LSN lsn) {
|
||||||
|
assert(lfm);
|
||||||
|
assert(lfm->last!=NULL);
|
||||||
|
lfm->last->lf_info->maxlsn = lsn;
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_logfilemgr_print(TOKULOGFILEMGR lfm) {
|
||||||
|
assert(lfm);
|
||||||
|
printf("toku_logfilemgr_print [%p] : %d entries \n", lfm, lfm->n_entries);
|
||||||
|
struct lfm_entry *entry = lfm->first;
|
||||||
|
for (int i=0;i<lfm->n_entries;i++) {
|
||||||
|
printf(" entry %d : index = %" PRId64 ", maxlsn = %" PRIu64 "\n", i, entry->lf_info->index, entry->lf_info->maxlsn.lsn);
|
||||||
|
entry = entry->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
65
storage/tokudb/PerconaFT/ft/logger/logfilemgr.h
Normal file
65
storage/tokudb/PerconaFT/ft/logger/logfilemgr.h
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <ft/log_header.h>
|
||||||
|
|
||||||
|
// this is the basic information we need to keep per logfile
|
||||||
|
struct toku_logfile_info {
|
||||||
|
int64_t index;
|
||||||
|
LSN maxlsn;
|
||||||
|
uint32_t version;
|
||||||
|
};
|
||||||
|
typedef struct toku_logfile_info *TOKULOGFILEINFO;
|
||||||
|
|
||||||
|
struct toku_logfilemgr;
|
||||||
|
typedef struct toku_logfilemgr *TOKULOGFILEMGR;
|
||||||
|
|
||||||
|
int toku_logfilemgr_create(TOKULOGFILEMGR *lfm);
|
||||||
|
int toku_logfilemgr_destroy(TOKULOGFILEMGR *lfm);
|
||||||
|
|
||||||
|
int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir, TXNID *last_xid_if_clean_shutdown);
|
||||||
|
int toku_logfilemgr_num_logfiles(TOKULOGFILEMGR lfm);
|
||||||
|
int toku_logfilemgr_add_logfile_info(TOKULOGFILEMGR lfm, TOKULOGFILEINFO lf_info);
|
||||||
|
TOKULOGFILEINFO toku_logfilemgr_get_oldest_logfile_info(TOKULOGFILEMGR lfm);
|
||||||
|
void toku_logfilemgr_delete_oldest_logfile_info(TOKULOGFILEMGR lfm);
|
||||||
|
LSN toku_logfilemgr_get_last_lsn(TOKULOGFILEMGR lfm);
|
||||||
|
void toku_logfilemgr_update_last_lsn(TOKULOGFILEMGR lfm, LSN lsn);
|
||||||
|
|
||||||
|
void toku_logfilemgr_print(TOKULOGFILEMGR lfm);
|
||||||
@@ -1,93 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
/* This file defines the logformat in an executable fashion.
|
/* This file defines the logformat in an executable fashion.
|
||||||
* This code is used to generate
|
* This code is used to generate
|
||||||
@@ -850,9 +797,8 @@ int main (int argc, const char *const argv[]) {
|
|||||||
fprintf2(cf, hf, "/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */\n");
|
fprintf2(cf, hf, "/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */\n");
|
||||||
fprintf2(cf, hf, "// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:\n");
|
fprintf2(cf, hf, "// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:\n");
|
||||||
fprintf(hf, "#pragma once\n");
|
fprintf(hf, "#pragma once\n");
|
||||||
fprintf2(cf, hf, "/* Do not edit this file. This code generated by logformat.c. Copyright (c) 2007-2013 Tokutek Inc. */\n");
|
fprintf2(cf, hf, "/* Do not edit this file. This code generated by logformat.c. Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. */\n");
|
||||||
fprintf2(cf, hf, "#ident \"Copyright (c) 2007-2013 Tokutek Inc. All rights reserved.\"\n");
|
fprintf2(cf, hf, "#ident \"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.\"\n");
|
||||||
fprintf2(cf, pf, "#include <config.h>\n");
|
|
||||||
fprintf2(cf, pf, "#include <stdint.h>\n");
|
fprintf2(cf, pf, "#include <stdint.h>\n");
|
||||||
fprintf2(cf, pf, "#include <sys/time.h>\n");
|
fprintf2(cf, pf, "#include <sys/time.h>\n");
|
||||||
fprintf2(cf, pf, "#include <ft/logger/log-internal.h>\n");
|
fprintf2(cf, pf, "#include <ft/logger/log-internal.h>\n");
|
||||||
@@ -1,95 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
@@ -1395,46 +1340,19 @@ void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn) {
|
|||||||
logger->last_completed_checkpoint_lsn = lsn;
|
logger->last_completed_checkpoint_lsn = lsn;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Engine status
|
|
||||||
//
|
|
||||||
// Status is intended for display to humans to help understand system behavior.
|
|
||||||
// It does not need to be perfectly thread-safe.
|
|
||||||
|
|
||||||
static LOGGER_STATUS_S logger_status;
|
|
||||||
|
|
||||||
#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(logger_status, k, c, t, "logger: " l, inc)
|
|
||||||
|
|
||||||
static void
|
|
||||||
status_init(void) {
|
|
||||||
// Note, this function initializes the keyname, type, and legend fields.
|
|
||||||
// Value fields are initialized to zero by compiler.
|
|
||||||
STATUS_INIT(LOGGER_NEXT_LSN, nullptr, UINT64, "next LSN", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(LOGGER_NUM_WRITES, LOGGER_WRITES, UINT64, "writes", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(LOGGER_BYTES_WRITTEN, LOGGER_WRITES_BYTES, UINT64, "writes (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN, LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(LOGGER_TOKUTIME_WRITES, LOGGER_WRITES_SECONDS, TOKUTIME, "writes (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
STATUS_INIT(LOGGER_WAIT_BUF_LONG, LOGGER_WAIT_LONG, UINT64, "number of long logger write operations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
|
||||||
logger_status.initialized = true;
|
|
||||||
}
|
|
||||||
#undef STATUS_INIT
|
|
||||||
|
|
||||||
#define STATUS_VALUE(x) logger_status.status[x].value.num
|
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS statp) {
|
toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS statp) {
|
||||||
if (!logger_status.initialized)
|
log_status.init();
|
||||||
status_init();
|
|
||||||
if (logger) {
|
if (logger) {
|
||||||
STATUS_VALUE(LOGGER_NEXT_LSN) = logger->lsn.lsn;
|
LOG_STATUS_VAL(LOGGER_NEXT_LSN) = logger->lsn.lsn;
|
||||||
STATUS_VALUE(LOGGER_NUM_WRITES) = logger->num_writes_to_disk;
|
LOG_STATUS_VAL(LOGGER_NUM_WRITES) = logger->num_writes_to_disk;
|
||||||
STATUS_VALUE(LOGGER_BYTES_WRITTEN) = logger->bytes_written_to_disk;
|
LOG_STATUS_VAL(LOGGER_BYTES_WRITTEN) = logger->bytes_written_to_disk;
|
||||||
// No compression on logfiles so the uncompressed size is just number of bytes written
|
// No compression on logfiles so the uncompressed size is just number of bytes written
|
||||||
STATUS_VALUE(LOGGER_UNCOMPRESSED_BYTES_WRITTEN) = logger->bytes_written_to_disk;
|
LOG_STATUS_VAL(LOGGER_UNCOMPRESSED_BYTES_WRITTEN) = logger->bytes_written_to_disk;
|
||||||
STATUS_VALUE(LOGGER_TOKUTIME_WRITES) = logger->time_spent_writing_to_disk;
|
LOG_STATUS_VAL(LOGGER_TOKUTIME_WRITES) = logger->time_spent_writing_to_disk;
|
||||||
STATUS_VALUE(LOGGER_WAIT_BUF_LONG) = logger->num_wait_buf_long;
|
LOG_STATUS_VAL(LOGGER_WAIT_BUF_LONG) = logger->num_wait_buf_long;
|
||||||
}
|
}
|
||||||
*statp = logger_status;
|
*statp = log_status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1483,5 +1401,3 @@ toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs, uint
|
|||||||
TXN_MANAGER toku_logger_get_txn_manager(TOKULOGGER logger) {
|
TXN_MANAGER toku_logger_get_txn_manager(TOKULOGGER logger) {
|
||||||
return logger->txn_manager;
|
return logger->txn_manager;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef STATUS_VALUE
|
|
||||||
273
storage/tokudb/PerconaFT/ft/logger/logger.h
Normal file
273
storage/tokudb/PerconaFT/ft/logger/logger.h
Normal file
@@ -0,0 +1,273 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ft/serialize/block_table.h"
|
||||||
|
#include "ft/serialize/ft_layout_version.h"
|
||||||
|
#include "ft/txn/txn.h"
|
||||||
|
|
||||||
|
typedef struct tokulogger *TOKULOGGER;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
TOKU_LOG_VERSION_1 = 1,
|
||||||
|
TOKU_LOG_VERSION_2 = 2,
|
||||||
|
//After 2 we linked the log version to the FT_LAYOUT VERSION.
|
||||||
|
//So it went from 2 to 13 (3-12 do not exist)
|
||||||
|
TOKU_LOG_VERSION_24 = 24,
|
||||||
|
TOKU_LOG_VERSION_25 = 25, // change rollinclude rollback log entry
|
||||||
|
TOKU_LOG_VERSION_26 = 26, // no change from 25
|
||||||
|
TOKU_LOG_VERSION_27 = 27, // no change from 26
|
||||||
|
TOKU_LOG_VERSION_28 = 28, // no change from 27
|
||||||
|
TOKU_LOG_VERSION = FT_LAYOUT_VERSION,
|
||||||
|
TOKU_LOG_MIN_SUPPORTED_VERSION = FT_LAYOUT_MIN_SUPPORTED_VERSION,
|
||||||
|
};
|
||||||
|
|
||||||
|
int toku_logger_create (TOKULOGGER *resultp);
|
||||||
|
int toku_logger_open (const char *directory, TOKULOGGER logger);
|
||||||
|
int toku_logger_open_with_last_xid(const char *directory, TOKULOGGER logger, TXNID last_xid);
|
||||||
|
void toku_logger_shutdown(TOKULOGGER logger);
|
||||||
|
int toku_logger_close(TOKULOGGER *loggerp);
|
||||||
|
void toku_logger_initialize_rollback_cache(TOKULOGGER logger, struct ft *ft);
|
||||||
|
int toku_logger_open_rollback(TOKULOGGER logger, struct cachetable *ct, bool create);
|
||||||
|
void toku_logger_close_rollback(TOKULOGGER logger);
|
||||||
|
void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdown);
|
||||||
|
bool toku_logger_rollback_is_open (TOKULOGGER); // return true iff the rollback is open.
|
||||||
|
|
||||||
|
void toku_logger_fsync (TOKULOGGER logger);
|
||||||
|
void toku_logger_fsync_if_lsn_not_fsynced(TOKULOGGER logger, LSN lsn);
|
||||||
|
int toku_logger_is_open(TOKULOGGER logger);
|
||||||
|
void toku_logger_set_cachetable (TOKULOGGER logger, struct cachetable *ct);
|
||||||
|
int toku_logger_set_lg_max(TOKULOGGER logger, uint32_t lg_max);
|
||||||
|
int toku_logger_get_lg_max(TOKULOGGER logger, uint32_t *lg_maxp);
|
||||||
|
int toku_logger_set_lg_bsize(TOKULOGGER logger, uint32_t bsize);
|
||||||
|
|
||||||
|
void toku_logger_write_log_files (TOKULOGGER logger, bool write_log_files);
|
||||||
|
void toku_logger_trim_log_files(TOKULOGGER logger, bool trim_log_files);
|
||||||
|
bool toku_logger_txns_exist(TOKULOGGER logger);
|
||||||
|
|
||||||
|
// Restart the logger. This function is used by recovery to really start
|
||||||
|
// logging.
|
||||||
|
// Effects: Flush the current log buffer, reset the logger's lastlsn, and
|
||||||
|
// open a new log file.
|
||||||
|
// Returns: 0 if success
|
||||||
|
int toku_logger_restart(TOKULOGGER logger, LSN lastlsn);
|
||||||
|
|
||||||
|
// Maybe trim the log entries from the log that are older than the given LSN
|
||||||
|
// Effect: find all of the log files whose largest LSN is smaller than the
|
||||||
|
// given LSN and delete them.
|
||||||
|
void toku_logger_maybe_trim_log(TOKULOGGER logger, LSN oldest_open_lsn);
|
||||||
|
|
||||||
|
// At the ft layer, a FILENUM uniquely identifies an open file.
|
||||||
|
struct FILENUM {
|
||||||
|
uint32_t fileid;
|
||||||
|
};
|
||||||
|
static const FILENUM FILENUM_NONE = { .fileid = UINT32_MAX };
|
||||||
|
|
||||||
|
struct FILENUMS {
|
||||||
|
uint32_t num;
|
||||||
|
FILENUM *filenums;
|
||||||
|
};
|
||||||
|
|
||||||
|
void toku_logger_log_fcreate(TOKUTXN txn, const char *fname, FILENUM filenum, uint32_t mode, uint32_t flags, uint32_t nodesize, uint32_t basementnodesize, enum toku_compression_method compression_method);
|
||||||
|
void toku_logger_log_fdelete(TOKUTXN txn, FILENUM filenum);
|
||||||
|
void toku_logger_log_fopen(TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags);
|
||||||
|
|
||||||
|
// the log generation code requires a typedef if we want to pass by pointer
|
||||||
|
typedef TOKU_XA_XID *XIDP;
|
||||||
|
|
||||||
|
int toku_fread_uint8_t (FILE *f, uint8_t *v, struct x1764 *mm, uint32_t *len);
|
||||||
|
int toku_fread_uint32_t_nocrclen (FILE *f, uint32_t *v);
|
||||||
|
int toku_fread_uint32_t (FILE *f, uint32_t *v, struct x1764 *checksum, uint32_t *len);
|
||||||
|
int toku_fread_uint64_t (FILE *f, uint64_t *v, struct x1764 *checksum, uint32_t *len);
|
||||||
|
int toku_fread_bool (FILE *f, bool *v, struct x1764 *checksum, uint32_t *len);
|
||||||
|
int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, uint32_t *len);
|
||||||
|
int toku_fread_BLOCKNUM (FILE *f, BLOCKNUM *lsn, struct x1764 *checksum, uint32_t *len);
|
||||||
|
int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, uint32_t *len);
|
||||||
|
int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, uint32_t *len);
|
||||||
|
int toku_fread_TXNID_PAIR (FILE *f, TXNID_PAIR *txnid, struct x1764 *checksum, uint32_t *len);
|
||||||
|
int toku_fread_XIDP (FILE *f, XIDP *xidp, struct x1764 *checksum, uint32_t *len);
|
||||||
|
int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, uint32_t *len);
|
||||||
|
int toku_fread_FILENUMS (FILE *f, FILENUMS *fs, struct x1764 *checksum, uint32_t *len);
|
||||||
|
|
||||||
|
int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||||
|
int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||||
|
int toku_logprint_TXNID_PAIR (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||||
|
int toku_logprint_XIDP (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||||
|
int toku_logprint_uint8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||||
|
int toku_logprint_uint32_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||||
|
int toku_logprint_BLOCKNUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||||
|
int toku_logprint_uint64_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||||
|
int toku_logprint_bool (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||||
|
void toku_print_BYTESTRING (FILE *outf, uint32_t len, char *data);
|
||||||
|
int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||||
|
int toku_logprint_FILENUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||||
|
int toku_logprint_FILENUMS (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||||
|
int toku_read_and_print_logmagic (FILE *f, uint32_t *versionp);
|
||||||
|
int toku_read_logmagic (FILE *f, uint32_t *versionp);
|
||||||
|
|
||||||
|
TXNID_PAIR toku_txn_get_txnid (TOKUTXN txn);
|
||||||
|
LSN toku_logger_last_lsn(TOKULOGGER logger);
|
||||||
|
TOKULOGGER toku_txn_logger (TOKUTXN txn);
|
||||||
|
|
||||||
|
void toku_txnid2txn (TOKULOGGER logger, TXNID_PAIR txnid, TOKUTXN *result);
|
||||||
|
|
||||||
|
int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags);
|
||||||
|
|
||||||
|
TOKUTXN toku_logger_txn_parent (TOKUTXN txn);
|
||||||
|
void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn);
|
||||||
|
|
||||||
|
void toku_logger_make_space_in_inbuf (TOKULOGGER logger, int n_bytes_needed);
|
||||||
|
|
||||||
|
int toku_logger_write_inbuf (TOKULOGGER logger);
|
||||||
|
// Effect: Write the buffered data (from the inbuf) to a file. No fsync, however.
|
||||||
|
// As a side effect, the inbuf will be made empty.
|
||||||
|
// Return 0 on success, otherwise return an error number.
|
||||||
|
// Requires: The inbuf lock is currently held, and the outbuf lock is not held.
|
||||||
|
// Upon return, the inbuf lock will be held, and the outbuf lock is not held.
|
||||||
|
// However, no side effects should have been made to the logger. The lock was acquired simply to determine that the buffer will overflow if we try to put something into it.
|
||||||
|
// The inbuf lock will be released, so the operations before and after this function call will not be atomic.
|
||||||
|
// Rationale: When the buffer becomes nearly full, call this function so that more can be put in.
|
||||||
|
// Implementation note: Since the output lock is acquired first, we must release the input lock, and then grab both in the right order.
|
||||||
|
|
||||||
|
void toku_logger_maybe_fsync (TOKULOGGER logger, LSN lsn, int do_fsync, bool holds_input_lock);
|
||||||
|
// Effect: If fsync is nonzero, then make sure that the log is flushed and synced at least up to lsn.
|
||||||
|
// Entry: Holds input lock iff 'holds_input_lock'.
|
||||||
|
// Exit: Holds no locks.
|
||||||
|
|
||||||
|
// Discussion: How does the logger work:
|
||||||
|
// The logger has two buffers: an inbuf and an outbuf.
|
||||||
|
// There are two locks, called the inlock, and the outlock. To write, both locks must be held, and the outlock is acquired first.
|
||||||
|
// Roughly speaking, the inbuf is used to accumulate logged data, and the outbuf is used to write to disk.
|
||||||
|
// When something is to be logged we do the following:
|
||||||
|
// acquire the inlock.
|
||||||
|
// Make sure there is space in the inbuf for the logentry. (We know the size of the logentry in advance):
|
||||||
|
// if the inbuf doesn't have enough space then
|
||||||
|
// release the inlock
|
||||||
|
// acquire the outlock
|
||||||
|
// acquire the inlock
|
||||||
|
// it's possible that some other thread made space.
|
||||||
|
// if there still isn't space
|
||||||
|
// swap the inbuf and the outbuf
|
||||||
|
// release the inlock
|
||||||
|
// write the outbuf
|
||||||
|
// acquire the inlock
|
||||||
|
// release the outlock
|
||||||
|
// if the inbuf is still too small, then increase the size of the inbuf
|
||||||
|
// Increment the LSN and fill the inbuf.
|
||||||
|
// If fsync is required then
|
||||||
|
// release the inlock
|
||||||
|
// acquire the outlock
|
||||||
|
// acquire the inlock
|
||||||
|
// if the LSN has been flushed and fsynced (if so we are done. Some other thread did the flush.)
|
||||||
|
// release the locks
|
||||||
|
// if the LSN has been flushed but not fsynced up to the LSN:
|
||||||
|
// release the inlock
|
||||||
|
// fsync
|
||||||
|
// release the outlock
|
||||||
|
// otherwise:
|
||||||
|
// swap the outbuf and the inbuf
|
||||||
|
// release the inlock
|
||||||
|
// write the outbuf
|
||||||
|
// fsync
|
||||||
|
// release the outlock
|
||||||
|
|
||||||
|
void toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s);
|
||||||
|
|
||||||
|
int toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs, uint32_t *version_found);
|
||||||
|
|
||||||
|
struct txn_manager *toku_logger_get_txn_manager(TOKULOGGER logger);
|
||||||
|
|
||||||
|
// For serialize / deserialize
|
||||||
|
|
||||||
|
#include "ft/serialize/wbuf.h"
|
||||||
|
|
||||||
|
static inline void wbuf_nocrc_FILENUM(struct wbuf *wb, FILENUM fileid) {
|
||||||
|
wbuf_nocrc_uint(wb, fileid.fileid);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void wbuf_FILENUM(struct wbuf *wb, FILENUM fileid) {
|
||||||
|
wbuf_uint(wb, fileid.fileid);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void wbuf_nocrc_FILENUMS(struct wbuf *wb, FILENUMS v) {
|
||||||
|
wbuf_nocrc_uint(wb, v.num);
|
||||||
|
for (uint32_t i = 0; i < v.num; i++) {
|
||||||
|
wbuf_nocrc_FILENUM(wb, v.filenums[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void wbuf_FILENUMS(struct wbuf *wb, FILENUMS v) {
|
||||||
|
wbuf_uint(wb, v.num);
|
||||||
|
for (uint32_t i = 0; i < v.num; i++) {
|
||||||
|
wbuf_FILENUM(wb, v.filenums[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void wbuf_nocrc_XIDP (struct wbuf *w, TOKU_XA_XID *xid) {
|
||||||
|
wbuf_nocrc_uint32_t(w, xid->formatID);
|
||||||
|
wbuf_nocrc_uint8_t(w, xid->gtrid_length);
|
||||||
|
wbuf_nocrc_uint8_t(w, xid->bqual_length);
|
||||||
|
wbuf_nocrc_literal_bytes(w, xid->data, xid->gtrid_length+xid->bqual_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "ft/serialize/rbuf.h"
|
||||||
|
|
||||||
|
static inline void rbuf_FILENUM(struct rbuf *rb, FILENUM *filenum) {
|
||||||
|
filenum->fileid = rbuf_int(rb);
|
||||||
|
}
|
||||||
|
static inline void rbuf_ma_FILENUM(struct rbuf *rb, memarena *UU(ma), FILENUM *filenum) {
|
||||||
|
rbuf_FILENUM(rb, filenum);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_FILENUMS(struct rbuf *rb, FILENUMS *filenums) {
|
||||||
|
filenums->num = rbuf_int(rb);
|
||||||
|
XMALLOC_N(filenums->num, filenums->filenums);
|
||||||
|
for (uint32_t i = 0; i < filenums->num; i++) {
|
||||||
|
rbuf_FILENUM(rb, &(filenums->filenums[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_ma_FILENUMS(struct rbuf *rb, memarena *ma, FILENUMS *filenums) {
|
||||||
|
rbuf_ma_uint32_t(rb, ma, &(filenums->num));
|
||||||
|
filenums->filenums = (FILENUM *) ma->malloc_from_arena(filenums->num * sizeof(FILENUM));
|
||||||
|
assert(filenums->filenums != NULL);
|
||||||
|
for (uint32_t i = 0; i < filenums->num; i++) {
|
||||||
|
rbuf_ma_FILENUM(rb, ma, &(filenums->filenums[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,96 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include "ft/cachetable/cachetable.h"
|
#include "ft/cachetable/cachetable.h"
|
||||||
#include "ft/cachetable/checkpoint.h"
|
#include "ft/cachetable/checkpoint.h"
|
||||||
@@ -424,7 +368,7 @@ static int toku_recover_begin_checkpoint (struct logtype_begin_checkpoint *l, RE
|
|||||||
r = 0; // ignore it (log only has a begin checkpoint)
|
r = 0; // ignore it (log only has a begin checkpoint)
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
|
fprintf(stderr, "PerconaFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
|
||||||
abort();
|
abort();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -434,7 +378,7 @@ static int toku_recover_begin_checkpoint (struct logtype_begin_checkpoint *l, RE
|
|||||||
static int toku_recover_backward_begin_checkpoint (struct logtype_begin_checkpoint *l, RECOVER_ENV renv) {
|
static int toku_recover_backward_begin_checkpoint (struct logtype_begin_checkpoint *l, RECOVER_ENV renv) {
|
||||||
int r;
|
int r;
|
||||||
time_t tnow = time(NULL);
|
time_t tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery bw_begin_checkpoint at %" PRIu64 " timestamp %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, recover_state(renv));
|
fprintf(stderr, "%.24s PerconaFT recovery bw_begin_checkpoint at %" PRIu64 " timestamp %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, recover_state(renv));
|
||||||
switch (renv->ss.ss) {
|
switch (renv->ss.ss) {
|
||||||
case BACKWARD_NEWER_CHECKPOINT_END:
|
case BACKWARD_NEWER_CHECKPOINT_END:
|
||||||
// incomplete checkpoint, nothing to do
|
// incomplete checkpoint, nothing to do
|
||||||
@@ -446,13 +390,13 @@ static int toku_recover_backward_begin_checkpoint (struct logtype_begin_checkpoi
|
|||||||
renv->ss.checkpoint_begin_timestamp = l->timestamp;
|
renv->ss.checkpoint_begin_timestamp = l->timestamp;
|
||||||
renv->goforward = true;
|
renv->goforward = true;
|
||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery turning around at begin checkpoint %" PRIu64 " time %" PRIu64 "\n",
|
fprintf(stderr, "%.24s PerconaFT recovery turning around at begin checkpoint %" PRIu64 " time %" PRIu64 "\n",
|
||||||
ctime(&tnow), l->lsn.lsn,
|
ctime(&tnow), l->lsn.lsn,
|
||||||
renv->ss.checkpoint_end_timestamp - renv->ss.checkpoint_begin_timestamp);
|
renv->ss.checkpoint_end_timestamp - renv->ss.checkpoint_begin_timestamp);
|
||||||
r = 0;
|
r = 0;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
|
fprintf(stderr, "PerconaFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
|
||||||
abort();
|
abort();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -482,7 +426,7 @@ static int toku_recover_end_checkpoint (struct logtype_end_checkpoint *l, RECOVE
|
|||||||
|
|
||||||
static int toku_recover_backward_end_checkpoint (struct logtype_end_checkpoint *l, RECOVER_ENV renv) {
|
static int toku_recover_backward_end_checkpoint (struct logtype_end_checkpoint *l, RECOVER_ENV renv) {
|
||||||
time_t tnow = time(NULL);
|
time_t tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery bw_end_checkpoint at %" PRIu64 " timestamp %" PRIu64 " xid %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, l->lsn_begin_checkpoint.lsn, recover_state(renv));
|
fprintf(stderr, "%.24s PerconaFT recovery bw_end_checkpoint at %" PRIu64 " timestamp %" PRIu64 " xid %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, l->lsn_begin_checkpoint.lsn, recover_state(renv));
|
||||||
switch (renv->ss.ss) {
|
switch (renv->ss.ss) {
|
||||||
case BACKWARD_NEWER_CHECKPOINT_END:
|
case BACKWARD_NEWER_CHECKPOINT_END:
|
||||||
renv->ss.ss = BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END;
|
renv->ss.ss = BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END;
|
||||||
@@ -491,12 +435,12 @@ static int toku_recover_backward_end_checkpoint (struct logtype_end_checkpoint *
|
|||||||
renv->ss.checkpoint_end_timestamp = l->timestamp;
|
renv->ss.checkpoint_end_timestamp = l->timestamp;
|
||||||
return 0;
|
return 0;
|
||||||
case BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END:
|
case BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END:
|
||||||
fprintf(stderr, "TokuFT recovery %s:%d Should not see two end_checkpoint log entries without an intervening begin_checkpoint\n", __FILE__, __LINE__);
|
fprintf(stderr, "PerconaFT recovery %s:%d Should not see two end_checkpoint log entries without an intervening begin_checkpoint\n", __FILE__, __LINE__);
|
||||||
abort();
|
abort();
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
|
fprintf(stderr, "PerconaFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -765,7 +709,7 @@ static void toku_recover_txn_progress(TOKU_TXN_PROGRESS txn_progress, void *extr
|
|||||||
time_t tnow = time(NULL);
|
time_t tnow = time(NULL);
|
||||||
if (tnow - txn_progress_extra->tlast >= tokuft_recovery_progress_time) {
|
if (tnow - txn_progress_extra->tlast >= tokuft_recovery_progress_time) {
|
||||||
txn_progress_extra->tlast = tnow;
|
txn_progress_extra->tlast = tnow;
|
||||||
fprintf(stderr, "%.24s TokuFT ", ctime(&tnow));
|
fprintf(stderr, "%.24s PerconaFT ", ctime(&tnow));
|
||||||
if (txn_progress_extra->lsn.lsn != 0)
|
if (txn_progress_extra->lsn.lsn != 0)
|
||||||
fprintf(stderr, "lsn %" PRIu64 " ", txn_progress_extra->lsn.lsn);
|
fprintf(stderr, "lsn %" PRIu64 " ", txn_progress_extra->lsn.lsn);
|
||||||
fprintf(stderr, "%s xid %" PRIu64 ":%" PRIu64 " ",
|
fprintf(stderr, "%s xid %" PRIu64 ":%" PRIu64 " ",
|
||||||
@@ -785,7 +729,7 @@ static int toku_recover_xcommit (struct logtype_xcommit *l, RECOVER_ENV renv) {
|
|||||||
assert(txn!=NULL);
|
assert(txn!=NULL);
|
||||||
|
|
||||||
// commit the transaction
|
// commit the transaction
|
||||||
toku_txn_progress_extra extra = { time(NULL), l->lsn, "commit", l->xid };
|
toku_txn_progress_extra extra = { time(NULL), l->lsn, "commit", l->xid, 0 };
|
||||||
int r = toku_txn_commit_with_lsn(txn, true, l->lsn, toku_recover_txn_progress, &extra);
|
int r = toku_txn_commit_with_lsn(txn, true, l->lsn, toku_recover_txn_progress, &extra);
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
|
|
||||||
@@ -828,7 +772,7 @@ static int toku_recover_xabort (struct logtype_xabort *l, RECOVER_ENV renv) {
|
|||||||
assert(txn!=NULL);
|
assert(txn!=NULL);
|
||||||
|
|
||||||
// abort the transaction
|
// abort the transaction
|
||||||
toku_txn_progress_extra extra = { time(NULL), l->lsn, "abort", l->xid };
|
toku_txn_progress_extra extra = { time(NULL), l->lsn, "abort", l->xid, 0 };
|
||||||
r = toku_txn_abort_with_lsn(txn, l->lsn, toku_recover_txn_progress, &extra);
|
r = toku_txn_abort_with_lsn(txn, l->lsn, toku_recover_txn_progress, &extra);
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
|
|
||||||
@@ -864,7 +808,7 @@ static int toku_recover_fcreate (struct logtype_fcreate *l, RECOVER_ENV renv) {
|
|||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
int er = get_error_errno();
|
int er = get_error_errno();
|
||||||
if (er != ENOENT) {
|
if (er != ENOENT) {
|
||||||
fprintf(stderr, "TokuFT recovery %s:%d unlink %s %d\n", __FUNCTION__, __LINE__, iname, er);
|
fprintf(stderr, "PerconaFT recovery %s:%d unlink %s %d\n", __FUNCTION__, __LINE__, iname, er);
|
||||||
toku_free(iname);
|
toku_free(iname);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@@ -1363,7 +1307,7 @@ static void recover_abort_live_txn(TOKUTXN txn) {
|
|||||||
// sanity check that the recursive call successfully NULLs out txn->child
|
// sanity check that the recursive call successfully NULLs out txn->child
|
||||||
invariant(txn->child == NULL);
|
invariant(txn->child == NULL);
|
||||||
// abort the transaction
|
// abort the transaction
|
||||||
toku_txn_progress_extra extra = { time(NULL), ZERO_LSN, "abort live", txn->txnid };
|
toku_txn_progress_extra extra = { time(NULL), ZERO_LSN, "abort live", txn->txnid, 0 };
|
||||||
int r = toku_txn_abort_txn(txn, toku_recover_txn_progress, &extra);
|
int r = toku_txn_abort_txn(txn, toku_recover_txn_progress, &extra);
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
|
|
||||||
@@ -1416,7 +1360,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
|
|||||||
struct log_entry *le = NULL;
|
struct log_entry *le = NULL;
|
||||||
|
|
||||||
time_t tnow = time(NULL);
|
time_t tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery starting in env %s\n", ctime(&tnow), env_dir);
|
fprintf(stderr, "%.24s PerconaFT recovery starting in env %s\n", ctime(&tnow), env_dir);
|
||||||
|
|
||||||
char org_wd[1000];
|
char org_wd[1000];
|
||||||
{
|
{
|
||||||
@@ -1452,10 +1396,10 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
|
|||||||
toku_struct_stat buf;
|
toku_struct_stat buf;
|
||||||
if (toku_stat(env_dir, &buf)!=0) {
|
if (toku_stat(env_dir, &buf)!=0) {
|
||||||
rr = get_error_errno();
|
rr = get_error_errno();
|
||||||
fprintf(stderr, "%.24s TokuFT recovery error: directory does not exist: %s\n", ctime(&tnow), env_dir);
|
fprintf(stderr, "%.24s PerconaFT recovery error: directory does not exist: %s\n", ctime(&tnow), env_dir);
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
} else if (!S_ISDIR(buf.st_mode)) {
|
} else if (!S_ISDIR(buf.st_mode)) {
|
||||||
fprintf(stderr, "%.24s TokuFT recovery error: this file is supposed to be a directory, but is not: %s\n", ctime(&tnow), env_dir);
|
fprintf(stderr, "%.24s PerconaFT recovery error: this file is supposed to be a directory, but is not: %s\n", ctime(&tnow), env_dir);
|
||||||
rr = ENOTDIR; goto errorexit;
|
rr = ENOTDIR; goto errorexit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1464,7 +1408,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
|
|||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
time_t tlast;
|
time_t tlast;
|
||||||
tlast = tnow;
|
tlast = tnow;
|
||||||
fprintf(stderr, "%.24s TokuFT recovery scanning backward from %" PRIu64 "\n", ctime(&tnow), lastlsn.lsn);
|
fprintf(stderr, "%.24s PerconaFT recovery scanning backward from %" PRIu64 "\n", ctime(&tnow), lastlsn.lsn);
|
||||||
for (unsigned i=0; 1; i++) {
|
for (unsigned i=0; 1; i++) {
|
||||||
|
|
||||||
// get the previous log entry (first time gets the last one)
|
// get the previous log entry (first time gets the last one)
|
||||||
@@ -1484,7 +1428,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
|
|||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
if (tnow - tlast >= tokuft_recovery_progress_time) {
|
if (tnow - tlast >= tokuft_recovery_progress_time) {
|
||||||
thislsn = toku_log_entry_get_lsn(le);
|
thislsn = toku_log_entry_get_lsn(le);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery scanning backward from %" PRIu64 " at %" PRIu64 " (%s)\n",
|
fprintf(stderr, "%.24s PerconaFT recovery scanning backward from %" PRIu64 " at %" PRIu64 " (%s)\n",
|
||||||
ctime(&tnow), lastlsn.lsn, thislsn.lsn, recover_state(renv));
|
ctime(&tnow), lastlsn.lsn, thislsn.lsn, recover_state(renv));
|
||||||
tlast = tnow;
|
tlast = tnow;
|
||||||
}
|
}
|
||||||
@@ -1514,7 +1458,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
|
|||||||
assert(le);
|
assert(le);
|
||||||
thislsn = toku_log_entry_get_lsn(le);
|
thislsn = toku_log_entry_get_lsn(le);
|
||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery starts scanning forward to %" PRIu64 " from %" PRIu64 " left %" PRIu64 " (%s)\n",
|
fprintf(stderr, "%.24s PerconaFT recovery starts scanning forward to %" PRIu64 " from %" PRIu64 " left %" PRIu64 " (%s)\n",
|
||||||
ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv));
|
ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv));
|
||||||
|
|
||||||
for (unsigned i=0; 1; i++) {
|
for (unsigned i=0; 1; i++) {
|
||||||
@@ -1524,7 +1468,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
|
|||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
if (tnow - tlast >= tokuft_recovery_progress_time) {
|
if (tnow - tlast >= tokuft_recovery_progress_time) {
|
||||||
thislsn = toku_log_entry_get_lsn(le);
|
thislsn = toku_log_entry_get_lsn(le);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery scanning forward to %" PRIu64 " at %" PRIu64 " left %" PRIu64 " (%s)\n",
|
fprintf(stderr, "%.24s PerconaFT recovery scanning forward to %" PRIu64 " at %" PRIu64 " left %" PRIu64 " (%s)\n",
|
||||||
ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv));
|
ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv));
|
||||||
tlast = tnow;
|
tlast = tnow;
|
||||||
}
|
}
|
||||||
@@ -1574,7 +1518,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
|
|||||||
uint32_t n = recover_get_num_live_txns(renv);
|
uint32_t n = recover_get_num_live_txns(renv);
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery has %" PRIu32 " live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
|
fprintf(stderr, "%.24s PerconaFT recovery has %" PRIu32 " live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
recover_abort_all_live_txns(renv);
|
recover_abort_all_live_txns(renv);
|
||||||
@@ -1582,7 +1526,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
|
|||||||
uint32_t n = recover_get_num_live_txns(renv);
|
uint32_t n = recover_get_num_live_txns(renv);
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery has %" PRIu32 " prepared transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
|
fprintf(stderr, "%.24s PerconaFT recovery has %" PRIu32 " prepared transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1591,7 +1535,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
|
|||||||
n = file_map_get_num_dictionaries(&renv->fmap);
|
n = file_map_get_num_dictionaries(&renv->fmap);
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery closing %" PRIu32 " dictionar%s\n", ctime(&tnow), n, n > 1 ? "ies" : "y");
|
fprintf(stderr, "%.24s PerconaFT recovery closing %" PRIu32 " dictionar%s\n", ctime(&tnow), n, n > 1 ? "ies" : "y");
|
||||||
}
|
}
|
||||||
file_map_close_dictionaries(&renv->fmap, lastlsn);
|
file_map_close_dictionaries(&renv->fmap, lastlsn);
|
||||||
|
|
||||||
@@ -1603,17 +1547,17 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
|
|||||||
|
|
||||||
// checkpoint
|
// checkpoint
|
||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery making a checkpoint\n", ctime(&tnow));
|
fprintf(stderr, "%.24s PerconaFT recovery making a checkpoint\n", ctime(&tnow));
|
||||||
r = toku_checkpoint(renv->cp, renv->logger, NULL, NULL, NULL, NULL, RECOVERY_CHECKPOINT);
|
r = toku_checkpoint(renv->cp, renv->logger, NULL, NULL, NULL, NULL, RECOVERY_CHECKPOINT);
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery done\n", ctime(&tnow));
|
fprintf(stderr, "%.24s PerconaFT recovery done\n", ctime(&tnow));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
errorexit:
|
errorexit:
|
||||||
tnow = time(NULL);
|
tnow = time(NULL);
|
||||||
fprintf(stderr, "%.24s TokuFT recovery failed %d\n", ctime(&tnow), rr);
|
fprintf(stderr, "%.24s PerconaFT recovery failed %d\n", ctime(&tnow), rr);
|
||||||
|
|
||||||
if (logcursor) {
|
if (logcursor) {
|
||||||
r = toku_logcursor_destroy(&logcursor);
|
r = toku_logcursor_destroy(&logcursor);
|
||||||
85
storage/tokudb/PerconaFT/ft/logger/recover.h
Normal file
85
storage/tokudb/PerconaFT/ft/logger/recover.h
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
#include "portability/memory.h"
|
||||||
|
#include "portability/toku_portability.h"
|
||||||
|
|
||||||
|
#include "ft/comparator.h"
|
||||||
|
#include "ft/ft-ops.h"
|
||||||
|
#include "util/x1764.h"
|
||||||
|
|
||||||
|
typedef void (*prepared_txn_callback_t)(DB_ENV *env, struct tokutxn *txn);
|
||||||
|
typedef void (*keep_cachetable_callback_t)(DB_ENV *env, struct cachetable *ct);
|
||||||
|
|
||||||
|
// Run tokuft recovery from the log
|
||||||
|
// Returns 0 if success
|
||||||
|
int tokuft_recover(DB_ENV *env,
|
||||||
|
prepared_txn_callback_t prepared_txn_callback,
|
||||||
|
keep_cachetable_callback_t keep_cachetable_callback,
|
||||||
|
struct tokulogger *logger,
|
||||||
|
const char *env_dir,
|
||||||
|
const char *log_dir,
|
||||||
|
ft_compare_func bt_compare,
|
||||||
|
ft_update_func update_function,
|
||||||
|
generate_row_for_put_func generate_row_for_put,
|
||||||
|
generate_row_for_del_func generate_row_for_del,
|
||||||
|
size_t cachetable_size);
|
||||||
|
|
||||||
|
// Effect: Check the tokuft logs to determine whether or not we need to run recovery.
|
||||||
|
// If the log is empty or if there is a clean shutdown at the end of the log, then we
|
||||||
|
// dont need to run recovery.
|
||||||
|
// Returns: true if we need recovery, otherwise false.
|
||||||
|
int tokuft_needs_recovery(const char *logdir, bool ignore_empty_log);
|
||||||
|
|
||||||
|
// Return 0 if recovery log exists, ENOENT if log is missing
|
||||||
|
int tokuft_recover_log_exists(const char * log_dir);
|
||||||
|
|
||||||
|
// For test only - set callbacks for recovery testing
|
||||||
|
void toku_recover_set_callback (void (*)(void*), void*);
|
||||||
|
void toku_recover_set_callback2 (void (*)(void*), void*);
|
||||||
|
|
||||||
|
extern int tokuft_recovery_trace;
|
||||||
|
|
||||||
|
int toku_recover_lock (const char *lock_dir, int *lockfd);
|
||||||
|
|
||||||
|
int toku_recover_unlock(int lockfd);
|
||||||
119
storage/tokudb/PerconaFT/ft/msg.cc
Normal file
119
storage/tokudb/PerconaFT/ft/msg.cc
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "portability/toku_portability.h"
|
||||||
|
|
||||||
|
#include "ft/msg.h"
|
||||||
|
#include "ft/txn/xids.h"
|
||||||
|
#include "util/dbt.h"
|
||||||
|
|
||||||
|
ft_msg::ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x) :
|
||||||
|
_key(key ? *key : toku_empty_dbt()),
|
||||||
|
_val(val ? *val : toku_empty_dbt()),
|
||||||
|
_type(t), _msn(m), _xids(x) {
|
||||||
|
}
|
||||||
|
|
||||||
|
ft_msg ft_msg::deserialize_from_rbuf(struct rbuf *rb, XIDS *x, bool *is_fresh) {
|
||||||
|
const void *keyp, *valp;
|
||||||
|
uint32_t keylen, vallen;
|
||||||
|
enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb);
|
||||||
|
*is_fresh = rbuf_char(rb);
|
||||||
|
MSN m = rbuf_MSN(rb);
|
||||||
|
toku_xids_create_from_buffer(rb, x);
|
||||||
|
rbuf_bytes(rb, &keyp, &keylen);
|
||||||
|
rbuf_bytes(rb, &valp, &vallen);
|
||||||
|
|
||||||
|
DBT k, v;
|
||||||
|
return ft_msg(toku_fill_dbt(&k, keyp, keylen), toku_fill_dbt(&v, valp, vallen), t, m, *x);
|
||||||
|
}
|
||||||
|
|
||||||
|
ft_msg ft_msg::deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *x) {
|
||||||
|
const void *keyp, *valp;
|
||||||
|
uint32_t keylen, vallen;
|
||||||
|
enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb);
|
||||||
|
toku_xids_create_from_buffer(rb, x);
|
||||||
|
rbuf_bytes(rb, &keyp, &keylen);
|
||||||
|
rbuf_bytes(rb, &valp, &vallen);
|
||||||
|
|
||||||
|
DBT k, v;
|
||||||
|
return ft_msg(toku_fill_dbt(&k, keyp, keylen), toku_fill_dbt(&v, valp, vallen), t, m, *x);
|
||||||
|
}
|
||||||
|
|
||||||
|
const DBT *ft_msg::kdbt() const {
|
||||||
|
return &_key;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DBT *ft_msg::vdbt() const {
|
||||||
|
return &_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum ft_msg_type ft_msg::type() const {
|
||||||
|
return _type;
|
||||||
|
}
|
||||||
|
|
||||||
|
MSN ft_msg::msn() const {
|
||||||
|
return _msn;
|
||||||
|
}
|
||||||
|
|
||||||
|
XIDS ft_msg::xids() const {
|
||||||
|
return _xids;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ft_msg::total_size() const {
|
||||||
|
// Must store two 4-byte lengths
|
||||||
|
static const size_t key_val_overhead = 8;
|
||||||
|
|
||||||
|
// 1 byte type, 1 byte freshness, then 8 byte MSN
|
||||||
|
static const size_t msg_overhead = 2 + sizeof(MSN);
|
||||||
|
|
||||||
|
static const size_t total_overhead = key_val_overhead + msg_overhead;
|
||||||
|
|
||||||
|
const size_t keyval_size = _key.size + _val.size;
|
||||||
|
const size_t xids_size = toku_xids_get_serialize_size(xids());
|
||||||
|
return total_overhead + keyval_size + xids_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ft_msg::serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const {
|
||||||
|
wbuf_nocrc_char(wb, (unsigned char) _type);
|
||||||
|
wbuf_nocrc_char(wb, (unsigned char) is_fresh);
|
||||||
|
wbuf_MSN(wb, _msn);
|
||||||
|
wbuf_nocrc_xids(wb, _xids);
|
||||||
|
wbuf_nocrc_bytes(wb, _key.data, _key.size);
|
||||||
|
wbuf_nocrc_bytes(wb, _val.data, _val.size);
|
||||||
|
}
|
||||||
|
|
||||||
191
storage/tokudb/PerconaFT/ft/msg.h
Normal file
191
storage/tokudb/PerconaFT/ft/msg.h
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
/* The purpose of this file is to provide access to the ft_msg,
|
||||||
|
* which is the ephemeral version of the messages that lives in
|
||||||
|
* a message buffer.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
|
||||||
|
#include "portability/toku_assert.h"
|
||||||
|
#include "portability/toku_stdint.h"
|
||||||
|
|
||||||
|
#include "ft/txn/xids.h"
|
||||||
|
|
||||||
|
// Message Sequence Number (MSN)
|
||||||
|
typedef struct __toku_msn { uint64_t msn; } MSN;
|
||||||
|
|
||||||
|
// dummy used for message construction, to be filled in when msg is applied to tree
|
||||||
|
static const MSN ZERO_MSN = { .msn = 0 };
|
||||||
|
|
||||||
|
// first 2^62 values reserved for messages created before Dr. No (for upgrade)
|
||||||
|
static const MSN MIN_MSN = { .msn = 1ULL << 62 };
|
||||||
|
static const MSN MAX_MSN = { .msn = UINT64_MAX };
|
||||||
|
|
||||||
|
/* tree command types */
|
||||||
|
enum ft_msg_type {
|
||||||
|
FT_NONE = 0,
|
||||||
|
FT_INSERT = 1,
|
||||||
|
FT_DELETE_ANY = 2, // Delete any matching key. This used to be called FT_DELETE.
|
||||||
|
//FT_DELETE_BOTH = 3,
|
||||||
|
FT_ABORT_ANY = 4, // Abort any commands on any matching key.
|
||||||
|
//FT_ABORT_BOTH = 5, // Abort commands that match both the key and the value
|
||||||
|
FT_COMMIT_ANY = 6,
|
||||||
|
//FT_COMMIT_BOTH = 7,
|
||||||
|
FT_COMMIT_BROADCAST_ALL = 8, // Broadcast to all leafentries, (commit all transactions).
|
||||||
|
FT_COMMIT_BROADCAST_TXN = 9, // Broadcast to all leafentries, (commit specific transaction).
|
||||||
|
FT_ABORT_BROADCAST_TXN = 10, // Broadcast to all leafentries, (commit specific transaction).
|
||||||
|
FT_INSERT_NO_OVERWRITE = 11,
|
||||||
|
FT_OPTIMIZE = 12, // Broadcast
|
||||||
|
FT_OPTIMIZE_FOR_UPGRADE = 13, // same as FT_OPTIMIZE, but record version number in leafnode
|
||||||
|
FT_UPDATE = 14,
|
||||||
|
FT_UPDATE_BROADCAST_ALL = 15
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
ft_msg_type_applies_once(enum ft_msg_type type)
|
||||||
|
{
|
||||||
|
bool ret_val;
|
||||||
|
switch (type) {
|
||||||
|
case FT_INSERT_NO_OVERWRITE:
|
||||||
|
case FT_INSERT:
|
||||||
|
case FT_DELETE_ANY:
|
||||||
|
case FT_ABORT_ANY:
|
||||||
|
case FT_COMMIT_ANY:
|
||||||
|
case FT_UPDATE:
|
||||||
|
ret_val = true;
|
||||||
|
break;
|
||||||
|
case FT_COMMIT_BROADCAST_ALL:
|
||||||
|
case FT_COMMIT_BROADCAST_TXN:
|
||||||
|
case FT_ABORT_BROADCAST_TXN:
|
||||||
|
case FT_OPTIMIZE:
|
||||||
|
case FT_OPTIMIZE_FOR_UPGRADE:
|
||||||
|
case FT_UPDATE_BROADCAST_ALL:
|
||||||
|
case FT_NONE:
|
||||||
|
ret_val = false;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
return ret_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
ft_msg_type_applies_all(enum ft_msg_type type)
|
||||||
|
{
|
||||||
|
bool ret_val;
|
||||||
|
switch (type) {
|
||||||
|
case FT_NONE:
|
||||||
|
case FT_INSERT_NO_OVERWRITE:
|
||||||
|
case FT_INSERT:
|
||||||
|
case FT_DELETE_ANY:
|
||||||
|
case FT_ABORT_ANY:
|
||||||
|
case FT_COMMIT_ANY:
|
||||||
|
case FT_UPDATE:
|
||||||
|
ret_val = false;
|
||||||
|
break;
|
||||||
|
case FT_COMMIT_BROADCAST_ALL:
|
||||||
|
case FT_COMMIT_BROADCAST_TXN:
|
||||||
|
case FT_ABORT_BROADCAST_TXN:
|
||||||
|
case FT_OPTIMIZE:
|
||||||
|
case FT_OPTIMIZE_FOR_UPGRADE:
|
||||||
|
case FT_UPDATE_BROADCAST_ALL:
|
||||||
|
ret_val = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
return ret_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
ft_msg_type_does_nothing(enum ft_msg_type type)
|
||||||
|
{
|
||||||
|
return (type == FT_NONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
class ft_msg {
|
||||||
|
public:
|
||||||
|
ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x);
|
||||||
|
|
||||||
|
enum ft_msg_type type() const;
|
||||||
|
|
||||||
|
MSN msn() const;
|
||||||
|
|
||||||
|
XIDS xids() const;
|
||||||
|
|
||||||
|
const DBT *kdbt() const;
|
||||||
|
|
||||||
|
const DBT *vdbt() const;
|
||||||
|
|
||||||
|
size_t total_size() const;
|
||||||
|
|
||||||
|
void serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const;
|
||||||
|
|
||||||
|
// deserialization goes through a static factory function so the ft msg
|
||||||
|
// API stays completely const and there's no default constructor
|
||||||
|
static ft_msg deserialize_from_rbuf(struct rbuf *rb, XIDS *xids, bool *is_fresh);
|
||||||
|
|
||||||
|
// Version 13/14 messages did not have an msn - so `m' is the MSN
|
||||||
|
// that will be assigned to the message that gets deserialized.
|
||||||
|
static ft_msg deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *xids);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const DBT _key;
|
||||||
|
const DBT _val;
|
||||||
|
enum ft_msg_type _type;
|
||||||
|
MSN _msn;
|
||||||
|
XIDS _xids;
|
||||||
|
};
|
||||||
|
|
||||||
|
// For serialize / deserialize
|
||||||
|
|
||||||
|
#include "ft/serialize/wbuf.h"
|
||||||
|
|
||||||
|
static inline void wbuf_MSN(struct wbuf *wb, MSN msn) {
|
||||||
|
wbuf_ulonglong(wb, msn.msn);
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "ft/serialize/rbuf.h"
|
||||||
|
|
||||||
|
static inline MSN rbuf_MSN(struct rbuf *rb) {
|
||||||
|
MSN msn = { .msn = rbuf_ulonglong(rb) };
|
||||||
|
return msn;
|
||||||
|
}
|
||||||
292
storage/tokudb/PerconaFT/ft/msg_buffer.cc
Normal file
292
storage/tokudb/PerconaFT/ft/msg_buffer.cc
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "ft/msg_buffer.h"
|
||||||
|
#include "util/dbt.h"
|
||||||
|
|
||||||
|
void message_buffer::create() {
|
||||||
|
_num_entries = 0;
|
||||||
|
_memory = nullptr;
|
||||||
|
_memory_usable = 0;
|
||||||
|
_memory_size = 0;
|
||||||
|
_memory_used = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void message_buffer::clone(message_buffer *src) {
|
||||||
|
_num_entries = src->_num_entries;
|
||||||
|
_memory_used = src->_memory_used;
|
||||||
|
_memory_size = src->_memory_size;
|
||||||
|
XMALLOC_N(_memory_size, _memory);
|
||||||
|
memcpy(_memory, src->_memory, _memory_size);
|
||||||
|
_memory_usable = toku_malloc_usable_size(_memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
void message_buffer::destroy() {
|
||||||
|
if (_memory != nullptr) {
|
||||||
|
toku_free(_memory);
|
||||||
|
_memory_usable = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void message_buffer::deserialize_from_rbuf(struct rbuf *rb,
|
||||||
|
int32_t **fresh_offsets, int32_t *nfresh,
|
||||||
|
int32_t **stale_offsets, int32_t *nstale,
|
||||||
|
int32_t **broadcast_offsets, int32_t *nbroadcast) {
|
||||||
|
// read the number of messages in this buffer
|
||||||
|
int n_in_this_buffer = rbuf_int(rb);
|
||||||
|
if (fresh_offsets != nullptr) {
|
||||||
|
XMALLOC_N(n_in_this_buffer, *fresh_offsets);
|
||||||
|
}
|
||||||
|
if (stale_offsets != nullptr) {
|
||||||
|
XMALLOC_N(n_in_this_buffer, *stale_offsets);
|
||||||
|
}
|
||||||
|
if (broadcast_offsets != nullptr) {
|
||||||
|
XMALLOC_N(n_in_this_buffer, *broadcast_offsets);
|
||||||
|
}
|
||||||
|
|
||||||
|
_resize(rb->size + 64); // rb->size is a good hint for how big the buffer will be
|
||||||
|
|
||||||
|
// deserialize each message individually, noting whether it was fresh
|
||||||
|
// and putting its buffer offset in the appropriate offsets array
|
||||||
|
for (int i = 0; i < n_in_this_buffer; i++) {
|
||||||
|
XIDS xids;
|
||||||
|
bool is_fresh;
|
||||||
|
const ft_msg msg = ft_msg::deserialize_from_rbuf(rb, &xids, &is_fresh);
|
||||||
|
|
||||||
|
int32_t *dest;
|
||||||
|
if (ft_msg_type_applies_once(msg.type())) {
|
||||||
|
if (is_fresh) {
|
||||||
|
dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr;
|
||||||
|
} else {
|
||||||
|
dest = stale_offsets ? *stale_offsets + (*nstale)++ : nullptr;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
invariant(ft_msg_type_applies_all(msg.type()) || ft_msg_type_does_nothing(msg.type()));
|
||||||
|
dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
enqueue(msg, is_fresh, dest);
|
||||||
|
toku_xids_destroy(&xids);
|
||||||
|
}
|
||||||
|
|
||||||
|
invariant(_num_entries == n_in_this_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
MSN message_buffer::deserialize_from_rbuf_v13(struct rbuf *rb,
|
||||||
|
MSN *highest_unused_msn_for_upgrade,
|
||||||
|
int32_t **fresh_offsets, int32_t *nfresh,
|
||||||
|
int32_t **broadcast_offsets, int32_t *nbroadcast) {
|
||||||
|
// read the number of messages in this buffer
|
||||||
|
int n_in_this_buffer = rbuf_int(rb);
|
||||||
|
if (fresh_offsets != nullptr) {
|
||||||
|
XMALLOC_N(n_in_this_buffer, *fresh_offsets);
|
||||||
|
}
|
||||||
|
if (broadcast_offsets != nullptr) {
|
||||||
|
XMALLOC_N(n_in_this_buffer, *broadcast_offsets);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Atomically decrement the header's MSN count by the number
|
||||||
|
// of messages in the buffer.
|
||||||
|
MSN highest_msn_in_this_buffer = {
|
||||||
|
.msn = toku_sync_sub_and_fetch(&highest_unused_msn_for_upgrade->msn, n_in_this_buffer)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create the message buffers from the deserialized buffer.
|
||||||
|
for (int i = 0; i < n_in_this_buffer; i++) {
|
||||||
|
XIDS xids;
|
||||||
|
// There were no stale messages at this version, so call it fresh.
|
||||||
|
const bool is_fresh = true;
|
||||||
|
|
||||||
|
// Increment our MSN, the last message should have the
|
||||||
|
// newest/highest MSN. See above for a full explanation.
|
||||||
|
highest_msn_in_this_buffer.msn++;
|
||||||
|
const ft_msg msg = ft_msg::deserialize_from_rbuf_v13(rb, highest_msn_in_this_buffer, &xids);
|
||||||
|
|
||||||
|
int32_t *dest;
|
||||||
|
if (ft_msg_type_applies_once(msg.type())) {
|
||||||
|
dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr;
|
||||||
|
} else {
|
||||||
|
invariant(ft_msg_type_applies_all(msg.type()) || ft_msg_type_does_nothing(msg.type()));
|
||||||
|
dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
enqueue(msg, is_fresh, dest);
|
||||||
|
toku_xids_destroy(&xids);
|
||||||
|
}
|
||||||
|
|
||||||
|
return highest_msn_in_this_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void message_buffer::_resize(size_t new_size) {
|
||||||
|
XREALLOC_N(new_size, _memory);
|
||||||
|
_memory_size = new_size;
|
||||||
|
_memory_usable = toku_malloc_usable_size(_memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int next_power_of_two (int n) {
|
||||||
|
int r = 4096;
|
||||||
|
while (r < n) {
|
||||||
|
r*=2;
|
||||||
|
assert(r>0);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct message_buffer::buffer_entry *message_buffer::get_buffer_entry(int32_t offset) const {
|
||||||
|
return (struct buffer_entry *) (_memory + offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void message_buffer::enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset) {
|
||||||
|
int need_space_here = msg_memsize_in_buffer(msg);
|
||||||
|
int need_space_total = _memory_used + need_space_here;
|
||||||
|
if (_memory == nullptr || need_space_total > _memory_size) {
|
||||||
|
// resize the buffer to the next power of 2 greater than the needed space
|
||||||
|
int next_2 = next_power_of_two(need_space_total);
|
||||||
|
_resize(next_2);
|
||||||
|
}
|
||||||
|
uint32_t keylen = msg.kdbt()->size;
|
||||||
|
uint32_t datalen = msg.vdbt()->size;
|
||||||
|
struct buffer_entry *entry = get_buffer_entry(_memory_used);
|
||||||
|
entry->type = (unsigned char) msg.type();
|
||||||
|
entry->msn = msg.msn();
|
||||||
|
toku_xids_cpy(&entry->xids_s, msg.xids());
|
||||||
|
entry->is_fresh = is_fresh;
|
||||||
|
unsigned char *e_key = toku_xids_get_end_of_array(&entry->xids_s);
|
||||||
|
entry->keylen = keylen;
|
||||||
|
memcpy(e_key, msg.kdbt()->data, keylen);
|
||||||
|
entry->vallen = datalen;
|
||||||
|
memcpy(e_key + keylen, msg.vdbt()->data, datalen);
|
||||||
|
if (offset) {
|
||||||
|
*offset = _memory_used;
|
||||||
|
}
|
||||||
|
_num_entries++;
|
||||||
|
_memory_used += need_space_here;
|
||||||
|
}
|
||||||
|
|
||||||
|
void message_buffer::set_freshness(int32_t offset, bool is_fresh) {
|
||||||
|
struct buffer_entry *entry = get_buffer_entry(offset);
|
||||||
|
entry->is_fresh = is_fresh;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool message_buffer::get_freshness(int32_t offset) const {
|
||||||
|
struct buffer_entry *entry = get_buffer_entry(offset);
|
||||||
|
return entry->is_fresh;
|
||||||
|
}
|
||||||
|
|
||||||
|
ft_msg message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const {
|
||||||
|
struct buffer_entry *entry = get_buffer_entry(offset);
|
||||||
|
uint32_t keylen = entry->keylen;
|
||||||
|
uint32_t vallen = entry->vallen;
|
||||||
|
enum ft_msg_type type = (enum ft_msg_type) entry->type;
|
||||||
|
MSN msn = entry->msn;
|
||||||
|
const XIDS xids = (XIDS) &entry->xids_s;
|
||||||
|
const void *key = toku_xids_get_end_of_array(xids);
|
||||||
|
const void *val = (uint8_t *) key + entry->keylen;
|
||||||
|
return ft_msg(toku_fill_dbt(keydbt, key, keylen), toku_fill_dbt(valdbt, val, vallen), type, msn, xids);
|
||||||
|
}
|
||||||
|
|
||||||
|
void message_buffer::get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const {
|
||||||
|
struct buffer_entry *entry = get_buffer_entry(offset);
|
||||||
|
if (key != nullptr) {
|
||||||
|
toku_fill_dbt(key, toku_xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen);
|
||||||
|
}
|
||||||
|
if (msn != nullptr) {
|
||||||
|
*msn = entry->msn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int message_buffer::num_entries() const {
|
||||||
|
return _num_entries;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t message_buffer::buffer_size_in_use() const {
|
||||||
|
return _memory_used;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t message_buffer::memory_size_in_use() const {
|
||||||
|
return sizeof(*this) + _memory_used;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t message_buffer::memory_footprint() const {
|
||||||
|
#ifdef TOKU_DEBUG_PARANOID
|
||||||
|
// Enable this code if you want to verify that the new way of computing
|
||||||
|
// the memory footprint is the same as the old.
|
||||||
|
// It slows the code down by perhaps 10%.
|
||||||
|
assert(_memory_usable == toku_malloc_usable_size(_memory));
|
||||||
|
size_t fp = toku_memory_footprint(_memory, _memory_used);
|
||||||
|
size_t fpg = toku_memory_footprint_given_usable_size(_memory_used, _memory_usable);
|
||||||
|
if (fp != fpg) printf("ptr=%p mu=%ld fp=%ld fpg=%ld\n", _memory, _memory_usable, fp, fpg);
|
||||||
|
assert(fp == fpg);
|
||||||
|
#endif // TOKU_DEBUG_PARANOID
|
||||||
|
return sizeof(*this) + toku_memory_footprint_given_usable_size(_memory_used, _memory_usable);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool message_buffer::equals(message_buffer *other) const {
|
||||||
|
return (_memory_used == other->_memory_used &&
|
||||||
|
memcmp(_memory, other->_memory, _memory_used) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void message_buffer::serialize_to_wbuf(struct wbuf *wb) const {
|
||||||
|
wbuf_nocrc_int(wb, _num_entries);
|
||||||
|
struct msg_serialize_fn {
|
||||||
|
struct wbuf *wb;
|
||||||
|
msg_serialize_fn(struct wbuf *w) : wb(w) { }
|
||||||
|
int operator()(const ft_msg &msg, bool is_fresh) {
|
||||||
|
msg.serialize_to_wbuf(wb, is_fresh);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
} serialize_fn(wb);
|
||||||
|
iterate(serialize_fn);
|
||||||
|
}
|
||||||
|
//void static stats(struct wbuf *wb) const {
|
||||||
|
// wbuf_nocrc_int(wb, _num_entries);
|
||||||
|
// struct msg_serialize_fn {
|
||||||
|
// struct wbuf *wb;
|
||||||
|
// msg_serialize_fn(struct wbuf *w) : wb(w) { }
|
||||||
|
// int operator()(const ft_msg &msg, bool is_fresh) {
|
||||||
|
// msg.serialize_to_wbuf(wb, is_fresh);
|
||||||
|
// return 0;
|
||||||
|
// }
|
||||||
|
// } serialize_fn(wb);
|
||||||
|
// iterate(serialize_fn);
|
||||||
|
//}
|
||||||
|
size_t message_buffer::msg_memsize_in_buffer(const ft_msg &msg) {
|
||||||
|
const uint32_t keylen = msg.kdbt()->size;
|
||||||
|
const uint32_t datalen = msg.vdbt()->size;
|
||||||
|
const size_t xidslen = toku_xids_get_size(msg.xids());
|
||||||
|
return sizeof(struct buffer_entry) + keylen + datalen + xidslen - sizeof(XIDS_S);
|
||||||
|
}
|
||||||
131
storage/tokudb/PerconaFT/ft/msg_buffer.h
Normal file
131
storage/tokudb/PerconaFT/ft/msg_buffer.h
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ft/msg.h"
|
||||||
|
#include "ft/txn/xids.h"
|
||||||
|
#include "util/dbt.h"
|
||||||
|
|
||||||
|
class message_buffer {
|
||||||
|
public:
|
||||||
|
void create();
|
||||||
|
|
||||||
|
void clone(message_buffer *dst);
|
||||||
|
|
||||||
|
void destroy();
|
||||||
|
|
||||||
|
// effect: deserializes a message buffer from the given rbuf
|
||||||
|
// returns: *fresh_offsets (etc) malloc'd to be num_entries large and
|
||||||
|
// populated with *nfresh (etc) offsets in the message buffer
|
||||||
|
// requires: if fresh_offsets (etc) != nullptr, then nfresh != nullptr
|
||||||
|
void deserialize_from_rbuf(struct rbuf *rb,
|
||||||
|
int32_t **fresh_offsets, int32_t *nfresh,
|
||||||
|
int32_t **stale_offsets, int32_t *nstale,
|
||||||
|
int32_t **broadcast_offsets, int32_t *nbroadcast);
|
||||||
|
|
||||||
|
// effect: deserializes a message buffer whose messages are at version 13/14
|
||||||
|
// returns: similar to deserialize_from_rbuf(), excpet there are no stale messages
|
||||||
|
// and each message is assigned a sequential value from *highest_unused_msn_for_upgrade,
|
||||||
|
// which is modified as needed using toku_sync_fech_and_sub()
|
||||||
|
// returns: the highest MSN assigned to any message in this buffer
|
||||||
|
// requires: similar to deserialize_from_rbuf(), and highest_unused_msn_for_upgrade != nullptr
|
||||||
|
MSN deserialize_from_rbuf_v13(struct rbuf *rb,
|
||||||
|
MSN *highest_unused_msn_for_upgrade,
|
||||||
|
int32_t **fresh_offsets, int32_t *nfresh,
|
||||||
|
int32_t **broadcast_offsets, int32_t *nbroadcast);
|
||||||
|
|
||||||
|
void enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset);
|
||||||
|
|
||||||
|
void set_freshness(int32_t offset, bool is_fresh);
|
||||||
|
|
||||||
|
bool get_freshness(int32_t offset) const;
|
||||||
|
|
||||||
|
ft_msg get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const;
|
||||||
|
|
||||||
|
void get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const;
|
||||||
|
|
||||||
|
int num_entries() const;
|
||||||
|
|
||||||
|
size_t buffer_size_in_use() const;
|
||||||
|
|
||||||
|
size_t memory_size_in_use() const;
|
||||||
|
|
||||||
|
size_t memory_footprint() const;
|
||||||
|
|
||||||
|
template <typename F>
|
||||||
|
int iterate(F &fn) const {
|
||||||
|
for (int32_t offset = 0; offset < _memory_used; ) {
|
||||||
|
DBT k, v;
|
||||||
|
const ft_msg msg = get_message(offset, &k, &v);
|
||||||
|
bool is_fresh = get_freshness(offset);
|
||||||
|
int r = fn(msg, is_fresh);
|
||||||
|
if (r != 0) {
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
offset += msg_memsize_in_buffer(msg);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool equals(message_buffer *other) const;
|
||||||
|
|
||||||
|
void serialize_to_wbuf(struct wbuf *wb) const;
|
||||||
|
|
||||||
|
static size_t msg_memsize_in_buffer(const ft_msg &msg);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void _resize(size_t new_size);
|
||||||
|
|
||||||
|
// If this isn't packged, the compiler aligns the xids array and we waste a lot of space
|
||||||
|
struct __attribute__((__packed__)) buffer_entry {
|
||||||
|
unsigned int keylen;
|
||||||
|
unsigned int vallen;
|
||||||
|
unsigned char type;
|
||||||
|
bool is_fresh;
|
||||||
|
MSN msn;
|
||||||
|
XIDS_S xids_s;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct buffer_entry *get_buffer_entry(int32_t offset) const;
|
||||||
|
|
||||||
|
int _num_entries;
|
||||||
|
char *_memory; // An array of bytes into which buffer entries are embedded.
|
||||||
|
int _memory_size; // How big is _memory
|
||||||
|
int _memory_used; // How many bytes are in use?
|
||||||
|
size_t _memory_usable; // a cached result of toku_malloc_usable_size(_memory).
|
||||||
|
};
|
||||||
@@ -1,95 +1,39 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
/*
|
|
||||||
COPYING CONDITIONS NOTICE:
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
GNU General Public License for more details.
|
||||||
GRANT (below) in the documentation and/or other materials
|
|
||||||
provided with the distribution.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include "ft/ft.h"
|
#include "ft/ft.h"
|
||||||
#include "ft/ft-internal.h"
|
#include "ft/ft-internal.h"
|
||||||
@@ -1,90 +1,39 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
/*
|
|
||||||
COPYING CONDITIONS NOTICE:
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
GNU General Public License for more details.
|
||||||
GRANT (below) in the documentation and/or other materials
|
|
||||||
provided with the distribution.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
@@ -1,95 +1,39 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
/*
|
|
||||||
COPYING CONDITIONS NOTICE:
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
GNU General Public License for more details.
|
||||||
GRANT (below) in the documentation and/or other materials
|
|
||||||
provided with the distribution.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@@ -1,93 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
/*
|
#ident "$Id$"
|
||||||
COPYING CONDITIONS NOTICE:
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
#ident "$Id$"
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
214
storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
Normal file
214
storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
|
||||||
|
#include "portability/toku_pthread.h"
|
||||||
|
#include "portability/toku_stdint.h"
|
||||||
|
#include "portability/toku_stdlib.h"
|
||||||
|
|
||||||
|
// Block allocator.
|
||||||
|
//
|
||||||
|
// A block allocator manages the allocation of variable-sized blocks.
|
||||||
|
// The translation of block numbers to addresses is handled elsewhere.
|
||||||
|
// The allocation of block numbers is handled elsewhere.
|
||||||
|
//
|
||||||
|
// When creating a block allocator we also specify a certain-sized
|
||||||
|
// block at the beginning that is preallocated (and cannot be allocated or freed)
|
||||||
|
//
|
||||||
|
// We can allocate blocks of a particular size at a particular location.
|
||||||
|
// We can allocate blocks of a particular size at a location chosen by the allocator.
|
||||||
|
// We can free blocks.
|
||||||
|
// We can determine the size of a block.
|
||||||
|
|
||||||
|
class block_allocator {
|
||||||
|
public:
|
||||||
|
static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096;
|
||||||
|
|
||||||
|
// How much must be reserved at the beginning for the block?
|
||||||
|
// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
|
||||||
|
// So 4096 should be enough.
|
||||||
|
static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096;
|
||||||
|
|
||||||
|
static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0,
|
||||||
|
"block allocator header must have proper alignment");
|
||||||
|
|
||||||
|
static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
|
||||||
|
|
||||||
|
enum allocation_strategy {
|
||||||
|
BA_STRATEGY_FIRST_FIT = 1,
|
||||||
|
BA_STRATEGY_BEST_FIT,
|
||||||
|
BA_STRATEGY_PADDED_FIT,
|
||||||
|
BA_STRATEGY_HEAT_ZONE
|
||||||
|
};
|
||||||
|
|
||||||
|
struct blockpair {
|
||||||
|
uint64_t offset;
|
||||||
|
uint64_t size;
|
||||||
|
blockpair(uint64_t o, uint64_t s) :
|
||||||
|
offset(o), size(s) {
|
||||||
|
}
|
||||||
|
int operator<(const struct blockpair &rhs) const {
|
||||||
|
return offset < rhs.offset;
|
||||||
|
}
|
||||||
|
int operator<(const uint64_t &o) const {
|
||||||
|
return offset < o;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
|
||||||
|
// The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
|
||||||
|
// All blocks be start on a multiple of ALIGNMENT.
|
||||||
|
// Aborts if we run out of memory.
|
||||||
|
// Parameters
|
||||||
|
// reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
|
||||||
|
// alignment (IN) Block alignment.
|
||||||
|
void create(uint64_t reserve_at_beginning, uint64_t alignment);
|
||||||
|
|
||||||
|
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
|
||||||
|
// The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
|
||||||
|
// The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs'
|
||||||
|
// All blocks be start on a multiple of ALIGNMENT.
|
||||||
|
// Aborts if we run out of memory.
|
||||||
|
// Parameters
|
||||||
|
// pairs, unowned array of pairs to copy
|
||||||
|
// n_blocks, Size of pairs array
|
||||||
|
// reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
|
||||||
|
// alignment (IN) Block alignment.
|
||||||
|
void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
|
||||||
|
struct blockpair *pairs, uint64_t n_blocks);
|
||||||
|
|
||||||
|
// Effect: Destroy this block allocator
|
||||||
|
void destroy();
|
||||||
|
|
||||||
|
// Effect: Set the allocation strategy that the allocator should use
|
||||||
|
// Requires: No other threads are operating on this block allocator
|
||||||
|
void set_strategy(enum allocation_strategy strategy);
|
||||||
|
|
||||||
|
// Effect: Allocate a block of the specified size at an address chosen by the allocator.
|
||||||
|
// Aborts if anything goes wrong.
|
||||||
|
// The block address will be a multiple of the alignment.
|
||||||
|
// Parameters:
|
||||||
|
// size (IN): The size of the block. (The size does not have to be aligned.)
|
||||||
|
// offset (OUT): The location of the block.
|
||||||
|
// heat (IN): A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint)
|
||||||
|
// Heat values are lexiographically ordered (like integers), but their specific values are arbitrary
|
||||||
|
void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset);
|
||||||
|
|
||||||
|
// Effect: Free the block at offset.
|
||||||
|
// Requires: There must be a block currently allocated at that offset.
|
||||||
|
// Parameters:
|
||||||
|
// offset (IN): The offset of the block.
|
||||||
|
void free_block(uint64_t offset);
|
||||||
|
|
||||||
|
// Effect: Return the size of the block that starts at offset.
|
||||||
|
// Requires: There must be a block currently allocated at that offset.
|
||||||
|
// Parameters:
|
||||||
|
// offset (IN): The offset of the block.
|
||||||
|
uint64_t block_size(uint64_t offset);
|
||||||
|
|
||||||
|
// Effect: Check to see if the block allocator is OK. This may take a long time.
|
||||||
|
// Usage Hints: Probably only use this for unit tests.
|
||||||
|
// TODO: Private?
|
||||||
|
void validate() const;
|
||||||
|
|
||||||
|
// Effect: Return the unallocated block address of "infinite" size.
|
||||||
|
// That is, return the smallest address that is above all the allocated blocks.
|
||||||
|
uint64_t allocated_limit() const;
|
||||||
|
|
||||||
|
// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
|
||||||
|
// Return the offset and size of the block with that number.
|
||||||
|
// Return 0 if there is a block that big, return nonzero if b is too big.
|
||||||
|
// Rationale: This is probably useful only for tests.
|
||||||
|
int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size);
|
||||||
|
|
||||||
|
// Effect: Fill in report to indicate how the file is used.
|
||||||
|
// Requires:
|
||||||
|
// report->file_size_bytes is filled in
|
||||||
|
// report->data_bytes is filled in
|
||||||
|
// report->checkpoint_bytes_additional is filled in
|
||||||
|
void get_unused_statistics(TOKU_DB_FRAGMENTATION report);
|
||||||
|
|
||||||
|
// Effect: Fill in report->data_bytes with the number of bytes in use
|
||||||
|
// Fill in report->data_blocks with the number of blockpairs in use
|
||||||
|
// Fill in unused statistics using this->get_unused_statistics()
|
||||||
|
// Requires:
|
||||||
|
// report->file_size is ignored on return
|
||||||
|
// report->checkpoint_bytes_additional is ignored on return
|
||||||
|
void get_statistics(TOKU_DB_FRAGMENTATION report);
|
||||||
|
|
||||||
|
// Block allocator tracing.
|
||||||
|
// - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file
|
||||||
|
// should be written to.
|
||||||
|
// - Trace may be replayed by ba_trace_replay tool in tools/ directory
|
||||||
|
// eg: "cat mytracefile | ba_trace_replay"
|
||||||
|
static void maybe_initialize_trace();
|
||||||
|
static void maybe_close_trace();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment);
|
||||||
|
void grow_blocks_array_by(uint64_t n_to_add);
|
||||||
|
void grow_blocks_array();
|
||||||
|
int64_t find_block(uint64_t offset);
|
||||||
|
struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat);
|
||||||
|
|
||||||
|
// Tracing
|
||||||
|
toku_mutex_t _trace_lock;
|
||||||
|
void _trace_create(void);
|
||||||
|
void _trace_create_from_blockpairs(void);
|
||||||
|
void _trace_destroy(void);
|
||||||
|
void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset);
|
||||||
|
void _trace_free(uint64_t offset);
|
||||||
|
|
||||||
|
// How much to reserve at the beginning
|
||||||
|
uint64_t _reserve_at_beginning;
|
||||||
|
// Block alignment
|
||||||
|
uint64_t _alignment;
|
||||||
|
// How many blocks
|
||||||
|
uint64_t _n_blocks;
|
||||||
|
// How big is the blocks_array. Must be >= n_blocks.
|
||||||
|
uint64_t _blocks_array_size;
|
||||||
|
// These blocks are sorted by address.
|
||||||
|
struct blockpair *_blocks_array;
|
||||||
|
// Including the reserve_at_beginning
|
||||||
|
uint64_t _n_bytes_in_use;
|
||||||
|
// The allocation strategy are we using
|
||||||
|
enum allocation_strategy _strategy;
|
||||||
|
};
|
||||||
@@ -0,0 +1,224 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "portability/toku_assert.h"
|
||||||
|
|
||||||
|
#include "ft/serialize/block_allocator_strategy.h"
|
||||||
|
|
||||||
|
static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
|
||||||
|
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t _roundup_to_power_of_two(uint64_t value) {
|
||||||
|
uint64_t r = 4096;
|
||||||
|
while (r < value) {
|
||||||
|
r *= 2;
|
||||||
|
invariant(r > 0);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// First fit block allocation
|
||||||
|
static struct block_allocator::blockpair *
|
||||||
|
_first_fit(struct block_allocator::blockpair *blocks_array,
|
||||||
|
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
||||||
|
uint64_t max_padding) {
|
||||||
|
if (n_blocks == 1) {
|
||||||
|
// won't enter loop, can't underflow the direction < 0 case
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct block_allocator::blockpair *bp = &blocks_array[0];
|
||||||
|
for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
|
||||||
|
n_spaces_to_check--, bp++) {
|
||||||
|
// Consider the space after bp
|
||||||
|
uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
|
||||||
|
uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
|
||||||
|
if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1]
|
||||||
|
invariant(bp - blocks_array < (int64_t) n_blocks);
|
||||||
|
return bp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct block_allocator::blockpair *
|
||||||
|
_first_fit_bw(struct block_allocator::blockpair *blocks_array,
|
||||||
|
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
||||||
|
uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) {
|
||||||
|
if (n_blocks == 1) {
|
||||||
|
// won't enter loop, can't underflow the direction < 0 case
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct block_allocator::blockpair *bp = &blocks_array[-1];
|
||||||
|
for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
|
||||||
|
n_spaces_to_check--, bp--) {
|
||||||
|
// Consider the space after bp
|
||||||
|
uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
|
||||||
|
uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
|
||||||
|
if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) {
|
||||||
|
invariant(blocks_array - bp < (int64_t) n_blocks);
|
||||||
|
return bp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct block_allocator::blockpair *
|
||||||
|
block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
|
||||||
|
uint64_t n_blocks, uint64_t size, uint64_t alignment) {
|
||||||
|
return _first_fit(blocks_array, n_blocks, size, alignment, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Best fit block allocation
|
||||||
|
struct block_allocator::blockpair *
|
||||||
|
block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
|
||||||
|
uint64_t n_blocks, uint64_t size, uint64_t alignment) {
|
||||||
|
struct block_allocator::blockpair *best_bp = nullptr;
|
||||||
|
uint64_t best_hole_size = 0;
|
||||||
|
for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
|
||||||
|
// Consider the space after blocknum
|
||||||
|
struct block_allocator::blockpair *bp = &blocks_array[blocknum];
|
||||||
|
uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
|
||||||
|
uint64_t possible_end_offset = possible_offset + size;
|
||||||
|
if (possible_end_offset <= bp[1].offset) {
|
||||||
|
// It fits here. Is it the best fit?
|
||||||
|
uint64_t hole_size = bp[1].offset - possible_end_offset;
|
||||||
|
if (best_bp == nullptr || hole_size < best_hole_size) {
|
||||||
|
best_hole_size = hole_size;
|
||||||
|
best_bp = bp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return best_bp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t padded_fit_alignment = 4096;
|
||||||
|
|
||||||
|
// TODO: These compiler specific directives should be abstracted in a portability header
|
||||||
|
// portability/toku_compiler.h?
|
||||||
|
__attribute__((__constructor__))
|
||||||
|
static void determine_padded_fit_alignment_from_env(void) {
|
||||||
|
// TODO: Should be in portability as 'toku_os_getenv()?'
|
||||||
|
const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT");
|
||||||
|
if (s != nullptr && strlen(s) > 0) {
|
||||||
|
const int64_t alignment = strtoll(s, nullptr, 10);
|
||||||
|
if (alignment <= 0) {
|
||||||
|
fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), "
|
||||||
|
"but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n",
|
||||||
|
s, padded_fit_alignment);
|
||||||
|
} else {
|
||||||
|
padded_fit_alignment = _roundup_to_power_of_two(alignment);
|
||||||
|
fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n",
|
||||||
|
padded_fit_alignment);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// First fit into a block that is oversized by up to max_padding.
|
||||||
|
// The hope is that if we purposefully waste a bit of space at allocation
|
||||||
|
// time we'll be more likely to reuse this block later.
|
||||||
|
struct block_allocator::blockpair *
|
||||||
|
block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
|
||||||
|
uint64_t n_blocks, uint64_t size, uint64_t alignment) {
|
||||||
|
return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
static double hot_zone_threshold = 0.85;
|
||||||
|
|
||||||
|
// TODO: These compiler specific directives should be abstracted in a portability header
|
||||||
|
// portability/toku_compiler.h?
|
||||||
|
__attribute__((__constructor__))
|
||||||
|
static void determine_hot_zone_threshold_from_env(void) {
|
||||||
|
// TODO: Should be in portability as 'toku_os_getenv()?'
|
||||||
|
const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD");
|
||||||
|
if (s != nullptr && strlen(s) > 0) {
|
||||||
|
const double hot_zone = strtod(s, nullptr);
|
||||||
|
if (hot_zone < 1 || hot_zone > 99) {
|
||||||
|
fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), "
|
||||||
|
"but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s);
|
||||||
|
hot_zone_threshold = 85 / 100;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s);
|
||||||
|
hot_zone_threshold = hot_zone / 100;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct block_allocator::blockpair *
|
||||||
|
block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
|
||||||
|
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
||||||
|
uint64_t heat) {
|
||||||
|
if (heat > 0) {
|
||||||
|
struct block_allocator::blockpair *bp, *boundary_bp;
|
||||||
|
|
||||||
|
// Hot allocation. Find the beginning of the hot zone.
|
||||||
|
boundary_bp = &blocks_array[n_blocks - 1];
|
||||||
|
uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment);
|
||||||
|
uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
|
||||||
|
|
||||||
|
boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
|
||||||
|
uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp;
|
||||||
|
uint64_t blocks_outside_zone = boundary_bp - blocks_array;
|
||||||
|
invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
|
||||||
|
|
||||||
|
if (blocks_in_zone > 0) {
|
||||||
|
// Find the first fit in the hot zone, going forward.
|
||||||
|
bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0);
|
||||||
|
if (bp != nullptr) {
|
||||||
|
return bp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (blocks_outside_zone > 0) {
|
||||||
|
// Find the first fit in the cold zone, going backwards.
|
||||||
|
bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]);
|
||||||
|
if (bp != nullptr) {
|
||||||
|
return bp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Cold allocations are simply first-fit from the beginning.
|
||||||
|
return _first_fit(blocks_array, n_blocks, size, alignment, 0);
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
|
||||||
|
#include "ft/serialize/block_allocator.h"
|
||||||
|
|
||||||
|
// Block allocation strategy implementations
|
||||||
|
|
||||||
|
class block_allocator_strategy {
|
||||||
|
public:
|
||||||
|
static struct block_allocator::blockpair *
|
||||||
|
first_fit(struct block_allocator::blockpair *blocks_array,
|
||||||
|
uint64_t n_blocks, uint64_t size, uint64_t alignment);
|
||||||
|
|
||||||
|
static struct block_allocator::blockpair *
|
||||||
|
best_fit(struct block_allocator::blockpair *blocks_array,
|
||||||
|
uint64_t n_blocks, uint64_t size, uint64_t alignment);
|
||||||
|
|
||||||
|
static struct block_allocator::blockpair *
|
||||||
|
padded_fit(struct block_allocator::blockpair *blocks_array,
|
||||||
|
uint64_t n_blocks, uint64_t size, uint64_t alignment);
|
||||||
|
|
||||||
|
static struct block_allocator::blockpair *
|
||||||
|
heat_zone(struct block_allocator::blockpair *blocks_array,
|
||||||
|
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
||||||
|
uint64_t heat);
|
||||||
|
};
|
||||||
@@ -1,95 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include "portability/memory.h"
|
#include "portability/memory.h"
|
||||||
#include "portability/toku_assert.h"
|
#include "portability/toku_assert.h"
|
||||||
285
storage/tokudb/PerconaFT/ft/serialize/block_table.h
Normal file
285
storage/tokudb/PerconaFT/ft/serialize/block_table.h
Normal file
@@ -0,0 +1,285 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
|
||||||
|
#include "portability/toku_stdint.h"
|
||||||
|
#include "portability/toku_pthread.h"
|
||||||
|
|
||||||
|
#include "ft/serialize/block_allocator.h"
|
||||||
|
#include "util/nb_mutex.h"
|
||||||
|
|
||||||
|
struct ft;
|
||||||
|
|
||||||
|
typedef struct blocknum_s { int64_t b; } BLOCKNUM;
|
||||||
|
|
||||||
|
// Offset in a disk. -1 is the 'null' pointer.
|
||||||
|
typedef int64_t DISKOFF;
|
||||||
|
|
||||||
|
// Unmovable reserved first, then reallocable.
|
||||||
|
// We reserve one blocknum for the translation table itself.
|
||||||
|
enum {
|
||||||
|
RESERVED_BLOCKNUM_NULL = 0,
|
||||||
|
RESERVED_BLOCKNUM_TRANSLATION = 1,
|
||||||
|
RESERVED_BLOCKNUM_DESCRIPTOR = 2,
|
||||||
|
RESERVED_BLOCKNUMS
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
|
||||||
|
|
||||||
|
static inline BLOCKNUM make_blocknum(int64_t b) {
|
||||||
|
BLOCKNUM result = { .b = b };
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* There are three copies of the translation table (btt) in the block table:
|
||||||
|
*
|
||||||
|
* checkpointed Is initialized by deserializing from disk,
|
||||||
|
* and is the only version ever read from disk.
|
||||||
|
* When read from disk it is copied to current.
|
||||||
|
* It is immutable. It can be replaced by an inprogress btt.
|
||||||
|
*
|
||||||
|
* inprogress Is only filled by copying from current,
|
||||||
|
* and is the only version ever serialized to disk.
|
||||||
|
* (It is serialized to disk on checkpoint and clean shutdown.)
|
||||||
|
* At end of checkpoint it replaces 'checkpointed'.
|
||||||
|
* During a checkpoint, any 'pending' dirty writes will update
|
||||||
|
* inprogress.
|
||||||
|
*
|
||||||
|
* current Is initialized by copying from checkpointed,
|
||||||
|
* is the only version ever modified while the database is in use,
|
||||||
|
* and is the only version ever copied to inprogress.
|
||||||
|
* It is never stored on disk.
|
||||||
|
*/
|
||||||
|
class block_table {
|
||||||
|
public:
|
||||||
|
enum translation_type {
|
||||||
|
TRANSLATION_NONE = 0,
|
||||||
|
TRANSLATION_CURRENT,
|
||||||
|
TRANSLATION_INPROGRESS,
|
||||||
|
TRANSLATION_CHECKPOINTED,
|
||||||
|
TRANSLATION_DEBUG
|
||||||
|
};
|
||||||
|
|
||||||
|
void create();
|
||||||
|
|
||||||
|
int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
|
||||||
|
|
||||||
|
void destroy();
|
||||||
|
|
||||||
|
// Checkpointing
|
||||||
|
void note_start_checkpoint_unlocked();
|
||||||
|
void note_end_checkpoint(int fd);
|
||||||
|
void note_skipped_checkpoint();
|
||||||
|
void maybe_truncate_file_on_open(int fd);
|
||||||
|
|
||||||
|
// Blocknums
|
||||||
|
void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
|
||||||
|
void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat);
|
||||||
|
void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
|
||||||
|
void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
|
||||||
|
void free_unused_blocknums(BLOCKNUM root);
|
||||||
|
void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd);
|
||||||
|
void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size);
|
||||||
|
|
||||||
|
// External verfication
|
||||||
|
void verify_blocknum_allocated(BLOCKNUM b);
|
||||||
|
void verify_no_data_blocks_except_root(BLOCKNUM root);
|
||||||
|
void verify_no_free_blocknums();
|
||||||
|
|
||||||
|
// Serialization
|
||||||
|
void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size);
|
||||||
|
|
||||||
|
// DEBUG ONLY (ftdump included), tests included
|
||||||
|
void blocknum_dump_translation(BLOCKNUM b);
|
||||||
|
void dump_translation_table_pretty(FILE *f);
|
||||||
|
void dump_translation_table(FILE *f);
|
||||||
|
void block_free(uint64_t offset);
|
||||||
|
|
||||||
|
int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only);
|
||||||
|
void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep);
|
||||||
|
|
||||||
|
// Requires: blocktable lock is held.
|
||||||
|
// Requires: report->file_size_bytes is already filled in.
|
||||||
|
void get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report);
|
||||||
|
|
||||||
|
int64_t get_blocks_in_use_unlocked();
|
||||||
|
|
||||||
|
void get_info64(struct ftinfo64 *);
|
||||||
|
|
||||||
|
int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *);
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct block_translation_pair {
|
||||||
|
// If in the freelist, use next_free_blocknum, otherwise diskoff.
|
||||||
|
union {
|
||||||
|
DISKOFF diskoff;
|
||||||
|
BLOCKNUM next_free_blocknum;
|
||||||
|
} u;
|
||||||
|
|
||||||
|
// Set to 0xFFFFFFFFFFFFFFFF for free
|
||||||
|
DISKOFF size;
|
||||||
|
};
|
||||||
|
|
||||||
|
// This is the BTT (block translation table)
|
||||||
|
// When the translation (btt) is stored on disk:
|
||||||
|
// In Header:
|
||||||
|
// size_on_disk
|
||||||
|
// location_on_disk
|
||||||
|
// In block translation table (in order):
|
||||||
|
// smallest_never_used_blocknum
|
||||||
|
// blocknum_freelist_head
|
||||||
|
// array
|
||||||
|
// a checksum
|
||||||
|
struct translation {
|
||||||
|
enum translation_type type;
|
||||||
|
|
||||||
|
// Number of elements in array (block_translation). always >= smallest_never_used_blocknum
|
||||||
|
int64_t length_of_array;
|
||||||
|
BLOCKNUM smallest_never_used_blocknum;
|
||||||
|
|
||||||
|
// Next (previously used) unused blocknum (free list)
|
||||||
|
BLOCKNUM blocknum_freelist_head;
|
||||||
|
struct block_translation_pair *block_translation;
|
||||||
|
|
||||||
|
// size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
|
||||||
|
// location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
|
||||||
|
};
|
||||||
|
|
||||||
|
void _create_internal();
|
||||||
|
int _translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize
|
||||||
|
DISKOFF location_on_disk, // location of translation_buffer
|
||||||
|
uint64_t size_on_disk,
|
||||||
|
unsigned char * translation_buffer); // buffer with serialized translation
|
||||||
|
|
||||||
|
void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype);
|
||||||
|
void _maybe_optimize_translation(struct translation *t);
|
||||||
|
void _maybe_expand_translation(struct translation *t);
|
||||||
|
bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair);
|
||||||
|
void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b);
|
||||||
|
int64_t _calculate_size_on_disk(struct translation *t);
|
||||||
|
bool _pair_is_unallocated(struct block_translation_pair *pair);
|
||||||
|
void _alloc_inprogress_translation_on_disk_unlocked();
|
||||||
|
void _dump_translation_internal(FILE *f, struct translation *t);
|
||||||
|
|
||||||
|
// Blocknum management
|
||||||
|
void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
|
||||||
|
void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint);
|
||||||
|
void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft);
|
||||||
|
void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat);
|
||||||
|
void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
|
||||||
|
|
||||||
|
// File management
|
||||||
|
void _maybe_truncate_file(int fd, uint64_t size_needed_before);
|
||||||
|
void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset);
|
||||||
|
|
||||||
|
// Verification
|
||||||
|
bool _is_valid_blocknum(struct translation *t, BLOCKNUM b);
|
||||||
|
void _verify_valid_blocknum(struct translation *t, BLOCKNUM b);
|
||||||
|
bool _is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b);
|
||||||
|
void _verify_valid_freeable_blocknum(struct translation *t, BLOCKNUM b);
|
||||||
|
bool _no_data_blocks_except_root(BLOCKNUM root);
|
||||||
|
bool _blocknum_allocated(BLOCKNUM b);
|
||||||
|
|
||||||
|
// Locking
|
||||||
|
//
|
||||||
|
// TODO: Move the lock to the FT
|
||||||
|
void _mutex_lock();
|
||||||
|
void _mutex_unlock();
|
||||||
|
|
||||||
|
// The current translation is the one used by client threads.
|
||||||
|
// It is not represented on disk.
|
||||||
|
struct translation _current;
|
||||||
|
|
||||||
|
// The translation used by the checkpoint currently in progress.
|
||||||
|
// If the checkpoint thread allocates a block, it must also update the current translation.
|
||||||
|
struct translation _inprogress;
|
||||||
|
|
||||||
|
// The translation for the data that shall remain inviolate on disk until the next checkpoint finishes,
|
||||||
|
// after which any blocks used only in this translation can be freed.
|
||||||
|
struct translation _checkpointed;
|
||||||
|
|
||||||
|
// The in-memory data structure for block allocation.
|
||||||
|
// There is no on-disk data structure for block allocation.
|
||||||
|
// Note: This is *allocation* not *translation* - the block allocator is unaware of which
|
||||||
|
// blocks are used for which translation, but simply allocates and deallocates blocks.
|
||||||
|
block_allocator _bt_block_allocator;
|
||||||
|
toku_mutex_t _mutex;
|
||||||
|
struct nb_mutex _safe_file_size_lock;
|
||||||
|
bool _checkpoint_skipped;
|
||||||
|
uint64_t _safe_file_size;
|
||||||
|
|
||||||
|
// Because the lock is in a weird place right now
|
||||||
|
friend void toku_ft_lock(struct ft *ft);
|
||||||
|
friend void toku_ft_unlock(struct ft *ft);
|
||||||
|
};
|
||||||
|
|
||||||
|
// For serialize / deserialize
|
||||||
|
|
||||||
|
#include "ft/serialize/wbuf.h"
|
||||||
|
|
||||||
|
static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
|
||||||
|
wbuf_ulonglong(w, b.b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
|
||||||
|
wbuf_nocrc_ulonglong(w, b.b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) {
|
||||||
|
wbuf_ulonglong(wb, (uint64_t) off);
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "ft/serialize/rbuf.h"
|
||||||
|
|
||||||
|
static inline DISKOFF rbuf_DISKOFF(struct rbuf *rb) {
|
||||||
|
return rbuf_ulonglong(rb);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) {
|
||||||
|
BLOCKNUM result = make_blocknum(rbuf_longlong(rb));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) {
|
||||||
|
*blocknum = rbuf_blocknum(rb);
|
||||||
|
}
|
||||||
257
storage/tokudb/PerconaFT/ft/serialize/compress.cc
Normal file
257
storage/tokudb/PerconaFT/ft/serialize/compress.cc
Normal file
@@ -0,0 +1,257 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include <toku_portability.h>
|
||||||
|
#include <util/scoped_malloc.h>
|
||||||
|
|
||||||
|
#include <zlib.h>
|
||||||
|
#include <lzma.h>
|
||||||
|
#include <snappy.h>
|
||||||
|
|
||||||
|
#include "compress.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "quicklz.h"
|
||||||
|
#include "toku_assert.h"
|
||||||
|
|
||||||
|
static inline enum toku_compression_method
|
||||||
|
normalize_compression_method(enum toku_compression_method method)
|
||||||
|
// Effect: resolve "friendly" names like "fast" and "small" into their real values.
|
||||||
|
{
|
||||||
|
switch (method) {
|
||||||
|
case TOKU_DEFAULT_COMPRESSION_METHOD:
|
||||||
|
case TOKU_FAST_COMPRESSION_METHOD:
|
||||||
|
return TOKU_QUICKLZ_METHOD;
|
||||||
|
case TOKU_SMALL_COMPRESSION_METHOD:
|
||||||
|
return TOKU_LZMA_METHOD;
|
||||||
|
default:
|
||||||
|
return method; // everything else is fine
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t toku_compress_bound (enum toku_compression_method a, size_t size)
|
||||||
|
// See compress.h for the specification of this function.
|
||||||
|
{
|
||||||
|
a = normalize_compression_method(a);
|
||||||
|
switch (a) {
|
||||||
|
case TOKU_NO_COMPRESSION:
|
||||||
|
return size + 1;
|
||||||
|
case TOKU_LZMA_METHOD:
|
||||||
|
return 1+lzma_stream_buffer_bound(size); // We need one extra for the rfc1950-style header byte (bits -03 are TOKU_LZMA_METHOD (1), bits 4-7 are the compression level)
|
||||||
|
case TOKU_QUICKLZ_METHOD:
|
||||||
|
return size+400 + 1; // quicklz manual says 400 bytes is enough. We need one more byte for the rfc1950-style header byte. bits 0-3 are 9, bits 4-7 are the QLZ_COMPRESSION_LEVEL.
|
||||||
|
case TOKU_ZLIB_METHOD:
|
||||||
|
return compressBound (size);
|
||||||
|
case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD:
|
||||||
|
return 2+deflateBound(nullptr, size); // We need one extra for the rfc1950-style header byte, and one extra to store windowBits (a bit over cautious about future upgrades maybe).
|
||||||
|
case TOKU_SNAPPY_METHOD:
|
||||||
|
return (1 + snappy::MaxCompressedLength(size));
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// fall through for bad enum (thus compiler can warn us if we didn't use all the enums
|
||||||
|
assert(0); return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_compress (enum toku_compression_method a,
|
||||||
|
// the following types and naming conventions come from zlib.h
|
||||||
|
Bytef *dest, uLongf *destLen,
|
||||||
|
const Bytef *source, uLong sourceLen)
|
||||||
|
// See compress.h for the specification of this function.
|
||||||
|
{
|
||||||
|
static const int zlib_compression_level = 5;
|
||||||
|
static const int zlib_without_checksum_windowbits = -15;
|
||||||
|
|
||||||
|
a = normalize_compression_method(a);
|
||||||
|
assert(sourceLen < (1LL << 32));
|
||||||
|
switch (a) {
|
||||||
|
case TOKU_NO_COMPRESSION:
|
||||||
|
dest[0] = TOKU_NO_COMPRESSION;
|
||||||
|
memcpy(dest + 1, source, sourceLen);
|
||||||
|
*destLen = sourceLen + 1;
|
||||||
|
return;
|
||||||
|
case TOKU_ZLIB_METHOD: {
|
||||||
|
int r = compress2(dest, destLen, source, sourceLen, zlib_compression_level);
|
||||||
|
assert(r == Z_OK);
|
||||||
|
assert((dest[0]&0xF) == TOKU_ZLIB_METHOD);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
case TOKU_QUICKLZ_METHOD: {
|
||||||
|
if (sourceLen==0) {
|
||||||
|
// quicklz requires at least one byte, so we handle this ourselves
|
||||||
|
assert(1 <= *destLen);
|
||||||
|
*destLen = 1;
|
||||||
|
} else {
|
||||||
|
toku::scoped_calloc qsc_buf(sizeof(qlz_state_compress));
|
||||||
|
qlz_state_compress *qsc = reinterpret_cast<qlz_state_compress *>(qsc_buf.get());
|
||||||
|
size_t actual_destlen = qlz_compress(source, (char*)(dest+1), sourceLen, qsc);
|
||||||
|
assert(actual_destlen + 1 <= *destLen);
|
||||||
|
// add one for the rfc1950-style header byte.
|
||||||
|
*destLen = actual_destlen + 1;
|
||||||
|
}
|
||||||
|
// Fill in that first byte
|
||||||
|
dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
case TOKU_LZMA_METHOD: {
|
||||||
|
const int lzma_compression_level = 2;
|
||||||
|
if (sourceLen==0) {
|
||||||
|
// lzma version 4.999 requires at least one byte, so we'll do it ourselves.
|
||||||
|
assert(1<=*destLen);
|
||||||
|
*destLen = 1;
|
||||||
|
} else {
|
||||||
|
size_t out_pos = 1;
|
||||||
|
lzma_ret r = lzma_easy_buffer_encode(lzma_compression_level,
|
||||||
|
LZMA_CHECK_NONE, NULL,
|
||||||
|
source, sourceLen,
|
||||||
|
dest, &out_pos, *destLen);
|
||||||
|
assert(out_pos < *destLen);
|
||||||
|
if (r != LZMA_OK) {
|
||||||
|
fprintf(stderr, "lzma_easy_buffer_encode() returned %d\n", (int) r);
|
||||||
|
}
|
||||||
|
assert(r==LZMA_OK);
|
||||||
|
*destLen = out_pos;
|
||||||
|
}
|
||||||
|
dest[0] = TOKU_LZMA_METHOD + (lzma_compression_level << 4);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: {
|
||||||
|
z_stream strm;
|
||||||
|
strm.zalloc = Z_NULL;
|
||||||
|
strm.zfree = Z_NULL;
|
||||||
|
strm.opaque = Z_NULL;
|
||||||
|
strm.next_in = const_cast<Bytef *>(source);
|
||||||
|
strm.avail_in = sourceLen;
|
||||||
|
int r = deflateInit2(&strm, zlib_compression_level, Z_DEFLATED,
|
||||||
|
zlib_without_checksum_windowbits, 8, Z_DEFAULT_STRATEGY);
|
||||||
|
lazy_assert(r == Z_OK);
|
||||||
|
strm.next_out = dest + 2;
|
||||||
|
strm.avail_out = *destLen - 2;
|
||||||
|
r = deflate(&strm, Z_FINISH);
|
||||||
|
lazy_assert(r == Z_STREAM_END);
|
||||||
|
r = deflateEnd(&strm);
|
||||||
|
lazy_assert(r == Z_OK);
|
||||||
|
*destLen = strm.total_out + 2;
|
||||||
|
dest[0] = TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD + (zlib_compression_level << 4);
|
||||||
|
dest[1] = zlib_without_checksum_windowbits;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
case TOKU_SNAPPY_METHOD: {
|
||||||
|
snappy::RawCompress((char*)source, sourceLen, (char*)dest + 1, destLen);
|
||||||
|
*destLen += 1;
|
||||||
|
dest[0] = TOKU_SNAPPY_METHOD;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// default fall through to error.
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_decompress (Bytef *dest, uLongf destLen,
|
||||||
|
const Bytef *source, uLongf sourceLen)
|
||||||
|
// See compress.h for the specification of this function.
|
||||||
|
{
|
||||||
|
assert(sourceLen>=1); // need at least one byte for the RFC header.
|
||||||
|
switch (source[0] & 0xF) {
|
||||||
|
case TOKU_NO_COMPRESSION:
|
||||||
|
memcpy(dest, source + 1, sourceLen - 1);
|
||||||
|
return;
|
||||||
|
case TOKU_ZLIB_METHOD: {
|
||||||
|
uLongf actual_destlen = destLen;
|
||||||
|
int r = uncompress(dest, &actual_destlen, source, sourceLen);
|
||||||
|
assert(r == Z_OK);
|
||||||
|
assert(actual_destlen == destLen);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
case TOKU_QUICKLZ_METHOD:
|
||||||
|
if (sourceLen>1) {
|
||||||
|
toku::scoped_calloc state_buf(sizeof(qlz_state_decompress));
|
||||||
|
qlz_state_decompress *qsd = reinterpret_cast<qlz_state_decompress *>(state_buf.get());
|
||||||
|
uLongf actual_destlen = qlz_decompress((char*)source+1, dest, qsd);
|
||||||
|
assert(actual_destlen == destLen);
|
||||||
|
} else {
|
||||||
|
// length 1 means there is no data, so do nothing.
|
||||||
|
assert(destLen==0);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case TOKU_LZMA_METHOD: {
|
||||||
|
if (sourceLen>1) {
|
||||||
|
uint64_t memlimit = UINT64_MAX;
|
||||||
|
size_t out_pos = 0;
|
||||||
|
size_t in_pos = 1;
|
||||||
|
lzma_ret r = lzma_stream_buffer_decode(&memlimit, // memlimit, use UINT64_MAX to disable this check
|
||||||
|
0, // flags
|
||||||
|
NULL, // allocator
|
||||||
|
source, &in_pos, sourceLen,
|
||||||
|
dest, &out_pos, destLen);
|
||||||
|
assert(r==LZMA_OK);
|
||||||
|
assert(out_pos == destLen);
|
||||||
|
} else {
|
||||||
|
// length 1 means there is no data, so do nothing.
|
||||||
|
assert(destLen==0);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: {
|
||||||
|
z_stream strm;
|
||||||
|
strm.next_in = const_cast<Bytef *>(source + 2);
|
||||||
|
strm.avail_in = sourceLen - 2;
|
||||||
|
strm.zalloc = Z_NULL;
|
||||||
|
strm.zfree = Z_NULL;
|
||||||
|
strm.opaque = Z_NULL;
|
||||||
|
char windowBits = source[1];
|
||||||
|
int r = inflateInit2(&strm, windowBits);
|
||||||
|
lazy_assert(r == Z_OK);
|
||||||
|
strm.next_out = dest;
|
||||||
|
strm.avail_out = destLen;
|
||||||
|
r = inflate(&strm, Z_FINISH);
|
||||||
|
lazy_assert(r == Z_STREAM_END);
|
||||||
|
r = inflateEnd(&strm);
|
||||||
|
lazy_assert(r == Z_OK);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
case TOKU_SNAPPY_METHOD: {
|
||||||
|
bool r = snappy::RawUncompress((char*)source + 1, sourceLen - 1, (char*)dest);
|
||||||
|
assert(r);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// default fall through to error.
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
78
storage/tokudb/PerconaFT/ft/serialize/compress.h
Normal file
78
storage/tokudb/PerconaFT/ft/serialize/compress.h
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <zlib.h>
|
||||||
|
#include <db.h>
|
||||||
|
|
||||||
|
// The following provides an abstraction of quicklz and zlib.
|
||||||
|
// We offer three compression methods: ZLIB, QUICKLZ, and LZMA, as well as a "no compression" option. These options are declared in make_tdb.c.
|
||||||
|
// The resulting byte string includes enough information for us to decompress it. That is, we can tell whether it's z-compressed or qz-compressed or xz-compressed.
|
||||||
|
|
||||||
|
size_t toku_compress_bound (enum toku_compression_method a, size_t size);
|
||||||
|
// Effect: Return the number of bytes needed to compress a buffer of size SIZE using compression method A.
|
||||||
|
// Typically, the result is a little bit larger than SIZE, since some data cannot be compressed.
|
||||||
|
// Usage note: It may help to know roughly how much space is involved.
|
||||||
|
// zlib's bound is something like (size + (size>>12) + (size>>14) + (size>>25) + 13.
|
||||||
|
// quicklz's bound is something like size+400.
|
||||||
|
|
||||||
|
void toku_compress (enum toku_compression_method a,
|
||||||
|
// the following types and naming conventions come from zlib.h
|
||||||
|
Bytef *dest, uLongf *destLen,
|
||||||
|
const Bytef *source, uLong sourceLen);
|
||||||
|
// Effect: Using compression method A, compress SOURCE into DEST. The number of bytes to compress is passed in SOURCELEN.
|
||||||
|
// On input: *destLen is the size of the buffer.
|
||||||
|
// On output: *destLen is the size of the actual compressed data.
|
||||||
|
// Usage note: sourceLen may be be zero (unlike for quicklz, which requires sourceLen>0).
|
||||||
|
// Requires: The buffer must be big enough to hold the compressed data. (That is *destLen >= compressBound(a, sourceLen))
|
||||||
|
// Requires: sourceLen < 2^32.
|
||||||
|
// Usage note: Although we *try* to assert if the DESTLEN isn't big enough, it's possible that it's too late by then (in the case of quicklz which offers
|
||||||
|
// no way to avoid a buffer overrun.) So we require that that DESTLEN is big enough.
|
||||||
|
// Rationale: zlib's argument order is DEST then SOURCE with the size of the buffer passed in *destLen, and the size of the result returned in *destLen.
|
||||||
|
// quicklz's argument order is SOURCE then DEST with the size returned (and it has no way to verify that an overright didn't happen).
|
||||||
|
// We use zlib's calling conventions partly because it is safer, and partly because it is more established.
|
||||||
|
// We also use zlib's ugly camel case convention for destLen and sourceLen.
|
||||||
|
// Unlike zlib, we return no error codes. Instead, we require that the data be OK and the size of the buffers is OK, and assert if there's a problem.
|
||||||
|
|
||||||
|
void toku_decompress (Bytef *dest, uLongf destLen,
|
||||||
|
const Bytef *source, uLongf sourceLen);
|
||||||
|
// Effect: Decompress source (length sourceLen) into dest (length destLen)
|
||||||
|
// This function can decompress data compressed with either zlib or quicklz compression methods (calling toku_compress(), which puts an appropriate header on so we know which it is.)
|
||||||
|
// Requires: destLen is equal to the actual decompressed size of the data.
|
||||||
|
// Requires: The source must have been properly compressed.
|
||||||
186
storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc
Normal file
186
storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "ft/node.h"
|
||||||
|
#include "ft/ft-internal.h"
|
||||||
|
#include "ft/serialize/ft_node-serialize.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ft-node-deserialize.c -
|
||||||
|
* This file contains functions used by deserializtion
|
||||||
|
* code paths in and out of the engine. The functions can,
|
||||||
|
* essentially, be broken up into two types. Some of these
|
||||||
|
* functions return error codes based expected values inside
|
||||||
|
* the fractal tree node, others merely read the specific
|
||||||
|
* quantities of bytes out of the buffer. It is expeceted
|
||||||
|
* that these will be called in the correct order by users
|
||||||
|
* of these functions/this API.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Sets initial values for the given fractal tree node to be
|
||||||
|
// deserialized
|
||||||
|
void
|
||||||
|
initialize_ftnode(FTNODE node, BLOCKNUM blocknum)
|
||||||
|
{
|
||||||
|
node->fullhash = 0xDEADBEEF; // <CER> Is this 'spoof' ok?
|
||||||
|
node->blocknum = blocknum;
|
||||||
|
node->dirty = 0;
|
||||||
|
node->bp = NULL;
|
||||||
|
// <CER> Can we use this initialization as a correctness assert in
|
||||||
|
// a later function?
|
||||||
|
node->layout_version_read_from_disk = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/************************
|
||||||
|
* TODO: In other deserialization code, we check the rb size member. We
|
||||||
|
* verify that it is greater than or equal to 24. Ignoring this magic
|
||||||
|
* number for a moment, should we put this check in its own function? *
|
||||||
|
*************************/
|
||||||
|
|
||||||
|
|
||||||
|
// Read and check the 'magic' bytes on disk. Returns an error if
|
||||||
|
// the magic does not match.
|
||||||
|
int
|
||||||
|
read_and_check_magic(struct rbuf *rb)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
const void *magic;
|
||||||
|
rbuf_literal_bytes(rb, &magic, 8);
|
||||||
|
if (memcmp(magic, "tokuleaf", 8)!=0 &&
|
||||||
|
memcmp(magic, "tokunode", 8)!=0) {
|
||||||
|
r = DB_BADFORMAT; // TODO: Return more meaningful error.
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the version number from the given buffer
|
||||||
|
// and returns an error if the version is too old.
|
||||||
|
int
|
||||||
|
read_and_check_version(FTNODE node, struct rbuf *rb)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
int version = rbuf_int(rb);
|
||||||
|
node->layout_version_read_from_disk = version;
|
||||||
|
if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) {
|
||||||
|
r = 1; // TODO: Better error reporting.
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads the basic version, build, and child info from
|
||||||
|
// the given buffer.
|
||||||
|
void
|
||||||
|
read_node_info(FTNODE node, struct rbuf *rb, int version)
|
||||||
|
{
|
||||||
|
node->layout_version = version;
|
||||||
|
node->layout_version_original = rbuf_int(rb);
|
||||||
|
node->build_id = rbuf_int(rb);
|
||||||
|
node->n_children = rbuf_int(rb);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocates the partitions based on the given node's nubmer
|
||||||
|
// of children. It then reads, out of the given buffer,
|
||||||
|
// the start and size of each child partition.
|
||||||
|
// TODO: Should these be two seperate functions?
|
||||||
|
void
|
||||||
|
allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd)
|
||||||
|
{
|
||||||
|
XMALLOC_N(node->n_children, node->bp);
|
||||||
|
// TODO: Fix this to use xmalloc_n
|
||||||
|
XMALLOC_N(node->n_children, *ndd);
|
||||||
|
// Read the partition locations.
|
||||||
|
for (int i = 0; i < node->n_children; i++) {
|
||||||
|
BP_START(*ndd, i) = rbuf_int(rb);
|
||||||
|
BP_SIZE (*ndd, i) = rbuf_int(rb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compares checksum of stored (in the given buffer) checksum
|
||||||
|
// and the checksum of the buffer itself. If these are NOT
|
||||||
|
// equal, this function returns an appropriate error code.
|
||||||
|
int
|
||||||
|
check_node_info_checksum(struct rbuf *rb)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
// Verify checksum of header stored.
|
||||||
|
uint32_t checksum = toku_x1764_memory(rb->buf, rb->ndone);
|
||||||
|
uint32_t stored_checksum = rbuf_int(rb);
|
||||||
|
|
||||||
|
if (stored_checksum != checksum) {
|
||||||
|
// TODO: dump_bad_block(rb->buf, rb->size);
|
||||||
|
r = TOKUDB_BAD_CHECKSUM;
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads node info from older (13 and 14) fractal tree nodes
|
||||||
|
// out of the given buffer.
|
||||||
|
void
|
||||||
|
read_legacy_node_info(FTNODE node, struct rbuf *rb, int version)
|
||||||
|
{
|
||||||
|
(void)rbuf_int(rb); // 1. nodesize
|
||||||
|
node->flags = rbuf_int(rb); // 2. flags
|
||||||
|
node->height = rbuf_int(rb); // 3. height
|
||||||
|
|
||||||
|
// If the version is less than 14, there are two extra ints here.
|
||||||
|
// we would need to ignore them if they are there.
|
||||||
|
if (version == FT_LAYOUT_VERSION_13) {
|
||||||
|
(void) rbuf_int(rb); // 4. rand4
|
||||||
|
(void) rbuf_int(rb); // 5. local
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assuming the given buffer is in the correct position,
|
||||||
|
// this checks to see if the stored checksum matches the
|
||||||
|
// checksum of the entire buffer.
|
||||||
|
int
|
||||||
|
check_legacy_end_checksum(struct rbuf *rb)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
uint32_t expected_xsum = rbuf_int(rb);
|
||||||
|
uint32_t actual_xsum = toku_x1764_memory(rb->buf, rb->size - 4);
|
||||||
|
if (expected_xsum != actual_xsum) {
|
||||||
|
r = TOKUDB_BAD_CHECKSUM;
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
@@ -1,95 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include "ft/ft.h"
|
#include "ft/ft.h"
|
||||||
#include "ft/ft-internal.h"
|
#include "ft/ft-internal.h"
|
||||||
@@ -372,6 +317,12 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
|
|||||||
max_msn_in_ft = rbuf_MSN(rb);
|
max_msn_in_ft = rbuf_MSN(rb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned fanout;
|
||||||
|
fanout = FT_DEFAULT_FANOUT;
|
||||||
|
if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_28) {
|
||||||
|
fanout = rbuf_int(rb);
|
||||||
|
}
|
||||||
|
|
||||||
(void) rbuf_int(rb); //Read in checksum and ignore (already verified).
|
(void) rbuf_int(rb); //Read in checksum and ignore (already verified).
|
||||||
if (rb->ndone != rb->size) {
|
if (rb->ndone != rb->size) {
|
||||||
fprintf(stderr, "Header size did not match contents.\n");
|
fprintf(stderr, "Header size did not match contents.\n");
|
||||||
@@ -398,7 +349,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
|
|||||||
.nodesize = nodesize,
|
.nodesize = nodesize,
|
||||||
.basementnodesize = basementnodesize,
|
.basementnodesize = basementnodesize,
|
||||||
.compression_method = compression_method,
|
.compression_method = compression_method,
|
||||||
.fanout = FT_DEFAULT_FANOUT, // fanout is not serialized, must be set at startup
|
.fanout = fanout,
|
||||||
.highest_unused_msn_for_upgrade = highest_unused_msn_for_upgrade,
|
.highest_unused_msn_for_upgrade = highest_unused_msn_for_upgrade,
|
||||||
.max_msn_in_ft = max_msn_in_ft,
|
.max_msn_in_ft = max_msn_in_ft,
|
||||||
.time_of_last_optimize_begin = time_of_last_optimize_begin,
|
.time_of_last_optimize_begin = time_of_last_optimize_begin,
|
||||||
@@ -457,6 +408,8 @@ serialize_ft_min_size (uint32_t version) {
|
|||||||
size_t size = 0;
|
size_t size = 0;
|
||||||
|
|
||||||
switch(version) {
|
switch(version) {
|
||||||
|
case FT_LAYOUT_VERSION_28:
|
||||||
|
size += sizeof(uint32_t); // fanout in ft
|
||||||
case FT_LAYOUT_VERSION_27:
|
case FT_LAYOUT_VERSION_27:
|
||||||
case FT_LAYOUT_VERSION_26:
|
case FT_LAYOUT_VERSION_26:
|
||||||
case FT_LAYOUT_VERSION_25:
|
case FT_LAYOUT_VERSION_25:
|
||||||
@@ -800,6 +753,7 @@ void toku_serialize_ft_to_wbuf (
|
|||||||
wbuf_char(wbuf, (unsigned char) h->compression_method);
|
wbuf_char(wbuf, (unsigned char) h->compression_method);
|
||||||
wbuf_MSN(wbuf, h->highest_unused_msn_for_upgrade);
|
wbuf_MSN(wbuf, h->highest_unused_msn_for_upgrade);
|
||||||
wbuf_MSN(wbuf, h->max_msn_in_ft);
|
wbuf_MSN(wbuf, h->max_msn_in_ft);
|
||||||
|
wbuf_int(wbuf, h->fanout);
|
||||||
uint32_t checksum = toku_x1764_finish(&wbuf->checksum);
|
uint32_t checksum = toku_x1764_finish(&wbuf->checksum);
|
||||||
wbuf_int(wbuf, checksum);
|
wbuf_int(wbuf, checksum);
|
||||||
lazy_assert(wbuf->ndone == wbuf->size);
|
lazy_assert(wbuf->ndone == wbuf->size);
|
||||||
62
storage/tokudb/PerconaFT/ft/serialize/ft-serialize.h
Normal file
62
storage/tokudb/PerconaFT/ft/serialize/ft-serialize.h
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ft/ft.h"
|
||||||
|
#include "ft/serialize/block_table.h"
|
||||||
|
|
||||||
|
size_t toku_serialize_ft_size(struct ft_header *h);
|
||||||
|
void toku_serialize_ft_to(int fd, struct ft_header *h, block_table *bt, CACHEFILE cf);
|
||||||
|
void toku_serialize_ft_to_wbuf(struct wbuf *wbuf, struct ft_header *h, DISKOFF translation_location_on_disk, DISKOFF translation_size_on_disk);
|
||||||
|
void toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset);
|
||||||
|
void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc);
|
||||||
|
|
||||||
|
int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft);
|
||||||
|
|
||||||
|
// TODO rename
|
||||||
|
int deserialize_ft_from_fd_into_rbuf(int fd,
|
||||||
|
toku_off_t offset_of_header,
|
||||||
|
struct rbuf *rb,
|
||||||
|
uint64_t *checkpoint_count,
|
||||||
|
LSN *checkpoint_lsn,
|
||||||
|
uint32_t *version_p);
|
||||||
|
|
||||||
|
// used by verify
|
||||||
|
// TODO rename
|
||||||
|
int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version);
|
||||||
79
storage/tokudb/PerconaFT/ft/serialize/ft_layout_version.h
Normal file
79
storage/tokudb/PerconaFT/ft/serialize/ft_layout_version.h
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
//Must be defined before other recursive headers could include logger/recover.h
|
||||||
|
enum ft_layout_version_e {
|
||||||
|
FT_LAYOUT_VERSION_5 = 5,
|
||||||
|
FT_LAYOUT_VERSION_6 = 6, // Diff from 5 to 6: Add leafentry_estimate
|
||||||
|
FT_LAYOUT_VERSION_7 = 7, // Diff from 6 to 7: Add exact-bit to leafentry_estimate #818, add magic to header #22, add per-subdatase flags #333
|
||||||
|
FT_LAYOUT_VERSION_8 = 8, // Diff from 7 to 8: Use murmur instead of crc32. We are going to make a simplification and stop supporting version 7 and before. Current As of Beta 1.0.6
|
||||||
|
FT_LAYOUT_VERSION_9 = 9, // Diff from 8 to 9: Variable-sized blocks and compression.
|
||||||
|
FT_LAYOUT_VERSION_10 = 10, // Diff from 9 to 10: Variable number of compressed sub-blocks per block, disk byte order == intel byte order, Subtree estimates instead of just leafentry estimates, translation table, dictionary descriptors, checksum in header, subdb support removed from ft layer
|
||||||
|
FT_LAYOUT_VERSION_11 = 11, // Diff from 10 to 11: Nested transaction leafentries (completely redesigned). FT_CMDs on disk now support XIDS (multiple txnids) instead of exactly one.
|
||||||
|
FT_LAYOUT_VERSION_12 = 12, // Diff from 11 to 12: Added FT_CMD 'FT_INSERT_NO_OVERWRITE', compressed block format, num old blocks
|
||||||
|
FT_LAYOUT_VERSION_13 = 13, // Diff from 12 to 13: Fixed loader pivot bug, added build_id to every node, timestamps to ft
|
||||||
|
FT_LAYOUT_VERSION_14 = 14, // Diff from 13 to 14: Added MVCC; deprecated TOKU_DB_VALCMP_BUILTIN(_13); Remove fingerprints; Support QUICKLZ; add end-to-end checksum on uncompressed data.
|
||||||
|
FT_LAYOUT_VERSION_15 = 15, // Diff from 14 to 15: basement nodes, last verification time
|
||||||
|
FT_LAYOUT_VERSION_16 = 16, // Dr. No: No subtree estimates, partition layout information represented more transparently.
|
||||||
|
// ALERT ALERT ALERT: version 16 never released to customers, internal and beta use only
|
||||||
|
FT_LAYOUT_VERSION_17 = 17, // Dr. No: Add STAT64INFO_S to ft header
|
||||||
|
FT_LAYOUT_VERSION_18 = 18, // Dr. No: Add HOT info to ft header
|
||||||
|
FT_LAYOUT_VERSION_19 = 19, // Doofenshmirtz: Add compression method, highest_unused_msn_for_upgrade
|
||||||
|
FT_LAYOUT_VERSION_20 = 20, // Deadshot: Add compression method to log_fcreate,
|
||||||
|
// mgr_last_xid after begin checkpoint,
|
||||||
|
// last_xid to shutdown
|
||||||
|
FT_LAYOUT_VERSION_21 = 21, // Ming: Add max_msn_in_ft to header,
|
||||||
|
// Removed log suppression logentry
|
||||||
|
FT_LAYOUT_VERSION_22 = 22, // Ming: Add oldest known referenced xid to each ftnode, for better garbage collection
|
||||||
|
FT_LAYOUT_VERSION_23 = 23, // Ming: Fix upgrade path #5902
|
||||||
|
FT_LAYOUT_VERSION_24 = 24, // Riddler: change logentries that log transactions to store TXNID_PAIRs instead of TXNIDs
|
||||||
|
FT_LAYOUT_VERSION_25 = 25, // SecretSquirrel: ROLLBACK_LOG_NODES (on disk and in memory) now just use blocknum (instead of blocknum + hash) to point to other log nodes. same for xstillopen log entry
|
||||||
|
FT_LAYOUT_VERSION_26 = 26, // Hojo: basements store key/vals separately on disk for fixed klpair length BNs
|
||||||
|
FT_LAYOUT_VERSION_27 = 27, // serialize message trees with nonleaf buffers to avoid key, msn sort on deserialize
|
||||||
|
FT_LAYOUT_VERSION_28 = 28, // Add fanout to ft_header
|
||||||
|
FT_NEXT_VERSION, // the version after the current version
|
||||||
|
FT_LAYOUT_VERSION = FT_NEXT_VERSION-1, // A hack so I don't have to change this line.
|
||||||
|
FT_LAYOUT_MIN_SUPPORTED_VERSION = FT_LAYOUT_VERSION_13, // Minimum version supported
|
||||||
|
|
||||||
|
// Define these symbolically so the knowledge of exactly which layout version got rid of fingerprints isn't spread all over the code.
|
||||||
|
FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT = FT_LAYOUT_VERSION_13,
|
||||||
|
FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM = FT_LAYOUT_VERSION_14,
|
||||||
|
FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES = FT_LAYOUT_VERSION_15,
|
||||||
|
};
|
||||||
@@ -1,95 +1,40 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#include "portability/toku_atomic.h"
|
#include "portability/toku_atomic.h"
|
||||||
|
|
||||||
@@ -147,7 +92,7 @@ struct toku_thread_pool *get_ft_pool(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void toku_serialize_set_parallel(bool in_parallel) {
|
void toku_serialize_set_parallel(bool in_parallel) {
|
||||||
toku_drd_unsafe_set(&toku_serialize_in_parallel, in_parallel);
|
toku_unsafe_set(&toku_serialize_in_parallel, in_parallel);
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ft_serialize_layer_init(void) {
|
void toku_ft_serialize_layer_init(void) {
|
||||||
@@ -423,7 +368,7 @@ compress_ftnode_sub_block(struct sub_block *sb, enum toku_compression_method met
|
|||||||
|
|
||||||
//
|
//
|
||||||
// This probably seems a bit complicated. Here is what is going on.
|
// This probably seems a bit complicated. Here is what is going on.
|
||||||
// In TokuFT 5.0, sub_blocks were compressed and the compressed data
|
// In PerconaFT 5.0, sub_blocks were compressed and the compressed data
|
||||||
// was checksummed. The checksum did NOT include the size of the compressed data
|
// was checksummed. The checksum did NOT include the size of the compressed data
|
||||||
// and the size of the uncompressed data. The fields of sub_block only reference the
|
// and the size of the uncompressed data. The fields of sub_block only reference the
|
||||||
// compressed data, and it is the responsibility of the user of the sub_block
|
// compressed data, and it is the responsibility of the user of the sub_block
|
||||||
@@ -456,7 +401,7 @@ compress_ftnode_sub_block(struct sub_block *sb, enum toku_compression_method met
|
|||||||
// two integers at the beginning, the size and uncompressed size, and then the compressed
|
// two integers at the beginning, the size and uncompressed size, and then the compressed
|
||||||
// data. sb->xsum contains the checksum of this entire thing.
|
// data. sb->xsum contains the checksum of this entire thing.
|
||||||
//
|
//
|
||||||
// In TokuFT 5.0, sb->compressed_ptr only contained the compressed data, sb->xsum
|
// In PerconaFT 5.0, sb->compressed_ptr only contained the compressed data, sb->xsum
|
||||||
// checksummed only the compressed data, and the checksumming of the sizes were not
|
// checksummed only the compressed data, and the checksumming of the sizes were not
|
||||||
// done here.
|
// done here.
|
||||||
//
|
//
|
||||||
@@ -643,7 +588,7 @@ serialize_and_compress_in_parallel(FTNODE node,
|
|||||||
struct serialize_compress_work work[npartitions];
|
struct serialize_compress_work work[npartitions];
|
||||||
workset_lock(&ws);
|
workset_lock(&ws);
|
||||||
for (int i = 0; i < npartitions; i++) {
|
for (int i = 0; i < npartitions; i++) {
|
||||||
work[i] = (struct serialize_compress_work) { .base = {{NULL}},
|
work[i] = (struct serialize_compress_work) { .base = {{NULL, NULL}},
|
||||||
.node = node,
|
.node = node,
|
||||||
.i = i,
|
.i = i,
|
||||||
.compression_method = compression_method,
|
.compression_method = compression_method,
|
||||||
@@ -854,7 +799,7 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
|
|||||||
ft->h->basementnodesize,
|
ft->h->basementnodesize,
|
||||||
ft->h->compression_method,
|
ft->h->compression_method,
|
||||||
do_rebalancing,
|
do_rebalancing,
|
||||||
toku_drd_unsafe_fetch(&toku_serialize_in_parallel),
|
toku_unsafe_fetch(&toku_serialize_in_parallel),
|
||||||
&n_to_write,
|
&n_to_write,
|
||||||
&n_uncompressed_bytes,
|
&n_uncompressed_bytes,
|
||||||
&compressed_buf
|
&compressed_buf
|
||||||
92
storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.h
Normal file
92
storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.h
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ft/ft.h"
|
||||||
|
#include "ft/node.h"
|
||||||
|
#include "ft/serialize/sub_block.h"
|
||||||
|
#include "ft/serialize/rbuf.h"
|
||||||
|
#include "ft/serialize/wbuf.h"
|
||||||
|
#include "ft/serialize/block_table.h"
|
||||||
|
|
||||||
|
unsigned int toku_serialize_ftnode_size(FTNODE node);
|
||||||
|
int toku_serialize_ftnode_to_memory(FTNODE node, FTNODE_DISK_DATA *ndd,
|
||||||
|
unsigned int basementnodesize,
|
||||||
|
enum toku_compression_method compression_method,
|
||||||
|
bool do_rebalancing, bool in_parallel,
|
||||||
|
size_t *n_bytes_to_write, size_t *n_uncompressed_bytes,
|
||||||
|
char **bytes_to_write);
|
||||||
|
int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA *ndd, bool do_rebalancing, FT ft, bool for_checkpoint);
|
||||||
|
int toku_serialize_rollback_log_to(int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
|
||||||
|
FT ft, bool for_checkpoint);
|
||||||
|
void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized);
|
||||||
|
|
||||||
|
int toku_deserialize_rollback_log_from(int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft);
|
||||||
|
int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, ftnode_fetch_extra *bfe);
|
||||||
|
int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe);
|
||||||
|
int toku_deserialize_ftnode_from(int fd, BLOCKNUM off, uint32_t fullhash, FTNODE *node, FTNODE_DISK_DATA *ndd, ftnode_fetch_extra *bfe);
|
||||||
|
|
||||||
|
void toku_serialize_set_parallel(bool);
|
||||||
|
|
||||||
|
// used by nonleaf node partial eviction
|
||||||
|
void toku_create_compressed_partition_from_available(FTNODE node, int childnum,
|
||||||
|
enum toku_compression_method compression_method, SUB_BLOCK sb);
|
||||||
|
|
||||||
|
// <CER> For verifying old, non-upgraded nodes (versions 13 and 14).
|
||||||
|
int decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum);
|
||||||
|
|
||||||
|
// used by verify
|
||||||
|
int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version);
|
||||||
|
void read_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, FT ft, struct rbuf *rb);
|
||||||
|
int read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb);
|
||||||
|
int verify_ftnode_sub_block(struct sub_block *sb);
|
||||||
|
void just_decompress_sub_block(struct sub_block *sb);
|
||||||
|
|
||||||
|
// used by ft-node-deserialize.cc
|
||||||
|
void initialize_ftnode(FTNODE node, BLOCKNUM blocknum);
|
||||||
|
int read_and_check_magic(struct rbuf *rb);
|
||||||
|
int read_and_check_version(FTNODE node, struct rbuf *rb);
|
||||||
|
void read_node_info(FTNODE node, struct rbuf *rb, int version);
|
||||||
|
void allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd);
|
||||||
|
int check_node_info_checksum(struct rbuf *rb);
|
||||||
|
void read_legacy_node_info(FTNODE node, struct rbuf *rb, int version);
|
||||||
|
int check_legacy_end_checksum(struct rbuf *rb);
|
||||||
|
|
||||||
|
// exported so the loader can dump bad blocks
|
||||||
|
void dump_bad_block(unsigned char *vp, uint64_t size);
|
||||||
@@ -1,93 +1,41 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
#ident "$Id$"
|
#ident "$Id$"
|
||||||
/*
|
/*======
|
||||||
COPYING CONDITIONS NOTICE:
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of version 2 of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation, and provided that the
|
|
||||||
following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain this COPYING
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below).
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce this COPYING
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
it under the terms of the GNU General Public License, version 2,
|
||||||
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
as published by the Free Software Foundation.
|
||||||
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
||||||
GRANT (below) in the documentation and/or other materials
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
provided with the distribution.
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program; if not, write to the Free Software
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
02110-1301, USA.
|
|
||||||
|
|
||||||
COPYRIGHT NOTICE:
|
----------------------------------------
|
||||||
|
|
||||||
TokuFT, Tokutek Fractal Tree Indexing Library.
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
Copyright (C) 2007-2013 Tokutek, Inc.
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
DISCLAIMER:
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but
|
You should have received a copy of the GNU Affero General Public License
|
||||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
======= */
|
||||||
General Public License for more details.
|
|
||||||
|
|
||||||
UNIVERSITY PATENT NOTICE:
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
The technology is licensed by the Massachusetts Institute of
|
|
||||||
Technology, Rutgers State University of New Jersey, and the Research
|
|
||||||
Foundation of State University of New York at Stony Brook under
|
|
||||||
United States of America Serial No. 11/760379 and to the patents
|
|
||||||
and/or patent applications resulting from it.
|
|
||||||
|
|
||||||
PATENT MARKING NOTICE:
|
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
|
||||||
This software is covered by US Patent No. 8,489,638.
|
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
|
||||||
|
|
||||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
||||||
Tokutek as part of the Fractal Tree project.
|
|
||||||
|
|
||||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
||||||
licensable by Tokutek, both currently or in the future; and that in
|
|
||||||
the absence of this license would be infringed by THIS
|
|
||||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
||||||
|
|
||||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
||||||
patentability, enforceability and/or non-infringement of any of the
|
|
||||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
||||||
|
|
||||||
Tokutek hereby grants to you, for the term and geographical scope of
|
|
||||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
||||||
irrevocable (except as stated in this section) patent license to
|
|
||||||
make, have made, use, offer to sell, sell, import, transfer, and
|
|
||||||
otherwise run, modify, and propagate the contents of THIS
|
|
||||||
IMPLEMENTATION, where such license applies only to the PATENT
|
|
||||||
CLAIMS. This grant does not include claims that would be infringed
|
|
||||||
only as a consequence of further modifications of THIS
|
|
||||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
||||||
or agree to the institution of patent litigation against any entity
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
||||||
infringement, or inducement of patent infringement, then any rights
|
|
||||||
granted to you under this License shall terminate as of the date
|
|
||||||
such litigation is filed. If you or your agent or exclusive
|
|
||||||
licensee institute or order or agree to the institution of a PATENT
|
|
||||||
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
||||||
under this License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
// Fast data compression library
|
// Fast data compression library
|
||||||
// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
|
// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
|
||||||
// lar@quicklz.com
|
// lar@quicklz.com
|
||||||
177
storage/tokudb/PerconaFT/ft/serialize/quicklz.h
Normal file
177
storage/tokudb/PerconaFT/ft/serialize/quicklz.h
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
// Fast data compression library
|
||||||
|
// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
|
||||||
|
// lar@quicklz.com
|
||||||
|
//
|
||||||
|
// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything
|
||||||
|
// released into public must be open source) or under a commercial license if such
|
||||||
|
// has been acquired (see http://www.quicklz.com/order.html). The commercial license
|
||||||
|
// does not cover derived or ported versions created by third parties under GPL.
|
||||||
|
|
||||||
|
// You can edit following user settings. Data must be decompressed with the same
|
||||||
|
// setting of QLZ_COMPRESSION_LEVEL and QLZ_STREAMING_BUFFER as it was compressed
|
||||||
|
// (see manual). If QLZ_STREAMING_BUFFER > 0, scratch buffers must be initially
|
||||||
|
// zeroed out (see manual). First #ifndef makes it possible to define settings from
|
||||||
|
// the outside like the compiler command line.
|
||||||
|
|
||||||
|
// 1.5.0 final
|
||||||
|
|
||||||
|
#ifndef QLZ_COMPRESSION_LEVEL
|
||||||
|
//#define QLZ_COMPRESSION_LEVEL 1
|
||||||
|
//#define QLZ_COMPRESSION_LEVEL 2
|
||||||
|
#define QLZ_COMPRESSION_LEVEL 3
|
||||||
|
|
||||||
|
#define QLZ_STREAMING_BUFFER 0
|
||||||
|
//#define QLZ_STREAMING_BUFFER 100000
|
||||||
|
//#define QLZ_STREAMING_BUFFER 1000000
|
||||||
|
|
||||||
|
//#define QLZ_MEMORY_SAFE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define QLZ_VERSION_MAJOR 1
|
||||||
|
#define QLZ_VERSION_MINOR 5
|
||||||
|
#define QLZ_VERSION_REVISION 0
|
||||||
|
|
||||||
|
// Using size_t, memset() and memcpy()
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
// Verify compression level
|
||||||
|
#if QLZ_COMPRESSION_LEVEL != 1 && QLZ_COMPRESSION_LEVEL != 2 && QLZ_COMPRESSION_LEVEL != 3
|
||||||
|
#error QLZ_COMPRESSION_LEVEL must be 1, 2 or 3
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned int ui32;
|
||||||
|
typedef unsigned short int ui16;
|
||||||
|
|
||||||
|
// Decrease QLZ_POINTERS for level 3 to increase compression speed. Do not touch any other values!
|
||||||
|
#if QLZ_COMPRESSION_LEVEL == 1
|
||||||
|
#define QLZ_POINTERS 1
|
||||||
|
#define QLZ_HASH_VALUES 4096
|
||||||
|
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||||
|
#define QLZ_POINTERS 4
|
||||||
|
#define QLZ_HASH_VALUES 2048
|
||||||
|
#elif QLZ_COMPRESSION_LEVEL == 3
|
||||||
|
#define QLZ_POINTERS 16
|
||||||
|
#define QLZ_HASH_VALUES 4096
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Detect if pointer size is 64-bit. It's not fatal if some 64-bit target is not detected because this is only for adding an optional 64-bit optimization.
|
||||||
|
#if defined _LP64 || defined __LP64__ || defined __64BIT__ || _ADDR64 || defined _WIN64 || defined __arch64__ || __WORDSIZE == 64 || (defined __sparc && defined __sparcv9) || defined __x86_64 || defined __amd64 || defined __x86_64__ || defined _M_X64 || defined _M_IA64 || defined __ia64 || defined __IA64__
|
||||||
|
#define QLZ_PTR_64
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// hash entry
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
#if QLZ_COMPRESSION_LEVEL == 1
|
||||||
|
ui32 cache;
|
||||||
|
#if defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0
|
||||||
|
unsigned int offset;
|
||||||
|
#else
|
||||||
|
const unsigned char *offset;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
const unsigned char *offset[QLZ_POINTERS];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} qlz_hash_compress;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
#if QLZ_COMPRESSION_LEVEL == 1
|
||||||
|
const unsigned char *offset;
|
||||||
|
#else
|
||||||
|
const unsigned char *offset[QLZ_POINTERS];
|
||||||
|
#endif
|
||||||
|
} qlz_hash_decompress;
|
||||||
|
|
||||||
|
|
||||||
|
// states
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
#if QLZ_STREAMING_BUFFER > 0
|
||||||
|
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
|
||||||
|
#endif
|
||||||
|
size_t stream_counter;
|
||||||
|
qlz_hash_compress hash[QLZ_HASH_VALUES];
|
||||||
|
unsigned char hash_counter[QLZ_HASH_VALUES];
|
||||||
|
} qlz_state_compress;
|
||||||
|
|
||||||
|
|
||||||
|
#if QLZ_COMPRESSION_LEVEL == 1 || QLZ_COMPRESSION_LEVEL == 2
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
#if QLZ_STREAMING_BUFFER > 0
|
||||||
|
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
|
||||||
|
#endif
|
||||||
|
qlz_hash_decompress hash[QLZ_HASH_VALUES];
|
||||||
|
unsigned char hash_counter[QLZ_HASH_VALUES];
|
||||||
|
size_t stream_counter;
|
||||||
|
} qlz_state_decompress;
|
||||||
|
#elif QLZ_COMPRESSION_LEVEL == 3
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
#if QLZ_STREAMING_BUFFER > 0
|
||||||
|
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
|
||||||
|
#endif
|
||||||
|
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||||
|
qlz_hash_decompress hash[QLZ_HASH_VALUES];
|
||||||
|
#endif
|
||||||
|
size_t stream_counter;
|
||||||
|
} qlz_state_decompress;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined (__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Public functions of QuickLZ
|
||||||
|
size_t qlz_size_decompressed(const char *source);
|
||||||
|
size_t qlz_size_compressed(const char *source);
|
||||||
|
size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state);
|
||||||
|
size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state);
|
||||||
|
int qlz_get_setting(int setting);
|
||||||
|
|
||||||
|
#if defined (__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
156
storage/tokudb/PerconaFT/ft/serialize/rbuf.h
Normal file
156
storage/tokudb/PerconaFT/ft/serialize/rbuf.h
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "portability/memory.h"
|
||||||
|
#include "portability/toku_assert.h"
|
||||||
|
#include "portability/toku_htonl.h"
|
||||||
|
#include "portability/toku_portability.h"
|
||||||
|
#include "util/memarena.h"
|
||||||
|
|
||||||
|
struct rbuf {
|
||||||
|
unsigned char *buf;
|
||||||
|
unsigned int size;
|
||||||
|
unsigned int ndone;
|
||||||
|
};
|
||||||
|
#define RBUF_INITIALIZER ((struct rbuf){.buf = NULL, .size=0, .ndone=0})
|
||||||
|
|
||||||
|
static inline void rbuf_init(struct rbuf *r, unsigned char *buf, unsigned int size) {
|
||||||
|
r->buf = buf;
|
||||||
|
r->size = size;
|
||||||
|
r->ndone = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int rbuf_get_roffset(struct rbuf *r) {
|
||||||
|
return r->ndone;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned char rbuf_char (struct rbuf *r) {
|
||||||
|
assert(r->ndone<r->size);
|
||||||
|
return r->buf[r->ndone++];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_ma_uint8_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint8_t *num) {
|
||||||
|
*num = rbuf_char(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_ma_bool (struct rbuf *r, memarena *ma __attribute__((__unused__)), bool *b) {
|
||||||
|
uint8_t n = rbuf_char(r);
|
||||||
|
*b = (n!=0);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Read an int that MUST be in network order regardless of disk order
|
||||||
|
static unsigned int rbuf_network_int (struct rbuf *r) __attribute__((__unused__));
|
||||||
|
static unsigned int rbuf_network_int (struct rbuf *r) {
|
||||||
|
assert(r->ndone+4 <= r->size);
|
||||||
|
uint32_t result = toku_ntohl(*(uint32_t*)(r->buf+r->ndone)); // This only works on machines where unaligned loads are OK.
|
||||||
|
r->ndone+=4;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int rbuf_int (struct rbuf *r) {
|
||||||
|
#if 1
|
||||||
|
assert(r->ndone+4 <= r->size);
|
||||||
|
uint32_t result = toku_dtoh32(*(uint32_t*)(r->buf+r->ndone)); // This only works on machines where unaligned loads are OK.
|
||||||
|
r->ndone+=4;
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
unsigned char c0 = rbuf_char(r);
|
||||||
|
unsigned char c1 = rbuf_char(r);
|
||||||
|
unsigned char c2 = rbuf_char(r);
|
||||||
|
unsigned char c3 = rbuf_char(r);
|
||||||
|
return ((c0<<24)|
|
||||||
|
(c1<<16)|
|
||||||
|
(c2<<8)|
|
||||||
|
(c3<<0));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_literal_bytes (struct rbuf *r, const void **bytes, unsigned int n_bytes) {
|
||||||
|
*bytes = &r->buf[r->ndone];
|
||||||
|
r->ndone+=n_bytes;
|
||||||
|
assert(r->ndone<=r->size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return a pointer into the middle of the buffer. */
|
||||||
|
static inline void rbuf_bytes (struct rbuf *r, const void **bytes, unsigned int *n_bytes)
|
||||||
|
{
|
||||||
|
*n_bytes = rbuf_int(r);
|
||||||
|
rbuf_literal_bytes(r, bytes, *n_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned long long rbuf_ulonglong (struct rbuf *r) {
|
||||||
|
unsigned i0 = rbuf_int(r);
|
||||||
|
unsigned i1 = rbuf_int(r);
|
||||||
|
return ((unsigned long long)(i0)<<32) | ((unsigned long long)(i1));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline signed long long rbuf_longlong (struct rbuf *r) {
|
||||||
|
return (signed long long)rbuf_ulonglong(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_ma_uint32_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint32_t *num) {
|
||||||
|
*num = rbuf_int(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_ma_uint64_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint64_t *num) {
|
||||||
|
*num = rbuf_ulonglong(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't try to use the same space, malloc it
|
||||||
|
static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) {
|
||||||
|
bs->len = rbuf_int(r);
|
||||||
|
uint32_t newndone = r->ndone + bs->len;
|
||||||
|
assert(newndone <= r->size);
|
||||||
|
bs->data = (char *) toku_memdup(&r->buf[r->ndone], (size_t)bs->len);
|
||||||
|
assert(bs->data);
|
||||||
|
r->ndone = newndone;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_ma_BYTESTRING (struct rbuf *r, memarena *ma, BYTESTRING *bs) {
|
||||||
|
bs->len = rbuf_int(r);
|
||||||
|
uint32_t newndone = r->ndone + bs->len;
|
||||||
|
assert(newndone <= r->size);
|
||||||
|
bs->data = (char *) ma->malloc_from_arena(bs->len);
|
||||||
|
assert(bs->data);
|
||||||
|
memcpy(bs->data, &r->buf[r->ndone], bs->len);
|
||||||
|
r->ndone = newndone;
|
||||||
|
}
|
||||||
389
storage/tokudb/PerconaFT/ft/serialize/sub_block.cc
Normal file
389
storage/tokudb/PerconaFT/ft/serialize/sub_block.cc
Normal file
@@ -0,0 +1,389 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <zlib.h>
|
||||||
|
|
||||||
|
#include "portability/memory.h"
|
||||||
|
#include "portability/toku_assert.h"
|
||||||
|
#include "portability/toku_portability.h"
|
||||||
|
|
||||||
|
#include "ft/serialize/compress.h"
|
||||||
|
#include "ft/serialize/sub_block.h"
|
||||||
|
#include "ft/serialize/quicklz.h"
|
||||||
|
#include "util/threadpool.h"
|
||||||
|
#include "util/x1764.h"
|
||||||
|
|
||||||
|
SUB_BLOCK sub_block_creat(void) {
|
||||||
|
SUB_BLOCK XMALLOC(sb);
|
||||||
|
sub_block_init(sb);
|
||||||
|
return sb;
|
||||||
|
}
|
||||||
|
void sub_block_init(SUB_BLOCK sub_block) {
|
||||||
|
sub_block->uncompressed_ptr = 0;
|
||||||
|
sub_block->uncompressed_size = 0;
|
||||||
|
|
||||||
|
sub_block->compressed_ptr = 0;
|
||||||
|
sub_block->compressed_size_bound = 0;
|
||||||
|
sub_block->compressed_size = 0;
|
||||||
|
|
||||||
|
sub_block->xsum = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the size of the compression header
|
||||||
|
size_t
|
||||||
|
sub_block_header_size(int n_sub_blocks) {
|
||||||
|
return sizeof (uint32_t) + n_sub_blocks * sizeof (struct stored_sub_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
set_compressed_size_bound(struct sub_block *se, enum toku_compression_method method) {
|
||||||
|
se->compressed_size_bound = toku_compress_bound(method, se->uncompressed_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the sum of the sub block compressed sizes
|
||||||
|
size_t
|
||||||
|
get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[], enum toku_compression_method method) {
|
||||||
|
size_t compressed_size_bound = 0;
|
||||||
|
for (int i = 0; i < n_sub_blocks; i++) {
|
||||||
|
sub_block[i].compressed_size_bound = toku_compress_bound(method, sub_block[i].uncompressed_size);
|
||||||
|
compressed_size_bound += sub_block[i].compressed_size_bound;
|
||||||
|
}
|
||||||
|
return compressed_size_bound;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the sum of the sub block uncompressed sizes
|
||||||
|
size_t
|
||||||
|
get_sum_uncompressed_size(int n_sub_blocks, struct sub_block sub_block[]) {
|
||||||
|
size_t uncompressed_size = 0;
|
||||||
|
for (int i = 0; i < n_sub_blocks; i++)
|
||||||
|
uncompressed_size += sub_block[i].uncompressed_size;
|
||||||
|
return uncompressed_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// round up n
|
||||||
|
static inline int
|
||||||
|
alignup32(int a, int b) {
|
||||||
|
return ((a+b-1) / b) * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Choose n_sub_blocks and sub_block_size such that the product is >= total_size and the sub_block_size is at
|
||||||
|
// least >= the target_sub_block_size.
|
||||||
|
int
|
||||||
|
choose_sub_block_size(int total_size, int n_sub_blocks_limit, int *sub_block_size_ret, int *n_sub_blocks_ret) {
|
||||||
|
if (total_size < 0 || n_sub_blocks_limit < 1)
|
||||||
|
return EINVAL;
|
||||||
|
|
||||||
|
const int alignment = 32;
|
||||||
|
|
||||||
|
int n_sub_blocks, sub_block_size;
|
||||||
|
n_sub_blocks = total_size / target_sub_block_size;
|
||||||
|
if (n_sub_blocks <= 1) {
|
||||||
|
if (total_size > 0 && n_sub_blocks_limit > 0)
|
||||||
|
n_sub_blocks = 1;
|
||||||
|
sub_block_size = total_size;
|
||||||
|
} else {
|
||||||
|
if (n_sub_blocks > n_sub_blocks_limit) // limit the number of sub-blocks
|
||||||
|
n_sub_blocks = n_sub_blocks_limit;
|
||||||
|
sub_block_size = alignup32(total_size / n_sub_blocks, alignment);
|
||||||
|
while (sub_block_size * n_sub_blocks < total_size) // round up the sub-block size until big enough
|
||||||
|
sub_block_size += alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
*sub_block_size_ret = sub_block_size;
|
||||||
|
*n_sub_blocks_ret = n_sub_blocks;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Choose the right size of basement nodes. For now, just align up to
|
||||||
|
// 256k blocks and hope it compresses well enough.
|
||||||
|
int
|
||||||
|
choose_basement_node_size(int total_size, int *sub_block_size_ret, int *n_sub_blocks_ret) {
|
||||||
|
if (total_size < 0)
|
||||||
|
return EINVAL;
|
||||||
|
|
||||||
|
*n_sub_blocks_ret = (total_size + max_basement_node_uncompressed_size - 1) / max_basement_node_uncompressed_size;
|
||||||
|
*sub_block_size_ret = max_basement_node_uncompressed_size;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
set_all_sub_block_sizes(int total_size, int sub_block_size, int n_sub_blocks, struct sub_block sub_block[]) {
|
||||||
|
int size_left = total_size;
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < n_sub_blocks-1; i++) {
|
||||||
|
sub_block[i].uncompressed_size = sub_block_size;
|
||||||
|
size_left -= sub_block_size;
|
||||||
|
}
|
||||||
|
if (i == 0 || size_left > 0)
|
||||||
|
sub_block[i].uncompressed_size = size_left;
|
||||||
|
}
|
||||||
|
|
||||||
|
// find the index of the first sub block that contains offset
|
||||||
|
// Returns the sub block index, else returns -1
|
||||||
|
int
|
||||||
|
get_sub_block_index(int n_sub_blocks, struct sub_block sub_block[], size_t offset) {
|
||||||
|
size_t start_offset = 0;
|
||||||
|
for (int i = 0; i < n_sub_blocks; i++) {
|
||||||
|
size_t size = sub_block[i].uncompressed_size;
|
||||||
|
if (offset < start_offset + size)
|
||||||
|
return i;
|
||||||
|
start_offset += size;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "workset.h"
|
||||||
|
|
||||||
|
void
|
||||||
|
compress_work_init(struct compress_work *w, enum toku_compression_method method, struct sub_block *sub_block) {
|
||||||
|
w->method = method;
|
||||||
|
w->sub_block = sub_block;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// takes the uncompressed contents of sub_block
|
||||||
|
// and compresses them into sb_compressed_ptr
|
||||||
|
// cs_bound is the compressed size bound
|
||||||
|
// Returns the size of the compressed data
|
||||||
|
//
|
||||||
|
uint32_t
|
||||||
|
compress_nocrc_sub_block(
|
||||||
|
struct sub_block *sub_block,
|
||||||
|
void* sb_compressed_ptr,
|
||||||
|
uint32_t cs_bound,
|
||||||
|
enum toku_compression_method method
|
||||||
|
)
|
||||||
|
{
|
||||||
|
// compress it
|
||||||
|
Bytef *uncompressed_ptr = (Bytef *) sub_block->uncompressed_ptr;
|
||||||
|
Bytef *compressed_ptr = (Bytef *) sb_compressed_ptr;
|
||||||
|
uLongf uncompressed_len = sub_block->uncompressed_size;
|
||||||
|
uLongf real_compressed_len = cs_bound;
|
||||||
|
toku_compress(method,
|
||||||
|
compressed_ptr, &real_compressed_len,
|
||||||
|
uncompressed_ptr, uncompressed_len);
|
||||||
|
return real_compressed_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
compress_sub_block(struct sub_block *sub_block, enum toku_compression_method method) {
|
||||||
|
sub_block->compressed_size = compress_nocrc_sub_block(
|
||||||
|
sub_block,
|
||||||
|
sub_block->compressed_ptr,
|
||||||
|
sub_block->compressed_size_bound,
|
||||||
|
method
|
||||||
|
);
|
||||||
|
// checksum it
|
||||||
|
sub_block->xsum = toku_x1764_memory(sub_block->compressed_ptr, sub_block->compressed_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
compress_worker(void *arg) {
|
||||||
|
struct workset *ws = (struct workset *) arg;
|
||||||
|
while (1) {
|
||||||
|
struct compress_work *w = (struct compress_work *) workset_get(ws);
|
||||||
|
if (w == NULL)
|
||||||
|
break;
|
||||||
|
compress_sub_block(w->sub_block, w->method);
|
||||||
|
}
|
||||||
|
workset_release_ref(ws);
|
||||||
|
return arg;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool, enum toku_compression_method method) {
|
||||||
|
char *compressed_base_ptr = compressed_ptr;
|
||||||
|
size_t compressed_len;
|
||||||
|
|
||||||
|
// This is a complex way to write a parallel loop. Cilk would be better.
|
||||||
|
|
||||||
|
if (n_sub_blocks == 1) {
|
||||||
|
// single sub-block
|
||||||
|
sub_block[0].uncompressed_ptr = uncompressed_ptr;
|
||||||
|
sub_block[0].compressed_ptr = compressed_ptr;
|
||||||
|
compress_sub_block(&sub_block[0], method);
|
||||||
|
compressed_len = sub_block[0].compressed_size;
|
||||||
|
} else {
|
||||||
|
// multiple sub-blocks
|
||||||
|
int T = num_cores; // T = min(num_cores, n_sub_blocks) - 1
|
||||||
|
if (T > n_sub_blocks)
|
||||||
|
T = n_sub_blocks;
|
||||||
|
if (T > 0)
|
||||||
|
T = T - 1; // threads in addition to the running thread
|
||||||
|
|
||||||
|
struct workset ws;
|
||||||
|
ZERO_STRUCT(ws);
|
||||||
|
workset_init(&ws);
|
||||||
|
|
||||||
|
struct compress_work work[n_sub_blocks];
|
||||||
|
workset_lock(&ws);
|
||||||
|
for (int i = 0; i < n_sub_blocks; i++) {
|
||||||
|
sub_block[i].uncompressed_ptr = uncompressed_ptr;
|
||||||
|
sub_block[i].compressed_ptr = compressed_ptr;
|
||||||
|
compress_work_init(&work[i], method, &sub_block[i]);
|
||||||
|
workset_put_locked(&ws, &work[i].base);
|
||||||
|
uncompressed_ptr += sub_block[i].uncompressed_size;
|
||||||
|
compressed_ptr += sub_block[i].compressed_size_bound;
|
||||||
|
}
|
||||||
|
workset_unlock(&ws);
|
||||||
|
|
||||||
|
// compress the sub-blocks
|
||||||
|
if (0) printf("%s:%d T=%d N=%d\n", __FUNCTION__, __LINE__, T, n_sub_blocks);
|
||||||
|
toku_thread_pool_run(pool, 0, &T, compress_worker, &ws);
|
||||||
|
workset_add_ref(&ws, T);
|
||||||
|
compress_worker(&ws);
|
||||||
|
|
||||||
|
// wait for all of the work to complete
|
||||||
|
workset_join(&ws);
|
||||||
|
workset_destroy(&ws);
|
||||||
|
|
||||||
|
// squeeze out the holes not used by the compress bound
|
||||||
|
compressed_ptr = compressed_base_ptr + sub_block[0].compressed_size;
|
||||||
|
for (int i = 1; i < n_sub_blocks; i++) {
|
||||||
|
memmove(compressed_ptr, sub_block[i].compressed_ptr, sub_block[i].compressed_size);
|
||||||
|
compressed_ptr += sub_block[i].compressed_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
compressed_len = compressed_ptr - compressed_base_ptr;
|
||||||
|
}
|
||||||
|
return compressed_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
// initialize the decompression work
|
||||||
|
void
|
||||||
|
decompress_work_init(struct decompress_work *dw,
|
||||||
|
void *compress_ptr, uint32_t compress_size,
|
||||||
|
void *uncompress_ptr, uint32_t uncompress_size,
|
||||||
|
uint32_t xsum) {
|
||||||
|
dw->compress_ptr = compress_ptr;
|
||||||
|
dw->compress_size = compress_size;
|
||||||
|
dw->uncompress_ptr = uncompress_ptr;
|
||||||
|
dw->uncompress_size = uncompress_size;
|
||||||
|
dw->xsum = xsum;
|
||||||
|
dw->error = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int verbose_decompress_sub_block = 1;
|
||||||
|
|
||||||
|
// decompress one block
|
||||||
|
int
|
||||||
|
decompress_sub_block(void *compress_ptr, uint32_t compress_size, void *uncompress_ptr, uint32_t uncompress_size, uint32_t expected_xsum) {
|
||||||
|
int result = 0;
|
||||||
|
|
||||||
|
// verify checksum
|
||||||
|
uint32_t xsum = toku_x1764_memory(compress_ptr, compress_size);
|
||||||
|
if (xsum != expected_xsum) {
|
||||||
|
if (verbose_decompress_sub_block) fprintf(stderr, "%s:%d xsum %u expected %u\n", __FUNCTION__, __LINE__, xsum, expected_xsum);
|
||||||
|
result = EINVAL;
|
||||||
|
} else {
|
||||||
|
// decompress
|
||||||
|
toku_decompress((Bytef *) uncompress_ptr, uncompress_size, (Bytef *) compress_ptr, compress_size);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// decompress blocks until there is no more work to do
|
||||||
|
void *
|
||||||
|
decompress_worker(void *arg) {
|
||||||
|
struct workset *ws = (struct workset *) arg;
|
||||||
|
while (1) {
|
||||||
|
struct decompress_work *dw = (struct decompress_work *) workset_get(ws);
|
||||||
|
if (dw == NULL)
|
||||||
|
break;
|
||||||
|
dw->error = decompress_sub_block(dw->compress_ptr, dw->compress_size, dw->uncompress_ptr, dw->uncompress_size, dw->xsum);
|
||||||
|
}
|
||||||
|
workset_release_ref(ws);
|
||||||
|
return arg;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
decompress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], unsigned char *compressed_data, unsigned char *uncompressed_data, int num_cores, struct toku_thread_pool *pool) {
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (n_sub_blocks == 1) {
|
||||||
|
r = decompress_sub_block(compressed_data, sub_block[0].compressed_size, uncompressed_data, sub_block[0].uncompressed_size, sub_block[0].xsum);
|
||||||
|
} else {
|
||||||
|
// compute the number of additional threads needed for decompressing this node
|
||||||
|
int T = num_cores; // T = min(#cores, #blocks) - 1
|
||||||
|
if (T > n_sub_blocks)
|
||||||
|
T = n_sub_blocks;
|
||||||
|
if (T > 0)
|
||||||
|
T = T - 1; // threads in addition to the running thread
|
||||||
|
|
||||||
|
// init the decompression work set
|
||||||
|
struct workset ws;
|
||||||
|
ZERO_STRUCT(ws);
|
||||||
|
workset_init(&ws);
|
||||||
|
|
||||||
|
// initialize the decompression work and add to the work set
|
||||||
|
struct decompress_work decompress_work[n_sub_blocks];
|
||||||
|
workset_lock(&ws);
|
||||||
|
for (int i = 0; i < n_sub_blocks; i++) {
|
||||||
|
decompress_work_init(&decompress_work[i], compressed_data, sub_block[i].compressed_size, uncompressed_data, sub_block[i].uncompressed_size, sub_block[i].xsum);
|
||||||
|
workset_put_locked(&ws, &decompress_work[i].base);
|
||||||
|
|
||||||
|
uncompressed_data += sub_block[i].uncompressed_size;
|
||||||
|
compressed_data += sub_block[i].compressed_size;
|
||||||
|
}
|
||||||
|
workset_unlock(&ws);
|
||||||
|
|
||||||
|
// decompress the sub-blocks
|
||||||
|
if (0) printf("%s:%d Cores=%d Blocks=%d T=%d\n", __FUNCTION__, __LINE__, num_cores, n_sub_blocks, T);
|
||||||
|
toku_thread_pool_run(pool, 0, &T, decompress_worker, &ws);
|
||||||
|
workset_add_ref(&ws, T);
|
||||||
|
decompress_worker(&ws);
|
||||||
|
|
||||||
|
// cleanup
|
||||||
|
workset_join(&ws);
|
||||||
|
workset_destroy(&ws);
|
||||||
|
|
||||||
|
r = 0;
|
||||||
|
for (int i = 0; i < n_sub_blocks; i++) {
|
||||||
|
r = decompress_work[i].error;
|
||||||
|
if (r != 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user