Merge branch 'merge-tokudb-5.6' into 10.0-tokudb-merge

5.6.28-76.1
2026-01-06 05:22:24 +03:00 · 2016-06-10 20:48:59 +02:00
parent 77b548484e 9a957a5b56
commit ca95cc603b
217 changed files with 16692 additions and 6173 deletions
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
@@ -1,4 +1,4 @@
-SET(TOKUDB_VERSION 5.6.26-74.0)
+SET(TOKUDB_VERSION 5.6.28-76.1)
 # PerconaFT only supports x86-64 and cmake-2.8.9+
 IF(CMAKE_VERSION VERSION_LESS "2.8.9")
  MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
@@ -45,10 +45,25 @@ MARK_AS_ADVANCED(gcc_ar)
 MARK_AS_ADVANCED(gcc_ranlib)
 ############################################

+## adds a compiler flag if the compiler supports it
+include(CheckCCompilerFlag)
+include(CheckCXXCompilerFlag)
+
+# pick language dialect
+check_cxx_compiler_flag(-std=c++11 HAVE_STDCXX11)
+if (HAVE_STDCXX11)
+  set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
+else ()
+  message(FATAL_ERROR "${CMAKE_CXX_COMPILER} doesn't support -std=c++11, you need one that does.")
+endif ()
+
 SET(BUILD_TESTING OFF CACHE BOOL "")
 SET(USE_VALGRIND OFF CACHE BOOL "")
 SET(TOKU_DEBUG_PARANOID OFF CACHE BOOL "")

+# Enable TokuDB's TOKUDB_DEBUG in debug builds
+SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DTOKUDB_DEBUG")
+
 IF(NOT DEFINED TOKUDB_VERSION)
    IF(DEFINED ENV{TOKUDB_VERSION})
        SET(TOKUDB_VERSION $ENV{TOKUDB_VERSION})
@@ -71,10 +86,6 @@ IF(DEFINED TOKUDB_CHECK_JEMALLOC)
    ADD_DEFINITIONS("-DTOKUDB_CHECK_JEMALLOC=${TOKUDB_CHECK_JEMALLOC}")
 ENDIF()

-## adds a compiler flag if the compiler supports it
-include(CheckCCompilerFlag)
-include(CheckCXXCompilerFlag)
-
 macro(set_cflags_if_supported)
  foreach(flag ${ARGN})
    string(REGEX REPLACE "-" "_" temp_flag ${flag})
@@ -128,7 +139,12 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/${TOKU_FT_DIR_NAME}/buildheader)
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/${TOKU_FT_DIR_NAME}/portability)

 SET(TOKUDB_PLUGIN_DYNAMIC "ha_tokudb")
-SET(TOKUDB_SOURCES ha_tokudb.cc)
+SET(TOKUDB_SOURCES
+    ha_tokudb.cc
+    tokudb_background.cc
+    tokudb_information_schema.cc
+    tokudb_sysvars.cc
+    tokudb_thread.cc)
 MYSQL_ADD_PLUGIN(tokudb ${TOKUDB_SOURCES} STORAGE_ENGINE MODULE_ONLY
    LINK_LIBRARIES tokufractaltree_static tokuportability_static ${ZLIB_LIBRARY} stdc++)
 SET(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} -flto -fuse-linker-plugin")
--- a/storage/tokudb/PerconaFT/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/CMakeLists.txt
@@ -1,3 +1,6 @@
+if (CMAKE_PROJECT_NAME STREQUAL TokuDB)
+  cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
+endif()
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")

 project(TokuDB)
--- a/storage/tokudb/PerconaFT/CTestCustom.cmake.in
+++ b/storage/tokudb/PerconaFT/CTestCustom.cmake.in
--- a/storage/tokudb/PerconaFT/README.md
+++ b/storage/tokudb/PerconaFT/README.md
@@ -113,11 +113,11 @@ All source code and test contributions must be provided under a [BSD 2-Clause][b
 License
 -------

-PerconaFT is available under the GPL version 2, and AGPL version 3, with slight modifications.
+PerconaFT is available under the GPL version 2, and AGPL version 3.
 See [COPYING.AGPLv3][agpllicense],
 [COPYING.GPLv2][gpllicense], and
 [PATENTS][patents].

-[agpllicense]: http://github.com/Perona/PerconaFT/blob/master/COPYING.AGPLv3
-[gpllicense]: http://github.com/Perona/PerconaFT/blob/master/COPYING.GPLv2
-[patents]: http://github.com/Perona/PerconaFT/blob/master/PATENTS
+[agpllicense]: http://github.com/Percona/PerconaFT/blob/master/COPYING.AGPLv3
+[gpllicense]: http://github.com/Percona/PerconaFT/blob/master/COPYING.GPLv2
+[patents]: http://github.com/Percona/PerconaFT/blob/master/PATENTS
--- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
+++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
@@ -510,8 +510,9 @@ static void print_db_struct (void) {
 			 "int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, uint32_t flags)",
 			 "int (*get_fractal_tree_info64)(DB*,uint64_t*,uint64_t*,uint64_t*,uint64_t*)",
 			 "int (*iterate_fractal_tree_block_map)(DB*,int(*)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*),void*)",
-                         "const char *(*get_dname)(DB *db)",
-                         "int (*get_last_key)(DB *db, YDB_CALLBACK_FUNCTION func, void* extra)",
+			 "const char *(*get_dname)(DB *db)",
+			 "int (*get_last_key)(DB *db, YDB_CALLBACK_FUNCTION func, void* extra)",
+			 "int (*recount_rows)(DB* db, int (*progress_callback)(uint64_t count, uint64_t deleted, void* progress_extra), void* progress_extra)",
 			 NULL};
    sort_and_dump_fields("db", true, extra);
 }
--- a/storage/tokudb/PerconaFT/cmake_modules/TokuMergeLibs.cmake
+++ b/storage/tokudb/PerconaFT/cmake_modules/TokuMergeLibs.cmake
@@ -48,7 +48,8 @@ MACRO(TOKU_MERGE_STATIC_LIBS TARGET OUTPUT_NAME LIBS_TO_MERGE)
    ENDIF()
  ENDFOREACH()
  IF(OSLIBS)
-    #LIST(REMOVE_DUPLICATES OSLIBS)
+    # REMOVE_DUPLICATES destroys the order of the libs so disabled
+    # LIST(REMOVE_DUPLICATES OSLIBS)
    TARGET_LINK_LIBRARIES(${TARGET} LINK_PUBLIC ${OSLIBS})
  ENDIF()

--- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
+++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
@@ -24,12 +24,12 @@ endif ()

 ## add TOKU_PTHREAD_DEBUG for debug builds
 if (CMAKE_VERSION VERSION_LESS 3.0)
-  set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DEBUG TOKU_PTHREAD_DEBUG=1)
-  set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DRD TOKU_PTHREAD_DEBUG=1)
+  set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DEBUG TOKU_PTHREAD_DEBUG=1 TOKU_DEBUG_TXN_SYNC=1)
+  set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DRD TOKU_PTHREAD_DEBUG=1 TOKU_DEBUG_TXN_SYNC=1)
  set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DRD _FORTIFY_SOURCE=2)
 else ()
  set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS
-    $<$<OR:$<CONFIG:DEBUG>,$<CONFIG:DRD>>:TOKU_PTHREAD_DEBUG=1>
+    $<$<OR:$<CONFIG:DEBUG>,$<CONFIG:DRD>>:TOKU_PTHREAD_DEBUG=1 TOKU_DEBUG_TXN_SYNC=1>
    $<$<CONFIG:DRD>:_FORTIFY_SOURCE=2>
    )
 endif ()
--- a/storage/tokudb/PerconaFT/ft/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/ft/CMakeLists.txt
@@ -36,6 +36,7 @@ set(FT_SOURCES
  ft-flusher
  ft-hot-flusher
  ft-ops
+  ft-recount-rows
  ft-status
  ft-test-helpers
  ft-verify
--- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
@@ -1572,6 +1572,7 @@ void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID p
        txn_gc_info *gc_info;

        STAT64INFO_S stats_delta;
+        int64_t logical_rows_delta = 0;
        size_t remaining_memsize = bnc->msg_buffer.buffer_size_in_use();

        flush_msg_fn(FT t, FTNODE n, NONLEAF_CHILDINFO nl, txn_gc_info *g) :
@@ -1599,8 +1600,8 @@ void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID p
                is_fresh,
                gc_info,
                flow_deltas,
-                &stats_delta
-                );
+                &stats_delta,
+                &logical_rows_delta);
            remaining_memsize -= memsize_in_buffer;
            return 0;
        }
@@ -1613,6 +1614,7 @@ void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID p
    if (flush_fn.stats_delta.numbytes || flush_fn.stats_delta.numrows) {
        toku_ft_update_stats(&ft->in_memory_stats, flush_fn.stats_delta);
    }
+    toku_ft_adjust_logical_row_count(ft, flush_fn.logical_rows_delta);
    if (do_garbage_collection) {
        size_t buffsize = bnc->msg_buffer.buffer_size_in_use();
        // may be misleading if there's a broadcast message in there
--- a/storage/tokudb/PerconaFT/ft/ft-internal.h
+++ b/storage/tokudb/PerconaFT/ft/ft-internal.h
@@ -143,6 +143,10 @@ struct ft_header {
    MSN msn_at_start_of_last_completed_optimize;

    STAT64INFO_S on_disk_stats;
+
+    // This represents the balance of inserts - deletes and should be
+    // closer to a logical representation of the number of records in an index
+    uint64_t on_disk_logical_rows;
 };
 typedef struct ft_header *FT_HEADER;

@@ -176,6 +180,7 @@ struct ft {

    // protected by atomic builtins
    STAT64INFO_S in_memory_stats;
+    uint64_t in_memory_logical_rows;

    // transient, not serialized to disk.  updated when we do write to
    // disk.  tells us whether we can do partial eviction (we can't if
--- a/storage/tokudb/PerconaFT/ft/ft-ops.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc
@@ -1371,7 +1371,8 @@ static void inject_message_in_locked_node(
    ft_msg msg_with_msn(msg.kdbt(), msg.vdbt(), msg.type(), msg_msn, msg.xids());
    paranoid_invariant(msg_with_msn.msn().msn > node->max_msn_applied_to_node_on_disk.msn);

-    STAT64INFO_S stats_delta = {0,0};
+    STAT64INFO_S stats_delta = { 0,0 };
+    int64_t logical_rows_delta = 0;
    toku_ftnode_put_msg(
        ft->cmp,
        ft->update_fun,
@@ -1381,11 +1382,12 @@ static void inject_message_in_locked_node(
        true,
        gc_info,
        flow_deltas,
-        &stats_delta
-        );
+        &stats_delta,
+        &logical_rows_delta);
    if (stats_delta.numbytes || stats_delta.numrows) {
        toku_ft_update_stats(&ft->in_memory_stats, stats_delta);
    }
+    toku_ft_adjust_logical_row_count(ft, logical_rows_delta);
    //
    // assumption is that toku_ftnode_put_msg will
    // mark the node as dirty.
@@ -2169,6 +2171,7 @@ int toku_ft_insert_unique(FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool

    if (r == 0) {
        ft_txn_log_insert(ft_h->ft, key, val, txn, do_logging, FT_INSERT);
+        toku_ft_adjust_logical_row_count(ft_h->ft, 1);
    }
    return r;
 }
@@ -2344,6 +2347,7 @@ void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool
        if (r != 0) {
            toku_ft_send_insert(ft_h, key, val, message_xids, type, &gc_info);
        }
+        toku_ft_adjust_logical_row_count(ft_h->ft, 1);
    }
 }

@@ -2513,6 +2517,7 @@ void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_vali
                            oldest_referenced_xid_estimate,
                            txn != nullptr ? !txn->for_recovery : false);
        toku_ft_send_delete(ft_h, key, message_xids, &gc_info);
+        toku_ft_adjust_logical_row_count(ft_h->ft, -1);
    }
 }

--- a/storage/tokudb/PerconaFT/ft/ft-ops.h
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.h
@@ -207,6 +207,15 @@ extern int toku_ft_debug_mode;
 int toku_verify_ft (FT_HANDLE ft_h)  __attribute__ ((warn_unused_result));
 int toku_verify_ft_with_progress (FT_HANDLE ft_h, int (*progress_callback)(void *extra, float progress), void *extra, int verbose, int keep_going)  __attribute__ ((warn_unused_result));

+int toku_ft_recount_rows(
+    FT_HANDLE ft,
+    int (*progress_callback)(
+        uint64_t count,
+        uint64_t deleted,
+        void* progress_extra),
+    void* progress_extra);
+
+
 DICTIONARY_ID toku_ft_get_dictionary_id(FT_HANDLE);

 enum ft_flags {
--- a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
@@ -0,0 +1,115 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "ft/serialize/block_table.h"
+#include "ft/ft.h"
+#include "ft/ft-internal.h"
+#include "ft/cursor.h"
+
+struct recount_rows_extra_t {
+    int (*_progress_callback)(
+        uint64_t count,
+        uint64_t deleted,
+        void* progress_extra);
+    void* _progress_extra;
+    uint64_t _keys;
+    bool _cancelled;
+};
+
+static int recount_rows_found(
+    uint32_t UU(keylen),
+    const void* key,
+    uint32_t UU(vallen),
+    const void* UU(val),
+    void* extra,
+    bool UU(lock_only)) {
+
+    recount_rows_extra_t* rre = (recount_rows_extra_t*)extra;
+
+    if (FT_LIKELY(key != nullptr)) {
+        rre->_keys++;
+    }
+    return rre->_cancelled
+        = rre->_progress_callback(rre->_keys, 0, rre->_progress_extra);
+}
+static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) {
+    recount_rows_extra_t* rre = (recount_rows_extra_t*)extra;
+
+    return rre->_cancelled =
+        rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra);
+}
+int toku_ft_recount_rows(
+    FT_HANDLE ft,
+    int (*progress_callback)(
+        uint64_t count,
+        uint64_t deleted,
+        void* progress_extra),
+    void* progress_extra) {
+
+    int ret = 0;
+    recount_rows_extra_t rre = {
+        progress_callback,
+        progress_extra,
+        0,
+        false
+        };
+
+    ft_cursor c;
+    ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false);
+    if (ret) return ret;
+
+    toku_ft_cursor_set_check_interrupt_cb(
+        &c,
+        recount_rows_interrupt,
+        &rre);
+
+    ret = toku_ft_cursor_first(&c, recount_rows_found, &rre);
+    while (FT_LIKELY(ret == 0)) {
+        ret = toku_ft_cursor_next(&c, recount_rows_found, &rre);
+    }
+
+    toku_ft_cursor_destroy(&c);
+
+    if (rre._cancelled == false) {
+        // update ft count
+        toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
+        ret = 0;
+    }
+
+    return ret;
+}
--- a/storage/tokudb/PerconaFT/ft/ft-status.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-status.cc
@@ -128,24 +128,24 @@ void CACHETABLE_STATUS_S::init() {
    CT_STATUS_INIT(CT_LONG_WAIT_PRESSURE_COUNT, CACHETABLE_LONG_WAIT_PRESSURE_COUNT,    UINT64, "number of long waits on cache pressure");
    CT_STATUS_INIT(CT_LONG_WAIT_PRESSURE_TIME,  CACHETABLE_LONG_WAIT_PRESSURE_TIME,     UINT64, "long time waiting on cache pressure");
    
-    CT_STATUS_INIT(CT_POOL_CLIENT_NUM_THREADS,                  CACHETABLE_POOL_CLIENT_NUM_THREADS,                 UINT64, "number of threads in pool");
-    CT_STATUS_INIT(CT_POOL_CLIENT_NUM_THREADS_ACTIVE,           CACHETABLE_POOL_CLIENT_NUM_THREADS_ACTIVE,          UINT64, "number of currently active threads in pool");
-    CT_STATUS_INIT(CT_POOL_CLIENT_QUEUE_SIZE,                   CACHETABLE_POOL_CLIENT_QUEUE_SIZE,                  UINT64, "number of currently queued work items");
-    CT_STATUS_INIT(CT_POOL_CLIENT_MAX_QUEUE_SIZE,               CACHETABLE_POOL_CLIENT_MAX_QUEUE_SIZE,              UINT64, "largest number of queued work items");
-    CT_STATUS_INIT(CT_POOL_CLIENT_TOTAL_ITEMS_PROCESSED,        CACHETABLE_POOL_CLIENT_TOTAL_ITEMS_PROCESSED,       UINT64, "total number of work items processed");
-    CT_STATUS_INIT(CT_POOL_CLIENT_TOTAL_EXECUTION_TIME,         CACHETABLE_POOL_CLIENT_TOTAL_EXECUTION_TIME,        UINT64, "total execution time of processing work items");
-    CT_STATUS_INIT(CT_POOL_CACHETABLE_NUM_THREADS,              CACHETABLE_POOL_CACHETABLE_NUM_THREADS,             UINT64, "number of threads in pool");
-    CT_STATUS_INIT(CT_POOL_CACHETABLE_NUM_THREADS_ACTIVE,       CACHETABLE_POOL_CACHETABLE_NUM_THREADS_ACTIVE,      UINT64, "number of currently active threads in pool");
-    CT_STATUS_INIT(CT_POOL_CACHETABLE_QUEUE_SIZE,               CACHETABLE_POOL_CACHETABLE_QUEUE_SIZE,              UINT64, "number of currently queued work items");
-    CT_STATUS_INIT(CT_POOL_CACHETABLE_MAX_QUEUE_SIZE,           CACHETABLE_POOL_CACHETABLE_MAX_QUEUE_SIZE,          UINT64, "largest number of queued work items");
-    CT_STATUS_INIT(CT_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED,    CACHETABLE_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED,   UINT64, "total number of work items processed");
-    CT_STATUS_INIT(CT_POOL_CACHETABLE_TOTAL_EXECUTION_TIME,     CACHETABLE_POOL_CACHETABLE_TOTAL_EXECUTION_TIME,    UINT64, "total execution time of processing work items");
-    CT_STATUS_INIT(CT_POOL_CHECKPOINT_NUM_THREADS,              CACHETABLE_POOL_CHECKPOINT_NUM_THREADS,             UINT64, "number of threads in pool");
-    CT_STATUS_INIT(CT_POOL_CHECKPOINT_NUM_THREADS_ACTIVE,       CACHETABLE_POOL_CHECKPOINT_NUM_THREADS_ACTIVE,      UINT64, "number of currently active threads in pool");
-    CT_STATUS_INIT(CT_POOL_CHECKPOINT_QUEUE_SIZE,               CACHETABLE_POOL_CHECKPOINT_QUEUE_SIZE,              UINT64, "number of currently queued work items");
-    CT_STATUS_INIT(CT_POOL_CHECKPOINT_MAX_QUEUE_SIZE,           CACHETABLE_POOL_CHECKPOINT_MAX_QUEUE_SIZE,          UINT64, "largest number of queued work items");
-    CT_STATUS_INIT(CT_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED,    CACHETABLE_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED,   UINT64, "total number of work items processed");
-    CT_STATUS_INIT(CT_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME,     CACHETABLE_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME,    UINT64, "total execution time of processing work items");
+    CT_STATUS_INIT(CT_POOL_CLIENT_NUM_THREADS,                  CACHETABLE_POOL_CLIENT_NUM_THREADS,                 UINT64, "client pool: number of threads in pool");
+    CT_STATUS_INIT(CT_POOL_CLIENT_NUM_THREADS_ACTIVE,           CACHETABLE_POOL_CLIENT_NUM_THREADS_ACTIVE,          UINT64, "client pool: number of currently active threads in pool");
+    CT_STATUS_INIT(CT_POOL_CLIENT_QUEUE_SIZE,                   CACHETABLE_POOL_CLIENT_QUEUE_SIZE,                  UINT64, "client pool: number of currently queued work items");
+    CT_STATUS_INIT(CT_POOL_CLIENT_MAX_QUEUE_SIZE,               CACHETABLE_POOL_CLIENT_MAX_QUEUE_SIZE,              UINT64, "client pool: largest number of queued work items");
+    CT_STATUS_INIT(CT_POOL_CLIENT_TOTAL_ITEMS_PROCESSED,        CACHETABLE_POOL_CLIENT_TOTAL_ITEMS_PROCESSED,       UINT64, "client pool: total number of work items processed");
+    CT_STATUS_INIT(CT_POOL_CLIENT_TOTAL_EXECUTION_TIME,         CACHETABLE_POOL_CLIENT_TOTAL_EXECUTION_TIME,        UINT64, "client pool: total execution time of processing work items");
+    CT_STATUS_INIT(CT_POOL_CACHETABLE_NUM_THREADS,              CACHETABLE_POOL_CACHETABLE_NUM_THREADS,             UINT64, "cachetable pool: number of threads in pool");
+    CT_STATUS_INIT(CT_POOL_CACHETABLE_NUM_THREADS_ACTIVE,       CACHETABLE_POOL_CACHETABLE_NUM_THREADS_ACTIVE,      UINT64, "cachetable pool: number of currently active threads in pool");
+    CT_STATUS_INIT(CT_POOL_CACHETABLE_QUEUE_SIZE,               CACHETABLE_POOL_CACHETABLE_QUEUE_SIZE,              UINT64, "cachetable pool: number of currently queued work items");
+    CT_STATUS_INIT(CT_POOL_CACHETABLE_MAX_QUEUE_SIZE,           CACHETABLE_POOL_CACHETABLE_MAX_QUEUE_SIZE,          UINT64, "cachetable pool: largest number of queued work items");
+    CT_STATUS_INIT(CT_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED,    CACHETABLE_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED,   UINT64, "cachetable pool: total number of work items processed");
+    CT_STATUS_INIT(CT_POOL_CACHETABLE_TOTAL_EXECUTION_TIME,     CACHETABLE_POOL_CACHETABLE_TOTAL_EXECUTION_TIME,    UINT64, "cachetable pool: total execution time of processing work items");
+    CT_STATUS_INIT(CT_POOL_CHECKPOINT_NUM_THREADS,              CACHETABLE_POOL_CHECKPOINT_NUM_THREADS,             UINT64, "checkpoint pool: number of threads in pool");
+    CT_STATUS_INIT(CT_POOL_CHECKPOINT_NUM_THREADS_ACTIVE,       CACHETABLE_POOL_CHECKPOINT_NUM_THREADS_ACTIVE,      UINT64, "checkpoint pool: number of currently active threads in pool");
+    CT_STATUS_INIT(CT_POOL_CHECKPOINT_QUEUE_SIZE,               CACHETABLE_POOL_CHECKPOINT_QUEUE_SIZE,              UINT64, "checkpoint pool: number of currently queued work items");
+    CT_STATUS_INIT(CT_POOL_CHECKPOINT_MAX_QUEUE_SIZE,           CACHETABLE_POOL_CHECKPOINT_MAX_QUEUE_SIZE,          UINT64, "checkpoint pool: largest number of queued work items");
+    CT_STATUS_INIT(CT_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED,    CACHETABLE_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED,   UINT64, "checkpoint pool: total number of work items processed");
+    CT_STATUS_INIT(CT_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME,     CACHETABLE_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME,    UINT64, "checkpoint pool: total execution time of processing work items");

    m_initialized = true;
 #undef CT_STATUS_INIT
--- a/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc
@@ -172,21 +172,26 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, const
    assert(node->height==0);

    DBT kdbt, vdbt;
-    ft_msg msg(toku_fill_dbt(&kdbt, key, keylen), toku_fill_dbt(&vdbt, val, vallen),
-               FT_INSERT, next_dummymsn(), toku_xids_get_root_xids());
+    ft_msg msg(
+        toku_fill_dbt(&kdbt, key, keylen),
+        toku_fill_dbt(&vdbt, val, vallen),
+        FT_INSERT,
+        next_dummymsn(),
+        toku_xids_get_root_xids());

    static size_t zero_flow_deltas[] = { 0, 0 };
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
-    toku_ftnode_put_msg(ft_handle->ft->cmp,
-                        ft_handle->ft->update_fun,
-                        node,
-                        -1,
-                        msg,
-                        true,
-                        &gc_info,
-                        zero_flow_deltas,
-                        NULL
-                        );
+    toku_ftnode_put_msg(
+        ft_handle->ft->cmp,
+        ft_handle->ft->update_fun,
+        node,
+        -1,
+        msg,
+        true,
+        &gc_info,
+        zero_flow_deltas,
+        NULL,
+        NULL);

    toku_verify_or_set_counts(node);

--- a/storage/tokudb/PerconaFT/ft/ft.cc
+++ b/storage/tokudb/PerconaFT/ft/ft.cc
@@ -198,6 +198,8 @@ static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) {
        ch->time_of_last_modification = now;
        ch->checkpoint_count++;
        ft_hack_highest_unused_msn_for_upgrade_for_checkpoint(ft);
+        ch->on_disk_logical_rows =
+            ft->h->on_disk_logical_rows = ft->in_memory_logical_rows;
                                                             
        // write translation and header to disk (or at least to OS internal buffer)
        toku_serialize_ft_to(fd, ch, &ft->blocktable, ft->cf);
@@ -383,7 +385,8 @@ ft_header_create(FT_OPTIONS options, BLOCKNUM root_blocknum, TXNID root_xid_that
        .count_of_optimize_in_progress = 0,
        .count_of_optimize_in_progress_read_from_disk = 0,
        .msn_at_start_of_last_completed_optimize = ZERO_MSN,
-        .on_disk_stats = ZEROSTATS
+        .on_disk_stats = ZEROSTATS,
+        .on_disk_logical_rows = 0
    };
    return (FT_HEADER) toku_xmemdup(&h, sizeof h);
 }
@@ -802,7 +805,14 @@ toku_ft_stat64 (FT ft, struct ftstat64_s *s) {
    s->fsize = toku_cachefile_size(ft->cf);
    // just use the in memory stats from the header
    // prevent appearance of negative numbers for numrows, numbytes
-    int64_t n = ft->in_memory_stats.numrows;
+    // if the logical count was never properly re-counted on an upgrade,
+    // return the existing physical count instead.
+    int64_t n;
+    if (ft->in_memory_logical_rows == (uint64_t)-1) {
+        n = ft->in_memory_stats.numrows;
+    } else {
+        n = ft->in_memory_logical_rows;
+    }
    if (n < 0) {
        n = 0;
    }
@@ -871,20 +881,38 @@ DESCRIPTOR toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle) {
    return &ft_handle->ft->cmp_descriptor;
 }

-void
-toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta) {
+void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta) {
    (void) toku_sync_fetch_and_add(&(headerstats->numrows),  delta.numrows);
    (void) toku_sync_fetch_and_add(&(headerstats->numbytes), delta.numbytes);
 }

-void
-toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta) {
+void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta) {
    (void) toku_sync_fetch_and_sub(&(headerstats->numrows),  delta.numrows);
    (void) toku_sync_fetch_and_sub(&(headerstats->numbytes), delta.numbytes);
 }

-void
-toku_ft_remove_reference(FT ft, bool oplsn_valid, LSN oplsn, remove_ft_ref_callback remove_ref, void *extra) {
+void toku_ft_adjust_logical_row_count(FT ft, int64_t delta) {
+    // In order to make sure that the correct count is returned from
+    // toku_ft_stat64, the ft->(in_memory|on_disk)_logical_rows _MUST_NOT_ be
+    // modified from anywhere else from here with the exceptions of
+    // serializing in a header, initializing a new header and analyzing
+    // an index for a logical_row count.
+    // The gist is that on an index upgrade, all logical_rows values
+    // in the ft header are set to -1 until an analyze can reset it to an
+    // accurate value. Until then, the physical count from in_memory_stats
+    // must be returned in toku_ft_stat64.
+    if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) {
+        toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta);
+    }
+}
+
+void toku_ft_remove_reference(
+    FT ft,
+    bool oplsn_valid,
+    LSN oplsn,
+    remove_ft_ref_callback remove_ref,
+    void *extra) {
+
    toku_ft_grab_reflock(ft);
    if (toku_ft_has_one_reference_unlocked(ft)) {
        toku_ft_release_reflock(ft);
--- a/storage/tokudb/PerconaFT/ft/ft.h
+++ b/storage/tokudb/PerconaFT/ft/ft.h
@@ -127,13 +127,17 @@ DESCRIPTOR toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle);

 typedef struct {
    // delta versions in basements could be negative
+    // These represent the physical leaf entries and do not account
+    // for pending deletes or other in-flight messages that have not been
+    // applied to a leaf entry.
    int64_t numrows;
    int64_t numbytes;
 } STAT64INFO_S, *STAT64INFO;
-static const STAT64INFO_S ZEROSTATS = { .numrows = 0, .numbytes = 0};
+static const STAT64INFO_S ZEROSTATS = { .numrows = 0, .numbytes = 0 };

 void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta);
 void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta);
+void toku_ft_adjust_logical_row_count(FT ft, int64_t delta);

 typedef void (*remove_ft_ref_callback)(FT ft, void *extra);
 void toku_ft_remove_reference(FT ft,
--- a/storage/tokudb/PerconaFT/ft/leafentry.h
+++ b/storage/tokudb/PerconaFT/ft/leafentry.h
@@ -180,43 +180,57 @@ uint64_t le_outermost_uncommitted_xid (LEAFENTRY le);
 //      r|r!=0&&r!=TOKUDB_ACCEPT:  Quit early, return r, because something unexpected went wrong (error case)
 typedef int(*LE_ITERATE_CALLBACK)(TXNID id, TOKUTXN context, bool is_provisional);

-int le_iterate_val(LEAFENTRY le, LE_ITERATE_CALLBACK f, void** valpp, uint32_t *vallenp, TOKUTXN context);
+int le_iterate_val(
+    LEAFENTRY le,
+    LE_ITERATE_CALLBACK f,
+    void** valpp,
+    uint32_t* vallenp,
+    TOKUTXN context);

-void le_extract_val(LEAFENTRY le,
-                    // should we return the entire leafentry as the val?
-                    bool is_leaf_mode, enum cursor_read_type read_type,
-                    TOKUTXN ttxn, uint32_t *vallen, void **val);
+void le_extract_val(
+    LEAFENTRY le,
+    // should we return the entire leafentry as the val?
+    bool is_leaf_mode,
+    enum cursor_read_type read_type,
+    TOKUTXN ttxn,
+    uint32_t* vallen,
+    void** val);

-size_t
-leafentry_disksize_13(LEAFENTRY_13 le);
+size_t leafentry_disksize_13(LEAFENTRY_13 le);

-int
-toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, // NULL if there was no stored data.
-                      void** keyp,
-                      uint32_t* keylen,
-                      size_t *new_leafentry_memorysize,
-                      LEAFENTRY *new_leafentry_p);
+int toku_le_upgrade_13_14(
+    // NULL if there was no stored data.
+    LEAFENTRY_13 old_leafentry,
+    void** keyp,
+    uint32_t* keylen,
+    size_t* new_leafentry_memorysize,
+    LEAFENTRY *new_leafentry_p);

 class bn_data;

-void
-toku_le_apply_msg(const ft_msg &msg,
-                  LEAFENTRY old_leafentry, // NULL if there was no stored data.
-                  bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
-                  uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
-                  uint32_t old_keylen,
-                  txn_gc_info *gc_info,
-                  LEAFENTRY *new_leafentry_p,
-                  int64_t * numbytes_delta_p);
+int64_t toku_le_apply_msg(
+    const ft_msg &msg,
+    // NULL if there was no stored data.
+    LEAFENTRY old_leafentry,
+    // bn_data storing leafentry, if NULL, means there is no bn_data
+    bn_data* data_buffer,
+    // index in data_buffer where leafentry is stored (and should be replaced
+    uint32_t idx,
+    uint32_t old_keylen,
+    txn_gc_info* gc_info,
+    LEAFENTRY *new_leafentry_p,
+    int64_t* numbytes_delta_p);

-bool toku_le_worth_running_garbage_collection(LEAFENTRY le, txn_gc_info *gc_info);
+bool toku_le_worth_running_garbage_collection(
+    LEAFENTRY le,
+    txn_gc_info* gc_info);

-void
-toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
-                        bn_data* data_buffer,
-                        uint32_t idx,
-                        void* keyp,
-                        uint32_t keylen,
-                        txn_gc_info *gc_info,
-                        LEAFENTRY *new_leaf_entry,
-                        int64_t * numbytes_delta_p);
+void toku_le_garbage_collect(
+    LEAFENTRY old_leaf_entry,
+    bn_data* data_buffer,
+    uint32_t idx,
+    void* keyp,
+    uint32_t keylen,
+    txn_gc_info* gc_info,
+    LEAFENTRY* new_leaf_entry,
+    int64_t* numbytes_delta_p);
--- a/storage/tokudb/PerconaFT/ft/loader/loader.cc
+++ b/storage/tokudb/PerconaFT/ft/loader/loader.cc
@@ -2312,11 +2312,42 @@ static struct leaf_buf *start_leaf (struct dbout *out, const DESCRIPTOR UU(desc)
    return lbuf;
 }

-static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progress_allocation, FTLOADER bl, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method);
-static int write_nonleaves (FTLOADER bl, FIDX pivots_fidx, struct dbout *out, struct subtrees_info *sts, const DESCRIPTOR descriptor, uint32_t target_nodesize, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method);
-static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen, int this_leafentry_size, STAT64INFO stats_to_update);
-static int write_translation_table (struct dbout *out, long long *off_of_translation_p);
-static int write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk);
+static void finish_leafnode(
+    struct dbout* out,
+    struct leaf_buf* lbuf,
+    int progress_allocation,
+    FTLOADER bl,
+    uint32_t target_basementnodesize,
+    enum toku_compression_method target_compression_method);
+
+static int write_nonleaves(
+    FTLOADER bl,
+    FIDX pivots_fidx,
+    struct dbout* out,
+    struct subtrees_info* sts,
+    const DESCRIPTOR descriptor,
+    uint32_t target_nodesize,
+    uint32_t target_basementnodesize,
+    enum toku_compression_method target_compression_method);
+
+static void add_pair_to_leafnode(
+    struct leaf_buf* lbuf,
+    unsigned char* key,
+    int keylen,
+    unsigned char* val,
+    int vallen,
+    int this_leafentry_size,
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta);
+
+static int write_translation_table(
+    struct dbout* out,
+    long long* off_of_translation_p);
+
+static int write_header(
+    struct dbout* out,
+    long long translation_location_on_disk,
+    long long translation_size_on_disk);

 static void drain_writer_q(QUEUE q) {
    void *item;
@@ -2448,6 +2479,12 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
    DBT maxkey = make_dbt(0, 0); // keep track of the max key of the current node

    STAT64INFO_S deltas = ZEROSTATS;
+    // This is just a placeholder and not used in the loader, the real/accurate
+    // stats will come out of 'deltas' because this loader is not pushing
+    // messages down into the top of a fractal tree where the logical row count
+    // is done, it is directly creating leaf entries so it must also take on
+    // performing the logical row counting on its own
+    int64_t logical_rows_delta = 0;
    while (result == 0) {
        void *item;
        {
@@ -2506,7 +2543,15 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
                lbuf = start_leaf(&out, descriptor, lblock, le_xid, target_nodesize);
            }

-            add_pair_to_leafnode(lbuf, (unsigned char *) key.data, key.size, (unsigned char *) val.data, val.size, this_leafentry_size, &deltas);
+            add_pair_to_leafnode(
+                lbuf,
+                (unsigned char*)key.data,
+                key.size,
+                (unsigned char*)val.data,
+                val.size,
+                this_leafentry_size,
+                &deltas,
+                &logical_rows_delta);
            n_rows_remaining--;

            update_maxkey(&maxkey, &key); // set the new maxkey to the current key
@@ -2526,6 +2571,13 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
        toku_ft_update_stats(&ft.in_memory_stats, deltas);
    }

+    // As noted above, the loader directly creates a tree structure without
+    // going through the higher level ft API and tus bypasses the logical row
+    // counting performed at that level. So, we must manually update the logical
+    // row count with the info we have from the physical delta that comes out of
+    // add_pair_to_leafnode.
+    toku_ft_adjust_logical_row_count(&ft, deltas.numrows);
+
    cleanup_maxkey(&maxkey);

    if (lbuf) {
@@ -2878,7 +2930,16 @@ int toku_ft_loader_get_error(FTLOADER bl, int *error) {
    return 0;
 }

-static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen, int this_leafentry_size, STAT64INFO stats_to_update) {
+static void add_pair_to_leafnode(
+    struct leaf_buf* lbuf,
+    unsigned char* key,
+    int keylen,
+    unsigned char* val,
+    int vallen,
+    int this_leafentry_size,
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta) {
+
    lbuf->nkeys++;
    lbuf->ndata++;
    lbuf->dsize += keylen + vallen;
@@ -2890,11 +2951,25 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
    FTNODE leafnode = lbuf->node;
    uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs();
    DBT kdbt, vdbt;
-    ft_msg msg(toku_fill_dbt(&kdbt, key, keylen), toku_fill_dbt(&vdbt, val, vallen), FT_INSERT, ZERO_MSN, lbuf->xids);
+    ft_msg msg(
+        toku_fill_dbt(&kdbt, key, keylen),
+        toku_fill_dbt(&vdbt, val, vallen),
+        FT_INSERT,
+        ZERO_MSN,
+        lbuf->xids);
    uint64_t workdone = 0;
    // there's no mvcc garbage in a bulk-loaded FT, so there's no need to pass useful gc info
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, &workdone, stats_to_update);
+    toku_ft_bn_apply_msg_once(
+        BLB(leafnode, 0),
+        msg,
+        idx,
+        keylen,
+        NULL,
+        &gc_info,
+        &workdone,
+        stats_to_update,
+        logical_rows_delta);
 }

 static int write_literal(struct dbout *out, void*data,  size_t len) {
@@ -2905,7 +2980,14 @@ static int write_literal(struct dbout *out, void*data,  size_t len) {
    return result;
 }

-static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progress_allocation, FTLOADER bl, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method) {
+static void finish_leafnode(
+    struct dbout* out,
+    struct leaf_buf* lbuf,
+    int progress_allocation,
+    FTLOADER bl,
+    uint32_t target_basementnodesize,
+    enum toku_compression_method target_compression_method) {
+
    int result = 0;

    // serialize leaf to buffer
@@ -2913,7 +2995,16 @@ static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progr
    size_t uncompressed_serialized_leaf_size = 0;
    char *serialized_leaf = NULL;
    FTNODE_DISK_DATA ndd = NULL;
-    result = toku_serialize_ftnode_to_memory(lbuf->node, &ndd, target_basementnodesize, target_compression_method, true, true, &serialized_leaf_size, &uncompressed_serialized_leaf_size, &serialized_leaf);
+    result = toku_serialize_ftnode_to_memory(
+        lbuf->node,
+        &ndd,
+        target_basementnodesize,
+        target_compression_method,
+        true,
+        true,
+        &serialized_leaf_size,
+        &uncompressed_serialized_leaf_size,
+        &serialized_leaf);

    // write it out
    if (result == 0) {
@@ -2979,8 +3070,11 @@ static int write_translation_table (struct dbout *out, long long *off_of_transla
    return result;
 }

-static int
-write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk) {
+static int write_header(
+    struct dbout* out,
+    long long translation_location_on_disk,
+    long long translation_size_on_disk) {
+
    int result = 0;
    size_t size = toku_serialize_ft_size(out->ft->h);
    size_t alloced_size = roundup_to_multiple(512, size);
@@ -2991,6 +3085,7 @@ write_header (struct dbout *out, long long translation_location_on_disk, long lo
    } else {
        wbuf_init(&wbuf, buf, size);
        out->ft->h->on_disk_stats = out->ft->in_memory_stats;
+        out->ft->h->on_disk_logical_rows = out->ft->in_memory_logical_rows;
        toku_serialize_ft_to_wbuf(&wbuf, out->ft->h, translation_location_on_disk, translation_size_on_disk);
        for (size_t i=size; i<alloced_size; i++) buf[i]=0; // initialize all those unused spots to zero
        if (wbuf.ndone != size)
--- a/storage/tokudb/PerconaFT/ft/logger/log_upgrade.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/log_upgrade.cc
@@ -265,8 +265,9 @@ toku_maybe_upgrade_log(const char *env_dir, const char *log_dir, LSN * lsn_of_cl
        TXNID last_xid = TXNID_NONE;
        r = verify_clean_shutdown_of_log_version(log_dir, version_of_logs_on_disk, &last_lsn, &last_xid);
        if (r != 0) {
-            if (TOKU_LOG_VERSION_25 <= version_of_logs_on_disk && version_of_logs_on_disk <= TOKU_LOG_VERSION_27
-                && TOKU_LOG_VERSION_28 == TOKU_LOG_VERSION) {
+            if (TOKU_LOG_VERSION_25 <= version_of_logs_on_disk &&
+                version_of_logs_on_disk <= TOKU_LOG_VERSION_27 &&
+                TOKU_LOG_VERSION_29 == TOKU_LOG_VERSION) {
                r = 0; // can do recovery on dirty shutdown
            } else {
                fprintf(stderr, "Cannot upgrade PerconaFT version %d database.", version_of_logs_on_disk);
--- a/storage/tokudb/PerconaFT/ft/logger/logger.h
+++ b/storage/tokudb/PerconaFT/ft/logger/logger.h
@@ -54,6 +54,7 @@ enum {
    TOKU_LOG_VERSION_26 = 26, // no change from 25
    TOKU_LOG_VERSION_27 = 27, // no change from 26
    TOKU_LOG_VERSION_28 = 28, // no change from 27
+    TOKU_LOG_VERSION_29 = 29, // no change from 28
    TOKU_LOG_VERSION   = FT_LAYOUT_VERSION, 
    TOKU_LOG_MIN_SUPPORTED_VERSION = FT_LAYOUT_MIN_SUPPORTED_VERSION,
 };
--- a/storage/tokudb/PerconaFT/ft/node.cc
+++ b/storage/tokudb/PerconaFT/ft/node.cc
@@ -206,12 +206,20 @@ int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, con
 }

 /**
- * Given a message buffer and and offset, apply the message with toku_ft_bn_apply_msg, or discard it,
+ * Given a message buffer and and offset, apply the message with
+ * toku_ft_bn_apply_msg, or discard it,
 * based on its MSN and the MSN of the basement node.
 */
-static void
-do_bn_apply_msg(FT_HANDLE ft_handle, BASEMENTNODE bn, message_buffer *msg_buffer, int32_t offset,
-                txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update) {
+static void do_bn_apply_msg(
+    FT_HANDLE ft_handle,
+    BASEMENTNODE bn,
+    message_buffer* msg_buffer,
+    int32_t offset,
+    txn_gc_info* gc_info,
+    uint64_t* workdone,
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta) {
+
    DBT k, v;
    ft_msg msg = msg_buffer->get_message(offset, &k, &v);

@@ -227,16 +235,17 @@ do_bn_apply_msg(FT_HANDLE ft_handle, BASEMENTNODE bn, message_buffer *msg_buffer
            msg,
            gc_info,
            workdone,
-            stats_to_update
-            );
+            stats_to_update,
+            logical_rows_delta);
    } else {
        toku_ft_status_note_msn_discard();
    }

    // We must always mark message as stale since it has been marked
    // (using omt::iterate_and_mark_range)
-    // It is possible to call do_bn_apply_msg even when it won't apply the message because
-    // the node containing it could have been evicted and brought back in.
+    // It is possible to call do_bn_apply_msg even when it won't apply the
+    // message because the node containing it could have been evicted and
+    // brought back in.
    msg_buffer->set_freshness(offset, false);
 }

@@ -248,12 +257,29 @@ struct iterate_do_bn_apply_msg_extra {
    txn_gc_info *gc_info;
    uint64_t *workdone;
    STAT64INFO stats_to_update;
+    int64_t *logical_rows_delta;
 };

-int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e) __attribute__((nonnull(3)));
-int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e)
+int iterate_do_bn_apply_msg(
+    const int32_t &offset,
+    const uint32_t UU(idx),
+    struct iterate_do_bn_apply_msg_extra* const e)
+    __attribute__((nonnull(3)));
+
+int iterate_do_bn_apply_msg(
+    const int32_t &offset,
+    const uint32_t UU(idx),
+    struct iterate_do_bn_apply_msg_extra* const e)
 {
-    do_bn_apply_msg(e->t, e->bn, &e->bnc->msg_buffer, offset, e->gc_info, e->workdone, e->stats_to_update);
+    do_bn_apply_msg(
+        e->t,
+        e->bn,
+        &e->bnc->msg_buffer,
+        offset,
+        e->gc_info,
+        e->workdone,
+        e->stats_to_update,
+        e->logical_rows_delta);
    return 0;
 }

@@ -354,17 +380,15 @@ find_bounds_within_message_tree(
 * or plus infinity respectively if they are NULL.  Do not mark the node
 * as dirty (preserve previous state of 'dirty' bit).
 */
-static void
-bnc_apply_messages_to_basement_node(
+static void bnc_apply_messages_to_basement_node(
    FT_HANDLE t,             // used for comparison function
    BASEMENTNODE bn,   // where to apply messages
    FTNODE ancestor,  // the ancestor node where we can find messages to apply
    int childnum,      // which child buffer of ancestor contains messages we want
    const pivot_bounds &bounds,  // contains pivot key bounds of this basement node
-    txn_gc_info *gc_info,
-    bool* msgs_applied
-    )
-{
+    txn_gc_info* gc_info,
+    bool* msgs_applied) {
+
    int r;
    NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);

@@ -372,16 +396,29 @@ bnc_apply_messages_to_basement_node(
    // apply messages from this buffer
    STAT64INFO_S stats_delta = {0,0};
    uint64_t workdone_this_ancestor = 0;
+    int64_t logical_rows_delta = 0;

    uint32_t stale_lbi, stale_ube;
    if (!bn->stale_ancestor_messages_applied) {
-        find_bounds_within_message_tree(t->ft->cmp, bnc->stale_message_tree, &bnc->msg_buffer, bounds, &stale_lbi, &stale_ube);
+        find_bounds_within_message_tree(
+            t->ft->cmp,
+            bnc->stale_message_tree,
+            &bnc->msg_buffer,
+            bounds,
+            &stale_lbi,
+            &stale_ube);
    } else {
        stale_lbi = 0;
        stale_ube = 0;
    }
    uint32_t fresh_lbi, fresh_ube;
-    find_bounds_within_message_tree(t->ft->cmp, bnc->fresh_message_tree, &bnc->msg_buffer, bounds, &fresh_lbi, &fresh_ube);
+    find_bounds_within_message_tree(
+        t->ft->cmp,
+        bnc->fresh_message_tree,
+        &bnc->msg_buffer,
+        bounds,
+        &fresh_lbi,
+        &fresh_ube);

    // We now know where all the messages we must apply are, so one of the
    // following 4 cases will do the application, depending on which of
@@ -395,7 +432,9 @@ bnc_apply_messages_to_basement_node(
        // We have messages in multiple trees, so we grab all
        // the relevant messages' offsets and sort them by MSN, then apply
        // them in MSN order.
-        const int buffer_size = ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + bnc->broadcast_list.size());
+        const int buffer_size = ((stale_ube - stale_lbi) +
+                                 (fresh_ube - fresh_lbi) +
+                                 bnc->broadcast_list.size());
        toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
        int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
        struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
@@ -419,11 +458,27 @@ bnc_apply_messages_to_basement_node(
        // Apply the messages in MSN order.
        for (int i = 0; i < buffer_size; ++i) {
            *msgs_applied = true;
-            do_bn_apply_msg(t, bn, &bnc->msg_buffer, offsets[i], gc_info, &workdone_this_ancestor, &stats_delta);
+            do_bn_apply_msg(
+                t,
+                bn,
+                &bnc->msg_buffer,
+                offsets[i],
+                gc_info,
+                &workdone_this_ancestor,
+                &stats_delta,
+                &logical_rows_delta);
        }
    } else if (stale_lbi == stale_ube) {
        // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
-        struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
+        struct iterate_do_bn_apply_msg_extra iter_extra = {
+            .t = t,
+            .bn = bn,
+            .bnc = bnc,
+            .gc_info = gc_info,
+            .workdone = &workdone_this_ancestor,
+            .stats_to_update = &stats_delta,
+            .logical_rows_delta = &logical_rows_delta
+        };
        if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
        r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(fresh_lbi, fresh_ube, &iter_extra);
        assert_zero(r);
@@ -432,7 +487,15 @@ bnc_apply_messages_to_basement_node(
        // No fresh messages to apply, we just apply stale messages.

        if (stale_ube - stale_lbi > 0) *msgs_applied = true;
-        struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
+        struct iterate_do_bn_apply_msg_extra iter_extra = {
+            .t = t,
+            .bn = bn,
+            .bnc = bnc,
+            .gc_info = gc_info,
+            .workdone = &workdone_this_ancestor,
+            .stats_to_update = &stats_delta,
+            .logical_rows_delta = &logical_rows_delta
+        };

        r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(stale_lbi, stale_ube, &iter_extra);
        assert_zero(r);
@@ -446,6 +509,7 @@ bnc_apply_messages_to_basement_node(
    if (stats_delta.numbytes || stats_delta.numrows) {
        toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
    }
+    toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
 }

 static void
@@ -1073,7 +1137,8 @@ toku_ft_bn_apply_msg_once (
    LEAFENTRY le,
    txn_gc_info *gc_info,
    uint64_t *workdone,
-    STAT64INFO stats_to_update
+    STAT64INFO stats_to_update,
+    int64_t *logical_rows_delta
    )
 // Effect: Apply msg to leafentry (msn is ignored)
 //         Calculate work done by message on leafentry and add it to caller's workdone counter.
@@ -1082,26 +1147,34 @@ toku_ft_bn_apply_msg_once (
 {
    size_t newsize=0, oldsize=0, workdone_this_le=0;
    LEAFENTRY new_le=0;
-    int64_t numbytes_delta = 0;  // how many bytes of user data (not including overhead) were added or deleted from this row
-    int64_t numrows_delta = 0;   // will be +1 or -1 or 0 (if row was added or deleted or not)
+    // how many bytes of user data (not including overhead) were added or
+    // deleted from this row
+    int64_t numbytes_delta = 0;
+    // will be +1 or -1 or 0 (if row was added or deleted or not)
+    int64_t numrows_delta = 0;
+    // will be +1, -1 or 0 if a message that was accounted for logically has
+    // changed in meaning such as an insert changed to an update or a delete
+    // changed to a noop
+    int64_t logical_rows_delta_le = 0;
    uint32_t key_storage_size = msg.kdbt()->size + sizeof(uint32_t);
    if (le) {
        oldsize = leafentry_memsize(le) + key_storage_size;
    }

-    // toku_le_apply_msg() may call bn_data::mempool_malloc_and_update_dmt() to allocate more space.
-    // That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is
-    // no longer in use.  We'll have to release the old mempool later.
-    toku_le_apply_msg(
-        msg, 
+    // toku_le_apply_msg() may call bn_data::mempool_malloc_and_update_dmt()
+    // to allocate more space. That means le is guaranteed to not cause a
+    // sigsegv but it may point to a mempool that is no longer in use.
+    // We'll have to release the old mempool later.
+    logical_rows_delta_le = toku_le_apply_msg(
+        msg,
        le,
        &bn->data_buffer,
        idx,
        le_keylen,
-        gc_info, 
-        &new_le, 
-        &numbytes_delta
-        );
+        gc_info,
+        &new_le,
+        &numbytes_delta);
+
    // at this point, we cannot trust cmd->u.id.key to be valid.
    // The dmt may have realloced its mempool and freed the one containing key.

@@ -1121,37 +1194,42 @@ toku_ft_bn_apply_msg_once (
            numrows_delta = 1;
        }
    }
-    if (workdone) {  // test programs may call with NULL
+    if (FT_LIKELY(workdone != NULL)) {  // test programs may call with NULL
        *workdone += workdone_this_le;
    }

+    if (FT_LIKELY(logical_rows_delta != NULL)) {
+        *logical_rows_delta += logical_rows_delta_le;
+    }
    // now update stat64 statistics
    bn->stat64_delta.numrows  += numrows_delta;
    bn->stat64_delta.numbytes += numbytes_delta;
    // the only reason stats_to_update may be null is for tests
-    if (stats_to_update) {
+    if (FT_LIKELY(stats_to_update != NULL)) {
        stats_to_update->numrows += numrows_delta;
        stats_to_update->numbytes += numbytes_delta;
    }
-
 }

 static const uint32_t setval_tag = 0xee0ccb99; // this was gotten by doing "cat /dev/random|head -c4|od -x" to get a random number.  We want to make sure that the user actually passes us the setval_extra_s that we passed in.
 struct setval_extra_s {
    uint32_t  tag;
    bool did_set_val;
-    int         setval_r;    // any error code that setval_fun wants to return goes here.
+    // any error code that setval_fun wants to return goes here.
+    int setval_r;
    // need arguments for toku_ft_bn_apply_msg_once
    BASEMENTNODE bn;
-    MSN msn;              // captured from original message, not currently used
+    // captured from original message, not currently used
+    MSN msn;
    XIDS xids;
-    const DBT *key;
+    const DBT* key;
    uint32_t idx;
    uint32_t le_keylen;
    LEAFENTRY le;
-    txn_gc_info *gc_info;
-    uint64_t * workdone;  // set by toku_ft_bn_apply_msg_once()
+    txn_gc_info* gc_info;
+    uint64_t* workdone;  // set by toku_ft_bn_apply_msg_once()
    STAT64INFO stats_to_update;
+    int64_t* logical_rows_delta;
 };

 /*
@@ -1170,29 +1248,45 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
        // can't leave scope until toku_ft_bn_apply_msg_once if
        // this is a delete
        DBT val;
-        ft_msg msg(svextra->key,
-                   new_val ? new_val : toku_init_dbt(&val),
-                   new_val ? FT_INSERT : FT_DELETE_ANY,
-                   svextra->msn, svextra->xids);
-        toku_ft_bn_apply_msg_once(svextra->bn, msg,
-                                  svextra->idx, svextra->le_keylen, svextra->le,
-                                  svextra->gc_info,
-                                  svextra->workdone, svextra->stats_to_update);
+        ft_msg msg(
+            svextra->key,
+            new_val ? new_val : toku_init_dbt(&val),
+            new_val ? FT_INSERT : FT_DELETE_ANY,
+            svextra->msn,
+            svextra->xids);
+        toku_ft_bn_apply_msg_once(
+            svextra->bn,
+            msg,
+            svextra->idx,
+            svextra->le_keylen,
+            svextra->le,
+            svextra->gc_info,
+            svextra->workdone,
+            svextra->stats_to_update,
+            svextra->logical_rows_delta);
        svextra->setval_r = 0;
    }
 }

-// We are already past the msn filter (in toku_ft_bn_apply_msg(), which calls do_update()),
-// so capturing the msn in the setval_extra_s is not strictly required.         The alternative
-// would be to put a dummy msn in the messages created by setval_fun(), but preserving
-// the original msn seems cleaner and it preserves accountability at a lower layer.
-static int do_update(ft_update_func update_fun, const DESCRIPTOR_S *desc, BASEMENTNODE bn, const ft_msg &msg, uint32_t idx,
-                     LEAFENTRY le,
-                     void* keydata,
-                     uint32_t keylen,
-                     txn_gc_info *gc_info,
-                     uint64_t * workdone,
-                     STAT64INFO stats_to_update) {
+// We are already past the msn filter (in toku_ft_bn_apply_msg(), which calls
+// do_update()), so capturing the msn in the setval_extra_s is not strictly
+// required. The alternative would be to put a dummy msn in the messages
+// created by setval_fun(), but preserving the original msn seems cleaner and
+// it preserves accountability at a lower layer.
+static int do_update(
+    ft_update_func update_fun,
+    const DESCRIPTOR_S* desc,
+    BASEMENTNODE bn,
+    const ft_msg &msg,
+    uint32_t idx,
+    LEAFENTRY le,
+    void* keydata,
+    uint32_t keylen,
+    txn_gc_info* gc_info,
+    uint64_t* workdone,
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta) {
+
    LEAFENTRY le_for_update;
    DBT key;
    const DBT *keyp;
@@ -1232,39 +1326,52 @@ static int do_update(ft_update_func update_fun, const DESCRIPTOR_S *desc, BASEME
    }
    le_for_update = le;

-    struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, msg.msn(), msg.xids(),
-                                          keyp, idx, keylen, le_for_update, gc_info,
-                                          workdone, stats_to_update};
-    // call handlerton's ft->update_fun(), which passes setval_extra to setval_fun()
+    struct setval_extra_s setval_extra = {
+        setval_tag,
+        false,
+        0,
+        bn,
+        msg.msn(),
+        msg.xids(),
+        keyp,
+        idx,
+        keylen,
+        le_for_update,
+        gc_info,
+        workdone,
+        stats_to_update,
+        logical_rows_delta
+    };
+    // call handlerton's ft->update_fun(), which passes setval_extra
+    // to setval_fun()
    FAKE_DB(db, desc);
    int r = update_fun(
        &db,
        keyp,
        vdbtp,
        update_function_extra,
-        setval_fun, &setval_extra
-        );
+        setval_fun,
+        &setval_extra);

    if (r == 0) { r = setval_extra.setval_r; }
    return r;
 }

 // Should be renamed as something like "apply_msg_to_basement()."
-void
-toku_ft_bn_apply_msg (
-    const toku::comparator &cmp,
+void toku_ft_bn_apply_msg(
+    const toku::comparator& cmp,
    ft_update_func update_fun,
    BASEMENTNODE bn,
-    const ft_msg &msg,
-    txn_gc_info *gc_info, 
-    uint64_t *workdone,
-    STAT64INFO stats_to_update
-    )
+    const ft_msg& msg,
+    txn_gc_info* gc_info,
+    uint64_t* workdone,
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta) {
 // Effect:
 //   Put a msg into a leaf.
-//   Calculate work done by message on leafnode and add it to caller's workdone counter.
+//   Calculate work done by message on leafnode and add it to caller's
+//   workdone counter.
 // The leaf could end up "too big" or "too small".  The caller must fix that up.
-{
    LEAFENTRY storeddata;
    void* key = NULL;
    uint32_t keylen = 0;
@@ -1303,7 +1410,16 @@ toku_ft_bn_apply_msg (
        } else {
            assert_zero(r);
        }
-        toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update);
+        toku_ft_bn_apply_msg_once(
+            bn,
+            msg,
+            idx,
+            keylen,
+            storeddata,
+            gc_info,
+            workdone,
+            stats_to_update,
+            logical_rows_delta);

        // if the insertion point is within a window of the right edge of
        // the leaf then it is sequential
@@ -1331,12 +1447,19 @@ toku_ft_bn_apply_msg (
            &storeddata,
            &key,
            &keylen,
-            &idx
-            );
+            &idx);
        if (r == DB_NOTFOUND) break;
        assert_zero(r);
-        toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update);
-
+        toku_ft_bn_apply_msg_once(
+            bn,
+            msg,
+            idx,
+            keylen,
+            storeddata,
+            gc_info,
+            workdone,
+            stats_to_update,
+            logical_rows_delta);
        break;
    }
    case FT_OPTIMIZE_FOR_UPGRADE:
@@ -1352,13 +1475,27 @@ toku_ft_bn_apply_msg (
            assert_zero(r);
            int deleted = 0;
            if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
-                // message application code needs a key in order to determine how much
-                // work was done by this message. since this is a broadcast message,
-                // we have to create a new message whose key is the current le's key.
+                // message application code needs a key in order to determine
+                // how much work was done by this message. since this is a
+                // broadcast message, we have to create a new message whose
+                // key is the current le's key.
                DBT curr_keydbt;
-                ft_msg curr_msg(toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen),
-                                msg.vdbt(), msg.type(), msg.msn(), msg.xids());
-                toku_ft_bn_apply_msg_once(bn, curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
+                ft_msg curr_msg(
+                    toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen),
+                    msg.vdbt(),
+                    msg.type(),
+                    msg.msn(),
+                    msg.xids());
+                toku_ft_bn_apply_msg_once(
+                    bn,
+                    curr_msg,
+                    idx,
+                    curr_keylen,
+                    storeddata,
+                    gc_info,
+                    workdone,
+                    stats_to_update,
+                    logical_rows_delta);
                // at this point, we cannot trust msg.kdbt to be valid.
                uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
                if (new_dmt_size != num_klpairs) {
@@ -1386,13 +1523,27 @@ toku_ft_bn_apply_msg (
            assert_zero(r);
            int deleted = 0;
            if (le_has_xids(storeddata, msg.xids())) {
-                // message application code needs a key in order to determine how much
-                // work was done by this message. since this is a broadcast message,
-                // we have to create a new message whose key is the current le's key.
+                // message application code needs a key in order to determine
+                // how much work was done by this message. since this is a
+                // broadcast message, we have to create a new message whose key
+                // is the current le's key.
                DBT curr_keydbt;
-                ft_msg curr_msg(toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen),
-                                msg.vdbt(), msg.type(), msg.msn(), msg.xids());
-                toku_ft_bn_apply_msg_once(bn, curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
+                ft_msg curr_msg(
+                    toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen),
+                    msg.vdbt(),
+                    msg.type(),
+                    msg.msn(),
+                    msg.xids());
+                toku_ft_bn_apply_msg_once(
+                    bn,
+                    curr_msg,
+                    idx,
+                    curr_keylen,
+                    storeddata,
+                    gc_info,
+                    workdone,
+                    stats_to_update,
+                    logical_rows_delta);
                uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
                if (new_dmt_size != num_klpairs) {
                    paranoid_invariant(new_dmt_size + 1 == num_klpairs);
@@ -1424,9 +1575,33 @@ toku_ft_bn_apply_msg (
                key = msg.kdbt()->data;
                keylen = msg.kdbt()->size;
            }
-            r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update);
+            r = do_update(
+                update_fun,
+                cmp.get_descriptor(),
+                bn,
+                msg,
+                idx,
+                NULL,
+                NULL,
+                0,
+                gc_info,
+                workdone,
+                stats_to_update,
+                logical_rows_delta);
        } else if (r==0) {
-            r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update);
+            r = do_update(
+                update_fun,
+                cmp.get_descriptor(),
+                bn,
+                msg,
+                idx,
+                storeddata,
+                key,
+                keylen,
+                gc_info,
+                workdone,
+                stats_to_update,
+                logical_rows_delta);
        } // otherwise, a worse error, just return it
        break;
    }
@@ -1434,6 +1609,12 @@ toku_ft_bn_apply_msg (
        // apply to all leafentries.
        uint32_t idx = 0;
        uint32_t num_leafentries_before;
+        // This is used to avoid having the logical row count changed on apply
+        // of this message since it will return a negative number of the number
+        // of leaf entries visited and cause the ft header value to go to 0;
+        // This message will not change the number of rows, so just use the
+        // bogus value.
+        int64_t temp_logical_rows_delta = 0;
        while (idx < (num_leafentries_before = bn->data_buffer.num_klpairs())) {
            void* curr_key = nullptr;
            uint32_t curr_keylen = 0;
@@ -1449,7 +1630,19 @@ toku_ft_bn_apply_msg (

            // This is broken below. Have a compilation error checked
            // in as a reminder
-            r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update);
+            r = do_update(
+                update_fun,
+                cmp.get_descriptor(),
+                bn,
+                msg,
+                idx,
+                storeddata,
+                curr_key,
+                curr_keylen,
+                gc_info,
+                workdone,
+                stats_to_update,
+                &temp_logical_rows_delta);
            assert_zero(r);

            if (num_leafentries_before == bn->data_buffer.num_klpairs()) {
@@ -1810,24 +2003,22 @@ void toku_ftnode_leaf_run_gc(FT ft, FTNODE node) {
    }
 }

-void
-toku_ftnode_put_msg (
+void toku_ftnode_put_msg(
    const toku::comparator &cmp,
    ft_update_func update_fun,
    FTNODE node,
    int target_childnum,
    const ft_msg &msg,
    bool is_fresh,
-    txn_gc_info *gc_info,
+    txn_gc_info* gc_info,
    size_t flow_deltas[],
-    STAT64INFO stats_to_update
-    )
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta) {
 // Effect: Push message into the subtree rooted at NODE.
 //   If NODE is a leaf, then
 //   put message into leaf, applying it to the leafentries
 //   If NODE is a nonleaf, then push the message into the message buffer(s) of the relevent child(ren).
 //   The node may become overfull.  That's not our problem.
-{
    toku_ftnode_assert_fully_in_memory(node);
    //
    // see comments in toku_ft_leaf_apply_msg
@@ -1836,26 +2027,40 @@ toku_ftnode_put_msg (
    // and instead defer to these functions
    //
    if (node->height==0) {
-        toku_ft_leaf_apply_msg(cmp, update_fun, node, target_childnum, msg, gc_info, nullptr, stats_to_update);
+        toku_ft_leaf_apply_msg(
+            cmp,
+            update_fun,
+            node,
+            target_childnum, msg,
+            gc_info,
+            nullptr,
+            stats_to_update,
+            logical_rows_delta);
    } else {
-        ft_nonleaf_put_msg(cmp, node, target_childnum, msg, is_fresh, flow_deltas);
+        ft_nonleaf_put_msg(
+            cmp,
+            node,
+            target_childnum,
+            msg,
+            is_fresh,
+            flow_deltas);
    }
 }

-// Effect: applies the message to the leaf if the appropriate basement node is in memory.
-//           This function is called during message injection and/or flushing, so the entire
-//           node MUST be in memory.
+// Effect: applies the message to the leaf if the appropriate basement node is
+//           in memory. This function is called during message injection and/or
+//           flushing, so the entire node MUST be in memory.
 void toku_ft_leaf_apply_msg(
-    const toku::comparator &cmp,
+    const toku::comparator& cmp,
    ft_update_func update_fun,
    FTNODE node,
    int target_childnum,  // which child to inject to, or -1 if unknown
-    const ft_msg &msg,
-    txn_gc_info *gc_info,
-    uint64_t *workdone,
-    STAT64INFO stats_to_update
-    )
-{
+    const ft_msg& msg,
+    txn_gc_info* gc_info,
+    uint64_t* workdone,
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta) {
+
    VERIFY_NODE(t, node);
    toku_ftnode_assert_fully_in_memory(node);

@@ -1891,34 +2096,36 @@ void toku_ft_leaf_apply_msg(
        BASEMENTNODE bn = BLB(node, childnum);
        if (msg.msn().msn > bn->max_msn_applied.msn) {
            bn->max_msn_applied = msg.msn();
-            toku_ft_bn_apply_msg(cmp,
-                                 update_fun,
-                                 bn,
-                                 msg,
-                                 gc_info,
-                                 workdone,
-                                 stats_to_update);
+            toku_ft_bn_apply_msg(
+                cmp,
+                update_fun,
+                bn,
+                msg,
+                gc_info,
+                workdone,
+                stats_to_update,
+                logical_rows_delta);
        } else {
            toku_ft_status_note_msn_discard();
        }
-    }
-    else if (ft_msg_type_applies_all(msg.type())) {
+    } else if (ft_msg_type_applies_all(msg.type())) {
        for (int childnum=0; childnum<node->n_children; childnum++) {
            if (msg.msn().msn > BLB(node, childnum)->max_msn_applied.msn) {
                BLB(node, childnum)->max_msn_applied = msg.msn();
-                toku_ft_bn_apply_msg(cmp,
-                                     update_fun,
-                                     BLB(node, childnum),
-                                     msg,
-                                     gc_info,
-                                     workdone,
-                                     stats_to_update);
+                toku_ft_bn_apply_msg(
+                    cmp,
+                    update_fun,
+                    BLB(node, childnum),
+                    msg,
+                    gc_info,
+                    workdone,
+                    stats_to_update,
+                    logical_rows_delta);
            } else {
                toku_ft_status_note_msn_discard();
            }
        }
-    }
-    else if (!ft_msg_type_does_nothing(msg.type())) {
+    } else if (!ft_msg_type_does_nothing(msg.type())) {
        invariant(ft_msg_type_does_nothing(msg.type()));
    }
    VERIFY_NODE(t, node);
--- a/storage/tokudb/PerconaFT/ft/node.h
+++ b/storage/tokudb/PerconaFT/ft/node.h
@@ -382,25 +382,54 @@ enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize);
 * If k is equal to some pivot, then we return the next (to the right)
 * childnum.
 */
-int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator &cmp);
+int toku_ftnode_hot_next_child(
+    FTNODE node,
+    const DBT* k,
+    const toku::comparator &cmp);

-void toku_ftnode_put_msg(const toku::comparator &cmp, ft_update_func update_fun,
-                         FTNODE node, int target_childnum,
-                         const ft_msg &msg, bool is_fresh, txn_gc_info *gc_info,
-                         size_t flow_deltas[], STAT64INFO stats_to_update);
+void toku_ftnode_put_msg(
+    const toku::comparator& cmp,
+    ft_update_func update_fun,
+    FTNODE node,
+    int target_childnum,
+    const ft_msg& msg,
+    bool is_fresh,
+    txn_gc_info* gc_info,
+    size_t flow_deltas[],
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta);

-void toku_ft_bn_apply_msg_once(BASEMENTNODE bn, const ft_msg &msg, uint32_t idx,
-                               uint32_t le_keylen, LEAFENTRY le, txn_gc_info *gc_info,
-                               uint64_t *workdonep, STAT64INFO stats_to_update);
+void toku_ft_bn_apply_msg_once(
+    BASEMENTNODE bn,
+    const ft_msg& msg,
+    uint32_t idx,
+    uint32_t le_keylen,
+    LEAFENTRY le,
+    txn_gc_info* gc_info,
+    uint64_t* workdonep,
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta);

-void toku_ft_bn_apply_msg(const toku::comparator &cmp, ft_update_func update_fun,
-                          BASEMENTNODE bn, const ft_msg &msg, txn_gc_info *gc_info,
-                          uint64_t *workdone, STAT64INFO stats_to_update);
+void toku_ft_bn_apply_msg(
+    const toku::comparator& cmp,
+    ft_update_func update_fun,
+    BASEMENTNODE bn,
+    const ft_msg& msg,
+    txn_gc_info* gc_info,
+    uint64_t* workdone,
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta);

-void toku_ft_leaf_apply_msg(const toku::comparator &cmp, ft_update_func update_fun,
-                            FTNODE node, int target_childnum,
-                            const ft_msg &msg, txn_gc_info *gc_info,
-                            uint64_t *workdone, STAT64INFO stats_to_update);
+void toku_ft_leaf_apply_msg(
+    const toku::comparator& cmp,
+    ft_update_func update_fun,
+    FTNODE node,
+    int target_childnum,
+    const ft_msg& msg,
+    txn_gc_info* gc_info,
+    uint64_t* workdone,
+    STAT64INFO stats_to_update,
+    int64_t* logical_rows_delta);

 //
 // Message management for orthopush
--- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
@@ -323,6 +323,13 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
        fanout = rbuf_int(rb);
    }

+    uint64_t on_disk_logical_rows;
+    on_disk_logical_rows = (uint64_t)-1;
+    if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_29) {
+        on_disk_logical_rows = rbuf_ulonglong(rb);
+    }
+    ft->in_memory_logical_rows = on_disk_logical_rows;
+
    (void) rbuf_int(rb); //Read in checksum and ignore (already verified).
    if (rb->ndone != rb->size) {
        fprintf(stderr, "Header size did not match contents.\n");
@@ -357,7 +364,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
            .count_of_optimize_in_progress = count_of_optimize_in_progress,
            .count_of_optimize_in_progress_read_from_disk = count_of_optimize_in_progress,
            .msn_at_start_of_last_completed_optimize = msn_at_start_of_last_completed_optimize,
-            .on_disk_stats = on_disk_stats
+            .on_disk_stats = on_disk_stats,
+            .on_disk_logical_rows = on_disk_logical_rows
        };
        XMEMDUP(ft->h, &h);
    }
@@ -408,6 +416,8 @@ serialize_ft_min_size (uint32_t version) {
    size_t size = 0;

    switch(version) {
+    case FT_LAYOUT_VERSION_29:
+        size += sizeof(uint64_t); // logrows in ft
    case FT_LAYOUT_VERSION_28:
        size += sizeof(uint32_t); // fanout in ft
    case FT_LAYOUT_VERSION_27:
@@ -754,6 +764,7 @@ void toku_serialize_ft_to_wbuf (
    wbuf_MSN(wbuf, h->highest_unused_msn_for_upgrade);
    wbuf_MSN(wbuf, h->max_msn_in_ft);
    wbuf_int(wbuf, h->fanout);
+    wbuf_ulonglong(wbuf, h->on_disk_logical_rows);
    uint32_t checksum = toku_x1764_finish(&wbuf->checksum);
    wbuf_int(wbuf, checksum);
    lazy_assert(wbuf->ndone == wbuf->size);
--- a/storage/tokudb/PerconaFT/ft/serialize/ft_layout_version.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft_layout_version.h
@@ -68,6 +68,7 @@ enum ft_layout_version_e {
    FT_LAYOUT_VERSION_26 = 26, // Hojo: basements store key/vals separately on disk for fixed klpair length BNs
    FT_LAYOUT_VERSION_27 = 27, // serialize message trees with nonleaf buffers to avoid key, msn sort on deserialize
    FT_LAYOUT_VERSION_28 = 28, // Add fanout to ft_header
+    FT_LAYOUT_VERSION_29 = 29, // Add logrows to ft_header
    FT_NEXT_VERSION,           // the version after the current version
    FT_LAYOUT_VERSION   = FT_NEXT_VERSION-1, // A hack so I don't have to change this line.
    FT_LAYOUT_MIN_SUPPORTED_VERSION = FT_LAYOUT_VERSION_13, // Minimum version supported
--- a/storage/tokudb/PerconaFT/ft/tests/make-tree.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/make-tree.cc
@@ -74,7 +74,16 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    // apply an insert to the leaf node
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(
+        BLB(leafnode, 0),
+        msg,
+        idx,
+        keylen,
+        NULL,
+        &gc_info,
+        NULL,
+        NULL,
+        NULL);

    leafnode->max_msn_applied_to_node_on_disk = msn;

--- a/storage/tokudb/PerconaFT/ft/tests/msnfilter.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/msnfilter.cc
@@ -82,46 +82,82 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);

-    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg, &gc_info, nullptr, nullptr);
+    toku_ft_leaf_apply_msg(
+        ft->ft->cmp,
+        ft->ft->update_fun,
+        leafnode,
+        -1,
+        msg,
+        &gc_info,
+        nullptr,
+        nullptr,
+        nullptr);
    {
-	int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair);
-	assert(r==0);
-	assert(pair.call_count==1);
+        int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair);
+        assert(r==0);
+        assert(pair.call_count==1);
    }

    ft_msg badmsg(&thekey, &badval, FT_INSERT, msn, toku_xids_get_root_xids());
-    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, badmsg, &gc_info, nullptr, nullptr);
+    toku_ft_leaf_apply_msg(
+        ft->ft->cmp,
+        ft->ft->update_fun,
+        leafnode,
+        -1,
+        badmsg,
+        &gc_info,
+        nullptr,
+        nullptr,
+        nullptr);

    // message should be rejected for duplicate msn, row should still have original val
    {
-	int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair);
-	assert(r==0);
-	assert(pair.call_count==2);
+	      int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair);
+	      assert(r==0);
+	      assert(pair.call_count==2);
    }

    // now verify that message with proper msn gets through
    msn = next_dummymsn();
    ft->ft->h->max_msn_in_ft = msn;
    ft_msg msg2(&thekey, &val2,  FT_INSERT, msn, toku_xids_get_root_xids());
-    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg2, &gc_info, nullptr, nullptr);
+    toku_ft_leaf_apply_msg(
+        ft->ft->cmp,
+        ft->ft->update_fun,
+        leafnode,
+        -1,
+        msg2,
+        &gc_info,
+        nullptr,
+        nullptr,
+        nullptr);

    // message should be accepted, val should have new value
    {
-	int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair2);
-	assert(r==0);
-	assert(pair2.call_count==1);
+	      int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair2);
+	      assert(r==0);
+	      assert(pair2.call_count==1);
    }

    // now verify that message with lesser (older) msn is rejected
    msn.msn = msn.msn - 10;
    ft_msg msg3(&thekey, &badval, FT_INSERT, msn, toku_xids_get_root_xids());
-    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg3, &gc_info, nullptr, nullptr);
+    toku_ft_leaf_apply_msg(
+        ft->ft->cmp,
+        ft->ft->update_fun,
+        leafnode,
+        -1,
+        msg3,
+        &gc_info,
+        nullptr,
+        nullptr,
+        nullptr);

    // message should be rejected, val should still have value in pair2
    {
-	int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair2);
-	assert(r==0);
-	assert(pair2.call_count==2);
+	      int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair2);
+	      assert(r==0);
+	      assert(pair2.call_count==2);
    }

    // dont forget to dirty the node
--- a/storage/tokudb/PerconaFT/ft/tests/orthopush-flush.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/orthopush-flush.cc
@@ -137,8 +137,24 @@ insert_random_message_to_bn(
    *keyp = toku_xmemdup(keydbt->data, keydbt->size);
    ft_msg msg(keydbt, valdbt, FT_INSERT, msn, xids);
    int64_t numbytes;
-    toku_le_apply_msg(msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
-    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb, msg, &non_mvcc_gc_info, NULL, NULL);
+    toku_le_apply_msg(
+        msg,
+        NULL,
+        NULL,
+        0,
+        keydbt->size,
+        &non_mvcc_gc_info,
+        save,
+        &numbytes);
+    toku_ft_bn_apply_msg(
+        t->ft->cmp,
+        t->ft->update_fun,
+        blb,
+        msg,
+        &non_mvcc_gc_info,
+        NULL,
+        NULL,
+        NULL);
    if (msn.msn > blb->max_msn_applied.msn) {
        blb->max_msn_applied = msn;
    }
@@ -182,12 +198,36 @@ insert_same_message_to_bns(
    *keyp = toku_xmemdup(keydbt->data, keydbt->size);
    ft_msg msg(keydbt, valdbt, FT_INSERT, msn, xids);
    int64_t numbytes;
-    toku_le_apply_msg(msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
-    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb1, msg, &non_mvcc_gc_info, NULL, NULL);
+    toku_le_apply_msg(
+        msg,
+        NULL,
+        NULL,
+        0,
+        keydbt->size,
+        &non_mvcc_gc_info,
+        save,
+        &numbytes);
+    toku_ft_bn_apply_msg(
+        t->ft->cmp,
+        t->ft->update_fun,
+        blb1,
+        msg,
+        &non_mvcc_gc_info,
+        NULL,
+        NULL,
+        NULL);
    if (msn.msn > blb1->max_msn_applied.msn) {
        blb1->max_msn_applied = msn;
    }
-    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb2, msg, &non_mvcc_gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg(
+        t->ft->cmp,
+        t->ft->update_fun,
+        blb2,
+        msg,
+        &non_mvcc_gc_info,
+        NULL,
+        NULL,
+        NULL);
    if (msn.msn > blb2->max_msn_applied.msn) {
        blb2->max_msn_applied = msn;
    }
@@ -619,7 +659,16 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
    if (make_leaf_up_to_date) {
        for (i = 0; i < num_parent_messages; ++i) {
            if (!parent_messages_is_fresh[i]) {
-                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(
+                    t->ft->cmp,
+                    t->ft->update_fun,
+                    child,
+                    -1,
+                    *parent_messages[i],
+                    &non_mvcc_gc_info,
+                    NULL,
+                    NULL,
+                    NULL);
            }
        }
        for (i = 0; i < 8; ++i) {
@@ -842,7 +891,16 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
        for (i = 0; i < num_parent_messages; ++i) {
            if (dummy_cmp(parent_messages[i]->kdbt(), &childkeys[7]) <= 0 &&
                !parent_messages_is_fresh[i]) {
-                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(
+                    t->ft->cmp,
+                    t->ft->update_fun,
+                    child,
+                    -1,
+                    *parent_messages[i],
+                    &non_mvcc_gc_info,
+                    NULL,
+                    NULL,
+                    NULL);
            }
        }
        for (i = 0; i < 8; ++i) {
@@ -1045,8 +1103,26 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
    if (make_leaf_up_to_date) {
        for (i = 0; i < num_parent_messages; ++i) {
            if (!parent_messages_is_fresh[i]) {
-                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child1, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
-                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child2, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(
+                    t->ft->cmp,
+                    t->ft->update_fun,
+                    child1,
+                    -1,
+                    *parent_messages[i],
+                    &non_mvcc_gc_info,
+                    NULL,
+                    NULL,
+                    NULL);
+                toku_ft_leaf_apply_msg(
+                    t->ft->cmp,
+                    t->ft->update_fun,
+                    child2,
+                    -1,
+                    *parent_messages[i],
+                    &non_mvcc_gc_info,
+                    NULL,
+                    NULL,
+                    NULL);
            }
        }
        for (i = 0; i < 8; ++i) {
--- a/storage/tokudb/PerconaFT/ft/tests/verify-bad-msn.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/verify-bad-msn.cc
@@ -78,7 +78,16 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    // apply an insert to the leaf node
    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(
+        BLB(leafnode, 0),
+        msg,
+        idx,
+        keylen,
+        NULL,
+        &gc_info,
+        NULL,
+        NULL,
+        NULL);

    // Create bad tree (don't do following):
    // leafnode->max_msn_applied_to_node = msn;
--- a/storage/tokudb/PerconaFT/ft/tests/verify-bad-pivots.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/verify-bad-pivots.cc
@@ -65,7 +65,16 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(
+        BLB(leafnode, 0),
+        msg,
+        idx,
+        keylen,
+        NULL,
+        &gc_info,
+        NULL,
+        NULL,
+        NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;
--- a/storage/tokudb/PerconaFT/ft/tests/verify-dup-in-leaf.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/verify-dup-in-leaf.cc
@@ -66,7 +66,16 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(
+        BLB(leafnode, 0),
+        msg,
+        idx,
+        keylen,
+        NULL,
+        &gc_info,
+        NULL,
+        NULL,
+        NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;
--- a/storage/tokudb/PerconaFT/ft/tests/verify-dup-pivots.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/verify-dup-pivots.cc
@@ -65,7 +65,16 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(
+        BLB(leafnode, 0),
+        msg,
+        idx,
+        keylen,
+        NULL,
+        &gc_info,
+        NULL,
+        NULL,
+        NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;
--- a/storage/tokudb/PerconaFT/ft/tests/verify-misrouted-msgs.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/verify-misrouted-msgs.cc
@@ -66,7 +66,16 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(
+        BLB(leafnode, 0),
+        msg,
+        idx,
+        keylen,
+        NULL,
+        &gc_info,
+        NULL,
+        NULL,
+        NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;
--- a/storage/tokudb/PerconaFT/ft/tests/verify-unsorted-leaf.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/verify-unsorted-leaf.cc
@@ -68,7 +68,16 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(
+        BLB(leafnode, 0),
+        msg,
+        idx,
+        keylen,
+        NULL,
+        &gc_info,
+        NULL,
+        NULL,
+        NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;
--- a/storage/tokudb/PerconaFT/ft/tests/verify-unsorted-pivots.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/verify-unsorted-pivots.cc
@@ -65,7 +65,16 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
    MSN msn = next_dummymsn();
    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
    txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(
+        BLB(leafnode, 0),
+        msg,
+        idx,
+        keylen,
+        NULL,
+        &gc_info,
+        NULL,
+        NULL,
+        NULL);

    // dont forget to dirty the node
    leafnode->dirty = 1;
--- a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
@@ -186,6 +186,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
            // Append the list to the front of the parent.
            if (child_log->oldest_logentry) {
                // There are some entries, so link them in.
+                parent_log->dirty = true;
                child_log->oldest_logentry->prev = parent_log->newest_logentry;
                if (!parent_log->oldest_logentry) {
                    parent_log->oldest_logentry = child_log->oldest_logentry;
--- a/storage/tokudb/PerconaFT/ft/txn/txn.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/txn.cc
@@ -248,11 +248,24 @@ static txn_child_manager tcm;
        .xa_xid = {0, 0, 0, ""},
        .progress_poll_fun = NULL,
        .progress_poll_fun_extra = NULL,
-        .txn_lock = ZERO_MUTEX_INITIALIZER,
+
+        // You cannot initialize txn_lock a TOKU_MUTEX_INITIALIZER, because we
+        // will initialize it in the code below, and it cannot already
+        // be initialized at that point.  Also, in general, you don't
+        // get to use PTHREAD_MUTEX_INITALIZER (which is what is inside
+        // TOKU_MUTEX_INITIALIZER) except in static variables, and this
+        // is initializing an auto variable.
+        // 
+        // And we cannot simply avoid initializing these fields
+        // because, although it avoids -Wmissing-field-initializer
+        // errors under gcc, it gets other errors about non-trivial
+        // designated initializers not being supported.
+
+        .txn_lock = ZERO_MUTEX_INITIALIZER,   // Not TOKU_MUTEX_INITIALIZER
        .open_fts = open_fts,
        .roll_info = roll_info,
-        .state_lock = ZERO_MUTEX_INITIALIZER,
-        .state_cond = TOKU_COND_INITIALIZER,
+        .state_lock = ZERO_MUTEX_INITIALIZER, // Not TOKU_MUTEX_INITIALIZER
+        .state_cond = ZERO_COND_INITIALIZER,  // Not TOKU_COND_INITIALIZER
        .state = TOKUTXN_LIVE,
        .num_pin = 0,
        .client_id = 0,
--- a/storage/tokudb/PerconaFT/ft/txn/txn_manager.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/txn_manager.cc
@@ -45,7 +45,15 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "ft/txn/txn_manager.h"
 #include "ft/txn/rollback.h"
 #include "util/omt.h"
+//this is only for testing

+static void  (* test_txn_sync_callback) (uint64_t, void *) = NULL;
+static void * test_txn_sync_callback_extra = NULL;
+
+void set_test_txn_sync_callback(void (*cb) (uint64_t, void *), void *extra) {
+        test_txn_sync_callback = cb;
+        test_txn_sync_callback_extra = extra;
+}
 bool garbage_collection_debug = false;

 static bool txn_records_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, struct tokutxn *parent) {
@@ -525,14 +533,19 @@ void toku_txn_manager_handle_snapshot_create_for_child_txn(
        XMALLOC(txn->live_root_txn_list);
        txn_manager_lock(txn_manager);
        txn_manager_create_snapshot_unlocked(txn_manager, txn);
-        txn_manager_unlock(txn_manager);
    }
    else {
        inherit_snapshot_from_parent(txn);
    }
-    if (copies_snapshot) {
-        setup_live_root_txn_list(&txn_manager->live_root_ids, txn->live_root_txn_list);  
-    }
+   
+    toku_debug_txn_sync(pthread_self());
+    
+    if (copies_snapshot) { 
+	if(!records_snapshot) 
+    	    txn_manager_lock(txn_manager);
+	setup_live_root_txn_list(&txn_manager->live_root_ids, txn->live_root_txn_list);  
+       txn_manager_unlock(txn_manager);
+    } 
 }

 void toku_txn_manager_handle_snapshot_destroy_for_child_txn(
--- a/storage/tokudb/PerconaFT/ft/txn/txn_manager.h
+++ b/storage/tokudb/PerconaFT/ft/txn/txn_manager.h
@@ -43,6 +43,15 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.

 #include "ft/txn/txn.h"

+void set_test_txn_sync_callback(void (*) (uint64_t, void*), void*);
+#define toku_test_txn_sync_callback(a) ((test_txn_sync_callback)? test_txn_sync_callback( a,test_txn_sync_callback_extra) : (void) 0)
+
+#if TOKU_DEBUG_TXN_SYNC
+#define toku_debug_txn_sync(a) toku_test_txn_sync_callback(a) 
+#else
+#define toku_debug_txn_sync(a) ((void) 0)
+#endif
+
 typedef struct txn_manager *TXN_MANAGER;

 struct referenced_xid_tuple {
--- a/storage/tokudb/PerconaFT/ft/ule.cc
+++ b/storage/tokudb/PerconaFT/ft/ule.cc
--- a/storage/tokudb/PerconaFT/portability/toku_pthread.h
+++ b/storage/tokudb/PerconaFT/portability/toku_pthread.h
@@ -72,15 +72,18 @@ typedef struct toku_mutex_aligned {
    toku_mutex_t aligned_mutex __attribute__((__aligned__(64)));
 } toku_mutex_aligned_t;

-// Different OSes implement mutexes as different amounts of nested structs.
-// C++ will fill out all missing values with zeroes if you provide at least one zero, but it needs the right amount of nesting.
-#if defined(__FreeBSD__)
-# define ZERO_MUTEX_INITIALIZER {0}
-#elif defined(__APPLE__)
-# define ZERO_MUTEX_INITIALIZER {{0}}
-#else // __linux__, at least
-# define ZERO_MUTEX_INITIALIZER {{{0}}}
-#endif
+// Initializing with {} will fill in a struct with all zeros.
+// But you may also need a pragma to suppress the warnings, as follows
+//
+//   #pragma GCC diagnostic push
+//   #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+//   toku_mutex_t foo = ZERO_MUTEX_INITIALIZER;
+//   #pragma GCC diagnostic pop
+//
+// In general it will be a lot of busy work to make this codebase compile
+// cleanly with -Wmissing-field-initializers
+
+# define ZERO_MUTEX_INITIALIZER {}

 #if TOKU_PTHREAD_DEBUG
 # define TOKU_MUTEX_INITIALIZER { .pmutex = PTHREAD_MUTEX_INITIALIZER, .owner = 0, .locked = false, .valid = true }
@@ -223,15 +226,9 @@ typedef struct toku_cond {
    pthread_cond_t pcond;
 } toku_cond_t;

-// Different OSes implement mutexes as different amounts of nested structs.
-// C++ will fill out all missing values with zeroes if you provide at least one zero, but it needs the right amount of nesting.
-#if defined(__FreeBSD__)
-# define ZERO_COND_INITIALIZER {0}
-#elif defined(__APPLE__)
-# define ZERO_COND_INITIALIZER {{0}}
-#else // __linux__, at least
-# define ZERO_COND_INITIALIZER {{{0}}}
-#endif
+// Same considerations as for ZERO_MUTEX_INITIALIZER apply
+#define ZERO_COND_INITIALIZER {}
+
 #define TOKU_COND_INITIALIZER {PTHREAD_COND_INITIALIZER}

 static inline void
--- a/storage/tokudb/PerconaFT/portability/toku_time.h
+++ b/storage/tokudb/PerconaFT/portability/toku_time.h
@@ -108,3 +108,13 @@ static inline uint64_t toku_current_time_microsec(void) {
    gettimeofday(&t, NULL);
    return t.tv_sec * (1UL * 1000 * 1000) + t.tv_usec;
 }
+
+// sleep microseconds
+static inline void toku_sleep_microsec(uint64_t ms) {
+    struct timeval  t;
+
+    t.tv_sec = ms / 1000000;
+    t.tv_usec = ms % 1000000;
+
+    select(0, NULL, NULL, NULL, &t);
+}
--- a/storage/tokudb/PerconaFT/src/export.map
+++ b/storage/tokudb/PerconaFT/src/export.map
@@ -82,6 +82,7 @@
   toku_test_db_redirect_dictionary;
   toku_test_get_latest_lsn;
   toku_test_get_checkpointing_user_data_status;
+   toku_set_test_txn_sync_callback;
   toku_indexer_set_test_only_flags;
   toku_increase_last_xid;

--- a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
@@ -53,7 +53,7 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
  target_link_libraries(test-5138.tdb ${LIBTOKUDB}_static z ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
  add_space_separated_property(TARGET test-5138.tdb COMPILE_FLAGS -fvisibility=hidden)
  add_ydb_test(test-5138.tdb)
-
+  add_ydb_test(rollback-inconsistency.tdb)
  foreach(bin ${tdb_bins})
    get_filename_component(base ${bin} NAME_WE)

--- a/storage/tokudb/PerconaFT/src/tests/rollback-inconsistency.cc
+++ b/storage/tokudb/PerconaFT/src/tests/rollback-inconsistency.cc
@@ -0,0 +1,161 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "test.h"
+
+// insert enough rows with a child txn and then force an eviction to verify the rollback
+// log node is in valid state.
+// the test fails without the fix (and of course passes with it).
+// The test basically simulates the test script of George's. 
+
+
+static void
+populate_table(int start, int end, DB_TXN * parent, DB_ENV * env, DB * db) {
+    DB_TXN *txn = NULL;
+    int r = env->txn_begin(env, parent, &txn, 0); assert_zero(r);
+    for (int i = start; i < end; i++) {
+        int k = htonl(i);
+        char kk[4]; 
+        char str[220];
+        memset(kk, 0, sizeof kk);
+        memcpy(kk, &k, sizeof k);
+        memset(str,'a', sizeof str-1);
+        DBT key = { .data = kk, .size = sizeof kk };
+        DBT val = { .data = str, .size = 220 };
+        r = db->put(db, txn, &key, &val, 0);
+        assert_zero(r);
+    }
+    r = txn->commit(txn, 0); 
+    assert_zero(r);
+}
+
+static void
+populate_and_test(DB_ENV *env, DB *db) {
+    int r;
+    DB_TXN *parent = NULL;
+    r = env->txn_begin(env, NULL, &parent, 0); assert_zero(r);
+    
+    populate_table(0, 128, parent, env, db);
+
+    //we know the eviction is going to happen here and the log node of parent txn is going to be evicted
+    //due to the extremely low cachesize. 
+    populate_table(128, 256, parent, env, db);
+   
+    //again eviction due to the memory pressure. 256 rows is the point when that rollback log spills out. The spilled node
+    //will be written back but will not be dirtied by including rollback nodes from child txn(in which case the bug is bypassed).
+    populate_table(256, 512, parent, env, db);
+    
+    r = parent->abort(parent); assert_zero(r);
+    
+    //try to search anything in the lost range
+    int k = htonl(200);
+    char kk[4]; 
+    memset(kk, 0, sizeof kk);
+    memcpy(kk, &k, sizeof k);
+    DBT key = { .data = kk, .size = sizeof kk };
+    DBT val;
+    r = db->get(db, NULL, &key, &val, 0);
+    assert(r==DB_NOTFOUND);
+
+}
+
+static void
+run_test(void) {
+    int r;
+    DB_ENV *env = NULL;
+    r = db_env_create(&env, 0); 
+    assert_zero(r);
+    env->set_errfile(env, stderr);
+  
+    //setting up the cachetable size 64k
+    uint32_t cachesize = 64*1024;
+    r = env->set_cachesize(env, 0, cachesize, 1); 
+    assert_zero(r);
+
+    //setting up the log write block size to 4k so the rollback log nodes spill in accordance with the node size
+    r = env->set_lg_bsize(env, 4096);                                                   
+    assert_zero(r);
+    
+    r = env->open(env, TOKU_TEST_FILENAME, DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); 
+    assert_zero(r);
+
+    DB *db = NULL;
+    r = db_create(&db, env, 0); 
+    assert_zero(r);
+    
+    r = db->set_pagesize(db, 4096);
+    assert_zero(r);
+    
+    r = db->set_readpagesize(db, 1024);
+    assert_zero(r);
+ 
+    r = db->open(db, NULL, "test.tdb", NULL, DB_BTREE, DB_AUTO_COMMIT+DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); 
+    assert_zero(r);
+
+    populate_and_test(env, db);
+
+    r = db->close(db, 0); assert_zero(r);
+
+    r = env->close(env, 0); assert_zero(r);
+}
+
+int
+test_main(int argc, char * const argv[]) {
+    int r;
+
+    // parse_args(argc, argv);
+    for (int i = 1; i < argc; i++) {
+        char * const arg = argv[i];
+        if (strcmp(arg, "-v") == 0) {
+            verbose++;
+            continue;
+        }
+        if (strcmp(arg, "-q") == 0) {
+            verbose = 0;
+            continue;
+        }
+    }
+
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); assert_zero(r);
+
+    run_test();
+
+    return 0;
+}
+
--- a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
+++ b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
@@ -166,7 +166,7 @@ run_test (void) {

        DB_BTREE_STAT64 s;
        r = db->stat64(db, NULL, &s); CKERR(r);
-        assert(s.bt_nkeys == 1 && s.bt_dsize == sizeof key + sizeof val);
+        assert(s.bt_nkeys == 0);

        r = db->close(db, 0);     CKERR(r);

@@ -176,7 +176,7 @@ run_test (void) {
        r = txn->commit(txn, 0);    CKERR(r);

        r = db->stat64(db, NULL, &s); CKERR(r);
-        assert(s.bt_nkeys == 1 && s.bt_dsize == sizeof key + sizeof val);
+        assert(s.bt_nkeys == 0);
    }

    // verify update callback overwrites the row
--- a/storage/tokudb/PerconaFT/src/tests/test_db_rowcount.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_db_rowcount.cc
@@ -0,0 +1,528 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "test.h"
+#include <stdio.h>
+
+#include <sys/stat.h>
+#include <db.h>
+
+// Tests that the logical row counts are correct and not subject to variance
+// due to normal insert/delete messages within the tree with the few exceptions
+// of 1) rollback messages not yet applied; 2) inserts messages turned to
+// updates on apply; and 3) missing leafentries on delete messages on apply.
+
+static DB_TXN* const null_txn = 0;
+static const uint64_t num_records = 4*1024;
+
+#define CHECK_NUM_ROWS(_expected, _stats) assert(_stats.bt_ndata == _expected)
+
+static DB* create_db(const char* fname, DB_ENV* env) {
+    int r;
+    DB* db;
+
+    r = db_create(&db, env, 0);
+    assert(r == 0);
+    db->set_errfile(db, stderr);
+
+    r = db->set_pagesize(db, 8192);
+    assert(r == 0);
+
+    r = db->set_readpagesize(db, 1024);
+    assert(r == 0);
+
+    r = db->set_fanout(db, 4);
+    assert(r == 0);
+
+    r = db->set_compression_method(db, TOKU_NO_COMPRESSION);
+    assert(r == 0);
+
+    r = db->open(db, null_txn, fname, "main", DB_BTREE, DB_CREATE,
+                 0666);
+    assert(r == 0);
+
+    return db;
+}
+static void add_records(DB* db, DB_TXN* txn, uint64_t start_id, uint64_t num) {
+    int r;
+    for (uint64_t i = 0, j=start_id; i < num; i++,j++) {
+        char key[100], val[256];
+        DBT k,v;
+        snprintf(key, 100, "%08lu", j);
+        snprintf(val, 256, "%*s", 200, key);
+        r =
+            db->put(
+                db,
+                txn,
+                dbt_init(&k, key, 1+strlen(key)),
+                dbt_init(&v, val, 1+strlen(val)),
+                0);
+        assert(r == 0);
+    }
+}
+static void delete_records(
+    DB* db,
+    DB_TXN* txn,
+    uint64_t start_id,
+    uint64_t num) {
+
+    int r;
+    for (uint64_t i = 0, j=start_id; i < num; i++,j++) {
+        char key[100];
+        DBT k;
+        snprintf(key, 100, "%08lu", j);
+        r =
+            db->del(
+                db,
+                txn,
+                dbt_init(&k, key, 1+strlen(key)),
+                0);
+        assert(r == 0);
+    }
+}
+static void full_optimize(DB* db) {
+    int r;
+    uint64_t loops_run = 0;
+
+    r = db->optimize(db);
+    assert(r == 0);
+
+    r = db->hot_optimize(db, NULL, NULL, NULL, NULL, &loops_run);
+    assert(r == 0);
+}
+static void test_insert_commit(DB_ENV* env) {
+    int r;
+    DB* db;
+    DB_TXN* txn;
+    DB_BTREE_STAT64 stats;
+
+    db = create_db(__FUNCTION__, env);
+
+    r = env->txn_begin(env, null_txn, &txn, 0);
+    assert(r == 0);
+
+    add_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf("%s : before commit %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    r = txn->commit(txn, 0); 
+    assert(r == 0);
+    
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf("%s : after commit %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    db->close(db, 0);
+}
+static void test_insert_delete_commit(DB_ENV* env) {
+    int r;
+    DB* db;
+    DB_TXN* txn;
+    DB_BTREE_STAT64 stats;
+
+    db = create_db(__FUNCTION__, env);
+
+    r = env->txn_begin(env, null_txn, &txn, 0);
+    assert(r == 0);
+
+    add_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf("%s : before delete %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    delete_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(0, stats);
+    if (verbose)
+        printf("%s : after delete %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    r = txn->commit(txn, 0);
+    assert(r == 0);
+    
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(0, stats);
+    if (verbose)
+        printf("%s : after commit %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    db->close(db, 0);
+}
+static void test_insert_commit_delete_commit(DB_ENV* env) {
+    int r;
+    DB* db;
+    DB_TXN* txn;
+    DB_BTREE_STAT64 stats;
+
+    db = create_db(__FUNCTION__, env);
+
+    r = env->txn_begin(env, null_txn, &txn, 0);
+    assert(r == 0);
+
+    add_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf(
+            "%s : before insert commit %lu rows\n",
+            __FUNCTION__,
+            stats.bt_ndata);
+
+    r = txn->commit(txn, 0);
+    assert(r == 0);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf(
+            "%s : after insert commit %lu rows\n",
+            __FUNCTION__,
+            stats.bt_ndata);
+
+    r = env->txn_begin(env, null_txn, &txn, 0);
+    assert(r == 0);
+
+    delete_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(0, stats);
+    if (verbose)
+        printf("%s : after delete %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    r = txn->commit(txn, 0);
+    assert(r == 0);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(0, stats);
+    if (verbose)
+        printf(
+            "%s : after delete commit %lu rows\n",
+            __FUNCTION__,
+            stats.bt_ndata);
+
+    db->close(db, 0);
+}
+static void test_insert_rollback(DB_ENV* env) {
+    int r;
+    DB* db;
+    DB_TXN* txn;
+    DB_BTREE_STAT64 stats;
+
+    db = create_db(__FUNCTION__, env);
+
+    r = env->txn_begin(env, null_txn, &txn, 0);
+    assert(r == 0);
+
+    add_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf("%s : before rollback %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    r = txn->abort(txn);
+    assert(r == 0);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    // CAN NOT TEST stats HERE AS THEY ARE SOMEWHAT NON_DETERMINISTIC UNTIL
+    // optimize + hot_optimize HAVE BEEN RUN DUE TO THE FACT THAT ROLLBACK
+    // MESSAGES ARE "IN-FLIGHT" IN THE TREE AND MUST BE APPLIED IN ORDER TO
+    // CORRECT THE RUNNING LOGICAL COUNT
+    if (verbose)
+        printf("%s : after rollback %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    full_optimize(db);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(0, stats);
+    if (verbose)
+        printf(
+            "%s : after rollback optimize %lu rows\n",
+            __FUNCTION__,
+            stats.bt_ndata);
+
+    db->close(db, 0);
+}
+static void test_insert_delete_rollback(DB_ENV* env) {
+    int r;
+    DB* db;
+    DB_TXN* txn;
+    DB_BTREE_STAT64 stats;
+
+    db = create_db(__FUNCTION__, env);
+
+    r = env->txn_begin(env, null_txn, &txn, 0);
+    assert(r == 0);
+
+    add_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf("%s : before delete %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    delete_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(0, stats);
+    if (verbose)
+        printf("%s : after delete %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    r = txn->abort(txn); 
+    assert(r == 0);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(0, stats);
+    if (verbose)
+        printf("%s : after commit %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    db->close(db, 0);
+}
+static void test_insert_commit_delete_rollback(DB_ENV* env) {
+    int r;
+    DB* db;
+    DB_TXN* txn;
+    DB_BTREE_STAT64 stats;
+
+    db = create_db(__FUNCTION__, env);
+
+    r = env->txn_begin(env, null_txn, &txn, 0);
+    assert(r == 0);
+
+    add_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf(
+            "%s : before insert commit %lu rows\n",
+            __FUNCTION__,
+            stats.bt_ndata);
+
+    r = txn->commit(txn, 0);
+    assert(r == 0);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf(
+            "%s : after insert commit %lu rows\n",
+            __FUNCTION__,
+            stats.bt_ndata);
+
+    r = env->txn_begin(env, null_txn, &txn, 0);
+    assert(r == 0);
+
+    delete_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(0, stats);
+    if (verbose)
+        printf("%s : after delete %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    r = txn->abort(txn); 
+    assert(r == 0);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    // CAN NOT TEST stats HERE AS THEY ARE SOMEWHAT NON_DETERMINISTIC UNTIL
+    // optimize + hot_optimize HAVE BEEN RUN DUE TO THE FACT THAT ROLLBACK
+    // MESSAGES ARE "IN-FLIGHT" IN THE TREE AND MUST BE APPLIED IN ORDER TO
+    // CORRECT THE RUNNING LOGICAL COUNT
+    if (verbose)
+        printf(
+            "%s : after delete rollback %lu rows\n",
+            __FUNCTION__,
+            stats.bt_ndata);
+
+    full_optimize(db);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf(
+            "%s : after delete rollback optimize %lu rows\n",
+            __FUNCTION__,
+            stats.bt_ndata);
+
+    db->close(db, 0);
+}
+static inline uint64_t time_in_microsec() {
+    struct timeval t;
+    gettimeofday(&t, NULL);
+    return t.tv_sec * (1UL * 1000 * 1000) + t.tv_usec;
+}
+
+static int test_recount_insert_commit_progress(
+    uint64_t count,
+    uint64_t deleted,
+    void*) {
+
+    if (verbose)
+        printf(
+            "%s : count[%lu] deleted[%lu]\n",
+            __FUNCTION__,
+            count,
+            deleted);
+    return 0;
+}
+static int test_recount_cancel_progress(uint64_t, uint64_t, void*) {
+    return 1;
+}
+
+static void test_recount_insert_commit(DB_ENV* env) {
+    int r;
+    DB* db;
+    DB_TXN* txn;
+    DB_BTREE_STAT64 stats;
+
+    db = create_db(__FUNCTION__, env);
+
+    r = env->txn_begin(env, null_txn, &txn, 0);
+    assert(r == 0);
+
+    add_records(db, txn, 0, num_records);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf(
+            "%s : before commit %lu rows\n",
+            __FUNCTION__,
+            stats.bt_ndata);
+
+    r = txn->commit(txn, 0);
+    assert(r == 0);
+
+    r = db->stat64(db, null_txn, &stats);
+    assert(r == 0);
+
+    CHECK_NUM_ROWS(num_records, stats);
+    if (verbose)
+        printf("%s : after commit %lu rows\n", __FUNCTION__, stats.bt_ndata);
+
+    // test that recount counted correct # of rows
+    r = db->recount_rows(db, test_recount_insert_commit_progress, NULL);
+    assert(r == 0);
+    CHECK_NUM_ROWS(num_records, stats);
+
+    // test that recount callback cancel returns
+    r = db->recount_rows(db, test_recount_cancel_progress, NULL);
+    assert(r == 1);
+    CHECK_NUM_ROWS(num_records, stats);
+
+    db->close(db, 0);
+}
+int test_main(int UU(argc), char UU(*const argv[])) {
+    int r;
+    DB_ENV* env;
+
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU + S_IRWXG + S_IRWXO);
+
+    r = db_env_create(&env, 0);
+    assert(r == 0);
+
+    r =
+        env->open(
+            env,
+            TOKU_TEST_FILENAME,
+            DB_INIT_MPOOL + DB_INIT_LOG + DB_INIT_TXN + DB_PRIVATE + DB_CREATE,
+            S_IRWXU + S_IRWXG + S_IRWXO);
+    assert(r == 0);
+
+    test_insert_commit(env);
+    test_insert_delete_commit(env);
+    test_insert_commit_delete_commit(env);
+    test_insert_rollback(env);
+    test_insert_delete_rollback(env);
+    test_insert_commit_delete_rollback(env);
+    test_recount_insert_commit(env);
+
+    r = env->close(env, 0);
+    assert(r == 0);
+
+    return 0;
+}
--- a/storage/tokudb/PerconaFT/src/tests/txn_manager_handle_snapshot_atomicity.cc
+++ b/storage/tokudb/PerconaFT/src/tests/txn_manager_handle_snapshot_atomicity.cc
@@ -0,0 +1,217 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+//In response to the read-commit crash bug in the sysbench, this test is created to test
+//the atomicity of the txn manager when handling the child txn snapshot.
+//The test is supposed to fail before the read-commit-fix.
+
+#include "test.h"
+#include "toku_pthread.h"
+#include "ydb.h"
+struct test_sync {
+    int state;
+    toku_mutex_t lock;
+    toku_cond_t cv;
+};
+
+static void test_sync_init(struct test_sync *UU(sync)) {
+#if TOKU_DEBUG_TXN_SYNC
+    sync->state = 0;
+    toku_mutex_init(&sync->lock, NULL);
+    toku_cond_init(&sync->cv, NULL);
+#endif
+}
+
+static void test_sync_destroy(struct test_sync *UU(sync)) {
+#if TOKU_DEBUG_TXN_SYNC
+    toku_mutex_destroy(&sync->lock);
+    toku_cond_destroy(&sync->cv);
+#endif
+}
+
+static void test_sync_sleep(struct test_sync *UU(sync), int UU(new_state)) {
+#if TOKU_DEBUG_TXN_SYNC
+    toku_mutex_lock(&sync->lock);
+    while (sync->state != new_state) {
+        toku_cond_wait(&sync->cv, &sync->lock);
+    }
+    toku_mutex_unlock(&sync->lock);
+#endif
+}
+
+static void test_sync_next_state(struct test_sync *UU(sync)) {
+#if TOKU_DEBUG_TXN_SYNC
+    toku_mutex_lock(&sync->lock);
+    sync->state++;
+    toku_cond_broadcast(&sync->cv);
+    toku_mutex_unlock(&sync->lock);
+#endif
+}
+
+
+struct start_txn_arg {
+    DB_ENV *env;
+    DB *db;
+    DB_TXN * parent;
+};
+
+static struct test_sync sync_s;
+
+static void test_callback(uint64_t self_tid, void * extra) {
+    pthread_t **p = (pthread_t **) extra;
+    pthread_t tid_1 = *p[0];
+    pthread_t tid_2 = *p[1];
+    assert(self_tid == tid_2);
+    printf("%s: the thread[%" PRIu64 "] is going to wait...\n", __func__, tid_1);
+    test_sync_next_state(&sync_s);
+    sleep(3);
+    //test_sync_sleep(&sync_s,3);
+    //using test_sync_sleep/test_sync_next_state pair can sync threads better, however
+    //after the fix, this might cause a deadlock. just simply use sleep to do a proof-
+    //of-concept test. 
+    printf("%s: the thread[%" PRIu64 "] is resuming...\n", __func__, tid_1);
+    return;
+}
+
+static void * start_txn2(void * extra) {
+    struct start_txn_arg * args = (struct start_txn_arg *) extra;
+    DB_ENV * env = args -> env;
+    DB * db = args->db;
+    DB_TXN * parent = args->parent;
+    test_sync_sleep(&sync_s, 1);
+    printf("start %s [thread %" PRIu64 "]\n", __func__, pthread_self());
+    DB_TXN *txn;
+    int r = env->txn_begin(env, parent, &txn,  DB_READ_COMMITTED);
+    assert(r == 0);
+    //do some random things...
+    DBT key, data;
+    dbt_init(&key, "hello", 6);
+    dbt_init(&data, "world", 6);
+    db->put(db, txn, &key, &data, 0);
+    db->get(db, txn, &key, &data, 0);
+   
+    r = txn->commit(txn, 0);
+    assert(r == 0);
+    printf("%s done[thread %" PRIu64 "]\n", __func__, pthread_self());
+    return extra;
+}
+
+static void * start_txn1(void * extra) {
+    struct start_txn_arg * args = (struct start_txn_arg *) extra;
+    DB_ENV * env = args -> env;
+    DB * db = args->db;
+    printf("start %s: [thread %" PRIu64 "]\n", __func__, pthread_self());
+    DB_TXN *txn;
+    int r = env->txn_begin(env, NULL, &txn,  DB_READ_COMMITTED);
+    assert(r == 0);
+    printf("%s: txn began by [thread %" PRIu64 "], will wait\n", __func__, pthread_self());
+    test_sync_next_state(&sync_s);
+    test_sync_sleep(&sync_s,2);
+    printf("%s: [thread %" PRIu64 "] resumed\n", __func__, pthread_self());
+    //do some random things...
+    DBT key, data;
+    dbt_init(&key, "hello", 6);
+    dbt_init(&data, "world", 6);
+    db->put(db, txn, &key, &data, 0);
+    db->get(db, txn, &key, &data, 0);
+    r = txn->commit(txn, 0);
+    assert(r == 0);
+    printf("%s: done[thread %" PRIu64 "]\n", __func__, pthread_self());
+    //test_sync_next_state(&sync_s);
+    return extra;
+}
+
+int test_main (int UU(argc), char * const UU(argv[])) {
+    int r;
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO);
+    assert(r == 0);
+
+    DB_ENV *env;
+    r = db_env_create(&env, 0);
+    assert(r == 0);
+
+    r = env->open(env, TOKU_TEST_FILENAME, DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO);
+    assert(r == 0);
+
+    DB *db = NULL;
+    r = db_create(&db, env, 0);
+    assert(r == 0);
+
+    r = db->open(db, NULL, "testit", NULL, DB_BTREE, DB_AUTO_COMMIT+DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO);
+    assert(r == 0);
+
+    DB_TXN * parent = NULL;
+    r = env->txn_begin(env, 0, &parent, DB_READ_COMMITTED);
+    assert(r == 0);
+
+    ZERO_STRUCT(sync_s);
+    test_sync_init(&sync_s);
+
+    pthread_t tid_1 = 0;
+    pthread_t tid_2 = 0;
+    pthread_t* callback_extra[2] = {&tid_1, &tid_2};
+    toku_set_test_txn_sync_callback(test_callback, callback_extra);
+
+    struct start_txn_arg args = {env, db, parent};
+
+    r = pthread_create(&tid_1, NULL, start_txn1, &args);
+    assert(r==0);
+
+    r= pthread_create(&tid_2, NULL, start_txn2, &args);
+    assert(r==0);
+
+     void * ret; 
+    r = pthread_join(tid_1, &ret);
+    assert(r == 0);
+    r = pthread_join(tid_2, &ret);
+    assert(r == 0);
+
+    r = parent->commit(parent, 0);
+    assert(r ==0);
+
+    test_sync_destroy(&sync_s);
+    r = db->close(db, 0);
+    assert(r == 0);
+
+    r = env->close(env, 0);
+    assert(r == 0);
+
+    return 0;
+}
+
--- a/storage/tokudb/PerconaFT/src/ydb.cc
+++ b/storage/tokudb/PerconaFT/src/ydb.cc
@@ -3148,6 +3148,10 @@ toku_test_get_latest_lsn(DB_ENV *env) {
    return rval.lsn;
 }

+void toku_set_test_txn_sync_callback(void (* cb) (uint64_t, void *), void * extra) {
+    set_test_txn_sync_callback(cb, extra);
+}
+
 int 
 toku_test_get_checkpointing_user_data_status (void) {
    return toku_cachetable_get_checkpointing_user_data_status();
--- a/storage/tokudb/PerconaFT/src/ydb.h
+++ b/storage/tokudb/PerconaFT/src/ydb.h
@@ -58,3 +58,6 @@ extern "C" uint64_t toku_test_get_latest_lsn(DB_ENV *env) __attribute__((__visib

 // test-only function
 extern "C" int toku_test_get_checkpointing_user_data_status(void) __attribute__((__visibility__("default")));
+
+// test-only function
+extern "C" void toku_set_test_txn_sync_callback(void (* ) (uint64_t, void *), void * extra) __attribute__((__visibility__("default")));
--- a/storage/tokudb/PerconaFT/src/ydb_db.cc
+++ b/storage/tokudb/PerconaFT/src/ydb_db.cc
@@ -1015,6 +1015,25 @@ toku_db_verify_with_progress(DB *db, int (*progress_callback)(void *extra, float
    return r;
 }

+
+static int
+toku_db_recount_rows(DB* db, int (*progress_callback)(uint64_t count,
+                                                      uint64_t deleted,
+                                                      void* progress_extra),
+                     void* progress_extra) {
+
+    HANDLE_PANICKED_DB(db);
+    int r = 0;
+    r =
+        toku_ft_recount_rows(
+            db->i->ft_handle,
+            progress_callback,
+            progress_extra);
+
+    return r;
+}
+
+
 int toku_setup_db_internal (DB **dbp, DB_ENV *env, uint32_t flags, FT_HANDLE ft_handle, bool is_open) {
    if (flags || env == NULL) 
        return EINVAL;
@@ -1098,6 +1117,7 @@ toku_db_create(DB ** db, DB_ENV * env, uint32_t flags) {
    USDB(dbt_pos_infty);
    USDB(dbt_neg_infty);
    USDB(get_fragmentation);
+    USDB(recount_rows);
 #undef USDB
    result->get_indexer = db_get_indexer;
    result->del = autotxn_db_del;
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
--- a/storage/tokudb/ha_tokudb.h
+++ b/storage/tokudb/ha_tokudb.h
@@ -23,11 +23,12 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.

 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."

-#if !defined(HA_TOKUDB_H)
-#define HA_TOKUDB_H
+#ifndef _HA_TOKUDB_H
+#define _HA_TOKUDB_H

-#include <db.h>
+#include "hatoku_hton.h"
 #include "hatoku_cmp.h"
+#include "tokudb_background.h"

 #define HA_TOKU_ORIG_VERSION 4
 #define HA_TOKU_VERSION 4
@@ -45,84 +46,359 @@ typedef struct loader_context {
 } *LOADER_CONTEXT;

 //
-// This object stores table information that is to be shared
+// This class stores table information that is to be shared
 // among all ha_tokudb objects.
-// There is one instance per table, shared among threads.
+// There is one instance per table, shared among handlers.
 // Some of the variables here are the DB* pointers to indexes,
 // and auto increment information.
 //
+// When the last user releases it's reference on the share,
+// it closes all of its database handles and releases all info
+// The share instance stays around though so some data can be transiently
+// kept across open-close-open-close cycles. These data will be explicitly
+// noted below.
+//
 class TOKUDB_SHARE {
 public:
-    void init(void);
-    void destroy(void);
+    enum share_state_t {
+        CLOSED,
+        OPENED,
+        ERROR
+    };
+
+    // one time, start up init
+    static void static_init();
+
+    // one time, shutdown destroy
+    static void static_destroy();
+
+    // retuns a locked, properly reference counted share
+    // callers must check to ensure share is in correct state for callers use
+    // and unlock the share.
+    // if create_new is set, a new "CLOSED" share will be created if one
+    // doesn't exist, otherwise will return NULL if an existing is not found.
+    static TOKUDB_SHARE* get_share(
+        const char* table_name,
+        TABLE_SHARE* table_share,
+        THR_LOCK_DATA* data,
+        bool create_new);
+
+    // removes a share entirely from the pool, call to rename/deleta a table
+    // caller must hold ddl_mutex on this share and the share MUST have
+    // exactly 0 _use_count
+    static void drop_share(TOKUDB_SHARE* share);
+
+    void* operator new(size_t sz);
+    void operator delete(void* p);
+
+    TOKUDB_SHARE();
+
+    // increases the ref count and waits for any currently executing state
+    // transition to complete
+    // returns current state and leaves share locked
+    // callers must check to ensure share is in correct state for callers use
+    // and unlock the share.
+    share_state_t addref();
+
+    // decreases the ref count and potentially closes the share
+    // caller must not have ownership of mutex, will lock and release
+    int release();
+
+    // returns the current use count
+    // no locking requirements
+    inline int use_count() const;
+
+    // locks the share
+    inline void lock() const;
+
+    // unlocks the share
+    inline void unlock() const;
+
+    // returns the current state of the share
+    // no locking requirements
+    inline share_state_t state() const;
+
+    // sets the state of the share
+    // caller must hold mutex on this share
+    inline void set_state(share_state_t state);
+
+    // returns the full MySQL table name of the table ex:
+    // ./database/table
+    // no locking requirements
+    inline const char* full_table_name() const;
+
+    // returns the strlen of the full table name
+    // no locking requirements
+    inline uint full_table_name_length() const;
+
+    // returns the parsed database name this table resides in
+    // no locking requirements
+    inline const char* database_name() const;
+
+    // returns the strlen of the database name
+    // no locking requirements
+    inline uint database_name_length() const;
+
+    // returns the parsed table name of this table
+    // no locking requirements
+    inline const char* table_name() const;
+
+    // returns the strlen of the the table name
+    // no locking requirements
+    inline uint table_name_length() const;
+
+    // sets the estimated number of rows in the table
+    // should be called only during share initialization and info call
+    // caller must hold mutex on this share unless specified by 'locked'
+    inline void set_row_count(uint64_t rows, bool locked);
+
+    // updates tracked row count and ongoing table change delta tracking
+    // called from any ha_tokudb operation that inserts/modifies/deletes rows
+    // may spawn background analysis if enabled, allowed and threshold hit
+    // caller must not have ownership of mutex, will lock and release
+    void update_row_count(
+        THD* thd,
+        uint64_t added,
+        uint64_t deleted,
+        uint64_t updated);
+
+    // returns the current row count estimate
+    // no locking requirements
+    inline ha_rows row_count() const;
+
+    // initializes cardinality statistics, takes ownership of incoming buffer
+    // caller must hold mutex on this share
+    inline void init_cardinality_counts(
+        uint32_t rec_per_keys,
+        uint64_t* rec_per_key);
+
+    // update the cardinality statistics. number of records must match
+    // caller must hold mutex on this share
+    inline void update_cardinality_counts(
+        uint32_t rec_per_keys,
+        const uint64_t* rec_per_key);
+
+    // disallow any auto analysis from taking place
+    // caller must hold mutex on this share
+    inline void disallow_auto_analysis();
+
+    // allow any auto analysis to take place
+    // pass in true for 'reset_deltas' to reset delta counting to 0
+    // caller must hold mutex on this share
+    inline void allow_auto_analysis(bool reset_deltas);
+
+    // cancels all background jobs for this share
+    // no locking requirements
+    inline void cancel_background_jobs() const;
+
+    // copies cardinality statistics into TABLE counter set
+    // caller must not have ownership of mutex, will lock and release
+    void set_cardinality_counts_in_table(TABLE* table);
+
+    // performs table analysis on underlying indices and produces estimated
+    // cardinality statistics.
+    // on success updates cardinality counts in status database and this share
+    // MUST pass a valid THD to access session variables.
+    // MAY pass txn. If txn is passed, assumes an explicit user scheduled
+    // ANALYZE and not an auto ANALYZE resulting from delta threshold
+    // uses session variables:
+    //  tokudb_analyze_in_background, tokudb_analyze_throttle,
+    //  tokudb_analyze_time, and tokudb_analyze_delete_fraction
+    // caller must hold mutex on this share
+    int analyze_standard(THD* thd, DB_TXN* txn);
+
+    // performs table scan and updates the internal FT logical row count value
+    // on success also updates share row count estimate.
+    // MUST pass a valid THD to access session variables.
+    // MAY pass txn. If txn is passed, assumes an explicit user scheduled
+    // uses session variables:
+    //  tokudb_analyze_in_background, and tokudb_analyze_throttle
+    // caller must not have ownership of mutex, will lock and release
+    int analyze_recount_rows(THD* thd, DB_TXN* txn);

 public:
-    char *table_name;
-    uint table_name_length, use_count;
-    pthread_mutex_t mutex;
-    THR_LOCK lock;
-
+    //*********************************
+    // Destroyed and recreated on open-close-open
    ulonglong auto_ident;
    ulonglong last_auto_increment, auto_inc_create_value;
-    //
-    // estimate on number of rows in table
-    //
-    ha_rows rows;
-    //
+
    // estimate on number of rows added in the process of a locked tables
    // this is so we can better estimate row count during a lock table
-    //
    ha_rows rows_from_locked_table;
-    DB *status_block;
-    //
+    DB* status_block;
+
    // DB that is indexed on the primary key
-    //
-    DB *file;
-    //
+    DB* file;
+
    // array of all DB's that make up table, includes DB that
    // is indexed on the primary key, add 1 in case primary
    // key is hidden
-    //
-    DB *key_file[MAX_KEY +1];
-    rw_lock_t key_file_lock;
+    DB* key_file[MAX_KEY + 1];
    uint status, version, capabilities;
    uint ref_length;
-    //
+
    // whether table has an auto increment column
-    //
    bool has_auto_inc;
-    //
+
    // index of auto increment column in table->field, if auto_inc exists
-    //
    uint ai_field_index;
-    //
+
    // whether the primary key has a string
-    //
    bool pk_has_string;

    KEY_AND_COL_INFO kc_info;
-    
-    // 
+
+    // key info copied from TABLE_SHARE, used by background jobs that have no
+    // access to a handler instance
+    uint _keys;
+    uint _max_key_parts;
+    struct key_descriptor_t {
+        uint _parts;
+        bool _is_unique;
+        char* _name;
+    };
+    key_descriptor_t* _key_descriptors;
+
    // we want the following optimization for bulk loads, if the table is empty, 
    // attempt to grab a table lock. emptiness check can be expensive, 
    // so we try it once for a table. After that, we keep this variable around 
-    // to tell us to not try it again. 
-    // 
-    bool try_table_lock; 
+    // to tell us to not try it again.
+    bool try_table_lock;

    bool has_unique_keys;
    bool replace_into_fast;
-    rw_lock_t num_DBs_lock;
+    tokudb::thread::rwlock_t _num_DBs_lock;
    uint32_t num_DBs;

-    pthread_cond_t m_openclose_cond;
-    enum { CLOSED, OPENING, OPENED, CLOSING, ERROR } m_state;
-    int m_error;
-    int m_initialize_count;
+private:
+    static HASH _open_tables;
+    static tokudb::thread::mutex_t _open_tables_mutex;

-    uint n_rec_per_key;
-    uint64_t *rec_per_key;
+    static uchar* hash_get_key(
+        TOKUDB_SHARE* share,
+        size_t* length,
+        TOKUDB_UNUSED(my_bool not_used));
+
+    static void hash_free_element(TOKUDB_SHARE* share);
+
+    //*********************************
+    // Spans open-close-open
+    mutable tokudb::thread::mutex_t _mutex;
+    mutable tokudb::thread::mutex_t _ddl_mutex;
+    uint _use_count;
+
+    share_state_t _state;
+
+    ulonglong _row_delta_activity;
+    bool _allow_auto_analysis;
+
+    String _full_table_name;
+    String _database_name;
+    String _table_name;
+
+    //*********************************
+    // Destroyed and recreated on open-close-open
+    THR_LOCK _thr_lock;
+
+    // estimate on number of rows in table
+    ha_rows _rows;
+
+    // cardinality counts
+    uint32_t _rec_per_keys;
+    uint64_t* _rec_per_key;
+    bool _card_changed;
+
+    void init(const char* table_name);
+    void destroy();
 };
+inline int TOKUDB_SHARE::use_count() const {
+    return _use_count;
+}
+inline void TOKUDB_SHARE::lock() const {
+    _mutex.lock();
+}
+inline void TOKUDB_SHARE::unlock() const {
+    _mutex.unlock();
+}
+inline TOKUDB_SHARE::share_state_t TOKUDB_SHARE::state() const {
+    return _state;
+}
+inline void TOKUDB_SHARE::set_state(TOKUDB_SHARE::share_state_t state) {
+    assert_debug(_mutex.is_owned_by_me());
+    _state = state;
+}
+inline const char* TOKUDB_SHARE::full_table_name() const {
+    return _full_table_name.ptr();
+}
+inline uint TOKUDB_SHARE::full_table_name_length() const {
+    return _full_table_name.length();
+}
+inline const char* TOKUDB_SHARE::database_name() const {
+    return _database_name.ptr();
+}
+inline uint TOKUDB_SHARE::database_name_length() const {
+    return _database_name.length();
+}
+inline const char* TOKUDB_SHARE::table_name() const {
+    return _table_name.ptr();
+}
+inline uint TOKUDB_SHARE::table_name_length() const {
+    return _table_name.length();
+}
+inline void TOKUDB_SHARE::set_row_count(uint64_t rows, bool locked) {
+    if (!locked) {
+        lock();
+    } else {
+        assert_debug(_mutex.is_owned_by_me());
+    }
+    if (_rows && rows == 0)
+        _row_delta_activity = 0;
+
+    _rows = rows;
+    if (!locked) {
+        unlock();
+    }
+}
+inline ha_rows TOKUDB_SHARE::row_count() const {
+    return _rows;
+}
+inline void TOKUDB_SHARE::init_cardinality_counts(
+    uint32_t rec_per_keys,
+    uint64_t* rec_per_key) {
+
+    assert_debug(_mutex.is_owned_by_me());
+    // can not change number of keys live
+    assert_always(_rec_per_key == NULL && _rec_per_keys == 0);
+    _rec_per_keys = rec_per_keys;
+    _rec_per_key = rec_per_key;
+    _card_changed = true;
+}
+inline void TOKUDB_SHARE::update_cardinality_counts(
+    uint32_t rec_per_keys,
+    const uint64_t* rec_per_key) {
+
+    assert_debug(_mutex.is_owned_by_me());
+    // can not change number of keys live
+    assert_always(rec_per_keys == _rec_per_keys);
+    assert_always(rec_per_key != NULL);
+    memcpy(_rec_per_key, rec_per_key, _rec_per_keys * sizeof(uint64_t));
+    _card_changed = true;
+}
+inline void TOKUDB_SHARE::disallow_auto_analysis() {
+    assert_debug(_mutex.is_owned_by_me());
+    _allow_auto_analysis = false;
+}
+inline void TOKUDB_SHARE::allow_auto_analysis(bool reset_deltas) {
+    assert_debug(_mutex.is_owned_by_me());
+    _allow_auto_analysis = true;
+    if (reset_deltas)
+        _row_delta_activity = 0;
+}
+inline void TOKUDB_SHARE::cancel_background_jobs() const {
+    tokudb::background::_job_manager->cancel_job(full_table_name());
+}
+
+

 typedef struct st_filter_key_part_info {
    uint offset;
@@ -278,6 +554,7 @@ private:
    // 
    ulonglong added_rows;
    ulonglong deleted_rows;
+    ulonglong updated_rows;


    uint last_dup_key;
@@ -438,7 +715,7 @@ public:
    // Returns a bit mask of capabilities of storage engine. Capabilities 
    // defined in sql/handler.h
    //
-    ulonglong table_flags(void) const;
+    ulonglong table_flags() const;
    
    ulong index_flags(uint inx, uint part, bool all_parts) const;

@@ -482,7 +759,7 @@ public:
    double index_only_read_time(uint keynr, double records);

    int open(const char *name, int mode, uint test_if_locked);
-    int close(void);
+    int close();
    void update_create_info(HA_CREATE_INFO* create_info);
    int create(const char *name, TABLE * form, HA_CREATE_INFO * create_info);
    int delete_table(const char *name);
@@ -528,7 +805,7 @@ public:
    void position(const uchar * record);
    int info(uint);
    int extra(enum ha_extra_function operation);
-    int reset(void);
+    int reset();
    int external_lock(THD * thd, int lock_type);
    int start_stmt(THD * thd, thr_lock_type lock_type);

@@ -540,12 +817,17 @@ public:
    int get_status(DB_TXN* trans);
    void init_hidden_prim_key_info(DB_TXN *txn);
    inline void get_auto_primary_key(uchar * to) {
-        tokudb_pthread_mutex_lock(&share->mutex);
+        share->lock();
        share->auto_ident++;
        hpk_num_to_char(to, share->auto_ident);
-        tokudb_pthread_mutex_unlock(&share->mutex);
+        share->unlock();
    }
-    virtual void get_auto_increment(ulonglong offset, ulonglong increment, ulonglong nb_desired_values, ulonglong * first_value, ulonglong * nb_reserved_values);
+    virtual void get_auto_increment(
+        ulonglong offset,
+        ulonglong increment,
+        ulonglong nb_desired_values,
+        ulonglong* first_value,
+        ulonglong* nb_reserved_values);
    bool is_optimize_blocking();
    bool is_auto_inc_singleton();
    void print_error(int error, myf errflag);
@@ -555,9 +837,6 @@ public:
    bool primary_key_is_clustered() {
        return true;
    }
-    bool supports_clustered_keys() {
-        return true;
-    }
    int cmp_ref(const uchar * ref1, const uchar * ref2);
    bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes);

@@ -752,25 +1031,5 @@ private:
    bool in_rpl_update_rows;
 };

-#if TOKU_INCLUDE_OPTION_STRUCTS
-struct ha_table_option_struct {
-    uint row_format;
-};
-
-struct ha_index_option_struct {
-    bool clustering;
-};
-
-static inline bool key_is_clustering(const KEY *key) {
-    return (key->flags & HA_CLUSTERING) || (key->option_struct && key->option_struct->clustering);
-}
-
-#else
-
-static inline bool key_is_clustering(const KEY *key) {
-    return key->flags & HA_CLUSTERING;
-}
-#endif
-
-#endif
+#endif // _HA_TOKUDB_H

--- a/storage/tokudb/ha_tokudb_admin.cc
+++ b/storage/tokudb/ha_tokudb_admin.cc
--- a/storage/tokudb/ha_tokudb_alter_56.cc
+++ b/storage/tokudb/ha_tokudb_alter_56.cc
--- a/storage/tokudb/ha_tokudb_alter_common.cc
+++ b/storage/tokudb/ha_tokudb_alter_common.cc
@@ -26,8 +26,18 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #if !defined(TOKUDB_ALTER_COMMON)
 #define TOKUDB_ALTER_COMMON

-static bool tables_have_same_keys(TABLE* table, TABLE* altered_table, bool print_error, bool check_field_index) __attribute__((unused));
-static bool tables_have_same_keys(TABLE* table, TABLE* altered_table, bool print_error, bool check_field_index) {
+TOKUDB_UNUSED(static bool tables_have_same_keys(
+    TABLE* table,
+    TABLE* altered_table,
+    bool print_error,
+    bool check_field_index));
+
+static bool tables_have_same_keys(
+    TABLE* table,
+    TABLE* altered_table,
+    bool print_error,
+    bool check_field_index) {
+
    bool retval;
    if (table->s->keys != altered_table->s->keys) {
        if (print_error) {
@@ -39,10 +49,9 @@ static bool tables_have_same_keys(TABLE* table, TABLE* altered_table, bool print
    if (table->s->primary_key != altered_table->s->primary_key) {
        if (print_error) {
            sql_print_error(
-                "Tables have different primary keys, %d %d", 
+                "Tables have different primary keys, %d %d",
                table->s->primary_key,
-                altered_table->s->primary_key
-                );
+                altered_table->s->primary_key);
        }
        retval = false;
        goto cleanup;
@@ -53,33 +62,32 @@ static bool tables_have_same_keys(TABLE* table, TABLE* altered_table, bool print
        if (strcmp(curr_orig_key->name, curr_altered_key->name)) {
            if (print_error) {
                sql_print_error(
-                    "key %d has different name, %s %s", 
-                    i, 
+                    "key %d has different name, %s %s",
+                    i,
                    curr_orig_key->name,
-                    curr_altered_key->name
-                    );
+                    curr_altered_key->name);
            }
            retval = false;
            goto cleanup;
        }
-        if (key_is_clustering(curr_orig_key) != key_is_clustering(curr_altered_key)) {
+        if (key_is_clustering(curr_orig_key) !=
+            key_is_clustering(curr_altered_key)) {
            if (print_error) {
                sql_print_error(
                    "keys disagree on if they are clustering, %d, %d",
                    get_key_parts(curr_orig_key),
-                    get_key_parts(curr_altered_key)
-                    );
+                    get_key_parts(curr_altered_key));
            }
            retval = false;
            goto cleanup;
        }
-        if (((curr_orig_key->flags & HA_NOSAME) == 0) != ((curr_altered_key->flags & HA_NOSAME) == 0)) {
+        if (((curr_orig_key->flags & HA_NOSAME) == 0) !=
+            ((curr_altered_key->flags & HA_NOSAME) == 0)) {
            if (print_error) {
                sql_print_error(
                    "keys disagree on if they are unique, %d, %d",
                    get_key_parts(curr_orig_key),
-                    get_key_parts(curr_altered_key)
-                    );
+                    get_key_parts(curr_altered_key));
            }
            retval = false;
            goto cleanup;
@@ -89,8 +97,7 @@ static bool tables_have_same_keys(TABLE* table, TABLE* altered_table, bool print
                sql_print_error(
                    "keys have different number of parts, %d, %d",
                    get_key_parts(curr_orig_key),
-                    get_key_parts(curr_altered_key)
-                    );
+                    get_key_parts(curr_altered_key));
            }
            retval = false;
            goto cleanup;
@@ -106,10 +113,9 @@ static bool tables_have_same_keys(TABLE* table, TABLE* altered_table, bool print
            if (curr_orig_part->length != curr_altered_part->length) {
                if (print_error) {
                    sql_print_error(
-                        "Key %s has different length at index %d", 
-                        curr_orig_key->name, 
-                        j
-                        );
+                        "Key %s has different length at index %d",
+                        curr_orig_key->name,
+                        j);
                }
                retval = false;
                goto cleanup;
@@ -123,10 +129,9 @@ static bool tables_have_same_keys(TABLE* table, TABLE* altered_table, bool print
            if (!are_fields_same) {
                if (print_error) {
                    sql_print_error(
-                        "Key %s has different field at index %d", 
-                        curr_orig_key->name, 
-                        j
-                        );
+                        "Key %s has different field at index %d",
+                        curr_orig_key->name,
+                        j);
                }
                retval = false;
                goto cleanup;
@@ -143,7 +148,8 @@ cleanup:
 // to evaluate whether a field is NULL or not. This value is a power of 2, from
 // 2^0 to 2^7. We return the position of the bit within the byte, which is
 // lg null_bit
-static inline uint32_t get_null_bit_position(uint32_t null_bit) __attribute__((unused));
+TOKUDB_UNUSED(static inline uint32_t get_null_bit_position(
+    uint32_t null_bit));
 static inline uint32_t get_null_bit_position(uint32_t null_bit) {
    uint32_t retval = 0;
    switch(null_bit) {
@@ -170,23 +176,28 @@ static inline uint32_t get_null_bit_position(uint32_t null_bit) {
        break;
    case (128):
        retval = 7;
-        break;        
+        break;
    default:
-        assert(false);
+        assert_unreachable();
    }
    return retval;
 }

 // returns the index of the null bit of field. 
-static inline uint32_t get_overall_null_bit_position(TABLE* table, Field* field) __attribute__((unused));
-static inline uint32_t get_overall_null_bit_position(TABLE* table, Field* field) {
+TOKUDB_UNUSED(static inline uint32_t get_overall_null_bit_position(
+    TABLE* table,
+    Field* field));
+static inline uint32_t get_overall_null_bit_position(
+    TABLE* table,
+    Field* field) {
+
    uint32_t offset = get_null_offset(table, field);
    uint32_t null_bit = field->null_bit;
    return offset*8 + get_null_bit_position(null_bit);
 }

 // not static since 51 uses this and 56 does not
-static bool are_null_bits_in_order(TABLE* table) __attribute__((unused));
+TOKUDB_UNUSED(static bool are_null_bits_in_order(TABLE* table));
 static bool are_null_bits_in_order(TABLE* table) {
    uint32_t curr_null_pos = 0;
    bool first = true;
@@ -195,10 +206,8 @@ static bool are_null_bits_in_order(TABLE* table) {
        Field* curr_field = table->field[i];
        bool nullable = (curr_field->null_bit != 0);
        if (nullable) {
-            uint32_t pos = get_overall_null_bit_position(
-                table,
-                curr_field
-                );
+            uint32_t pos =
+                get_overall_null_bit_position(table, curr_field);
            if (!first && pos != curr_null_pos+1){
                retval = false;
                break;
@@ -210,34 +219,38 @@ static bool are_null_bits_in_order(TABLE* table) {
    return retval;
 }

-static uint32_t get_first_null_bit_pos(TABLE* table) __attribute__((unused));
+TOKUDB_UNUSED(static uint32_t get_first_null_bit_pos(TABLE* table));
 static uint32_t get_first_null_bit_pos(TABLE* table) {
    uint32_t table_pos = 0;
    for (uint i = 0; i < table->s->fields; i++) {
        Field* curr_field = table->field[i];
        bool nullable = (curr_field->null_bit != 0);
        if (nullable) {
-            table_pos = get_overall_null_bit_position(
-                table,
-                curr_field
-                );
+            table_pos =
+                get_overall_null_bit_position(table, curr_field);
            break;
        }
    }
    return table_pos;
 }

-static bool is_column_default_null(TABLE* src_table, uint32_t field_index) __attribute__((unused));
-static bool is_column_default_null(TABLE* src_table, uint32_t field_index) {
+TOKUDB_UNUSED(static bool is_column_default_null(
+    TABLE* src_table,
+    uint32_t field_index));
+static bool is_column_default_null(
+    TABLE* src_table,
+    uint32_t field_index) {
+
    Field* curr_field = src_table->field[field_index];
    bool is_null_default = false;
    bool nullable = curr_field->null_bit != 0;
    if (nullable) {
-        uint32_t null_bit_position = get_overall_null_bit_position(src_table, curr_field);
-        is_null_default = is_overall_null_position_set(
-            src_table->s->default_values,
-            null_bit_position
-            );
+        uint32_t null_bit_position =
+            get_overall_null_bit_position(src_table, curr_field);
+        is_null_default =
+            is_overall_null_position_set(
+                src_table->s->default_values,
+                null_bit_position);
    }
    return is_null_default;
 }
@@ -248,9 +261,8 @@ static uint32_t fill_static_row_mutator(
    TABLE* altered_table,
    KEY_AND_COL_INFO* orig_kc_info,
    KEY_AND_COL_INFO* altered_kc_info,
-    uint32_t keynr
-    ) 
-{
+    uint32_t keynr) {
+
    //
    // start packing extra
    //
@@ -258,25 +270,28 @@ static uint32_t fill_static_row_mutator(
    // says what the operation is
    pos[0] = UP_COL_ADD_OR_DROP;
    pos++;
-    
+
    //
    // null byte information
    //
    memcpy(pos, &orig_table->s->null_bytes, sizeof(orig_table->s->null_bytes));
    pos += sizeof(orig_table->s->null_bytes);
-    memcpy(pos, &altered_table->s->null_bytes, sizeof(orig_table->s->null_bytes));
+    memcpy(
+        pos,
+        &altered_table->s->null_bytes,
+        sizeof(orig_table->s->null_bytes));
    pos += sizeof(altered_table->s->null_bytes);
-    
+
    //
    // num_offset_bytes
    //
-    assert(orig_kc_info->num_offset_bytes <= 2);
+    assert_always(orig_kc_info->num_offset_bytes <= 2);
    pos[0] = orig_kc_info->num_offset_bytes;
    pos++;
-    assert(altered_kc_info->num_offset_bytes <= 2);
+    assert_always(altered_kc_info->num_offset_bytes <= 2);
    pos[0] = altered_kc_info->num_offset_bytes;
    pos++;
-    
+
    //
    // size of fixed fields
    //
@@ -286,7 +301,7 @@ static uint32_t fill_static_row_mutator(
    fixed_field_size = altered_kc_info->mcp_info[keynr].fixed_field_size;
    memcpy(pos, &fixed_field_size, sizeof(fixed_field_size));
    pos += sizeof(fixed_field_size);
-    
+
    //
    // length of offsets
    //
@@ -304,7 +319,7 @@ static uint32_t fill_static_row_mutator(
    memcpy(pos, &altered_start_null_pos, sizeof(altered_start_null_pos));
    pos += sizeof(altered_start_null_pos);

-    assert((pos-buf) == STATIC_ROW_MUTATOR_SIZE);
+    assert_always((pos-buf) == STATIC_ROW_MUTATOR_SIZE);
    return pos - buf;
 }

@@ -316,9 +331,8 @@ static uint32_t fill_dynamic_row_mutator(
    KEY_AND_COL_INFO* src_kc_info,
    uint32_t keynr,
    bool is_add,
-    bool* out_has_blobs
-    ) 
-{
+    bool* out_has_blobs) {
+
    uchar* pos = buf;
    bool has_blobs = false;
    uint32_t cols = num_columns;
@@ -327,7 +341,7 @@ static uint32_t fill_dynamic_row_mutator(
    for (uint32_t i = 0; i < num_columns; i++) {
        uint32_t curr_index = columns[i];
        Field* curr_field = src_table->field[curr_index];
-    
+
        pos[0] = is_add ? COL_ADD : COL_DROP;
        pos++;
        //
@@ -338,22 +352,22 @@ static uint32_t fill_dynamic_row_mutator(
        if (!nullable) {
            pos[0] = 0;
            pos++;
-        }
-        else {
+        } else {
            pos[0] = 1;
            pos++;
            // write position of null byte that is to be removed
-            uint32_t null_bit_position = get_overall_null_bit_position(src_table, curr_field);
+            uint32_t null_bit_position =
+                get_overall_null_bit_position(src_table, curr_field);
            memcpy(pos, &null_bit_position, sizeof(null_bit_position));
            pos += sizeof(null_bit_position);
            //
            // if adding a column, write the value of the default null_bit
            //
            if (is_add) {
-                is_null_default = is_overall_null_position_set(
-                    src_table->s->default_values,
-                    null_bit_position
-                    );
+                is_null_default =
+                    is_overall_null_position_set(
+                        src_table->s->default_values,
+                        null_bit_position);
                pos[0] = is_null_default ? 1 : 0;
                pos++;
            }
@@ -364,7 +378,8 @@ static uint32_t fill_dynamic_row_mutator(
            pos[0] = COL_FIXED;
            pos++;
            //store the offset
-            uint32_t fixed_field_offset = src_kc_info->cp_info[keynr][curr_index].col_pack_val;
+            uint32_t fixed_field_offset =
+                src_kc_info->cp_info[keynr][curr_index].col_pack_val;
            memcpy(pos, &fixed_field_offset, sizeof(fixed_field_offset));
            pos += sizeof(fixed_field_offset);
            //store the number of bytes
@@ -374,38 +389,35 @@ static uint32_t fill_dynamic_row_mutator(
            if (is_add && !is_null_default) {
                uint curr_field_offset = field_offset(curr_field, src_table);
                memcpy(
-                    pos, 
-                    src_table->s->default_values + curr_field_offset, 
-                    num_bytes
-                    );
+                    pos,
+                    src_table->s->default_values + curr_field_offset,
+                    num_bytes);
                pos += num_bytes;
            }
-        }
-        else if (is_variable_field(src_kc_info, curr_index)) {
+        } else if (is_variable_field(src_kc_info, curr_index)) {
            pos[0] = COL_VAR;
            pos++;
            //store the index of the variable column
-            uint32_t var_field_index = src_kc_info->cp_info[keynr][curr_index].col_pack_val;
+            uint32_t var_field_index =
+                src_kc_info->cp_info[keynr][curr_index].col_pack_val;
            memcpy(pos, &var_field_index, sizeof(var_field_index));
            pos += sizeof(var_field_index);
            if (is_add && !is_null_default) {
                uint curr_field_offset = field_offset(curr_field, src_table);
                uint32_t len_bytes = src_kc_info->length_bytes[curr_index];
-                uint32_t data_length = get_var_data_length(
-                    src_table->s->default_values + curr_field_offset,
-                    len_bytes
-                    );
+                uint32_t data_length =
+                    get_var_data_length(
+                        src_table->s->default_values + curr_field_offset,
+                        len_bytes);
                memcpy(pos, &data_length, sizeof(data_length));
                pos += sizeof(data_length);
                memcpy(
                    pos, 
                    src_table->s->default_values + curr_field_offset + len_bytes,
-                    data_length
-                    );
+                    data_length);
                pos += data_length;
            }
-        }
-        else {
+        } else {
            pos[0] = COL_BLOB;
            pos++;
            has_blobs = true;
@@ -418,9 +430,8 @@ static uint32_t fill_dynamic_row_mutator(
 static uint32_t fill_static_blob_row_mutator(
    uchar* buf,
    TABLE* src_table,
-    KEY_AND_COL_INFO* src_kc_info
-    ) 
-{
+    KEY_AND_COL_INFO* src_kc_info) {
+
    uchar* pos = buf;
    // copy number of blobs
    memcpy(pos, &src_kc_info->num_blobs, sizeof(src_kc_info->num_blobs));
@@ -430,11 +441,11 @@ static uint32_t fill_static_blob_row_mutator(
        uint32_t curr_field_index = src_kc_info->blob_fields[i]; 
        Field* field = src_table->field[curr_field_index];
        uint32_t len_bytes = field->row_pack_length();
-        assert(len_bytes <= 4);
+        assert_always(len_bytes <= 4);
        pos[0] = len_bytes;
        pos++;
    }
-    
+
    return pos-buf;
 }

@@ -444,9 +455,8 @@ static uint32_t fill_dynamic_blob_row_mutator(
    uint32_t num_columns,
    TABLE* src_table,
    KEY_AND_COL_INFO* src_kc_info,
-    bool is_add
-    ) 
-{
+    bool is_add) {
+
    uchar* pos = buf;
    for (uint32_t i = 0; i < num_columns; i++) {
        uint32_t curr_field_index = columns[i];
@@ -461,19 +471,19 @@ static uint32_t fill_dynamic_blob_row_mutator(
                }
            }
            // assert we found blob in list
-            assert(blob_index < src_kc_info->num_blobs);
+            assert_always(blob_index < src_kc_info->num_blobs);
            pos[0] = is_add ? COL_ADD : COL_DROP;
            pos++;
            memcpy(pos, &blob_index, sizeof(blob_index));
            pos += sizeof(blob_index);
            if (is_add) {
                uint32_t len_bytes = curr_field->row_pack_length();
-                assert(len_bytes <= 4);
+                assert_always(len_bytes <= 4);
                pos[0] = len_bytes;
                pos++;

-                // create a zero length blob field that can be directly copied in
-                // for now, in MySQL, we can only have blob fields 
+                // create a zero length blob field that can be directly copied
+                // in for now, in MySQL, we can only have blob fields
                // that have no default value
                memset(pos, 0, len_bytes);
                pos += len_bytes;
@@ -487,93 +497,86 @@ static uint32_t fill_dynamic_blob_row_mutator(
 // TODO: namely, when do we get stuff from share->kc_info and when we get
 // TODO: it from altered_kc_info, and when is keynr associated with the right thing
 uint32_t ha_tokudb::fill_row_mutator(
-    uchar* buf, 
-    uint32_t* columns, 
+    uchar* buf,
+    uint32_t* columns,
    uint32_t num_columns,
    TABLE* altered_table,
    KEY_AND_COL_INFO* altered_kc_info,
    uint32_t keynr,
-    bool is_add
-    ) 
-{
-    if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE) {
-        printf("*****some info:*************\n");
-        printf(
-            "old things: num_null_bytes %d, num_offset_bytes %d, fixed_field_size %d, fixed_field_size %d\n",
+    bool is_add) {
+
+    if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
+        TOKUDB_HANDLER_TRACE("*****some info:*************");
+        TOKUDB_HANDLER_TRACE(
+            "old things: num_null_bytes %d, num_offset_bytes %d, "
+            "fixed_field_size %d, fixed_field_size %d",
            table->s->null_bytes,
            share->kc_info.num_offset_bytes,
            share->kc_info.mcp_info[keynr].fixed_field_size,
-            share->kc_info.mcp_info[keynr].len_of_offsets
-            );
-        printf(
-            "new things: num_null_bytes %d, num_offset_bytes %d, fixed_field_size %d, fixed_field_size %d\n",
+            share->kc_info.mcp_info[keynr].len_of_offsets);
+        TOKUDB_HANDLER_TRACE(
+            "new things: num_null_bytes %d, num_offset_bytes %d, "
+            "fixed_field_size %d, fixed_field_size %d",
            altered_table->s->null_bytes,
            altered_kc_info->num_offset_bytes,
            altered_kc_info->mcp_info[keynr].fixed_field_size,
-            altered_kc_info->mcp_info[keynr].len_of_offsets
-            );
-        printf("****************************\n");
+            altered_kc_info->mcp_info[keynr].len_of_offsets);
+        TOKUDB_HANDLER_TRACE("****************************");
    }
    uchar* pos = buf;
    bool has_blobs = false;
-    pos += fill_static_row_mutator(
-        pos,
-        table,
-        altered_table,
-        &share->kc_info,
-        altered_kc_info,
-        keynr
-        );
-    
-    if (is_add) {
-        pos += fill_dynamic_row_mutator(
+    pos +=
+        fill_static_row_mutator(
            pos,
-            columns,
-            num_columns,
+            table,
            altered_table,
-            altered_kc_info,
-            keynr,
-            is_add,
-            &has_blobs
-            );
-    }
-    else {
-        pos += fill_dynamic_row_mutator(
-            pos,
-            columns,
-            num_columns,
-            table,
            &share->kc_info,
-            keynr,
-            is_add,
-            &has_blobs
-            );
-    }
-    if (has_blobs) {
-        pos += fill_static_blob_row_mutator(
-            pos,
-            table,
-            &share->kc_info
-            );
-        if (is_add) {
-            pos += fill_dynamic_blob_row_mutator(
+            altered_kc_info,
+            keynr);
+
+    if (is_add) {
+        pos +=
+            fill_dynamic_row_mutator(
                pos,
                columns,
                num_columns,
                altered_table,
                altered_kc_info,
-                is_add
-                );
-        }
-        else {
-            pos += fill_dynamic_blob_row_mutator(
+                keynr,
+                is_add,
+                &has_blobs);
+    } else {
+        pos +=
+            fill_dynamic_row_mutator(
                pos,
                columns,
                num_columns,
                table,
                &share->kc_info,
-                is_add
-                );
+                keynr,
+                is_add,
+                &has_blobs);
+    }
+    if (has_blobs) {
+        pos += fill_static_blob_row_mutator(pos, table, &share->kc_info);
+        if (is_add) {
+            pos +=
+                fill_dynamic_blob_row_mutator(
+                    pos,
+                    columns,
+                    num_columns,
+                    altered_table,
+                    altered_kc_info,
+                    is_add);
+        } else {
+            pos +=
+                fill_dynamic_blob_row_mutator(
+                    pos,
+                    columns,
+                    num_columns,
+                    table,
+                    &share->kc_info,
+                    is_add);
        }
    }
    return pos-buf;
@@ -583,16 +586,23 @@ static bool all_fields_are_same_type(TABLE *table_a, TABLE *table_b) {
    if (table_a->s->fields != table_b->s->fields)
        return false;
    for (uint i = 0; i < table_a->s->fields; i++) {
-        Field *field_a = table_a->field[i];
-        Field *field_b = table_b->field[i];
+        Field* field_a = table_a->field[i];
+        Field* field_b = table_b->field[i];
        if (!fields_are_same_type(field_a, field_b))
            return false;
    }
    return true;
 }

-static bool column_rename_supported(TABLE* orig_table, TABLE* new_table, bool alter_column_order)  __attribute__((unused));
-static bool column_rename_supported(TABLE* orig_table, TABLE* new_table, bool alter_column_order) {
+TOKUDB_UNUSED(static bool column_rename_supported(
+    TABLE* orig_table,
+    TABLE* new_table,
+    bool alter_column_order));
+static bool column_rename_supported(
+    TABLE* orig_table,
+    TABLE* new_table,
+    bool alter_column_order) {
+
    bool retval = false;
    bool keys_same_for_cr;
    uint num_fields_with_different_names = 0;
@@ -622,20 +632,20 @@ static bool column_rename_supported(TABLE* orig_table, TABLE* new_table, bool al
        retval = false;
        goto cleanup;
    }
-    assert(field_with_different_name < orig_table->s->fields);
+    assert_always(field_with_different_name < orig_table->s->fields);
    //
    // at this point, we have verified that the two tables have
-    // the same field types and with ONLY one field with a different name. 
+    // the same field types and with ONLY one field with a different name.
    // We have also identified the field with the different name
    //
    // Now we need to check the indexes
    //
-    keys_same_for_cr = tables_have_same_keys(
-        orig_table,
-        new_table,
-        false,
-        true
-        );
+    keys_same_for_cr =
+        tables_have_same_keys(
+            orig_table,
+            new_table,
+            false,
+            true);
    if (!keys_same_for_cr) {
        retval = false;
        goto cleanup;
@@ -645,12 +655,21 @@ cleanup:
    return retval;
 }

-static int find_changed_columns(uint32_t* changed_columns, uint32_t* num_changed_columns, TABLE* smaller_table, TABLE* bigger_table) __attribute__((unused));
-static int find_changed_columns(uint32_t* changed_columns, uint32_t* num_changed_columns, TABLE* smaller_table, TABLE* bigger_table) {
+TOKUDB_UNUSED(static int find_changed_columns(
+    uint32_t* changed_columns,
+    uint32_t* num_changed_columns,
+    TABLE* smaller_table,
+    TABLE* bigger_table));
+static int find_changed_columns(
+    uint32_t* changed_columns,
+    uint32_t* num_changed_columns,
+    TABLE* smaller_table,
+    TABLE* bigger_table) {
+
    int retval;
    uint curr_new_col_index = 0;
    uint32_t curr_num_changed_columns=0;
-    assert(bigger_table->s->fields > smaller_table->s->fields);
+    assert_always(bigger_table->s->fields > smaller_table->s->fields);
    for (uint i = 0; i < smaller_table->s->fields; i++, curr_new_col_index++) {
        if (curr_new_col_index >= bigger_table->s->fields) {
            sql_print_error("error in determining changed columns");
@@ -670,15 +689,15 @@ static int find_changed_columns(uint32_t* changed_columns, uint32_t* num_changed
                goto cleanup;
            }
        }
-        // at this point, curr_field_in_orig and curr_field_in_new should be the same, let's verify
-        // make sure the two fields that have the same name are ok
+        // at this point, curr_field_in_orig and curr_field_in_new should be
+        // the same, let's verify make sure the two fields that have the same
+        // name are ok
        if (!are_two_fields_same(curr_field_in_orig, curr_field_in_new)) {
            sql_print_error(
-                "Two fields that were supposedly the same are not: \
-                %s in original, %s in new", 
+                "Two fields that were supposedly the same are not: %s in "
+                "original, %s in new",
                curr_field_in_orig->field_name,
-                curr_field_in_new->field_name
-                );
+                curr_field_in_new->field_name);
            retval = 1;
            goto cleanup;
        }
@@ -693,17 +712,23 @@ cleanup:
    return retval;
 }

-static bool tables_have_same_keys_and_columns(TABLE* first_table, TABLE* second_table, bool print_error) __attribute__((unused));
-static bool tables_have_same_keys_and_columns(TABLE* first_table, TABLE* second_table, bool print_error) {
+TOKUDB_UNUSED(static bool tables_have_same_keys_and_columns(
+    TABLE* first_table,
+    TABLE* second_table,
+    bool print_error));
+static bool tables_have_same_keys_and_columns(
+    TABLE* first_table,
+    TABLE* second_table,
+    bool print_error) {
+
    bool retval;
    if (first_table->s->null_bytes != second_table->s->null_bytes) {
        retval = false;
        if (print_error) {
            sql_print_error(
-                "tables have different number of null bytes, %d, %d", 
-                first_table->s->null_bytes, 
-                second_table->s->null_bytes
-                );
+                "tables have different number of null bytes, %d, %d",
+                first_table->s->null_bytes,
+                second_table->s->null_bytes);
        }
        goto exit;
    }
@@ -711,10 +736,9 @@ static bool tables_have_same_keys_and_columns(TABLE* first_table, TABLE* second_
        retval = false;
        if (print_error) {
            sql_print_error(
-                "tables have different number of fields, %d, %d", 
-                first_table->s->fields, 
-                second_table->s->fields
-                );
+                "tables have different number of fields, %d, %d",
+                first_table->s->fields,
+                second_table->s->fields);
        }
        goto exit;
    }
@@ -724,9 +748,8 @@ static bool tables_have_same_keys_and_columns(TABLE* first_table, TABLE* second_
        if (!are_two_fields_same(a,b)) {
            retval = false;
            sql_print_error(
-                "tables have different fields at position %d", 
-                i
-                );
+                "tables have different fields at position %d",
+                i);
            goto exit;
        }
    }
@@ -741,21 +764,29 @@ exit:
 }

 #if TOKU_INCLUDE_WRITE_FRM_DATA
-// write the new frm data to the status dictionary using the alter table transaction
-int ha_tokudb::write_frm_data(const uchar *frm_data, size_t frm_len) {
+// write the new frm data to the status dictionary using the alter table
+// transaction
+int ha_tokudb::write_frm_data(const uchar* frm_data, size_t frm_len) {
    TOKUDB_DBUG_ENTER("write_frm_data");

    int error = 0;
    if (TOKU_PARTITION_WRITE_FRM_DATA || table->part_info == NULL) {
        // write frmdata to status
-        THD *thd = ha_thd();
-        tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);
-        assert(trx);
-        DB_TXN *txn = trx->stmt; // use alter table transaction
-        assert(txn);
-        error = write_to_status(share->status_block, hatoku_frm_data, (void *)frm_data, (uint)frm_len, txn);
+        THD* thd = ha_thd();
+        tokudb_trx_data* trx =
+            (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton);
+        assert_always(trx);
+        DB_TXN* txn = trx->stmt; // use alter table transaction
+        assert_always(txn);
+        error =
+            write_to_status(
+                share->status_block,
+                hatoku_frm_data,
+                (void*)frm_data,
+                (uint)frm_len,
+                txn);
    }
-   
+
    TOKUDB_DBUG_RETURN(error);
 }
 #endif
--- a/storage/tokudb/ha_tokudb_update.cc
+++ b/storage/tokudb/ha_tokudb_update.cc
--- a/storage/tokudb/hatoku_cmp.cc
+++ b/storage/tokudb/hatoku_cmp.cc
@@ -104,8 +104,7 @@ static void get_var_field_info(
        data_end_offset = uint2korr(var_field_offset_ptr + 2*var_field_index);
        break;
    default:
-        assert(false);
-        break;
+        assert_unreachable();
    }
    
    if (var_field_index) {
@@ -117,8 +116,7 @@ static void get_var_field_info(
            data_start_offset = uint2korr(var_field_offset_ptr + 2*(var_field_index-1));
            break;
        default:
-            assert(false);
-            break;
+            assert_unreachable();
        }
    }
    else {
@@ -126,7 +124,7 @@ static void get_var_field_info(
    }

    *start_offset = data_start_offset;
-    assert(data_end_offset >= data_start_offset);
+    assert_always(data_end_offset >= data_start_offset);
    *field_len = data_end_offset - data_start_offset;
 }

@@ -153,8 +151,7 @@ static void get_blob_field_info(
            data_end_offset = uint2korr(var_field_data_ptr - 2);
            break;
        default:
-            assert(false);
-            break;
+            assert_unreachable();
        }
    }
    else {
@@ -245,7 +242,7 @@ static TOKU_TYPE mysql_to_toku_type (Field* field) {
    case MYSQL_TYPE_DECIMAL:
    case MYSQL_TYPE_VAR_STRING:
    case MYSQL_TYPE_NULL:
-        assert(false);
+        assert_unreachable();
    }
 exit:
    return ret_val;
@@ -312,7 +309,7 @@ static inline uchar* pack_toku_int (uchar* to_tokudb, uchar* from_mysql, uint32_
        memcpy(to_tokudb, from_mysql, 8);
        break;
    default:
-        assert(false);
+        assert_unreachable();
    }
    return to_tokudb+num_bytes;
 }
@@ -338,7 +335,7 @@ static inline uchar* unpack_toku_int(uchar* to_mysql, uchar* from_tokudb, uint32
        memcpy(to_mysql, from_tokudb, 8);
        break;
    default:
-        assert(false);
+        assert_unreachable();
    }
    return from_tokudb+num_bytes;
 }
@@ -390,7 +387,7 @@ static inline int cmp_toku_int (uchar* a_buf, uchar* b_buf, bool is_unsigned, ui
            ret_val = 0;
            goto exit;
        default:
-            assert(false);
+            assert_unreachable();
        }
    }
    //
@@ -438,13 +435,13 @@ static inline int cmp_toku_int (uchar* a_buf, uchar* b_buf, bool is_unsigned, ui
            ret_val = 0;
            goto exit;
        default:
-            assert(false);
+            assert_unreachable();
        }
    }
    //
    // if this is hit, indicates bug in writing of this function
    //
-    assert(false);
+    assert_unreachable();
 exit:
    return ret_val;    
 }
@@ -653,7 +650,7 @@ static inline uchar* unpack_toku_varbinary(
        int4store(to_mysql, length);
        break;
    default:
-        assert(false);
+        assert_unreachable();
    }
    //
    // copy the binary data
@@ -779,7 +776,7 @@ static inline uchar* unpack_toku_blob(
        int4store(to_mysql, length);
        break;
    default:
-        assert(false);
+        assert_unreachable();
    }
    //
    // copy the binary data
@@ -957,7 +954,9 @@ static inline int tokudb_compare_two_hidden_keys(
    const void*  saved_key_data,
    const uint32_t saved_key_size
    ) {
-    assert( (new_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH) && (saved_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH) );
+    assert_always(
+        (new_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH) &&
+        (saved_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH));
    ulonglong a = hpk_char_to_num((uchar *) new_key_data);
    ulonglong b = hpk_char_to_num((uchar *) saved_key_data);
    return a < b ? -1 : (a > b ? 1 : 0);
@@ -997,8 +996,7 @@ static uint32_t skip_field_in_descriptor(uchar* row_desc) {
        row_desc_pos += sizeof(uint32_t);
        break;
    default:
-        assert(false);
-        break;
+        assert_unreachable();
    }
    return (uint32_t)(row_desc_pos - row_desc);
 }
@@ -1026,7 +1024,7 @@ static int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
        // The second byte for each field is the type
        //
        TOKU_TYPE type = mysql_to_toku_type(field);
-        assert (type < 256);
+        assert_always((int)type < 256);
        *pos = (uchar)(type & 255);
        pos++;

@@ -1041,7 +1039,7 @@ static int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
        //
        case (toku_type_int):
            num_bytes_in_field = field->pack_length();
-            assert (num_bytes_in_field < 256);
+            assert_always (num_bytes_in_field < 256);
            *pos = (uchar)(num_bytes_in_field & 255);
            pos++;
            *pos = (field->flags & UNSIGNED_FLAG) ? 1 : 0;
@@ -1059,7 +1057,7 @@ static int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
        case (toku_type_fixbinary):
            num_bytes_in_field = field->pack_length();
            set_if_smaller(num_bytes_in_field, key->key_part[i].length);
-            assert(num_bytes_in_field < 256);
+            assert_always(num_bytes_in_field < 256);
            pos[0] = (uchar)(num_bytes_in_field & 255);
            pos++;
            break;
@@ -1087,8 +1085,7 @@ static int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
            pos += 4;
            break;
        default:
-            assert(false);
-            
+            assert_unreachable();
        }
    }
    return pos - buf;
@@ -1277,8 +1274,7 @@ static inline int compare_toku_field(
        *read_string = true;
        break;
    default:
-        assert(false);
-        break;
+        assert_unreachable();
    }
    
    *row_desc_bytes_read = row_desc_pos - row_desc;
@@ -1301,7 +1297,7 @@ static uchar* pack_toku_key_field(
    TOKU_TYPE toku_type = mysql_to_toku_type(field);
    switch(toku_type) {
    case (toku_type_int):
-        assert(key_part_length == field->pack_length());
+        assert_always(key_part_length == field->pack_length());
        new_pos = pack_toku_int(
            to_tokudb, 
            from_mysql,
@@ -1309,13 +1305,13 @@ static uchar* pack_toku_key_field(
            );
        goto exit; 
    case (toku_type_double):
-        assert(field->pack_length() == sizeof(double));
-        assert(key_part_length == sizeof(double));
+        assert_always(field->pack_length() == sizeof(double));
+        assert_always(key_part_length == sizeof(double));
        new_pos = pack_toku_double(to_tokudb, from_mysql);
        goto exit;
    case (toku_type_float):
-        assert(field->pack_length() == sizeof(float));
-        assert(key_part_length == sizeof(float));
+        assert_always(field->pack_length() == sizeof(float));
+        assert_always(key_part_length == sizeof(float));
        new_pos = pack_toku_float(to_tokudb, from_mysql);
        goto exit;
    case (toku_type_fixbinary):
@@ -1368,9 +1364,9 @@ static uchar* pack_toku_key_field(
            );
        goto exit;
    default:
-        assert(false);
+        assert_unreachable();
    }
-    assert(false);
+    assert_unreachable();
 exit:
    return new_pos;
 }
@@ -1419,10 +1415,10 @@ static uchar* pack_key_toku_key_field(
            );
        goto exit;
    default:
-        assert(false);
+        assert_unreachable();
    }

-    assert(false);
+    assert_unreachable();
 exit:
    return new_pos;
 }
@@ -1432,16 +1428,15 @@ uchar* unpack_toku_key_field(
    uchar* to_mysql,
    uchar* from_tokudb,
    Field* field,
-    uint32_t key_part_length
-    )
-{
+    uint32_t key_part_length) {
+
    uchar* new_pos = NULL;
    uint32_t num_bytes = 0;
    uint32_t num_bytes_copied;
    TOKU_TYPE toku_type = mysql_to_toku_type(field);
    switch(toku_type) {
    case (toku_type_int):
-        assert(key_part_length == field->pack_length());
+        assert_always(key_part_length == field->pack_length());
        new_pos = unpack_toku_int(
            to_mysql,
            from_tokudb,
@@ -1449,13 +1444,13 @@ uchar* unpack_toku_key_field(
            );
        goto exit;    
    case (toku_type_double):
-        assert(field->pack_length() == sizeof(double));
-        assert(key_part_length == sizeof(double));
+        assert_always(field->pack_length() == sizeof(double));
+        assert_always(key_part_length == sizeof(double));
        new_pos = unpack_toku_double(to_mysql, from_tokudb);
        goto exit;
    case (toku_type_float):
-        assert(field->pack_length() == sizeof(float));
-        assert(key_part_length == sizeof(float));
+        assert_always(field->pack_length() == sizeof(float));
+        assert_always(key_part_length == sizeof(float));
        new_pos = unpack_toku_float(to_mysql, from_tokudb);
        goto exit;
    case (toku_type_fixbinary):
@@ -1464,8 +1459,7 @@ uchar* unpack_toku_key_field(
        new_pos = unpack_toku_binary(
            to_mysql,
            from_tokudb,
-            num_bytes
-            );
+            num_bytes);
        goto exit;
    case (toku_type_fixstring):
        num_bytes = field->pack_length();
@@ -1473,11 +1467,15 @@ uchar* unpack_toku_key_field(
            to_mysql,
            from_tokudb,
            get_length_bytes_from_max(key_part_length),
-            0
-            );
-        num_bytes_copied = new_pos - (from_tokudb + get_length_bytes_from_max(key_part_length));
-        assert(num_bytes_copied <= num_bytes);
-        memset(to_mysql+num_bytes_copied, field->charset()->pad_char, num_bytes - num_bytes_copied);
+            0);
+        num_bytes_copied =
+            new_pos -
+            (from_tokudb + get_length_bytes_from_max(key_part_length));
+        assert_always(num_bytes_copied <= num_bytes);
+        memset(
+            to_mysql + num_bytes_copied,
+            field->charset()->pad_char,
+            num_bytes - num_bytes_copied);
        goto exit;
    case (toku_type_varbinary):
    case (toku_type_varstring):
@@ -1485,21 +1483,20 @@ uchar* unpack_toku_key_field(
            to_mysql,
            from_tokudb,
            get_length_bytes_from_max(key_part_length),
-            ((Field_varstring *)field)->length_bytes
-            );
+            ((Field_varstring*)field)->length_bytes);
        goto exit;
    case (toku_type_blob):
        new_pos = unpack_toku_blob(
            to_mysql,
            from_tokudb,
            get_length_bytes_from_max(key_part_length),
-            ((Field_blob *)field)->row_pack_length() //only calling this because packlength is returned
-            );
+            //only calling this because packlength is returned
+            ((Field_blob *)field)->row_pack_length());
        goto exit;
    default:
-        assert(false);
+        assert_unreachable();
    }
-    assert(false);
+    assert_unreachable();
 exit:
    return new_pos;
 }
@@ -1513,9 +1510,8 @@ static int tokudb_compare_two_keys(
    const void*  row_desc,
    const uint32_t row_desc_size,
    bool cmp_prefix,
-    bool* read_string
-    )
-{
+    bool* read_string) {
+
    int ret_val = 0;
    int8_t new_key_inf_val = COL_NEG_INF;
    int8_t saved_key_inf_val = COL_NEG_INF;
@@ -1538,11 +1534,9 @@ static int tokudb_compare_two_keys(
    }
    row_desc_ptr++;

-    while ( (uint32_t)(new_key_ptr - (uchar *)new_key_data) < new_key_size &&
-            (uint32_t)(saved_key_ptr - (uchar *)saved_key_data) < saved_key_size &&
-            (uint32_t)(row_desc_ptr - (uchar *)row_desc) < row_desc_size
-            )
-    {
+    while ((uint32_t)(new_key_ptr - (uchar*)new_key_data) < new_key_size &&
+           (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) < saved_key_size &&
+           (uint32_t)(row_desc_ptr - (uchar*)row_desc) < row_desc_size) {
        uint32_t new_key_field_length;
        uint32_t saved_key_field_length;
        uint32_t row_desc_field_length;
@@ -1583,8 +1577,7 @@ static int tokudb_compare_two_keys(
            &new_key_field_length, 
            &saved_key_field_length,
            &row_desc_field_length,
-            read_string
-            );
+            read_string);
        new_key_ptr += new_key_field_length;
        saved_key_ptr += saved_key_field_length;
        row_desc_ptr += row_desc_field_length;
@@ -1592,35 +1585,30 @@ static int tokudb_compare_two_keys(
            goto exit;
        }

-        assert((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
-        assert((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
-        assert((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
+        assert_always(
+            (uint32_t)(new_key_ptr - (uchar*)new_key_data) <= new_key_size);
+        assert_always(
+            (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) <= saved_key_size);
+        assert_always(
+            (uint32_t)(row_desc_ptr - (uchar*)row_desc) <= row_desc_size);
    }
-    new_key_bytes_left = new_key_size - ((uint32_t)(new_key_ptr - (uchar *)new_key_data));
-    saved_key_bytes_left = saved_key_size - ((uint32_t)(saved_key_ptr - (uchar *)saved_key_data));
+    new_key_bytes_left =
+        new_key_size - ((uint32_t)(new_key_ptr - (uchar*)new_key_data));
+    saved_key_bytes_left =
+        saved_key_size - ((uint32_t)(saved_key_ptr - (uchar*)saved_key_data));
    if (cmp_prefix) {
        ret_val = 0;
-    }
-    //
-    // in this case, read both keys to completion, now read infinity byte
-    //
-    else if (new_key_bytes_left== 0 && saved_key_bytes_left== 0) {
+    } else if (new_key_bytes_left== 0 && saved_key_bytes_left== 0) {
+        // in this case, read both keys to completion, now read infinity byte
        ret_val = new_key_inf_val - saved_key_inf_val;
-    }
-    //
-    // at this point, one SHOULD be 0
-    //
-    else if (new_key_bytes_left == 0 && saved_key_bytes_left > 0) {
+    } else if (new_key_bytes_left == 0 && saved_key_bytes_left > 0) {
+        // at this point, one SHOULD be 0
        ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1; 
-    }
-    else if (new_key_bytes_left > 0 && saved_key_bytes_left == 0) {
+    } else if (new_key_bytes_left > 0 && saved_key_bytes_left == 0) {
        ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1; 
-    }
-    //
-    // this should never happen, perhaps we should assert(false)
-    //
-    else {
-        assert(false);
+    } else {
+        // this should never happen, perhaps we should assert(false)
+        assert_unreachable();
        ret_val = new_key_bytes_left - saved_key_bytes_left;
    }
 exit:
@@ -1765,9 +1753,9 @@ static int tokudb_compare_two_key_parts(
            goto exit;
        }

-        assert((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
-        assert((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
-        assert((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
+        assert_always((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
+        assert_always((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
+        assert_always((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
    }

    ret_val = 0;
@@ -1776,7 +1764,7 @@ exit:
 }

 static int tokudb_cmp_dbt_key_parts(DB *file, const DBT *keya, const DBT *keyb, uint max_parts) {
-    assert(file->cmp_descriptor->dbt.size);
+    assert_always(file->cmp_descriptor->dbt.size);
    return tokudb_compare_two_key_parts(
            keya->data, 
            keya->size, 
@@ -1847,7 +1835,7 @@ static uint32_t pack_desc_pk_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_S
    case (toku_type_float):
        pos[0] = COL_FIX_FIELD;
        pos++;
-        assert(kc_info->field_lengths[field_index] < 256);
+        assert_always(kc_info->field_lengths[field_index] < 256);
        pos[0] = kc_info->field_lengths[field_index];
        pos++;
        break;
@@ -1856,7 +1844,7 @@ static uint32_t pack_desc_pk_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_S
        pos++;
        field_length = field->pack_length();
        set_if_smaller(key_part_length, field_length);
-        assert(key_part_length < 256);
+        assert_always(key_part_length < 256);
        pos[0] = (uchar)key_part_length;
        pos++;
        break;
@@ -1871,7 +1859,7 @@ static uint32_t pack_desc_pk_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_S
        pos++;
        break;
    default:
-        assert(false);
+        assert_unreachable();
    }

    return pos - buf;
@@ -1908,7 +1896,7 @@ static uint32_t pack_desc_pk_offset_info(
        }
        offset += pk_info[2*i + 1];
    }
-    assert(found_col_in_pk);
+    assert_always(found_col_in_pk);
    if (is_constant_offset) {
        pos[0] = COL_FIX_PK_OFFSET;
        pos++;
@@ -1966,10 +1954,10 @@ static uint32_t pack_desc_offset_info(uchar* buf, KEY_AND_COL_INFO* kc_info, uin
                break;
            }
        }
-        assert(found_index);
+        assert_always(found_index);
        break;
    default:
-        assert(false);
+        assert_unreachable();
    }

    return pos - buf;
@@ -2004,7 +1992,7 @@ static uint32_t pack_desc_key_length_info(uchar* buf, KEY_AND_COL_INFO* kc_info,
        pos += sizeof(key_part_length);
        break;
    default:
-        assert(false);
+        assert_unreachable();
    }

    return pos - buf;
@@ -2041,7 +2029,7 @@ static uint32_t pack_desc_char_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE
        pos += 4;
        break;
    default:
-        assert(false);
+        assert_unreachable();
    }

    return pos - buf;
@@ -2151,7 +2139,7 @@ static uint32_t create_toku_clustering_val_pack_descriptor (
        bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
        bool col_filtered_in_pk = bitmap_is_set(&kc_info->key_filters[pk_index],i);
        if (col_filtered_in_pk) {
-            assert(col_filtered);
+            assert_always(col_filtered);
        }
    }

@@ -2321,7 +2309,7 @@ static uint32_t pack_clustering_val_from_desc(
        memcpy(&end, desc_pos, sizeof(end));
        desc_pos += sizeof(end);
        
-        assert (start <= end);
+        assert_always (start <= end);

        if (curr == CK_FIX_RANGE) {
            length = end - start;
@@ -2367,24 +2355,21 @@ static uint32_t pack_clustering_val_from_desc(
            offset_diffs = (end_data_offset + end_data_size) - (uint32_t)(var_dest_data_ptr - orig_var_dest_data_ptr);
            for (uint32_t i = start; i <= end; i++) {
                if ( num_offset_bytes == 1 ) {
-                    assert(offset_diffs < 256);
+                    assert_always(offset_diffs < 256);
                    var_dest_offset_ptr[0] = var_src_offset_ptr[i] - (uchar)offset_diffs;
                    var_dest_offset_ptr++;
-                }
-                else if ( num_offset_bytes == 2 ) {
+                } else if ( num_offset_bytes == 2 ) {
                    uint32_t tmp = uint2korr(var_src_offset_ptr + 2*i);
                    uint32_t new_offset = tmp - offset_diffs;
-                    assert(new_offset < 1<<16);
+                    assert_always(new_offset < 1<<16);
                    int2store(var_dest_offset_ptr,new_offset);
                    var_dest_offset_ptr += 2;
-                }
-                else {
-                    assert(false);
+                } else {
+                    assert_unreachable();
                }
            }
-        }
-        else {
-            assert(false);
+        } else {
+            assert_unreachable();
        }
    }
    //
@@ -2518,7 +2503,7 @@ static uint32_t create_toku_secondary_key_pack_descriptor (
        //
        // store number of parts
        //
-        assert(get_key_parts(prim_key) < 128);
+        assert_always(get_key_parts(prim_key) < 128);
        pos[0] = 2 * get_key_parts(prim_key);
        pos++;
        //
@@ -2540,7 +2525,7 @@ static uint32_t create_toku_secondary_key_pack_descriptor (
        //
        // asserting that we moved forward as much as we think we have
        //
-        assert(tmp - pos == (2 * get_key_parts(prim_key)));
+        assert_always(tmp - pos == (2 * get_key_parts(prim_key)));
        pos = tmp;
    }

@@ -2551,7 +2536,7 @@ static uint32_t create_toku_secondary_key_pack_descriptor (
        bool is_col_in_pk = false;

        if (bitmap_is_set(&kc_info->key_filters[pk_index],field_index)) {
-            assert(!has_hpk && prim_key != NULL);
+            assert_always(!has_hpk && prim_key != NULL);
            is_col_in_pk = true;
        }
        else {
@@ -2566,7 +2551,7 @@ static uint32_t create_toku_secondary_key_pack_descriptor (
            // assert that columns in pk do not have a null bit
            // because in MySQL, pk columns cannot be null
            //
-            assert(!field->null_bit);
+            assert_always(!field->null_bit);
        }

        if (field->null_bit) {
@@ -2668,7 +2653,7 @@ static uint32_t max_key_size_from_desc(
    // skip byte that states if main dictionary
    bool is_main_dictionary = desc_pos[0];
    desc_pos++;
-    assert(!is_main_dictionary);
+    assert_always(!is_main_dictionary);
    
    // skip hpk byte
    desc_pos++;
@@ -2731,7 +2716,7 @@ static uint32_t max_key_size_from_desc(
            desc_pos += sizeof(charset_num);
        }
        else {
-            assert(has_charset == COL_HAS_NO_CHARSET);
+            assert_always(has_charset == COL_HAS_NO_CHARSET);
        }        
    }
    return max_size;
@@ -2742,9 +2727,8 @@ static uint32_t pack_key_from_desc(
    void* row_desc,
    uint32_t row_desc_size,
    const DBT* pk_key,
-    const DBT* pk_val
-    ) 
-{
+    const DBT* pk_val) {
+
    MULTI_COL_PACK_INFO mcp_info;
    uint32_t num_null_bytes;
    uint32_t num_blobs;
@@ -2762,7 +2746,7 @@ static uint32_t pack_key_from_desc(

    bool is_main_dictionary = desc_pos[0];
    desc_pos++;
-    assert(!is_main_dictionary);
+    assert_always(!is_main_dictionary);

    //
    // get the constant info out of descriptor
@@ -2810,7 +2794,7 @@ static uint32_t pack_key_from_desc(
    fixed_field_ptr = null_bytes_ptr + num_null_bytes;
    var_field_offset_ptr = fixed_field_ptr + mcp_info.fixed_field_size;
    var_field_data_ptr = var_field_offset_ptr + mcp_info.len_of_offsets;
-    while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
+    while ((uint32_t)(desc_pos - (uchar*)row_desc) < row_desc_size) {
        uchar col_fix_val;
        uchar has_charset;
        uint32_t col_pack_val = 0;
@@ -2834,8 +2818,7 @@ static uint32_t pack_key_from_desc(
                packed_key_pos++;
                desc_pos += skip_key_in_desc(desc_pos);
                continue;
-            }
-            else {
+            } else {
                packed_key_pos[0] = NONNULL_COL_VAL;
                packed_key_pos++;
            }
@@ -2859,42 +2842,46 @@ static uint32_t pack_key_from_desc(
        if (has_charset == COL_HAS_CHARSET) {
            memcpy(&charset_num, desc_pos, sizeof(charset_num));
            desc_pos += sizeof(charset_num);
-        }
-        else {
-            assert(has_charset == COL_HAS_NO_CHARSET);
+        } else {
+            assert_always(has_charset == COL_HAS_NO_CHARSET);
        }
        //
        // case where column is in pk val
        //
-        if (col_fix_val == COL_FIX_FIELD || col_fix_val == COL_VAR_FIELD || col_fix_val == COL_BLOB_FIELD) {
-            if (col_fix_val == COL_FIX_FIELD && has_charset == COL_HAS_NO_CHARSET) {
-                memcpy(packed_key_pos, &fixed_field_ptr[col_pack_val], key_length);
+        if (col_fix_val == COL_FIX_FIELD ||
+            col_fix_val == COL_VAR_FIELD ||
+            col_fix_val == COL_BLOB_FIELD) {
+            if (col_fix_val == COL_FIX_FIELD &&
+                has_charset == COL_HAS_NO_CHARSET) {
+                memcpy(
+                    packed_key_pos,
+                    &fixed_field_ptr[col_pack_val],
+                    key_length);
                packed_key_pos += key_length;
-            }
-            else if (col_fix_val == COL_VAR_FIELD && has_charset == COL_HAS_NO_CHARSET) {
+            } else if (col_fix_val == COL_VAR_FIELD &&
+                       has_charset == COL_HAS_NO_CHARSET) {
                uint32_t data_start_offset = 0;

                uint32_t data_size = 0;
                get_var_field_info(
-                    &data_size, 
-                    &data_start_offset, 
-                    col_pack_val, 
-                    var_field_offset_ptr, 
-                    num_offset_bytes
-                    );
+                    &data_size,
+                    &data_start_offset,
+                    col_pack_val,
+                    var_field_offset_ptr,
+                    num_offset_bytes);

                //
                // length of this field in this row is data_size
                // data is located beginning at var_field_data_ptr + data_start_offset
                //
                packed_key_pos = pack_toku_varbinary_from_desc(
-                    packed_key_pos, 
-                    var_field_data_ptr + data_start_offset, 
-                    key_length, //number of bytes to use to encode the length in to_tokudb
-                    data_size //length of field
-                    );
-            }
-            else {
+                    packed_key_pos,
+                    var_field_data_ptr + data_start_offset,
+                    //number of bytes to use to encode the length in to_tokudb
+                    key_length,
+                    //length of field
+                    data_size);
+            } else {
                const uchar* data_start = NULL;
                uint32_t data_start_offset = 0;
                uint32_t data_size = 0;
@@ -2903,76 +2890,59 @@ static uint32_t pack_key_from_desc(
                    data_start_offset = col_pack_val;
                    data_size = key_length;
                    data_start = fixed_field_ptr + data_start_offset;
-                }
-                else if (col_fix_val == COL_VAR_FIELD){
+                } else if (col_fix_val == COL_VAR_FIELD){
                    get_var_field_info(
-                        &data_size, 
-                        &data_start_offset, 
-                        col_pack_val, 
-                        var_field_offset_ptr, 
-                        num_offset_bytes
-                        );
+                        &data_size,
+                        &data_start_offset,
+                        col_pack_val,
+                        var_field_offset_ptr,
+                        num_offset_bytes);
                    data_start = var_field_data_ptr + data_start_offset;
-                }
-                else if (col_fix_val == COL_BLOB_FIELD) {
+                } else if (col_fix_val == COL_BLOB_FIELD) {
                    uint32_t blob_index = col_pack_val;
                    uint32_t blob_offset;
                    const uchar* blob_ptr = NULL;
                    uint32_t field_len;
                    uint32_t field_len_bytes = blob_lengths[blob_index];
                    get_blob_field_info(
-                        &blob_offset, 
+                        &blob_offset,
                        mcp_info.len_of_offsets,
-                        var_field_data_ptr, 
-                        num_offset_bytes
-                        );
+                        var_field_data_ptr,
+                        num_offset_bytes);
                    blob_ptr = var_field_data_ptr + blob_offset;
-                    assert(num_blobs > 0);
-                    //
-                    // skip over other blobs to get to the one we want to make a key out of
-                    //
+                    assert_always(num_blobs > 0);
+
+                    // skip over other blobs to get to the one we want to
+                    // make a key out of
                    for (uint32_t i = 0; i < blob_index; i++) {
                        blob_ptr = unpack_toku_field_blob(
                            NULL,
                            blob_ptr,
                            blob_lengths[i],
-                            true
-                            );
+                            true);
                    }
-                    //
-                    // at this point, blob_ptr is pointing to the blob we want to make a key from
-                    //
+                    // at this point, blob_ptr is pointing to the blob we
+                    // want to make a key from
                    field_len = get_blob_field_len(blob_ptr, field_len_bytes);
-                    //
                    // now we set the variables to make the key
-                    //
                    data_start = blob_ptr + field_len_bytes;
                    data_size = field_len;
-                    
-                    
-                }
-                else {
-                    assert(false);
+                } else {
+                    assert_unreachable();
                }

-                packed_key_pos = pack_toku_varstring_from_desc(
-                    packed_key_pos,
+                packed_key_pos = pack_toku_varstring_from_desc(packed_key_pos,
                    data_start,
                    key_length,
                    data_size,
-                    charset_num
-                    );
+                    charset_num);
            }
-        }
-        //
-        // case where column is in pk key
-        //
-        else {
+        } else {
+            // case where column is in pk key
            if (col_fix_val == COL_FIX_PK_OFFSET) {
                memcpy(packed_key_pos, &pk_data_ptr[col_pack_val], key_length);
                packed_key_pos += key_length;
-            }
-            else if (col_fix_val == COL_VAR_PK_OFFSET) {
+            } else if (col_fix_val == COL_VAR_PK_OFFSET) {
                uchar* tmp_pk_data_ptr = pk_data_ptr;
                uint32_t index_in_pk = col_pack_val;
                //
@@ -2981,25 +2951,21 @@ static uint32_t pack_key_from_desc(
                for (uint32_t i = 0; i < index_in_pk; i++) {
                    if (pk_info[2*i] == COL_FIX_FIELD) {
                        tmp_pk_data_ptr += pk_info[2*i + 1];
-                    }
-                    else if (pk_info[2*i] == COL_VAR_FIELD) {
+                    } else if (pk_info[2*i] == COL_VAR_FIELD) {
                        uint32_t len_bytes = pk_info[2*i + 1];
                        uint32_t len;
                        if (len_bytes == 1) {
                            len = tmp_pk_data_ptr[0];
                            tmp_pk_data_ptr++;
-                        }
-                        else if (len_bytes == 2) {
+                        } else if (len_bytes == 2) {
                            len = uint2korr(tmp_pk_data_ptr);
                            tmp_pk_data_ptr += 2;
-                        }
-                        else {
-                            assert(false);
+                        } else {
+                            assert_unreachable();
                        }
                        tmp_pk_data_ptr += len;
-                    }
-                    else {
-                        assert(false);
+                    } else {
+                        assert_unreachable();
                    }
                }
                //
@@ -3009,21 +2975,18 @@ static uint32_t pack_key_from_desc(
                if (is_fix_field == COL_FIX_FIELD) {
                    memcpy(packed_key_pos, tmp_pk_data_ptr, key_length);
                    packed_key_pos += key_length;
-                }
-                else if (is_fix_field == COL_VAR_FIELD) {
+                } else if (is_fix_field == COL_VAR_FIELD) {
                    const uchar* data_start = NULL;
                    uint32_t data_size = 0;
                    uint32_t len_bytes = pk_info[2*index_in_pk + 1];
                    if (len_bytes == 1) {
                        data_size = tmp_pk_data_ptr[0];
                        tmp_pk_data_ptr++;
-                    }
-                    else if (len_bytes == 2) {
+                    } else if (len_bytes == 2) {
                        data_size = uint2korr(tmp_pk_data_ptr);
                        tmp_pk_data_ptr += 2;
-                    }
-                    else {
-                        assert(false);
+                    } else {
+                        assert_unreachable();
                    }
                    data_start = tmp_pk_data_ptr;

@@ -3033,32 +2996,26 @@ static uint32_t pack_key_from_desc(
                            data_start,
                            key_length,
                            data_size,
-                            charset_num
-                            );
-                    }
-                    else if (has_charset == COL_HAS_NO_CHARSET) {
+                            charset_num);
+                    } else if (has_charset == COL_HAS_NO_CHARSET) {
                        packed_key_pos = pack_toku_varbinary_from_desc(
-                            packed_key_pos, 
-                            data_start, 
+                            packed_key_pos,
+                            data_start,
                            key_length,
-                            data_size //length of field
-                            );
-                    }
-                    else {
-                        assert(false);
+                            data_size);
+                    } else {
+                        assert_unreachable();
                    }
+                } else {
+                    assert_unreachable();
                }
-                else {
-                    assert(false);
-                }
-            }
-            else {
-                assert(false);
+            } else {
+                assert_unreachable();
            }
        }
        
    }
-    assert( (uint32_t)(desc_pos - (uchar *)row_desc) == row_desc_size);
+    assert_always( (uint32_t)(desc_pos - (uchar *)row_desc) == row_desc_size);

    //
    // now append the primary key to the end of the key
@@ -3066,13 +3023,12 @@ static uint32_t pack_key_from_desc(
    if (hpk) {
        memcpy(packed_key_pos, pk_key->data, pk_key->size);
        packed_key_pos += pk_key->size;
-    }
-    else {
+    } else {
        memcpy(packed_key_pos, (uchar *)pk_key->data + 1, pk_key->size - 1);
        packed_key_pos += (pk_key->size - 1);
    }

-    return (uint32_t)(packed_key_pos - buf); // 
+    return (uint32_t)(packed_key_pos - buf);
 }

 static bool fields_have_same_name(Field* a, Field* b) {
@@ -3249,7 +3205,7 @@ static bool fields_are_same_type(Field* a, Field* b) {
    case MYSQL_TYPE_DECIMAL:
    case MYSQL_TYPE_VAR_STRING:
    case MYSQL_TYPE_NULL:
-        assert(false);
+        assert_unreachable();
    }

 cleanup:
--- a/storage/tokudb/hatoku_cmp.h
+++ b/storage/tokudb/hatoku_cmp.h
@@ -26,9 +26,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #ifndef _HATOKU_CMP
 #define _HATOKU_CMP

-#include "stdint.h"
+#include "hatoku_defines.h"
+#include "tokudb_debug.h"

-#include <db.h>

 //
 // A MySQL row is encoded in TokuDB, as follows:
@@ -180,7 +180,7 @@ static inline uint32_t get_blob_field_len(const uchar* from_tokudb, uint32_t len
        length = uint4korr(from_tokudb);
        break;
    default:
-        assert(false);
+        assert_unreachable();
    }
    return length;
 }
--- a/storage/tokudb/hatoku_defines.h
+++ b/storage/tokudb/hatoku_defines.h
@@ -23,8 +23,49 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.

 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."

-#ifndef _TOKUDB_CONFIG_H
-#define _TOKUDB_CONFIG_H
+#ifndef _HATOKU_DEFINES_H
+#define _HATOKU_DEFINES_H
+
+#include <my_config.h>
+#define MYSQL_SERVER 1
+#include "mysql_version.h"
+#include "sql_table.h"
+#include "handler.h"
+#include "table.h"
+#include "log.h"
+#include "sql_class.h"
+#include "sql_show.h"
+#include "discover.h"
+
+#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
+#include <binlog.h>
+#endif
+
+#undef PACKAGE
+#undef VERSION
+#undef HAVE_DTRACE
+#undef _DTRACE_VERSION
+
+/* We define DTRACE after mysql_priv.h in case it disabled dtrace in the main server */
+#ifdef HAVE_DTRACE
+#define _DTRACE_VERSION 1
+#else
+#endif
+
+#include <mysql/plugin.h>
+
+#include <ctype.h>
+#include <stdint.h>
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#if defined(_WIN32)
+#include "misc.h"
+#endif
+
+#include "db.h"
+#include "toku_os.h"
+#include "toku_time.h"
+#include "partitioned_counter.h"

 #ifdef USE_PRAGMA_INTERFACE
 #pragma interface               /* gcc class implementation */
@@ -162,326 +203,51 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 /* Bits for share->status */
 #define STATUS_PRIMARY_KEY_INIT 0x1

-#endif // _TOKUDB_CONFIG_H
-
-#ifndef _TOKUDB_DEBUG_H
-#define _TOKUDB_DEBUG_H
-
-#define TOKU_INCLUDE_BACKTRACE 0
-#if TOKU_INCLUDE_BACKTRACE
-static void tokudb_backtrace(void);
-#endif
-
-extern ulong tokudb_debug;
-
-// tokudb debug tracing
-#define TOKUDB_DEBUG_INIT 1
-#define TOKUDB_DEBUG_OPEN 2
-#define TOKUDB_DEBUG_ENTER 4
-#define TOKUDB_DEBUG_RETURN 8
-#define TOKUDB_DEBUG_ERROR 16
-#define TOKUDB_DEBUG_TXN 32
-#define TOKUDB_DEBUG_AUTO_INCREMENT 64
-#define TOKUDB_DEBUG_INDEX_KEY 128
-#define TOKUDB_DEBUG_LOCK 256
-#define TOKUDB_DEBUG_CHECK_KEY 1024
-#define TOKUDB_DEBUG_HIDE_DDL_LOCK_ERRORS 2048
-#define TOKUDB_DEBUG_ALTER_TABLE 4096
-#define TOKUDB_DEBUG_UPSERT 8192
-#define TOKUDB_DEBUG_CHECK (1<<14)
-#define TOKUDB_DEBUG_ANALYZE (1<<15)
-
-#define TOKUDB_TRACE(f, ...) { \
-    fprintf(stderr, "%u %s:%u %s " f "\n", my_tid(), __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__); \
-}
-
-static inline unsigned int my_tid() {
-    return (unsigned int)toku_os_gettid();
-}
-
-#define TOKUDB_DBUG_ENTER(f, ...) { \
-    if (tokudb_debug & TOKUDB_DEBUG_ENTER) { \
-        TOKUDB_TRACE(f, ##__VA_ARGS__);       \
-    } \
-} \
-    DBUG_ENTER(__FUNCTION__);
-
-#define TOKUDB_DBUG_RETURN(r) { \
-    int rr = (r); \
-    if ((tokudb_debug & TOKUDB_DEBUG_RETURN) || (rr != 0 && (tokudb_debug & TOKUDB_DEBUG_ERROR))) { \
-        TOKUDB_TRACE("return %d", rr); \
-    } \
-    DBUG_RETURN(rr); \
-}
-
-#define TOKUDB_HANDLER_TRACE(f, ...) \
-    fprintf(stderr, "%u %p %s:%u ha_tokudb::%s " f "\n", my_tid(), this, __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__);
-
-#define TOKUDB_HANDLER_DBUG_ENTER(f, ...) { \
-    if (tokudb_debug & TOKUDB_DEBUG_ENTER) { \
-        TOKUDB_HANDLER_TRACE(f, ##__VA_ARGS__); \
-    } \
-} \
-    DBUG_ENTER(__FUNCTION__);
-
-#define TOKUDB_HANDLER_DBUG_RETURN(r) { \
-    int rr = (r); \
-    if ((tokudb_debug & TOKUDB_DEBUG_RETURN) || (rr != 0 && (tokudb_debug & TOKUDB_DEBUG_ERROR))) { \
-        TOKUDB_HANDLER_TRACE("return %d", rr); \
-    } \
-    DBUG_RETURN(rr); \
-}
-
-#define TOKUDB_HANDLER_DBUG_VOID_RETURN { \
-    if (tokudb_debug & TOKUDB_DEBUG_RETURN) { \
-        TOKUDB_HANDLER_TRACE("return");       \
-    } \
-    DBUG_VOID_RETURN; \
-}
-
-#define TOKUDB_DBUG_DUMP(s, p, len) \
-{ \
-    TOKUDB_TRACE("%s", s); \
-    uint i;                                                             \
-    for (i=0; i<len; i++) {                                             \
-        fprintf(stderr, "%2.2x", ((uchar*)p)[i]);                       \
-    }                                                                   \
-    fprintf(stderr, "\n");                                              \
-}
-
-/* The purpose of this file is to define assert() for use by the handlerton.
- * The intention is for a failed handlerton assert to invoke a failed assert
- * in the fractal tree layer, which dumps engine status to the error log.
- */
-
-void toku_hton_assert_fail(const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/) __attribute__((__visibility__("default"))) __attribute__((__noreturn__));
-
-#undef assert  
-#define assert(expr)      ((expr)      ? (void)0 : toku_hton_assert_fail(#expr, __FUNCTION__, __FILE__, __LINE__, errno))
-
-#endif // _TOKUDB_DEBUG_H
-
-#ifndef _TOKUDB_TXN_H
-#define _TOKUDB_TXN_H
-
-typedef enum {
-    hatoku_iso_not_set = 0,
-    hatoku_iso_read_uncommitted,
-    hatoku_iso_read_committed,
-    hatoku_iso_repeatable_read,
-    hatoku_iso_serializable
-} HA_TOKU_ISO_LEVEL;
-
-
-
-typedef struct st_tokudb_stmt_progress {
-    ulonglong inserted;
-    ulonglong updated;
-    ulonglong deleted;
-    ulonglong queried;
-    bool using_loader;
-} tokudb_stmt_progress;
-
-
-typedef struct st_tokudb_trx_data {
-    DB_TXN *all;
-    DB_TXN *stmt;
-    DB_TXN *sp_level;
-    DB_TXN *sub_sp_level;
-    uint tokudb_lock_count;
-    uint create_lock_count;
-    tokudb_stmt_progress stmt_progress;
-    bool checkpoint_lock_taken;
-    LIST *handlers;
-} tokudb_trx_data;
-
-extern char *tokudb_data_dir;
-extern const char *ha_tokudb_ext;
-
-static inline void reset_stmt_progress (tokudb_stmt_progress* val) {
-    val->deleted = 0;
-    val->inserted = 0;
-    val->updated = 0;
-    val->queried = 0;
-}
-
-static inline int get_name_length(const char *name) {
-    int n = 0;
-    const char *newname = name;
-    n += strlen(newname);
-    n += strlen(ha_tokudb_ext);
-    return n;
-}
-
-//
-// returns maximum length of path to a dictionary
-//
-static inline int get_max_dict_name_path_length(const char *tablename) {
-    int n = 0;
-    n += get_name_length(tablename);
-    n += 1; //for the '-'
-    n += MAX_DICT_NAME_LEN;
-    return n;
-}
-
-static inline void make_name(char *newname, const char *tablename, const char *dictname) {
-    const char *newtablename = tablename;
-    char *nn = newname;
-    assert(tablename);
-    assert(dictname);
-    nn += sprintf(nn, "%s", newtablename);
-    nn += sprintf(nn, "-%s", dictname);
-}
-
-static inline int txn_begin(DB_ENV *env, DB_TXN *parent, DB_TXN **txn, uint32_t flags, THD *thd) {
-    *txn = NULL;
-    int r = env->txn_begin(env, parent, txn, flags);
-    if (r == 0 && thd) {
-        DB_TXN *this_txn = *txn;
-        this_txn->set_client_id(this_txn, thd_get_thread_id(thd));
-    }
-    if ((tokudb_debug & TOKUDB_DEBUG_TXN)) {
-        TOKUDB_TRACE("begin txn %p %p %u r=%d", parent, *txn, flags, r);
-    }
-    return r;
-}
-
-static inline void commit_txn(DB_TXN* txn, uint32_t flags) {
-    if (tokudb_debug & TOKUDB_DEBUG_TXN)
-        TOKUDB_TRACE("commit txn %p", txn);
-    int r = txn->commit(txn, flags);
-    if (r != 0) {
-        sql_print_error("tried committing transaction %p and got error code %d", txn, r);
-    }
-    assert(r == 0);
-}
-
-static inline void abort_txn(DB_TXN* txn) {
-    if (tokudb_debug & TOKUDB_DEBUG_TXN)
-        TOKUDB_TRACE("abort txn %p", txn);
-    int r = txn->abort(txn);
-    if (r != 0) {
-        sql_print_error("tried aborting transaction %p and got error code %d", txn, r);
-    }
-    assert(r == 0);
-}
-
-#endif // _TOKUDB_TXN_H
-
-#ifndef _TOKUDB_PORTABILITY_H
-#define _TOKUDB_PORTABILITY_H
-
-static inline void *tokudb_my_malloc(size_t s, myf flags) {
-#if 50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799
-    return my_malloc(0, s, flags);
+#if defined(TOKUDB_VERSION_MAJOR) && defined(TOKUDB_VERSION_MINOR)
+#define TOKUDB_PLUGIN_VERSION ((TOKUDB_VERSION_MAJOR << 8) + TOKUDB_VERSION_MINOR)
 #else
-    return my_malloc(s, flags);
+#define TOKUDB_PLUGIN_VERSION 0
 #endif
-}

-static inline void *tokudb_my_realloc(void *p, size_t s, myf flags) {
-    if (s == 0)
-        return p;
-#if 50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799
-    return my_realloc(0, p, s, flags);
+// Branch prediction macros.
+// If supported by the compiler, will hint in instruction caching for likely
+// branching. Should only be used where there is a very good idea of the correct
+// branch heuristics as determined by profiling. Mostly copied from InnoDB.
+// Use:
+//   "if (TOKUDB_LIKELY(x))" where the chances of "x" evaluating true are higher
+//   "if (TOKUDB_UNLIKELY(x))" where the chances of "x" evaluating false are higher
+#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+
+// Tell the compiler that 'expr' probably evaluates to 'constant'.
+#define TOKUDB_EXPECT(expr,constant) __builtin_expect(expr, constant)
+
 #else
-    return my_realloc(p, s, flags | MY_ALLOW_ZERO_PTR);
-#endif
-}

-static inline void tokudb_my_free(void *ptr) {
-    if (ptr)
-        my_free(ptr);
-}
+#error "No TokuDB branch prediction operations in use!"
+#define TOKUDB_EXPECT(expr,constant) (expr)

-static inline char *tokudb_my_strdup(const char *p, myf flags) {
-#if 50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799
-    return my_strdup(0, p, flags);
-#else
-    return my_strdup(p, flags);
-#endif
-}
+#endif // defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)

-static inline void* tokudb_my_multi_malloc(myf myFlags, ...) {
-  va_list args;
-  char **ptr,*start,*res;
-  size_t tot_length,length;
+// Tell the compiler that cond is likely to hold
+#define TOKUDB_LIKELY(cond) TOKUDB_EXPECT(cond, 1)

-  va_start(args,myFlags);
-  tot_length=0;
-  while ((ptr=va_arg(args, char **))) {
-      length=va_arg(args,uint);
-      tot_length+=ALIGN_SIZE(length);
-  }
-  va_end(args);
-
-  if (!(start=(char *) tokudb_my_malloc(tot_length,myFlags))) {
-      return 0;
-  }
-
-  va_start(args,myFlags);
-  res=start;
-  while ((ptr=va_arg(args, char **))) {
-      *ptr=res;
-      length=va_arg(args,uint);
-      res+=ALIGN_SIZE(length);
-  }
-  va_end(args);
-  return start;
-}
-
-static inline void tokudb_pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr) {
-    int r = pthread_mutex_init(mutex, attr);
-    assert(r == 0);
-}
-
-static inline void tokudb_pthread_mutex_destroy(pthread_mutex_t *mutex) {
-    int r = pthread_mutex_destroy(mutex);
-    assert(r == 0);
-}
-
-static inline void tokudb_pthread_mutex_lock(pthread_mutex_t *mutex) {
-    int r = pthread_mutex_lock(mutex);
-    assert(r == 0);
-}
-
-static inline void tokudb_pthread_mutex_unlock(pthread_mutex_t *mutex) {
-    int r = pthread_mutex_unlock(mutex);
-    assert(r == 0);
-}
-
-static inline void tokudb_pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) {
-    int r = pthread_cond_init(cond, attr);
-    assert(r == 0);
-}
-
-static inline void tokudb_pthread_cond_destroy(pthread_cond_t *cond) {
-    int r = pthread_cond_destroy(cond);
-    assert(r == 0);
-}
-
-static inline void tokudb_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) {
-    int r = pthread_cond_wait(cond, mutex);
-    assert(r == 0);
-}
-
-static inline void tokudb_pthread_cond_broadcast(pthread_cond_t *cond) {
-    int r = pthread_cond_broadcast(cond);
-    assert(r == 0);
-}
+// Tell the compiler that cond is unlikely to hold
+#define TOKUDB_UNLIKELY(cond) TOKUDB_EXPECT(cond, 0)

+// Tell the compiler that the function/argument is unused
+#define TOKUDB_UNUSED(_uu) _uu __attribute__((unused))
 // mysql 5.6.15 removed the test macro, so we define our own
 #define tokudb_test(e) ((e) ? 1 : 0)

-static const char *tokudb_thd_get_proc_info(THD *thd) {
+inline const char* tokudb_thd_get_proc_info(const THD *thd) {
    return thd->proc_info;
 }

 // uint3korr reads 4 bytes and valgrind reports an error, so we use this function instead
-static uint tokudb_uint3korr(const uchar *a) {
+inline uint tokudb_uint3korr(const uchar *a) {
    uchar b[4] = {};
    memcpy(b, a, 3);
    return uint3korr(b);
 }

-#endif // _TOKUDB_PORTABILITY_H
+#endif // _HATOKU_DEFINES_H
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
--- a/storage/tokudb/hatoku_hton.h
+++ b/storage/tokudb/hatoku_hton.h
@@ -26,434 +26,187 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #ifndef _HATOKU_HTON_H
 #define _HATOKU_HTON_H

-#include "db.h"
+#include "hatoku_defines.h"
+#include "tokudb_debug.h"
+#include "tokudb_information_schema.h"
+#include "tokudb_memory.h"
+#include "tokudb_thread.h"
+#include "tokudb_time.h"
+#include "tokudb_txn.h"
+#include "tokudb_sysvars.h"

-extern handlerton *tokudb_hton;
+extern handlerton* tokudb_hton;

-extern DB_ENV *db_env;
+extern DB_ENV* db_env;

-enum srv_row_format_enum {
-    SRV_ROW_FORMAT_UNCOMPRESSED = 0,
-    SRV_ROW_FORMAT_ZLIB = 1,
-    SRV_ROW_FORMAT_SNAPPY = 2,
-    SRV_ROW_FORMAT_QUICKLZ = 3,
-    SRV_ROW_FORMAT_LZMA = 4,
-    SRV_ROW_FORMAT_FAST = 5,
-    SRV_ROW_FORMAT_SMALL = 6,
-    SRV_ROW_FORMAT_DEFAULT = 7
-};
-typedef enum srv_row_format_enum srv_row_format_t;
+inline tokudb::sysvars::row_format_t toku_compression_method_to_row_format(
+    toku_compression_method method) {

-static inline srv_row_format_t toku_compression_method_to_row_format(toku_compression_method method) {
    switch (method) {
    case TOKU_NO_COMPRESSION:
-        return SRV_ROW_FORMAT_UNCOMPRESSED;        
+        return tokudb::sysvars::SRV_ROW_FORMAT_UNCOMPRESSED;
    case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD:
    case TOKU_ZLIB_METHOD:
-        return SRV_ROW_FORMAT_ZLIB;
+        return tokudb::sysvars::SRV_ROW_FORMAT_ZLIB;
    case TOKU_SNAPPY_METHOD:
-        return SRV_ROW_FORMAT_SNAPPY;
+        return tokudb::sysvars::SRV_ROW_FORMAT_SNAPPY;
    case TOKU_QUICKLZ_METHOD:
-        return SRV_ROW_FORMAT_QUICKLZ;
+        return tokudb::sysvars::SRV_ROW_FORMAT_QUICKLZ;
    case TOKU_LZMA_METHOD:
-        return SRV_ROW_FORMAT_LZMA;
+        return tokudb::sysvars::SRV_ROW_FORMAT_LZMA;
    case TOKU_DEFAULT_COMPRESSION_METHOD:
-        return SRV_ROW_FORMAT_DEFAULT;
+        return tokudb::sysvars::SRV_ROW_FORMAT_DEFAULT;
    case TOKU_FAST_COMPRESSION_METHOD:
-        return SRV_ROW_FORMAT_FAST;
+        return tokudb::sysvars::SRV_ROW_FORMAT_FAST;
    case TOKU_SMALL_COMPRESSION_METHOD:
-        return SRV_ROW_FORMAT_SMALL;
+        return tokudb::sysvars::SRV_ROW_FORMAT_SMALL;
    default:
-        assert(0);
+        assert_unreachable();
    }
 }

-static inline toku_compression_method row_format_to_toku_compression_method(srv_row_format_t row_format) {
+inline toku_compression_method row_format_to_toku_compression_method(
+    tokudb::sysvars::row_format_t row_format) {
+
    switch (row_format) {
-    case SRV_ROW_FORMAT_UNCOMPRESSED:
+    case tokudb::sysvars::SRV_ROW_FORMAT_UNCOMPRESSED:
        return TOKU_NO_COMPRESSION;
-    case SRV_ROW_FORMAT_QUICKLZ:
-    case SRV_ROW_FORMAT_FAST:
+    case tokudb::sysvars::SRV_ROW_FORMAT_QUICKLZ:
+    case tokudb::sysvars::SRV_ROW_FORMAT_FAST:
        return TOKU_QUICKLZ_METHOD;
-    case SRV_ROW_FORMAT_SNAPPY:
+    case tokudb::sysvars::SRV_ROW_FORMAT_SNAPPY:
        return TOKU_SNAPPY_METHOD;
-    case SRV_ROW_FORMAT_ZLIB:
-    case SRV_ROW_FORMAT_DEFAULT:
+    case tokudb::sysvars::SRV_ROW_FORMAT_ZLIB:
+    case tokudb::sysvars::SRV_ROW_FORMAT_DEFAULT:
        return TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD;
-    case SRV_ROW_FORMAT_LZMA:
-    case SRV_ROW_FORMAT_SMALL:
+    case tokudb::sysvars::SRV_ROW_FORMAT_LZMA:
+    case tokudb::sysvars::SRV_ROW_FORMAT_SMALL:
        return TOKU_LZMA_METHOD;
    default:
-        assert(0);
+        assert_unreachable();
    }
 }

-// thread variables
-
-static MYSQL_THDVAR_BOOL(commit_sync, 
-    PLUGIN_VAR_THDLOCAL, 
-    "sync on txn commit",
-    /* check */ NULL, 
-    /* update */ NULL,
-    /* default*/ true
-);
-
-static MYSQL_THDVAR_UINT(pk_insert_mode,
-    0,
-    "set the primary key insert mode",
-    NULL, 
-    NULL, 
-    1, // default
-    0, // min?
-    2, // max
-    1 // blocksize
-);
-
-static uint get_pk_insert_mode(THD* thd) {
-    return THDVAR(thd, pk_insert_mode);
-}
-
-static MYSQL_THDVAR_BOOL(load_save_space,
-    0,
-    "compress intermediate bulk loader files to save space",
-    NULL, 
-    NULL, 
-    true
-);
-
-static bool get_load_save_space(THD* thd) {
-    return (THDVAR(thd, load_save_space) != 0);
-}
-
-static MYSQL_THDVAR_BOOL(disable_slow_alter,
-    0,
-    "if on, alter tables that require copy are disabled",
-    NULL, 
-    NULL, 
-    false
-);
-
-static bool get_disable_slow_alter(THD* thd) {
-    return (THDVAR(thd, disable_slow_alter) != 0);
-}
-
-static MYSQL_THDVAR_BOOL(disable_hot_alter,
-    0,
-    "if on, hot alter table is disabled",
-    NULL, 
-    NULL, 
-    false
-);
-
-static bool get_disable_hot_alter(THD* thd) {
-    return THDVAR(thd, disable_hot_alter) != 0;
-}
-
-static MYSQL_THDVAR_BOOL(create_index_online,
-    0,
-    "if on, create index done online",
-    NULL, 
-    NULL, 
-    true
-);
-
-static bool get_create_index_online(THD* thd) {
-    return (THDVAR(thd, create_index_online) != 0);
-}
-
-static MYSQL_THDVAR_BOOL(alter_print_error,
-    0,
-    "Print errors for alter table operations",
-    NULL, 
-    NULL,
-    false
-);
-
-static MYSQL_THDVAR_BOOL(disable_prefetching,
-    0,
-    "if on, prefetching disabled",
-    NULL, 
-    NULL, 
-   false
-);
-
-static bool get_disable_prefetching(THD* thd) {
-    return (THDVAR(thd, disable_prefetching) != 0);
-}
-
-static MYSQL_THDVAR_BOOL(prelock_empty,
-    0,
-    "Tokudb Prelock Empty Table",
-    NULL, 
-    NULL, 
-    true
-);
-
-static bool get_prelock_empty(THD* thd) {
-    return (THDVAR(thd, prelock_empty) != 0);
-}
-
-static MYSQL_THDVAR_UINT(block_size,
-    0,
-    "fractal tree block size",
-    NULL, 
-    NULL, 
-    4<<20, // default
-    4096,  // min
-    ~0U,   // max
-    1      // blocksize???
-);
-
-static uint get_tokudb_block_size(THD* thd) {
-    return THDVAR(thd, block_size);
-}
-
-static MYSQL_THDVAR_UINT(read_block_size,
-    0,
-    "fractal tree read block size",
-    NULL, 
-    NULL, 
-    64*1024, // default
-    4096,  // min
-    ~0U,   // max
-    1      // blocksize???
-);
-
-static uint get_tokudb_read_block_size(THD* thd) {
-    return THDVAR(thd, read_block_size);
-}
-
-static MYSQL_THDVAR_UINT(read_buf_size,
-    0,
-    "fractal tree read block size", //TODO: Is this a typo?
-    NULL, 
-    NULL, 
-    128*1024, // default
-    0,  // min
-    1*1024*1024,   // max
-    1      // blocksize???
-);
-
-static uint get_tokudb_read_buf_size(THD* thd) {
-    return THDVAR(thd, read_buf_size);
-}
-
-#if TOKU_INCLUDE_UPSERT
-static MYSQL_THDVAR_BOOL(disable_slow_update, 
-    PLUGIN_VAR_THDLOCAL, 
-    "disable slow update",
-    NULL, // check
-    NULL, // update
-    false // default
-);
-
-static MYSQL_THDVAR_BOOL(disable_slow_upsert, 
-    PLUGIN_VAR_THDLOCAL, 
-    "disable slow upsert",
-    NULL, // check
-    NULL, // update
-    false // default
-);
+inline enum row_type row_format_to_row_type(
+    tokudb::sysvars::row_format_t row_format) {
+#if TOKU_INCLUDE_ROW_TYPE_COMPRESSION
+    switch (row_format) {
+    case tokudb::sysvars::SRV_ROW_FORMAT_UNCOMPRESSED:
+        return ROW_TYPE_TOKU_UNCOMPRESSED;
+    case tokudb::sysvars::SRV_ROW_FORMAT_ZLIB:
+        return ROW_TYPE_TOKU_ZLIB;
+    case tokudb::sysvars::SRV_ROW_FORMAT_SNAPPY:
+        return ROW_TYPE_TOKU_SNAPPY;
+    case tokudb::sysvars::SRV_ROW_FORMAT_QUICKLZ:
+        return ROW_TYPE_TOKU_QUICKLZ;
+    case tokudb::sysvars::SRV_ROW_FORMAT_LZMA:
+        return ROW_TYPE_TOKU_LZMA;
+    case tokudb::sysvars::SRV_ROW_FORMAT_SMALL:
+        return ROW_TYPE_TOKU_SMALL;
+    case tokudb::sysvars::SRV_ROW_FORMAT_FAST:
+        return ROW_TYPE_TOKU_FAST;
+    case tokudb::sysvars::SRV_ROW_FORMAT_DEFAULT:
+        return ROW_TYPE_DEFAULT;
+    }
 #endif
-
-static MYSQL_THDVAR_UINT(fanout,
-    0,
-    "fractal tree fanout",
-    NULL, 
-    NULL, 
-    16,    // default
-    2,     // min
-    16*1024,  // max
-    1      // blocksize???
-);
-
-static uint get_tokudb_fanout(THD* thd) {
-    return THDVAR(thd, fanout);
+    return ROW_TYPE_DEFAULT;
 }

-static MYSQL_THDVAR_UINT(analyze_time, 0, "analyze time (seconds)", NULL /*check*/, NULL /*update*/, 5 /*default*/, 0 /*min*/, ~0U /*max*/, 1 /*blocksize*/);
-
-static MYSQL_THDVAR_DOUBLE(analyze_delete_fraction, 0, "fraction of rows allowed to be deleted", NULL /*check*/, NULL /*update*/, 1.0 /*def*/, 0 /*min*/, 1.0 /*max*/, 1);
-
-static void tokudb_checkpoint_lock(THD * thd);
-static void tokudb_checkpoint_unlock(THD * thd);
-
-static void tokudb_checkpoint_lock_update(
-    THD* thd,
-    struct st_mysql_sys_var* var,
-    void* var_ptr,
-    const void* save) 
-{
-    my_bool* val = (my_bool *) var_ptr;
-    *val= *(my_bool *) save ? true : false;
-    if (*val) {
-        tokudb_checkpoint_lock(thd);
+inline tokudb::sysvars::row_format_t row_type_to_row_format(
+    enum row_type type) {
+#if TOKU_INCLUDE_ROW_TYPE_COMPRESSION
+    switch (type) {
+    case ROW_TYPE_TOKU_UNCOMPRESSED:
+        return tokudb::sysvars::SRV_ROW_FORMAT_UNCOMPRESSED;
+    case ROW_TYPE_TOKU_ZLIB:
+        return tokudb::sysvars::SRV_ROW_FORMAT_ZLIB;
+    case ROW_TYPE_TOKU_SNAPPY:
+        return tokudb::sysvars::SRV_ROW_FORMAT_SNAPPY;
+    case ROW_TYPE_TOKU_QUICKLZ:
+        return tokudb::sysvars::SRV_ROW_FORMAT_QUICKLZ;
+    case ROW_TYPE_TOKU_LZMA:
+        return tokudb::sysvars::SRV_ROW_FORMAT_LZMA;
+    case ROW_TYPE_TOKU_SMALL:
+        return tokudb::sysvars::SRV_ROW_FORMAT_SMALL;
+    case ROW_TYPE_TOKU_FAST:
+        return tokudb::sysvars::SRV_ROW_FORMAT_FAST;
+    case ROW_TYPE_DEFAULT:
+        return tokudb::sysvars::SRV_ROW_FORMAT_DEFAULT;
+    default:
+        return tokudb::sysvars::SRV_ROW_FORMAT_DEFAULT;
    }
-    else {
-        tokudb_checkpoint_unlock(thd);
-    }
-}
-  
-static MYSQL_THDVAR_BOOL(checkpoint_lock,
-    0,
-    "Tokudb Checkpoint Lock",
-    NULL, 
-    tokudb_checkpoint_lock_update, 
-    false
-);
-
-static const char *tokudb_row_format_names[] = {
-    "tokudb_uncompressed",
-    "tokudb_zlib",
-    "tokudb_snappy",
-    "tokudb_quicklz",
-    "tokudb_lzma",
-    "tokudb_fast",
-    "tokudb_small",
-    "tokudb_default",
-    NullS
-};
-
-static TYPELIB tokudb_row_format_typelib = {
-    array_elements(tokudb_row_format_names) - 1,
-    "tokudb_row_format_typelib",
-    tokudb_row_format_names,
-    NULL
-};
-
-static MYSQL_THDVAR_ENUM(row_format, PLUGIN_VAR_OPCMDARG,
-                         "Specifies the compression method for a table during this session. "
-                         "Possible values are TOKUDB_UNCOMPRESSED, TOKUDB_ZLIB, TOKUDB_SNAPPY, "
-                         "TOKUDB_QUICKLZ, TOKUDB_LZMA, TOKUDB_FAST, TOKUDB_SMALL and TOKUDB_DEFAULT",
-                         NULL, NULL, SRV_ROW_FORMAT_ZLIB, &tokudb_row_format_typelib);
-
-static inline srv_row_format_t get_row_format(THD *thd) {
-    return (srv_row_format_t) THDVAR(thd, row_format);
+#endif
+    return tokudb::sysvars::SRV_ROW_FORMAT_DEFAULT;
 }

-static MYSQL_THDVAR_UINT(lock_timeout_debug, 0, "TokuDB lock timeout debug", NULL /*check*/, NULL /*update*/, 1 /*default*/, 0 /*min*/, ~0U /*max*/, 1);
+inline enum row_type toku_compression_method_to_row_type(
+    toku_compression_method method) {

-static MYSQL_THDVAR_STR(last_lock_timeout, PLUGIN_VAR_MEMALLOC, "last TokuDB lock timeout", NULL /*check*/, NULL /*update*/, NULL /*default*/);
+    return row_format_to_row_type(
+        toku_compression_method_to_row_format(method));
+}

-static MYSQL_THDVAR_BOOL(hide_default_row_format, 0, "hide the default row format", NULL /*check*/, NULL /*update*/, true);
+inline toku_compression_method row_type_to_toku_compression_method(
+    enum row_type type) {

-static const uint64_t DEFAULT_TOKUDB_LOCK_TIMEOUT = 4000; /*milliseconds*/
+    return row_format_to_toku_compression_method(row_type_to_row_format(type));
+}

-static MYSQL_THDVAR_ULONGLONG(lock_timeout, 0, "TokuDB lock timeout", NULL, NULL, DEFAULT_TOKUDB_LOCK_TIMEOUT, 0 /*min*/, ~0ULL /*max*/, 1 /*blocksize*/);
+void tokudb_checkpoint_lock(THD * thd);
+void tokudb_checkpoint_unlock(THD * thd);

-static uint64_t tokudb_get_lock_wait_time_callback(uint64_t default_wait_time) {
+inline uint64_t tokudb_get_lock_wait_time_callback(uint64_t default_wait_time) {
    THD *thd = current_thd;
-    uint64_t wait_time = THDVAR(thd, lock_timeout);
-    return wait_time;
+    return tokudb::sysvars::lock_timeout(thd);
 }

-static MYSQL_THDVAR_ULONGLONG(loader_memory_size,
-    0,
-    "TokuDB loader memory size",
-    NULL, 
-    NULL, 
-    100*1000*1000, /*default*/
-    0, /*min*/
-    ~0ULL, /*max*/
-    1 /*blocksize*/
-);
-
-static uint64_t tokudb_get_loader_memory_size_callback(void) {
+inline uint64_t tokudb_get_loader_memory_size_callback(void) {
    THD *thd = current_thd;
-    uint64_t memory_size = THDVAR(thd, loader_memory_size);
-    return memory_size;
+    return tokudb::sysvars::loader_memory_size(thd);
 }

-static const uint64_t DEFAULT_TOKUDB_KILLED_TIME = 4000; 
-
-static MYSQL_THDVAR_ULONGLONG(killed_time, 0, "TokuDB killed time", NULL, NULL, DEFAULT_TOKUDB_KILLED_TIME, 0 /*min*/, ~0ULL /*max*/, 1 /*blocksize*/);
-
-static uint64_t tokudb_get_killed_time_callback(uint64_t default_killed_time) {
+inline uint64_t tokudb_get_killed_time_callback(uint64_t default_killed_time) {
    THD *thd = current_thd;
-    uint64_t killed_time = THDVAR(thd, killed_time);
-    return killed_time;
+    return tokudb::sysvars::killed_time(thd);
 }

-static int tokudb_killed_callback(void) {
+inline int tokudb_killed_callback(void) {
    THD *thd = current_thd;
    return thd_killed(thd);
 }

-static bool tokudb_killed_thd_callback(void *extra, uint64_t deleted_rows) {
+inline bool tokudb_killed_thd_callback(void *extra, uint64_t deleted_rows) {
    THD *thd = static_cast<THD *>(extra);
    return thd_killed(thd) != 0;
 }

-enum {
-    TOKUDB_EMPTY_SCAN_DISABLED = 0,
-    TOKUDB_EMPTY_SCAN_LR = 1,
-    TOKUDB_EMPTY_SCAN_RL = 2,
-};
-
-static const char *tokudb_empty_scan_names[] = {
-    "disabled",
-    "lr",
-    "rl",
-    NullS
-};
-
-static TYPELIB tokudb_empty_scan_typelib = {
-    array_elements(tokudb_empty_scan_names) - 1,
-    "tokudb_empty_scan_typelib",
-    tokudb_empty_scan_names,
-    NULL
-};
-
-static MYSQL_THDVAR_ENUM(empty_scan, PLUGIN_VAR_OPCMDARG,
-    "TokuDB algorithm to check if the table is empty when opened. ",
-    NULL, NULL, TOKUDB_EMPTY_SCAN_RL, &tokudb_empty_scan_typelib
-);
-
-#if TOKUDB_CHECK_JEMALLOC
-static uint tokudb_check_jemalloc;
-static MYSQL_SYSVAR_UINT(check_jemalloc, tokudb_check_jemalloc, 0, "Check if jemalloc is linked",
-                         NULL, NULL, 1, 0, 1, 0);
-#endif
-
-static MYSQL_THDVAR_BOOL(bulk_fetch, PLUGIN_VAR_THDLOCAL, "enable bulk fetch",
-                         NULL /*check*/, NULL /*update*/, true /*default*/);
-
-#if TOKU_INCLUDE_XA
-static MYSQL_THDVAR_BOOL(support_xa,
-    PLUGIN_VAR_OPCMDARG,
-    "Enable TokuDB support for the XA two-phase commit",
-    NULL, // check
-    NULL, // update
-    true  // default
-);
-#endif
-
-static MYSQL_THDVAR_BOOL(rpl_unique_checks, PLUGIN_VAR_THDLOCAL, "enable unique checks on replication slave",
-                         NULL /*check*/, NULL /*update*/, true /*default*/);
-
-static MYSQL_THDVAR_ULONGLONG(rpl_unique_checks_delay, PLUGIN_VAR_THDLOCAL, "time in milliseconds to add to unique checks test on replication slave",
-                              NULL, NULL, 0 /*default*/, 0 /*min*/, ~0ULL /*max*/, 1 /*blocksize*/);
-
-static MYSQL_THDVAR_BOOL(rpl_lookup_rows, PLUGIN_VAR_THDLOCAL, "lookup a row on rpl slave",
-                         NULL /*check*/, NULL /*update*/, true /*default*/);
-
-static MYSQL_THDVAR_ULONGLONG(rpl_lookup_rows_delay, PLUGIN_VAR_THDLOCAL, "time in milliseconds to add to lookups on replication slave",
-                              NULL, NULL, 0 /*default*/, 0 /*min*/, ~0ULL /*max*/, 1 /*blocksize*/);
-
-static MYSQL_THDVAR_BOOL(rpl_check_readonly, PLUGIN_VAR_THDLOCAL, "check if the slave is read only",
-                         NULL /*check*/, NULL /*update*/, true /*default*/);
-
-static MYSQL_THDVAR_STR(optimize_index_name, PLUGIN_VAR_THDLOCAL + PLUGIN_VAR_MEMALLOC, "optimize index name (default all indexes)", NULL /*check*/, NULL /*update*/, NULL /*default*/);
-
-static MYSQL_THDVAR_DOUBLE(optimize_index_fraction, 0, "optimize index fraction (default 1.0 all)", NULL /*check*/, NULL /*update*/, 1.0 /*def*/, 0 /*min*/, 1.0 /*max*/, 1);
-
-static MYSQL_THDVAR_ULONGLONG(optimize_throttle, 0, "optimize throttle (default no throttle)", NULL /*check*/, NULL /*update*/, 0 /*def*/, 0 /*min*/, ~0ULL /*max*/, 1);

 extern HASH tokudb_open_tables;
-extern pthread_mutex_t tokudb_mutex;
-extern uint32_t tokudb_write_status_frequency;
-extern uint32_t tokudb_read_status_frequency;
+extern tokudb::thread::mutex_t tokudb_mutex;
+extern const char* tokudb_hton_name;
+extern int tokudb_hton_initialized;
+extern tokudb::thread::rwlock_t tokudb_hton_initialized_lock;

 void toku_hton_update_primary_key_bytes_inserted(uint64_t row_size);

+void tokudb_split_dname(
+    const char* dname,
+    String& database_name,
+    String& table_name,
+    String& dictionary_name);
+
+void tokudb_pretty_left_key(const DB* db, const DBT* key, String* out);
+void tokudb_pretty_right_key(const DB* db, const DBT* key, String* out);
+const char *tokudb_get_index_name(DB* db);
+
+inline uint get_key_parts(const KEY *key) {
+#if (50609 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
+    (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
+    (100009 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100099)
+    return key->user_defined_key_parts;
+#else
+    return key->key_parts;
+#endif
+}
+
 #endif //#ifdef _HATOKU_HTON
--- a/storage/tokudb/mysql-test/rpl/combinations
+++ b/storage/tokudb/mysql-test/rpl/combinations
@@ -0,0 +1,8 @@
+[row]
+binlog-format=row
+
+[stmt]
+binlog-format=statement
+
+[mix]
+binlog-format=mixed
--- a/storage/tokudb/mysql-test/rpl/disabled.def
+++ b/storage/tokudb/mysql-test/rpl/disabled.def
@@ -13,4 +13,3 @@ rpl_tokudb_write_unique_uc1: unreliable, uses timestamp differences
 rpl_tokudb_read_only_ff: unreliable, uses timestamp differences
 rpl_tokudb_read_only_tf: unreliable, uses timestamp differences
 rpl_tokudb_read_only_tt: unreliable, uses timestamp differences
-rpl_row_sp002_tokudb : tokudb does not support foreign keys
--- a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_delete_pk.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_delete_pk.result
@@ -1,6 +1,13 @@
 include/master-slave.inc
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	OFF
+tokudb_rpl_lookup_rows_delay	10000
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	10000
 create table t (a bigint not null, primary key(a)) engine=tokudb;
 insert into t values (1);
 insert into t values (2),(3);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_update_pk_uc0_lookup0.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_update_pk_uc0_lookup0.result
@@ -1,6 +1,13 @@
 include/master-slave.inc
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	OFF
+tokudb_rpl_lookup_rows_delay	10000
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	10000
 create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb;
 insert into t values (1,0);
 insert into t values (2,0),(3,0);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_write_pk.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_parallel_tokudb_write_pk.result
@@ -1,6 +1,10 @@
 include/master-slave.inc
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_unique_checks%';
+Variable_name	Value
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	5000
 create table t (a bigint not null, primary key(a)) engine=tokudb;
 select unix_timestamp() into @tstart;
 insert into t values (1);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	OFF
+tokudb_rpl_lookup_rows_delay	10000
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	10000
 create table t (a bigint not null, primary key(a)) engine=tokudb;
 insert into t values (1);
 insert into t values (2),(3);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk_lookup1.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk_lookup1.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	ON
+tokudb_rpl_lookup_rows_delay	10000
+tokudb_rpl_unique_checks	ON
+tokudb_rpl_unique_checks_delay	0
 create table t (a bigint not null, primary key(a)) engine=tokudb;
 insert into t values (1);
 insert into t values (2),(3);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_mixed_dml.result
@@ -1,3 +1,4 @@
+SET SESSION tokudb_pk_insert_mode = 2;
 include/master-slave.inc
 [connection master]
 ==========MASTER==========
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ff.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ff.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl%';
+Variable_name	Value
+tokudb_rpl_check_readonly	OFF
+tokudb_rpl_lookup_rows	ON
+tokudb_rpl_lookup_rows_delay	0
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	5000
 create table t (a bigint not null, primary key(a)) engine=tokudb;
 select unix_timestamp() into @tstart;
 insert into t values (1);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_ft.result
@@ -1,6 +1,13 @@
 include/master-slave.inc
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	ON
+tokudb_rpl_lookup_rows_delay	0
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	5000
 create table t (a bigint not null, primary key(a)) engine=tokudb;
 select unix_timestamp() into @tstart;
 insert into t values (1);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_tf.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_tf.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl%';
+Variable_name	Value
+tokudb_rpl_check_readonly	OFF
+tokudb_rpl_lookup_rows	ON
+tokudb_rpl_lookup_rows_delay	0
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	5000
 create table t (a bigint not null, primary key(a)) engine=tokudb;
 select unix_timestamp() into @tstart;
 insert into t values (1);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_tt.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_read_only_tt.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	ON
+tokudb_rpl_lookup_rows_delay	0
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	5000
 create table t (a bigint not null, primary key(a)) engine=tokudb;
 select unix_timestamp() into @tstart;
 insert into t values (1);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	OFF
+tokudb_rpl_lookup_rows_delay	10000
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	10000
 create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb;
 insert into t values (1,0);
 insert into t values (2,0),(3,0);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	ON
+tokudb_rpl_lookup_rows_delay	10000
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	10000
 create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb;
 insert into t values (1,0);
 insert into t values (2,0),(3,0);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	OFF
+tokudb_rpl_lookup_rows_delay	10000
+tokudb_rpl_unique_checks	ON
+tokudb_rpl_unique_checks_delay	10000
 create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb;
 insert into t values (1,0);
 insert into t values (2,0),(3,0);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	ON
+tokudb_rpl_lookup_rows_delay	10000
+tokudb_rpl_unique_checks	ON
+tokudb_rpl_unique_checks_delay	10000
 create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb;
 insert into t values (1,0);
 insert into t values (2,0),(3,0);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	OFF
+tokudb_rpl_lookup_rows_delay	10000
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	10000
 create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=tokudb;
 insert into t values (1,0,-1);
 insert into t values (2,0,-2),(3,0,-3);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result
@@ -4,6 +4,13 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_%';
+Variable_name	Value
+tokudb_rpl_check_readonly	ON
+tokudb_rpl_lookup_rows	ON
+tokudb_rpl_lookup_rows_delay	10000
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	10000
 create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=tokudb;
 insert into t values (1,0,-1);
 insert into t values (2,0,-2),(3,0,-3);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk.result
@@ -4,6 +4,10 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_unique_checks%';
+Variable_name	Value
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	5000
 create table t (a bigint not null, primary key(a)) engine=tokudb;
 select unix_timestamp() into @tstart;
 insert into t values (1);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk_uc1.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk_uc1.result
@@ -4,6 +4,10 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_unique_checks%';
+Variable_name	Value
+tokudb_rpl_unique_checks	ON
+tokudb_rpl_unique_checks_delay	10000
 create table t (a bigint not null, primary key(a)) engine=tokudb;
 select unix_timestamp() into @tstart;
 insert into t values (1);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique.result
@@ -4,6 +4,10 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_unique_checks%';
+Variable_name	Value
+tokudb_rpl_unique_checks	OFF
+tokudb_rpl_unique_checks_delay	5000
 create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=tokudb;
 select unix_timestamp() into @tstart;
 insert into t values (1,2);
--- a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique_uc1.result
+++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique_uc1.result
@@ -4,6 +4,10 @@ Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
 [connection master]
 drop table if exists t;
+show variables like 'tokudb_rpl_unique_checks%';
+Variable_name	Value
+tokudb_rpl_unique_checks	ON
+tokudb_rpl_unique_checks_delay	5000
 create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=tokudb;
 select unix_timestamp() into @tstart;
 insert into t values (1,2);
--- a/storage/tokudb/mysql-test/rpl/t/disabled.def
+++ b/storage/tokudb/mysql-test/rpl/t/disabled.def
@@ -0,0 +1 @@
+rpl_row_sp002_tokudb : tokudb does not support foreign keys
--- a/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb.test
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb.test
@@ -1,3 +1,4 @@
+--source include/have_tokudb.inc
 #Want to skip this test from daily Valgrind execution
 --source include/no_valgrind_without_big.inc
 #
--- a/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_delete_pk-slave.opt
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_delete_pk-slave.opt
@@ -1,6 +1,5 @@
 --read-only=ON
--tokudb-rpl-unique-checks-delay=10000
--tokudb-rpl-unique-checks=OFF
--tokudb-rpl-lookup-rows-delay=10000
--tokudb-rpl-lookup-rows=OFF
-
+--loose-tokudb-rpl-unique-checks-delay=10000
+--loose-tokudb-rpl-unique-checks=OFF
+--loose-tokudb-rpl-lookup-rows-delay=10000
+--loose-tokudb-rpl-lookup-rows=OFF
--- a/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_delete_pk.test
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_delete_pk.test
@@ -14,7 +14,7 @@ enable_warnings;

 connection slave;
 # show variables like 'read_only';
-# show variables like 'tokudb_rpl_%';
+show variables like 'tokudb_rpl_%';

 # insert some rows
 connection master;
--- a/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_update_pk_uc0_lookup0-slave.opt
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_update_pk_uc0_lookup0-slave.opt
@@ -1,5 +1,5 @@
 --read-only=ON
--tokudb-rpl-unique-checks-delay=10000
--tokudb-rpl-unique-checks=OFF
--tokudb-rpl-lookup-rows-delay=10000
--tokudb-rpl-lookup-rows=OFF
+--loose-tokudb-rpl-unique-checks-delay=10000
+--loose-tokudb-rpl-unique-checks=OFF
+--loose-tokudb-rpl-lookup-rows-delay=10000
+--loose-tokudb-rpl-lookup-rows=OFF
--- a/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_update_pk_uc0_lookup0.test
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_update_pk_uc0_lookup0.test
@@ -13,7 +13,7 @@ enable_warnings;

 connection slave;
 # show variables like 'read_only';
-# show variables like 'tokudb_rpl_%';
+show variables like 'tokudb_rpl_%';

 # insert some rows
 connection master;
--- a/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_write_pk-slave.opt
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_write_pk-slave.opt
@@ -1,3 +1,3 @@
 --read-only=ON
--tokudb-rpl-unique-checks-delay=5000
--tokudb-rpl-unique-checks=OFF
+--loose-tokudb-rpl-unique-checks-delay=5000
+--loose-tokudb-rpl-unique-checks=OFF
--- a/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_write_pk.test
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_parallel_tokudb_write_pk.test
@@ -14,7 +14,7 @@ enable_warnings;

 connection slave;
 # show variables like 'read_only';
-# show variables like 'tokudb_rpl_unique_checks%';
+show variables like 'tokudb_rpl_unique_checks%';

 # insert some rows
 connection master;
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk-slave.opt
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk-slave.opt
@@ -1 +1 @@
--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF
+--read-only=ON --loose-tokudb-rpl-unique-checks-delay=10000 --loose-tokudb-rpl-unique-checks=OFF --loose-tokudb-rpl-lookup-rows-delay=10000 --loose-tokudb-rpl-lookup-rows=OFF
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk.test
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk.test
@@ -14,7 +14,7 @@ enable_warnings;

 connection slave;
 # show variables like 'read_only';
-# show variables like 'tokudb_rpl_%';
+show variables like 'tokudb_rpl_%';

 # insert some rows
 connection master;
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt
@@ -1 +1 @@
--read-only=ON --tokudb-rpl-unique-checks-delay=0 --tokudb-rpl-unique-checks=ON --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON
+--read-only=ON --loose-tokudb-rpl-unique-checks-delay=0 --loose-tokudb-rpl-unique-checks=ON --loose-tokudb-rpl-lookup-rows-delay=10000 --loose-tokudb-rpl-lookup-rows=ON
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1.test
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1.test
@@ -15,7 +15,7 @@ enable_warnings;

 connection slave;
 # show variables like 'read_only';
-# show variables like 'tokudb_rpl_%';
+show variables like 'tokudb_rpl_%';

 # insert some rows
 connection master;
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_mixed_dml-master.opt
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_mixed_dml-master.opt
@@ -1,2 +0,0 @@
--tokudb_pk_insert_mode=2
-
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_mixed_dml.test
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_mixed_dml.test
@@ -6,4 +6,7 @@
 --source include/have_binlog_format_mixed.inc
 --source include/have_tokudb.inc
 let $engine_type=TokuDB;
+
+SET SESSION tokudb_pk_insert_mode = 2;
+
 --source suite/rpl/include/rpl_mixed_dml.inc
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_read_only_ff-slave.opt
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_read_only_ff-slave.opt
@@ -1 +1 @@
--read-only=OFF --tokudb-rpl-check-readonly=OFF --tokudb-rpl-unique-checks-delay=5000 --tokudb-rpl-unique-checks=OFF
+--read-only=OFF --loose-tokudb-rpl-check-readonly=OFF --loose-tokudb-rpl-unique-checks-delay=5000 --loose-tokudb-rpl-unique-checks=OFF
--- a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_read_only_ff.test
+++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_read_only_ff.test
@@ -14,7 +14,7 @@ enable_warnings;

 connection slave;
 # show variables like 'read_only';
-# show variables like 'tokudb_rpl_unique_checks%';
+show variables like 'tokudb_rpl%';

 # insert some rows
 connection master;
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`rpl_row_sp002_tokudb : tokudb does not support foreign keys`