From f62fa411a1f6d0af9ac1c90b6b4c5bdebce272ee Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 9 Jun 2014 14:58:28 -0400
Subject: [PATCH 001/190] refs #258 Add a test that verifys we can overwrite or
 delete a rows using different length (but equal) keys, using various
 overwrite methods and interleaved aborts.

---
 src/tests/test_keylen_diff.cc | 284 ++++++++++++++++++++++++++++++++++
 1 file changed, 284 insertions(+)
 create mode 100644 src/tests/test_keylen_diff.cc

diff --git a/src/tests/test_keylen_diff.cc b/src/tests/test_keylen_diff.cc
new file mode 100644
index 00000000000..55eb620b958
--- /dev/null
+++ b/src/tests/test_keylen_diff.cc
@@ -0,0 +1,284 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2014 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#include "test.h"
+
+// test a comparison function that treats certain different-lengthed keys as equal
+
+struct packed_key {
+    char type;
+    char k[8];
+    static packed_key as_int(int v) {
+        packed_key k;
+        k.type = 0;
+        memcpy(k.k, &v, sizeof(int));
+        return k;
+    }
+    static packed_key as_double(double v) {
+        packed_key k;
+        k.type = 1;
+        memcpy(k.k, &v, sizeof(double));
+        return k;
+    }
+    size_t size() const {
+        assert(type == 0 || type == 1);
+        return type == 0 ? 5 : 9;
+    }
+};
+
+// the point is that keys can be packed as integers or doubles, but
+// we'll treat them both as doubles for the sake of comparison.
+// this means a 4 byte number could equal an 8 byte number.
+static int packed_key_cmp(DB *UU(db), const DBT *a, const DBT *b) {
+    assert(a->size == 5 || a->size == 9);
+    assert(b->size == 5 || b->size == 9);
+    char *k1 = reinterpret_cast<char *>(a->data);
+    char *k2 = reinterpret_cast<char *>(b->data);
+    assert(*k1 == 0 || *k1 == 1);
+    assert(*k2 == 0 || *k2 == 1);
+    double v1 = *k1 == 0 ? static_cast<double>(*reinterpret_cast<int *>(k1 + 1)) :
+                           *reinterpret_cast<double *>(k1 + 1);
+    double v2 = *k2 == 0 ? static_cast<double>(*reinterpret_cast<int *>(k2 + 1)) :
+                           *reinterpret_cast<double *>(k2 + 1);
+    if (v1 > v2) {
+        return 1;
+    } else if (v1 < v2) {
+        return -1;
+    } else {
+        return 0;
+    }
+}
+
+static int update_callback(DB *UU(db), const DBT *UU(key), const DBT *old_val, const DBT *extra,
+                           void (*set_val)(const DBT *new_val, void *setval_extra), void *setval_extra) {
+    assert(extra != nullptr);
+    assert(old_val != nullptr);
+    assert(extra->size == 0);
+    assert(old_val->size == 0);
+    if (extra->data == nullptr) {
+        set_val(nullptr, setval_extra);
+    } else {
+        DBT new_val;
+        char empty_v;
+        dbt_init(&new_val, &empty_v, 0);
+        set_val(&new_val, setval_extra);
+    }
+    return 0;
+}
+
+enum overwrite_method { 
+    VIA_UPDATE_OVERWRITE_BROADCAST,
+    VIA_UPDATE_DELETE_BROADCAST,
+    VIA_UPDATE_OVERWRITE,
+    VIA_UPDATE_DELETE,
+    VIA_DELETE,
+    VIA_INSERT,
+    NUM_OVERWRITE_METHODS
+};
+
+static void test_keylen_diff(enum overwrite_method method, bool control_test) {
+    int r;
+
+    DB_ENV *env;
+    r = db_env_create(&env, 0); CKERR(r);
+    r = env->set_default_bt_compare(env, packed_key_cmp); CKERR(r);
+    env->set_update(env, update_callback); CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r);
+
+    DB *db;
+    r = db_create(&db, env, 0); CKERR(r);
+    r = db->set_pagesize(db, 16 * 1024); // smaller pages so we get a more lush tree
+    r = db->set_readpagesize(db, 1 * 1024); // smaller basements so we get more per leaf
+    r = db->open(db, nullptr, "db", nullptr, DB_BTREE, DB_CREATE, 0666); CKERR(r);
+
+    DBT null_dbt, empty_dbt;
+    char empty_v;
+    dbt_init(&empty_dbt, &empty_v, 0);
+    dbt_init(&null_dbt, nullptr, 0);
+
+    const int num_keys = 256 * 1000;
+
+    for (int i = 0; i < num_keys; i++) {
+        // insert it using a 4 byte key ..
+        packed_key key = packed_key::as_int(i);
+
+        DBT dbt;
+        dbt_init(&dbt, &key, key.size());
+        r = db->put(db, nullptr, &dbt, &empty_dbt, 0); CKERR(r);
+    }
+
+    // overwrite keys randomly, so we induce flushes and get better / realistic coverage
+    int *XMALLOC_N(num_keys, shuffled_keys);
+    for (int i = 0; i < num_keys; i++) {
+        shuffled_keys[i] = i;
+    }
+    for (int i = num_keys - 1; i >= 1; i--) {
+        long rnd = random64() % (i + 1);
+        int tmp = shuffled_keys[rnd];
+        shuffled_keys[rnd] = shuffled_keys[i];
+        shuffled_keys[i] = tmp;
+    }
+
+    for (int i = 0; i < num_keys; i++) {
+        // for the control test, delete it using the same length key
+        //
+        // .. otherwise, delete it with an 8 byte key
+        packed_key key = control_test ? packed_key::as_int(shuffled_keys[i]) :
+                                        packed_key::as_double(shuffled_keys[i]);
+
+        DBT dbt;
+        dbt_init(&dbt, &key, key.size());
+        DB_TXN *txn;
+        env->txn_begin(env, nullptr, &txn, DB_TXN_NOSYNC); CKERR(r);
+        switch (method) {
+            case VIA_INSERT: {
+                r = db->put(db, txn, &dbt, &empty_dbt, 0); CKERR(r);
+                break;
+            }
+            case VIA_DELETE: {
+                // we purposefully do not pass DB_DELETE_ANY because the hidden query acts as
+                // a sanity check for the control test and, overall, gives better code coverage
+                r = db->del(db, txn, &dbt, 0); CKERR(r);
+                break;
+            }
+            case VIA_UPDATE_OVERWRITE:
+            case VIA_UPDATE_DELETE: {
+                r = db->update(db, txn, &dbt, method == VIA_UPDATE_DELETE ? &null_dbt : &empty_dbt, 0); CKERR(r);
+                break;
+            }
+            case VIA_UPDATE_OVERWRITE_BROADCAST:
+            case VIA_UPDATE_DELETE_BROADCAST: {
+                r = db->update_broadcast(db, txn, method == VIA_UPDATE_DELETE_BROADCAST ? &null_dbt : &empty_dbt, 0); CKERR(r); 
+                if (i > 1 ) { // only need to test broadcast twice - one with abort, one without
+                    txn->abort(txn); // we opened a txn so we should abort it before exiting
+                    goto done;
+                }
+                break;
+            }
+            default: {
+                assert(false);
+            }
+        }
+        const bool abort = i % 2 == 0;
+        if (abort) {
+            txn->abort(txn);
+        } else {
+            txn->commit(txn, 0);
+        }
+    }
+
+done:
+    toku_free(shuffled_keys);
+
+    // optimize before close to ensure that all messages are applied and any potential bugs are exposed
+    r = db->optimize(db);
+    r = db->close(db, 0); CKERR(r);
+    r = env->close(env, 0); CKERR(r);
+}
+
+int
+test_main(int argc, char *const argv[]) {
+    parse_args(argc, argv);
+
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
+
+    for (int i = 0; i < NUM_OVERWRITE_METHODS; i++) {
+        enum overwrite_method method = static_cast<enum overwrite_method>(i);
+
+        // control test - must pass for the 'real' test below to be interesting
+        printf("testing method %d (control)\n", i);
+        test_keylen_diff(method, true);
+
+        // real test, actually mixes key lengths
+        printf("testing method %d (real)\n", i);
+        test_keylen_diff(method, false);
+    }
+
+    return 0;
+}

From 521a9ec1ca3012cb4bc4e926f52a3014e3e2b3eb Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 9 Jun 2014 15:37:26 -0400
Subject: [PATCH 002/190] refs #258 Pass the old key length down to le_pack, so
 that mempool free has the right value

---
 ft/bndata.cc                       |  5 +++--
 ft/bndata.h                        |  3 ++-
 ft/ft-internal.h                   |  1 +
 ft/ft-ops.cc                       | 15 +++++++++------
 ft/ftloader.cc                     |  2 +-
 ft/leafentry.h                     |  1 +
 ft/tests/make-tree.cc              |  2 +-
 ft/tests/mempool-115.cc            |  1 +
 ft/tests/orthopush-flush.cc        |  4 ++--
 ft/tests/test-leafentry-nested.cc  | 17 +++++++++--------
 ft/tests/verify-bad-msn.cc         |  2 +-
 ft/tests/verify-bad-pivots.cc      |  2 +-
 ft/tests/verify-dup-in-leaf.cc     |  2 +-
 ft/tests/verify-dup-pivots.cc      |  2 +-
 ft/tests/verify-misrouted-msgs.cc  |  2 +-
 ft/tests/verify-unsorted-leaf.cc   |  2 +-
 ft/tests/verify-unsorted-pivots.cc |  2 +-
 ft/ule-internal.h                  |  1 +
 ft/ule.cc                          | 13 ++++++++++---
 util/mempool.cc                    |  3 ++-
 20 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/ft/bndata.cc b/ft/bndata.cc
index eb543a03ab4..36e97b8c934 100644
--- a/ft/bndata.cc
+++ b/ft/bndata.cc
@@ -441,6 +441,7 @@ void bn_data::get_space_for_overwrite(
     uint32_t idx,
     const void* keyp UU(),
     uint32_t keylen UU(),
+    uint32_t old_keylen,
     uint32_t old_le_size,
     uint32_t new_size,
     LEAFENTRY* new_le_space,
@@ -455,8 +456,8 @@ void bn_data::get_space_for_overwrite(
     int r = m_buffer.fetch(idx, &klpair_len, &klp);
     invariant_zero(r);
     paranoid_invariant(klp!=nullptr);
-    // Key never changes.
-    paranoid_invariant(keylen_from_klpair_len(klpair_len) == keylen);
+    // Old key length should be consistent with what is stored in the DMT
+    invariant(keylen_from_klpair_len(klpair_len) == old_keylen);
 
     size_t new_le_offset = toku_mempool_get_offset_from_pointer_and_base(&this->m_buffer_mempool, new_le);
     paranoid_invariant(new_le_offset <= UINT32_MAX - new_size);  // Not using > 4GB
diff --git a/ft/bndata.h b/ft/bndata.h
index 79daf1e5bf0..75db59daea9 100644
--- a/ft/bndata.h
+++ b/ft/bndata.h
@@ -304,7 +304,8 @@ public:
     // Allocates space in the mempool to store a new leafentry.
     // This may require reorganizing the mempool and updating the dmt.
     __attribute__((__nonnull__))
-    void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_size, uint32_t new_size, LEAFENTRY* new_le_space, void **const maybe_free);
+    void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_keylen, uint32_t old_size,
+                                 uint32_t new_size, LEAFENTRY* new_le_space, void **const maybe_free);
 
     // Allocates space in the mempool to store a new leafentry
     // and inserts a new key into the dmt
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index f182a4f6aed..2681c900d73 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -1224,6 +1224,7 @@ toku_ft_bn_apply_msg_once(
     BASEMENTNODE bn,
     const FT_MSG msg,
     uint32_t idx,
+    uint32_t le_keylen,
     LEAFENTRY le,
     txn_gc_info *gc_info,
     uint64_t *workdonep,
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index f9701ec34b1..6a21bec69ec 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -1740,6 +1740,7 @@ toku_ft_bn_apply_msg_once (
     BASEMENTNODE bn,
     const FT_MSG msg,
     uint32_t idx,
+    uint32_t le_keylen,
     LEAFENTRY le,
     txn_gc_info *gc_info,
     uint64_t *workdone,
@@ -1767,6 +1768,7 @@ toku_ft_bn_apply_msg_once (
         le,
         &bn->data_buffer,
         idx,
+        le_keylen,
         gc_info, 
         &new_le, 
         &numbytes_delta
@@ -1816,6 +1818,7 @@ struct setval_extra_s {
     XIDS xids;
     const DBT *key;
     uint32_t idx;
+    uint32_t le_keylen;
     LEAFENTRY le;
     txn_gc_info *gc_info;
     uint64_t * workdone;  // set by toku_ft_bn_apply_msg_once()
@@ -1849,7 +1852,7 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
             msg.u.id.val = &val;
         }
         toku_ft_bn_apply_msg_once(svextra->bn, &msg,
-                                  svextra->idx, svextra->le,
+                                  svextra->idx, svextra->le_keylen, svextra->le,
                                   svextra->gc_info,
                                   svextra->workdone, svextra->stats_to_update);
         svextra->setval_r = 0;
@@ -1909,7 +1912,7 @@ static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn
     le_for_update = le;
 
     struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, msg->msn, msg->xids,
-                                          keyp, idx, le_for_update, gc_info,
+                                          keyp, idx, keylen, le_for_update, gc_info,
                                           workdone, stats_to_update};
     // call handlerton's ft->update_fun(), which passes setval_extra to setval_fun()
     FAKE_DB(db, desc);
@@ -1980,7 +1983,7 @@ toku_ft_bn_apply_msg (
         } else {
             assert_zero(r);
         }
-        toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update);
+        toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update);
 
         // if the insertion point is within a window of the right edge of
         // the leaf then it is sequential
@@ -2012,7 +2015,7 @@ toku_ft_bn_apply_msg (
             );
         if (r == DB_NOTFOUND) break;
         assert_zero(r);
-        toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update);
+        toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update);
 
         break;
     }
@@ -2034,7 +2037,7 @@ toku_ft_bn_apply_msg (
             msg->u.id.key = &curr_keydbt;
             int deleted = 0;
             if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
-                toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update);
+                toku_ft_bn_apply_msg_once(bn, msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
                 // at this point, we cannot trust msg->u.id.key to be valid.
                 uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
                 if (new_dmt_size != num_klpairs) {
@@ -2067,7 +2070,7 @@ toku_ft_bn_apply_msg (
             msg->u.id.key = &curr_keydbt;
             int deleted = 0;
             if (le_has_xids(storeddata, msg->xids)) {
-                toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update);
+                toku_ft_bn_apply_msg_once(bn, msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
                 uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
                 if (new_dmt_size != num_klpairs) {
                     paranoid_invariant(new_dmt_size + 1 == num_klpairs);
diff --git a/ft/ftloader.cc b/ft/ftloader.cc
index 67b3cf9905e..6b5e9ae986b 100644
--- a/ft/ftloader.cc
+++ b/ft/ftloader.cc
@@ -2948,7 +2948,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
     uint64_t workdone=0;
     // there's no mvcc garbage in a bulk-loaded FT, so there's no need to pass useful gc info
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, NULL, &gc_info, &workdone, stats_to_update);
+    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, keylen, NULL, &gc_info, &workdone, stats_to_update);
 }
 
 static int write_literal(struct dbout *out, void*data,  size_t len) {
diff --git a/ft/leafentry.h b/ft/leafentry.h
index 5c525db5c19..735e43207e6 100644
--- a/ft/leafentry.h
+++ b/ft/leafentry.h
@@ -246,6 +246,7 @@ toku_le_apply_msg(FT_MSG   msg,
                   LEAFENTRY old_leafentry, // NULL if there was no stored data.
                   bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
                   uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
+                  uint32_t old_keylen,
                   txn_gc_info *gc_info,
                   LEAFENTRY *new_leafentry_p,
                   int64_t * numbytes_delta_p);
diff --git a/ft/tests/make-tree.cc b/ft/tests/make-tree.cc
index a8a04b7a6a4..d4b91008d37 100644
--- a/ft/tests/make-tree.cc
+++ b/ft/tests/make-tree.cc
@@ -126,7 +126,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
     // apply an insert to the leaf node
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
     FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u = {.id = { &thekey, &theval }} };
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     leafnode->max_msn_applied_to_node_on_disk = msn;
 
diff --git a/ft/tests/mempool-115.cc b/ft/tests/mempool-115.cc
index 3b9280364e8..0f44cf39da1 100644
--- a/ft/tests/mempool-115.cc
+++ b/ft/tests/mempool-115.cc
@@ -123,6 +123,7 @@ le_overwrite(bn_data* bn, uint32_t idx, const  char *key, int keysize, const cha
         idx, 
         key,
         keysize,
+        keysize, // old_keylen
         size_needed, // old_le_size
         size_needed,
         &r,
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index 7c198a3cb46..e467c70b6ed 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -218,7 +218,7 @@ insert_random_message_to_bn(
     *keylenp = keydbt->size;
     *keyp = toku_xmemdup(keydbt->data, keydbt->size);
     int64_t numbytes;
-    toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
+    toku_le_apply_msg(&msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
     toku_ft_bn_apply_msg(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, &non_mvcc_gc_info, NULL, NULL);
     if (msn.msn > blb->max_msn_applied.msn) {
         blb->max_msn_applied = msn;
@@ -268,7 +268,7 @@ insert_same_message_to_bns(
     *keylenp = keydbt->size;
     *keyp = toku_xmemdup(keydbt->data, keydbt->size);
     int64_t numbytes;
-    toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
+    toku_le_apply_msg(&msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
     toku_ft_bn_apply_msg(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, &non_mvcc_gc_info, NULL, NULL);
     if (msn.msn > blb1->max_msn_applied.msn) {
         blb1->max_msn_applied = msn;
diff --git a/ft/tests/test-leafentry-nested.cc b/ft/tests/test-leafentry-nested.cc
index 0335c284cb3..2126dde2011 100644
--- a/ft/tests/test-leafentry-nested.cc
+++ b/ft/tests/test-leafentry-nested.cc
@@ -213,7 +213,7 @@ test_le_offsets (void) {
 static void
 test_ule_packs_to_nothing (ULE ule) {
     LEAFENTRY le;
-    int r = le_pack(ule, NULL, 0, NULL, 0, 0, &le, nullptr);
+    int r = le_pack(ule, NULL, 0, NULL, 0, 0, 0, &le, nullptr);
     assert(r==0);
     assert(le==NULL);
 }
@@ -319,7 +319,7 @@ test_le_pack_committed (void) {
 
         size_t memsize;
         LEAFENTRY le;
-        int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, &le, nullptr);
+        int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr);
         assert(r==0);
         assert(le!=NULL);
         memsize = le_memsize_from_ule(&ule);
@@ -329,7 +329,7 @@ test_le_pack_committed (void) {
         verify_ule_equal(&ule, &tmp_ule);
         LEAFENTRY tmp_le;
         size_t    tmp_memsize;
-        r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, &tmp_le, nullptr);
+        r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, 0, &tmp_le, nullptr);
         tmp_memsize = le_memsize_from_ule(&tmp_ule);
         assert(r==0);
         assert(tmp_memsize == memsize);
@@ -377,7 +377,7 @@ test_le_pack_uncommitted (uint8_t committed_type, uint8_t prov_type, int num_pla
 
         size_t memsize;
         LEAFENTRY le;
-        int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, &le, nullptr);
+        int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr);
         assert(r==0);
         assert(le!=NULL);
         memsize = le_memsize_from_ule(&ule);
@@ -387,7 +387,7 @@ test_le_pack_uncommitted (uint8_t committed_type, uint8_t prov_type, int num_pla
         verify_ule_equal(&ule, &tmp_ule);
         LEAFENTRY tmp_le;
         size_t    tmp_memsize;
-        r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, &tmp_le, nullptr);
+        r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, 0, &tmp_le, nullptr);
         tmp_memsize = le_memsize_from_ule(&tmp_ule);
         assert(r==0);
         assert(tmp_memsize == memsize);
@@ -448,7 +448,7 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
     LEAFENTRY le_expected;
     LEAFENTRY le_result;
 
-    r = le_pack(ule_initial, nullptr, 0, nullptr, 0, 0, &le_initial, nullptr);
+    r = le_pack(ule_initial, nullptr, 0, nullptr, 0, 0, 0, &le_initial, nullptr);
     CKERR(r);
 
     size_t result_memsize = 0;
@@ -458,6 +458,7 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
                       le_initial,
                       nullptr,
                       0,
+                      0,
                       &gc_info,
                       &le_result,
                       &ignoreme);
@@ -467,7 +468,7 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
     }
 
     size_t expected_memsize = 0;
-    r = le_pack(ule_expected, nullptr, 0, nullptr, 0, 0, &le_expected, nullptr);
+    r = le_pack(ule_expected, nullptr, 0, nullptr, 0, 0, 0, &le_expected, nullptr);
     CKERR(r);
     if (le_expected) {
         expected_memsize = leafentry_memsize(le_expected);
@@ -749,7 +750,7 @@ test_le_apply_messages(void) {
 
 static bool ule_worth_running_garbage_collection(ULE ule, TXNID oldest_referenced_xid_known) {
     LEAFENTRY le;
-    int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, &le, nullptr); CKERR(r);
+    int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr); CKERR(r);
     invariant_notnull(le);
     txn_gc_info gc_info(nullptr, oldest_referenced_xid_known, oldest_referenced_xid_known, true);
     bool worth_running = toku_le_worth_running_garbage_collection(le, &gc_info);
diff --git a/ft/tests/verify-bad-msn.cc b/ft/tests/verify-bad-msn.cc
index 5eef196f611..fd8200ca860 100644
--- a/ft/tests/verify-bad-msn.cc
+++ b/ft/tests/verify-bad-msn.cc
@@ -129,7 +129,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
     // apply an insert to the leaf node
     FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // Create bad tree (don't do following):
     // leafnode->max_msn_applied_to_node = msn;
diff --git a/ft/tests/verify-bad-pivots.cc b/ft/tests/verify-bad-pivots.cc
index 0d477ea22c4..ea8b4ed1714 100644
--- a/ft/tests/verify-bad-pivots.cc
+++ b/ft/tests/verify-bad-pivots.cc
@@ -117,7 +117,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
     MSN msn = next_dummymsn();
     FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tests/verify-dup-in-leaf.cc b/ft/tests/verify-dup-in-leaf.cc
index ee0256b4882..78d36a8bc4e 100644
--- a/ft/tests/verify-dup-in-leaf.cc
+++ b/ft/tests/verify-dup-in-leaf.cc
@@ -118,7 +118,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
     MSN msn = next_dummymsn();
     FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tests/verify-dup-pivots.cc b/ft/tests/verify-dup-pivots.cc
index f2b74dc3891..5d2c129fc01 100644
--- a/ft/tests/verify-dup-pivots.cc
+++ b/ft/tests/verify-dup-pivots.cc
@@ -117,7 +117,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
     MSN msn = next_dummymsn();
     FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tests/verify-misrouted-msgs.cc b/ft/tests/verify-misrouted-msgs.cc
index 7efdd374f01..e90f0e53f6d 100644
--- a/ft/tests/verify-misrouted-msgs.cc
+++ b/ft/tests/verify-misrouted-msgs.cc
@@ -118,7 +118,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
     MSN msn = next_dummymsn();
     FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tests/verify-unsorted-leaf.cc b/ft/tests/verify-unsorted-leaf.cc
index 92d68dd56ee..83ef9ed52ec 100644
--- a/ft/tests/verify-unsorted-leaf.cc
+++ b/ft/tests/verify-unsorted-leaf.cc
@@ -120,7 +120,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
     MSN msn = next_dummymsn();
     FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tests/verify-unsorted-pivots.cc b/ft/tests/verify-unsorted-pivots.cc
index e1b9d9aba22..3b5277a8a2c 100644
--- a/ft/tests/verify-unsorted-pivots.cc
+++ b/ft/tests/verify-unsorted-pivots.cc
@@ -117,7 +117,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
     MSN msn = next_dummymsn();
     FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/ule-internal.h b/ft/ule-internal.h
index 00b9847a13f..d2dd212850b 100644
--- a/ft/ule-internal.h
+++ b/ft/ule-internal.h
@@ -148,6 +148,7 @@ le_pack(ULE ule, // data to be packed into new leafentry
         uint32_t idx,
         void* keyp,
         uint32_t keylen,
+        uint32_t old_keylen,
         uint32_t old_le_size,
         LEAFENTRY * const new_leafentry_p, // this is what this function creates
         void **const maybe_free
diff --git a/ft/ule.cc b/ft/ule.cc
index c364fc4603e..b5be3e075f9 100644
--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -256,6 +256,7 @@ static void get_space_for_le(
     uint32_t idx,
     void* keyp,
     uint32_t keylen,
+    uint32_t old_keylen,
     uint32_t old_le_size,
     size_t size,
     LEAFENTRY* new_le_space,
@@ -268,7 +269,7 @@ static void get_space_for_le(
     else {
         // this means we are overwriting something
         if (old_le_size > 0) {
-            data_buffer->get_space_for_overwrite(idx, keyp, keylen, old_le_size, size, new_le_space, maybe_free);
+            data_buffer->get_space_for_overwrite(idx, keyp, keylen, old_keylen, old_le_size, size, new_le_space, maybe_free);
         }
         // this means we are inserting something new
         else {
@@ -496,6 +497,7 @@ toku_le_apply_msg(FT_MSG   msg,
                   LEAFENTRY old_leafentry, // NULL if there was no stored data.
                   bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
                   uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
+                  uint32_t old_keylen, // length of the any key in data_buffer
                   txn_gc_info *gc_info,
                   LEAFENTRY *new_leafentry_p,
                   int64_t * numbytes_delta_p) {  // change in total size of key and val, not including any overhead
@@ -552,6 +554,7 @@ toku_le_apply_msg(FT_MSG   msg,
         idx,
         ft_msg_get_key(msg), // contract of this function is caller has this set, always
         keylen, // contract of this function is caller has this set, always
+        old_keylen,
         oldmemsize,
         new_leafentry_p,
         &maybe_free
@@ -655,6 +658,7 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
         idx,
         keyp,
         keylen,
+        keylen, // old_keylen, same because the key isn't going to change for gc
         old_mem_size,
         new_leaf_entry,
         &maybe_free
@@ -974,6 +978,7 @@ le_pack(ULE ule, // data to be packed into new leafentry
         uint32_t idx,
         void* keyp,
         uint32_t keylen,
+        uint32_t old_keylen,
         uint32_t old_le_size,
         LEAFENTRY * const new_leafentry_p, // this is what this function creates
         void **const maybe_free
@@ -996,7 +1001,8 @@ le_pack(ULE ule, // data to be packed into new leafentry
             }
         }
         if (data_buffer && old_le_size > 0) {
-            data_buffer->delete_leafentry(idx, keylen, old_le_size);
+            // must pass old_keylen and old_le_size, since that's what is actually stored in data_buffer
+            data_buffer->delete_leafentry(idx, old_keylen, old_le_size);
         }
         *new_leafentry_p = NULL;
         rval = 0;
@@ -1005,7 +1011,7 @@ le_pack(ULE ule, // data to be packed into new leafentry
 found_insert:
     memsize = le_memsize_from_ule(ule);
     LEAFENTRY new_leafentry;
-    get_space_for_le(data_buffer, idx, keyp, keylen, old_le_size, memsize, &new_leafentry, maybe_free);
+    get_space_for_le(data_buffer, idx, keyp, keylen, old_keylen, old_le_size, memsize, &new_leafentry, maybe_free);
 
     //p always points to first unused byte after leafentry we are packing
     uint8_t *p;
@@ -2467,6 +2473,7 @@ toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry,
                    nullptr, //only matters if we are passing in a bn_data
                    0, //only matters if we are passing in a bn_data
                    0, //only matters if we are passing in a bn_data
+                   0, //only matters if we are passing in a bn_data
                    new_leafentry_p,
                    nullptr //only matters if we are passing in a bn_data
                    );
diff --git a/util/mempool.cc b/util/mempool.cc
index 6f3e2c013db..9eea03338ad 100644
--- a/util/mempool.cc
+++ b/util/mempool.cc
@@ -232,7 +232,8 @@ void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment) {
 void toku_mempool_mfree(struct mempool *mp, void *vp, size_t size) {
     if (vp) { paranoid_invariant(toku_mempool_inrange(mp, vp, size)); }
     mp->frag_size += size;
-    paranoid_invariant(mp->frag_size <= mp->size);
+    invariant(mp->frag_size <= mp->free_offset);
+    invariant(mp->frag_size <= mp->size);
 }
 
 

From cdf2227466df70b0c5594fde47d8533050c43f47 Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Sat, 14 Jun 2014 10:28:19 -0400
Subject: [PATCH 003/190] Revert "fixed typo #226"

This reverts commit 467a5a99d21f032aeec9e083a4a8f7ec45872702.
---
 ft/ft_node-serialize.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 91ea0890c30..202e341d1a0 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -1168,21 +1168,21 @@ deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf) {
     // read in each message tree (fresh, stale, broadcast)
     nfresh = rbuf_int(rbuf);
     bytevec fresh_offsets_src_v;
-    rbuf_literal_bytes(rbuf, &fresh_offsets_src_v, nfresh * (sizeof *fresh_offsets));
+    rbuf_literal_bytes(rbuf, &fresh_offsets_v, nfresh * (sizeof *fresh_offsets));
     const int32_t *fresh_offsets_src = (const int32_t *) fresh_offsets_src_v;
     for (int i = 0; i < nfresh; i++) {
         fresh_offsets[i] = toku_dtoh32(fresh_offsets_src[i]);
     }
     nstale = rbuf_int(rbuf);
     bytevec stale_offsets_src_v;
-    rbuf_literal_bytes(rbuf, &stale_offsets_src_v, nstale * (sizeof *stale_offsets));
+    rbuf_literal_bytes(rbuf, &stale_offsets_v, nstale * (sizeof *stale_offsets));
     const int32_t *stale_offsets_src = (const int32_t *) stale_offsets_src_v;
     for (int i = 0; i < nstale; i++) {
         stale_offsets[i] = toku_dtoh32(stale_offsets_src[i]);
     }
     nbroadcast_offsets = rbuf_int(rbuf);
     bytevec broadcast_offsets_src_v;
-    rbuf_literal_bytes(rbuf, &broadcast_offsets_src_v, nbroadcast_offsets * (sizeof *broadcast_offsets));
+    rbuf_literal_bytes(rbuf, &broadcast_offsets_v, nbroadcast_offsets * (sizeof *broadcast_offsets));
     const int32_t *broadcast_offsets_src = (const int32_t *) broadcast_offsets_src_v;
     for (int i = 0; i < nbroadcast_offsets; i++) {
         broadcast_offsets[i] = toku_dtoh32(broadcast_offsets_src[i]);

From 6f97e1b8e07d101c71a7bc1e63237a69a3f1261f Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Sat, 14 Jun 2014 10:28:20 -0400
Subject: [PATCH 004/190] Revert "vectorized loops in new deserialization code
 #226"

This reverts commit d051cb716cdedf3f3409e43f589a543b550b9b8c.
---
 ft/ft_node-serialize.cc | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 202e341d1a0..51c93b6cc68 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -1135,7 +1135,7 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf,
     }
 }
 
-// effect: deserialize a single message from rbuf and enqueue the result into the given fifo
+// effect: deserialize a single message from rbuf and enque the result into the given fifo
 static void
 fifo_deserialize_msg_from_rbuf(FIFO fifo, struct rbuf *rbuf) {
     bytevec key, val;
@@ -1167,25 +1167,16 @@ deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf) {
 
     // read in each message tree (fresh, stale, broadcast)
     nfresh = rbuf_int(rbuf);
-    bytevec fresh_offsets_src_v;
-    rbuf_literal_bytes(rbuf, &fresh_offsets_v, nfresh * (sizeof *fresh_offsets));
-    const int32_t *fresh_offsets_src = (const int32_t *) fresh_offsets_src_v;
     for (int i = 0; i < nfresh; i++) {
-        fresh_offsets[i] = toku_dtoh32(fresh_offsets_src[i]);
+        fresh_offsets[i] = rbuf_int(rbuf);
     }
     nstale = rbuf_int(rbuf);
-    bytevec stale_offsets_src_v;
-    rbuf_literal_bytes(rbuf, &stale_offsets_v, nstale * (sizeof *stale_offsets));
-    const int32_t *stale_offsets_src = (const int32_t *) stale_offsets_src_v;
     for (int i = 0; i < nstale; i++) {
-        stale_offsets[i] = toku_dtoh32(stale_offsets_src[i]);
+        stale_offsets[i] = rbuf_int(rbuf);
     }
     nbroadcast_offsets = rbuf_int(rbuf);
-    bytevec broadcast_offsets_src_v;
-    rbuf_literal_bytes(rbuf, &broadcast_offsets_v, nbroadcast_offsets * (sizeof *broadcast_offsets));
-    const int32_t *broadcast_offsets_src = (const int32_t *) broadcast_offsets_src_v;
     for (int i = 0; i < nbroadcast_offsets; i++) {
-        broadcast_offsets[i] = toku_dtoh32(broadcast_offsets_src[i]);
+        broadcast_offsets[i] = rbuf_int(rbuf);
     }
 
     // build OMTs out of each offset array

From 39397aab93584e18f7068a9882f18c7f448f59f4 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:24:46 -0400
Subject: [PATCH 005/190] FT-242 Begin breaking up fttypes.h by moving many
 things to their appropriate headers

---
 ft/block_table.h            |   2 +-
 ft/cachetable.h             |  39 +++++++-
 ft/cursor.h                 | 112 ++++++++++++++++++++++
 ft/fifo.cc                  |   2 +-
 ft/fifo.h                   |  28 +-----
 ft/ft-cachetable-wrappers.h |   4 +-
 ft/ft-flusher.h             |  22 ++++-
 ft/ft-internal.h            |  90 +++++-------------
 ft/ft-ops.cc                |  31 +++++-
 ft/ft-ops.h                 |   6 +-
 ft/ft-search.h              |   1 +
 ft/ft_msg.h                 | 116 ++++++++++++++++++++---
 ft/ft_node-serialize.cc     |   2 +
 ft/ftloader.h               |   2 +
 ft/fttypes.h                | 184 +-----------------------------------
 ft/le-cursor.cc             |   7 +-
 ft/leafentry.h              |   7 +-
 ft/log-internal.h           |   5 +
 ft/logformat.cc             |   2 +-
 ft/logger.h                 |   5 +-
 ft/rollback.h               |  10 +-
 ft/sub_block.h              |   1 +
 ft/txn.cc                   |   2 +-
 ft/txn.h                    |   2 +-
 ft/txn_manager.h            |  17 ++++
 25 files changed, 391 insertions(+), 308 deletions(-)
 create mode 100644 ft/cursor.h

diff --git a/ft/block_table.h b/ft/block_table.h
index a9f17ad0e7e..72c914988fa 100644
--- a/ft/block_table.h
+++ b/ft/block_table.h
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "fttypes.h"
-
+#include "ft/ft-internal.h"
 
 typedef struct block_table *BLOCK_TABLE;
 
diff --git a/ft/cachetable.h b/ft/cachetable.h
index 9c11db02e00..c4290b6f6d7 100644
--- a/ft/cachetable.h
+++ b/ft/cachetable.h
@@ -111,6 +111,42 @@ PATENT RIGHTS GRANT:
 
 typedef BLOCKNUM CACHEKEY;
 
+class checkpointer;
+typedef class checkpointer *CHECKPOINTER;
+typedef struct cachetable *CACHETABLE;
+typedef struct cachefile *CACHEFILE;
+typedef struct ctpair *PAIR;
+
+// This struct hold information about values stored in the cachetable.
+// As one can tell from the names, we are probably violating an
+// abstraction layer by placing names.
+//
+// The purpose of having this struct is to have a way for the 
+// cachetable to accumulate the some totals we are interested in.
+// Breaking this abstraction layer by having these names was the 
+// easiest way.
+//
+typedef struct pair_attr_s {
+    long size; // size PAIR's value takes in memory
+    long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status
+    long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status
+    long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status
+    long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts
+    bool is_valid;
+} PAIR_ATTR;
+
+static inline PAIR_ATTR make_pair_attr(long size) { 
+    PAIR_ATTR result={
+        .size = size, 
+        .nonleaf_size = 0, 
+        .leaf_size = 0, 
+        .rollback_size = 0, 
+        .cache_pressure_size = 0,
+        .is_valid = true
+    }; 
+    return result; 
+}
+
 void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period);
 uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct);
 void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations);
@@ -394,8 +430,9 @@ struct unlockers {
     bool       locked;
     void (*f)(void* extra);
     void      *extra;
-    UNLOCKERS  next;
+    struct unlockers *next;
 };
+typedef struct unlockers *UNLOCKERS;
 
 // Effect:  If the block is in the cachetable, then return it.
 //   Otherwise call the functions in unlockers, fetch the data (but don't pin it, since we'll just end up pinning it again later), and return TOKUDB_TRY_AGAIN.
diff --git a/ft/cursor.h b/ft/cursor.h
new file mode 100644
index 00000000000..7636beb121c
--- /dev/null
+++ b/ft/cursor.h
@@ -0,0 +1,112 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2014 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#pragma once
+
+#include <db.h>
+
+typedef bool(*FT_CHECK_INTERRUPT_CALLBACK)(void* extra);
+
+/* an ft cursor is represented as a kv pair in a tree */
+struct ft_cursor {
+    FT_HANDLE ft_handle;
+    DBT key, val;             // The key-value pair that the cursor currently points to
+    DBT range_lock_left_key, range_lock_right_key;
+    bool prefetching;
+    bool left_is_neg_infty, right_is_pos_infty;
+    bool is_snapshot_read; // true if query is read_committed, false otherwise
+    bool is_leaf_mode;
+    bool disable_prefetching;
+    bool is_temporary;
+    int out_of_range_error;
+    int direction;
+    TOKUTXN ttxn;
+    FT_CHECK_INTERRUPT_CALLBACK interrupt_cb;
+    void *interrupt_cb_extra;
+};
+typedef struct ft_cursor *FT_CURSOR;
diff --git a/ft/fifo.cc b/ft/fifo.cc
index 07d7baec2a1..6acd29be67c 100644
--- a/ft/fifo.cc
+++ b/ft/fifo.cc
@@ -172,7 +172,7 @@ int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *d
         toku_fifo_resize(fifo, next_2);
     }
     struct fifo_entry *entry = (struct fifo_entry *)(fifo->memory + fifo->memory_used);
-    fifo_entry_set_msg_type(entry, type);
+    entry->type = (unsigned char) type;
     entry->msn = msn;
     xids_cpy(&entry->xids_s, xids);
     entry->is_fresh = is_fresh;
diff --git a/ft/fifo.h b/ft/fifo.h
index e9f53248b98..5333ca905a7 100644
--- a/ft/fifo.h
+++ b/ft/fifo.h
@@ -91,10 +91,10 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "fttypes.h"
-#include "xids-internal.h"
-#include "xids.h"
-
+#include "ft/fttypes.h"
+#include "ft/xids-internal.h"
+#include "ft/xids.h"
+#include "ft/ft_msg.h"
 
 // If the fifo_entry is unpacked, the compiler aligns the xids array and we waste a lot of space
 struct __attribute__((__packed__)) fifo_entry {
@@ -106,24 +106,6 @@ struct __attribute__((__packed__)) fifo_entry {
     XIDS_S        xids_s;
 };
 
-// get and set the ft message type for a fifo entry.
-// it is internally stored as a single unsigned char.
-static inline enum ft_msg_type 
-fifo_entry_get_msg_type(const struct fifo_entry * entry)
-{
-    enum ft_msg_type msg_type;
-    msg_type = (enum ft_msg_type) entry->type;
-    return msg_type;
-}
-
-static inline void
-fifo_entry_set_msg_type(struct fifo_entry * entry,
-        enum ft_msg_type msg_type)
-{
-    unsigned char type = (unsigned char) msg_type;
-    entry->type = type;
-}
-
 typedef struct fifo *FIFO;
 
 int toku_fifo_create(FIFO *);
@@ -150,7 +132,7 @@ void toku_fifo_iterate(FIFO, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,IT
       struct fifo_entry *e = toku_fifo_iterate_internal_get_entry(fifo, fifo_iterate_off);                \
       ITEMLEN keylenvar = e->keylen;                                                                      \
       ITEMLEN datalenvar = e->vallen;                                                                     \
-      enum ft_msg_type typevar = fifo_entry_get_msg_type(e);                                              \
+      enum ft_msg_type typevar = (enum ft_msg_type) e->type;                                              \
       MSN     msnvar  = e->msn;                                                                           \
       XIDS    xidsvar = &e->xids_s;                                                                       \
       bytevec keyvar  = xids_get_end_of_array(xidsvar);                                                   \
diff --git a/ft/ft-cachetable-wrappers.h b/ft/ft-cachetable-wrappers.h
index dc84d7f006b..d276dd8af5f 100644
--- a/ft/ft-cachetable-wrappers.h
+++ b/ft/ft-cachetable-wrappers.h
@@ -92,8 +92,8 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <fttypes.h>
-#include "cachetable.h"
+#include "ft/ft-internal.h"
+#include "ft/cachetable.h"
 
 /**
  * Put an empty node (that is, no fields filled) into the cachetable. 
diff --git a/ft/ft-flusher.h b/ft/ft-flusher.h
index 0788bf665d3..0861669157a 100644
--- a/ft/ft-flusher.h
+++ b/ft/ft-flusher.h
@@ -91,8 +91,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-// This must be first to make the 64-bit file mode work right in Linux
-#include "fttypes.h"
+#include "ft/ft-internal.h"
 
 typedef enum {
     FT_FLUSHER_CLEANER_TOTAL_NODES = 0,     // total number of nodes whose buffers are potentially flushed by cleaner thread
@@ -158,6 +157,23 @@ toku_ft_flush_node_on_background_thread(
     FTNODE parent
     );
 
+// Given pinned node and pinned child, split child into two
+// and update node with information about its new child.
+void toku_ft_split_child(
+    FT h,
+    FTNODE node,
+    int childnum,
+    FTNODE child,
+    enum split_mode split_mode
+    );
+// Given pinned node, merge childnum with a neighbor and update node with
+// information about the change
+void toku_ft_merge_child(
+    FT ft,
+    FTNODE node,
+    int childnum
+    );
+
 /**
  * Effect: Split a leaf node.
  * Argument "node" is node to be split.
@@ -166,6 +182,7 @@ toku_ft_flush_node_on_background_thread(
  *   nodea is the left node that results from the split
  *   splitk is the right-most key of nodea
  */
+// TODO: Rename toku_ft_leaf_split
 void
 ftleaf_split(
     FT h,
@@ -189,6 +206,7 @@ ftleaf_split(
  *    but it does not guarantee that the resulting nodes are smaller than nodesize.
  */
 void
+// TODO: Rename toku_ft_nonleaf_split
 ft_nonleaf_split(
     FT h,
     FTNODE node,
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 6534ab1431c..79090016f82 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -115,7 +115,9 @@ PATENT RIGHTS GRANT:
 #include "compress.h"
 #include <util/mempool.h>
 #include <util/omt.h>
-#include "bndata.h"
+#include "ft/bndata.h"
+#include "ft/rollback.h"
+#include "ft/ft-search.h"
 
 enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
 enum { FT_MSG_OVERHEAD = (2 + sizeof(MSN)) };   // the type plus freshness plus MSN
@@ -140,6 +142,18 @@ enum ftnode_fetch_type {
     ftnode_fetch_keymatch, // one child is needed if it holds both keys
 };
 
+enum split_mode {
+    SPLIT_EVENLY,
+    SPLIT_LEFT_HEAVY,
+    SPLIT_RIGHT_HEAVY
+};
+
+enum reactivity {
+    RE_STABLE,
+    RE_FUSIBLE,
+    RE_FISSIBLE
+};
+
 static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) UU();
 static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) {
     switch (type) {
@@ -191,6 +205,7 @@ struct ftnode_fetch_extra {
     tokutime_t decompress_time;
     tokutime_t deserialize_time;
 };
+typedef struct ftnode_fetch_extra *FTNODE_FETCH_EXTRA;
 
 struct toku_fifo_entry_key_msn_heaviside_extra {
     DESCRIPTOR desc;
@@ -593,6 +608,7 @@ struct ft {
     // - if our attempt fails because the key was not in range of the rightmost leaf, we reset the score back to 0
     uint32_t seqinsert_score;
 };
+typedef struct ft *FT;
 
 // Allocate a DB struct off the stack and only set its comparison
 // descriptor. We don't bother setting any other fields because
@@ -758,22 +774,6 @@ int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t f
 void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h);
 BASEMENTNODE toku_detach_bn(FTNODE node, int childnum);
 
-// Given pinned node and pinned child, split child into two
-// and update node with information about its new child.
-void toku_ft_split_child(
-    FT h,
-    FTNODE node,
-    int childnum,
-    FTNODE child,
-    enum split_mode split_mode
-    );
-// Given pinned node, merge childnum with a neighbor and update node with
-// information about the change
-void toku_ft_merge_child(
-    FT ft,
-    FTNODE node,
-    int childnum
-    );
 static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT h) {
     CACHETABLE_WRITE_CALLBACK wc;
     wc.flush_callback = toku_ftnode_flush_callback;
@@ -786,27 +786,6 @@ static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT h) {
     return wc;
 }
 
-static const FTNODE null_ftnode=0;
-
-/* an ft cursor is represented as a kv pair in a tree */
-struct ft_cursor {
-    struct toku_list cursors_link;
-    FT_HANDLE ft_handle;
-    DBT key, val;             // The key-value pair that the cursor currently points to
-    DBT range_lock_left_key, range_lock_right_key;
-    bool prefetching;
-    bool left_is_neg_infty, right_is_pos_infty;
-    bool is_snapshot_read; // true if query is read_committed, false otherwise
-    bool is_leaf_mode;
-    bool disable_prefetching;
-    bool is_temporary;
-    int out_of_range_error;
-    int direction;
-    TOKUTXN ttxn;
-    FT_CHECK_INTERRUPT_CALLBACK interrupt_cb;
-    void *interrupt_cb_extra;
-};
-
 //
 // Helper function to fill a ftnode_fetch_extra with data
 // that will tell the fetch callback that the entire node is
@@ -942,43 +921,22 @@ static inline void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) {
 }
 
 // this is in a strange place because it needs the cursor struct to be defined
-static inline void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
-                                         FT h,
-                                         FT_CURSOR c) {
-    paranoid_invariant(h->h->type == FT_CURRENT);
-    bfe->type = ftnode_fetch_prefetch;
-    bfe->h = h;
-    bfe->search = NULL;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    const DBT *left = &c->range_lock_left_key;
-    if (left->data) {
-        toku_clone_dbt(&bfe->range_lock_left_key, *left);
-    }
-    const DBT *right = &c->range_lock_right_key;
-    if (right->data) {
-        toku_clone_dbt(&bfe->range_lock_right_key, *right);
-    }
-    bfe->left_is_neg_infty = c->left_is_neg_infty;
-    bfe->right_is_pos_infty = c->right_is_pos_infty;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = c->disable_prefetching;
-    bfe->read_all_partitions = false;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
-}
+void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
+                           FT h,
+                           struct ft_cursor *c);
 
 struct ancestors {
     FTNODE   node;     // This is the root node if next is NULL.
     int       childnum; // which buffer holds messages destined to the node whose ancestors this list represents.
-    ANCESTORS next;     // Parent of this node (so next->node.(next->childnum) refers to this node).
+    struct ancestors *next;     // Parent of this node (so next->node.(next->childnum) refers to this node).
 };
+typedef struct ancestors *ANCESTORS;
+
 struct pivot_bounds {
     const DBT * const lower_bound_exclusive;
     const DBT * const upper_bound_inclusive; // NULL to indicate negative or positive infinity (which are in practice exclusive since there are now transfinite keys in messages).
 };
+typedef struct pivot_bounds const * const PIVOT_BOUNDS;
 
 __attribute__((nonnull))
 void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node);
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 6a21bec69ec..9c1a99b7e36 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -422,6 +422,7 @@ toku_ft_get_status(FT_STATUS s) {
         }                                                                           \
     } while (0)
 
+
 void toku_note_deserialized_basement_node(bool fixed_key_size) {
     if (fixed_key_size) {
         STATUS_INC(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, 1);
@@ -4703,7 +4704,7 @@ do_bn_apply_msg(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, txn_gc_i
     if (entry->msn.msn > bn->max_msn_applied.msn) {
         ITEMLEN keylen = entry->keylen;
         ITEMLEN vallen = entry->vallen;
-        enum ft_msg_type type = fifo_entry_get_msg_type(entry);
+        enum ft_msg_type type = (enum ft_msg_type) entry->type;
         MSN msn = entry->msn;
         const XIDS xids = (XIDS) &entry->xids_s;
         bytevec key = xids_get_end_of_array(xids);
@@ -5439,6 +5440,34 @@ ftnode_pf_callback_and_free_bfe(void *ftnode_pv, void* disk_data, void *read_ext
     return r;
 }
 
+void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
+                           FT h,
+                           struct ft_cursor *c) {
+    paranoid_invariant(h->h->type == FT_CURRENT);
+    bfe->type = ftnode_fetch_prefetch;
+    bfe->h = h;
+    bfe->search = NULL;
+    toku_init_dbt(&bfe->range_lock_left_key);
+    toku_init_dbt(&bfe->range_lock_right_key);
+    const DBT *left = &c->range_lock_left_key;
+    if (left->data) {
+        toku_clone_dbt(&bfe->range_lock_left_key, *left);
+    }
+    const DBT *right = &c->range_lock_right_key;
+    if (right->data) {
+        toku_clone_dbt(&bfe->range_lock_right_key, *right);
+    }
+    bfe->left_is_neg_infty = c->left_is_neg_infty;
+    bfe->right_is_pos_infty = c->right_is_pos_infty;
+    bfe->child_to_read = -1;
+    bfe->disable_prefetching = c->disable_prefetching;
+    bfe->read_all_partitions = false;
+    bfe->bytes_read = 0;
+    bfe->io_time = 0;
+    bfe->deserialize_time = 0;
+    bfe->decompress_time = 0;
+}
+
 static void
 ft_node_maybe_prefetch(FT_HANDLE ft_handle, FTNODE node, int childnum, FT_CURSOR ftcursor, bool *doprefetch) {
     // the number of nodes to prefetch
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index cfa6ba20f6f..b1c246f61f3 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -100,6 +100,8 @@ PATENT RIGHTS GRANT:
 #include "log.h"
 #include "ft-search.h"
 #include "compress.h"
+#include "ft_msg.h"
+#include "ft/cursor.h"
 
 // A callback function is invoked with the key, and the data.
 // The pointers (to the bytevecs) must not be modified.  The data must be copied out before the callback function returns.
@@ -114,8 +116,6 @@ PATENT RIGHTS GRANT:
 // When lock_only is true, the callback only does optional lock tree locking.
 typedef int(*FT_GET_CALLBACK_FUNCTION)(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only);
 
-typedef bool(*FT_CHECK_INTERRUPT_CALLBACK)(void* extra);
-
 int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));
 
 // effect: changes the descriptor for the ft of the given handle.
@@ -249,6 +249,7 @@ void toku_ft_maybe_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn, bool oplsn_valid
 TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
 TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h);
 
+class txn_gc_info;
 void toku_ft_send_insert(FT_HANDLE ft_h, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
 void toku_ft_send_delete(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info);
 void toku_ft_send_commit_any(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info);
@@ -261,7 +262,6 @@ extern int toku_ft_debug_mode;
 int toku_verify_ft (FT_HANDLE ft_h)  __attribute__ ((warn_unused_result));
 int toku_verify_ft_with_progress (FT_HANDLE ft_h, int (*progress_callback)(void *extra, float progress), void *extra, int verbose, int keep_going)  __attribute__ ((warn_unused_result));
 
-typedef struct ft_cursor *FT_CURSOR;
 int toku_ft_cursor (FT_HANDLE, FT_CURSOR*, TOKUTXN, bool, bool)  __attribute__ ((warn_unused_result));
 void toku_ft_cursor_set_leaf_mode(FT_CURSOR);
 // Sets a boolean on the ft cursor that prevents uncessary copying of
diff --git a/ft/ft-search.h b/ft/ft-search.h
index 9c26be456de..8e8fece6a3c 100644
--- a/ft/ft-search.h
+++ b/ft/ft-search.h
@@ -92,6 +92,7 @@ PATENT RIGHTS GRANT:
 #ifndef FT_SEARCH_H
 #define FT_SEARCH_H
 
+#include "ft/ybt.h"
 
 enum ft_search_direction_e {
     FT_SEARCH_LEFT = 1,  /* search left -> right, finds min xy as defined by the compare function */
diff --git a/ft/ft_msg.h b/ft/ft_msg.h
index f468d7f647b..2f996c6558f 100644
--- a/ft/ft_msg.h
+++ b/ft/ft_msg.h
@@ -97,6 +97,107 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
+/* tree command types */
+enum ft_msg_type {
+    FT_NONE = 0,
+    FT_INSERT = 1,
+    FT_DELETE_ANY = 2,  // Delete any matching key.  This used to be called FT_DELETE.
+    //FT_DELETE_BOTH = 3,
+    FT_ABORT_ANY = 4,   // Abort any commands on any matching key.
+    //FT_ABORT_BOTH  = 5, // Abort commands that match both the key and the value
+    FT_COMMIT_ANY  = 6,
+    //FT_COMMIT_BOTH = 7,
+    FT_COMMIT_BROADCAST_ALL = 8, // Broadcast to all leafentries, (commit all transactions).
+    FT_COMMIT_BROADCAST_TXN = 9, // Broadcast to all leafentries, (commit specific transaction).
+    FT_ABORT_BROADCAST_TXN  = 10, // Broadcast to all leafentries, (commit specific transaction).
+    FT_INSERT_NO_OVERWRITE = 11,
+    FT_OPTIMIZE = 12,             // Broadcast
+    FT_OPTIMIZE_FOR_UPGRADE = 13, // same as FT_OPTIMIZE, but record version number in leafnode
+    FT_UPDATE = 14,
+    FT_UPDATE_BROADCAST_ALL = 15
+};
+
+static inline bool
+ft_msg_type_applies_once(enum ft_msg_type type)
+{
+    bool ret_val;
+    switch (type) {
+    case FT_INSERT_NO_OVERWRITE:
+    case FT_INSERT:
+    case FT_DELETE_ANY:
+    case FT_ABORT_ANY:
+    case FT_COMMIT_ANY:
+    case FT_UPDATE:
+        ret_val = true;
+        break;
+    case FT_COMMIT_BROADCAST_ALL:
+    case FT_COMMIT_BROADCAST_TXN:
+    case FT_ABORT_BROADCAST_TXN:
+    case FT_OPTIMIZE:
+    case FT_OPTIMIZE_FOR_UPGRADE:
+    case FT_UPDATE_BROADCAST_ALL:
+    case FT_NONE:
+        ret_val = false;
+        break;
+    default:
+        assert(false);
+    }
+    return ret_val;
+}
+
+static inline bool
+ft_msg_type_applies_all(enum ft_msg_type type)
+{
+    bool ret_val;
+    switch (type) {
+    case FT_NONE:
+    case FT_INSERT_NO_OVERWRITE:
+    case FT_INSERT:
+    case FT_DELETE_ANY:
+    case FT_ABORT_ANY:
+    case FT_COMMIT_ANY:
+    case FT_UPDATE:
+        ret_val = false;
+        break;
+    case FT_COMMIT_BROADCAST_ALL:
+    case FT_COMMIT_BROADCAST_TXN:
+    case FT_ABORT_BROADCAST_TXN:
+    case FT_OPTIMIZE:
+    case FT_OPTIMIZE_FOR_UPGRADE:
+    case FT_UPDATE_BROADCAST_ALL:
+        ret_val = true;
+        break;
+    default:
+        assert(false);
+    }
+    return ret_val;
+}
+
+static inline bool
+ft_msg_type_does_nothing(enum ft_msg_type type)
+{
+    return (type == FT_NONE);
+}
+
+typedef struct xids_t *XIDS;
+
+/* tree commands */
+struct ft_msg {
+    enum ft_msg_type type;
+    MSN          msn;          // message sequence number
+    XIDS         xids;
+    union {
+        /* insert or delete */
+        struct ft_msg_insert_delete {
+            const DBT *key;   // for insert, delete, upsertdel
+            const DBT *val;   // for insert, delete, (and it is the "extra" for upsertdel, upsertdel_broadcast_all)
+        } id;
+    } u;
+};
+
+// Message sent into the ft to implement insert, delete, update, etc
+typedef struct ft_msg FT_MSG_S;
+typedef struct ft_msg *FT_MSG;
 
 uint32_t ft_msg_get_keylen(FT_MSG ft_msg);
 
@@ -104,21 +205,10 @@ uint32_t ft_msg_get_vallen(FT_MSG ft_msg);
 
 XIDS ft_msg_get_xids(FT_MSG ft_msg);
 
-void * ft_msg_get_key(FT_MSG ft_msg);
+void *ft_msg_get_key(FT_MSG ft_msg);
 
-void * ft_msg_get_val(FT_MSG ft_msg);
+void *ft_msg_get_val(FT_MSG ft_msg);
 
 enum ft_msg_type ft_msg_get_type(FT_MSG ft_msg);
 
-void ft_msg_from_fifo_msg(FT_MSG ft_msg, FIFO_MSG fifo_msg);
-
-#if 0
-
-void ft_msg_from_dbts(FT_MSG ft_msg, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type);
-
 #endif
-
-
-
-#endif  // FT_MSG_H
-
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 51c93b6cc68..22dbe994eb7 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -98,6 +98,8 @@ PATENT RIGHTS GRANT:
 #include "ft.h"
 #include <util/status.h>
 #include <util/scoped_malloc.h>
+#include "ft/cachetable.h"
+#include "ft/rollback.h"
 
 static FT_UPGRADE_STATUS_S ft_upgrade_status;
 
diff --git a/ft/ftloader.h b/ft/ftloader.h
index c920b4c5362..ab78af34ea2 100644
--- a/ft/ftloader.h
+++ b/ft/ftloader.h
@@ -92,6 +92,8 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
+#include "ft/cachetable.h"
+
 // The loader callbacks are C functions and need to be defined as such
 
 typedef void (*ft_loader_error_func)(DB *, int which_db, int err, DBT *key, DBT *val, void *extra);
diff --git a/ft/fttypes.h b/ft/fttypes.h
index 73e228cf6ff..f291484433c 100644
--- a/ft/fttypes.h
+++ b/ft/fttypes.h
@@ -115,9 +115,6 @@ typedef struct ft *FT;
 typedef struct ft_header *FT_HEADER;
 typedef struct ft_options *FT_OPTIONS;
 
-struct wbuf;
-struct dbuf;
-
 typedef unsigned int ITEMLEN;
 typedef const void *bytevec;
 
@@ -143,36 +140,6 @@ typedef TOKU_XA_XID *XIDP; // this is the type that's passed to the logger code
 
 static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; }
 
-// This struct hold information about values stored in the cachetable.
-// As one can tell from the names, we are probably violating an
-// abstraction layer by placing names.
-//
-// The purpose of having this struct is to have a way for the 
-// cachetable to accumulate the some totals we are interested in.
-// Breaking this abstraction layer by having these names was the 
-// easiest way.
-//
-typedef struct pair_attr_s {
-    long size; // size PAIR's value takes in memory
-    long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status
-    long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status
-    long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status
-    long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts
-    bool is_valid;
-} PAIR_ATTR;
-
-static inline PAIR_ATTR make_pair_attr(long size) { 
-    PAIR_ATTR result={
-        .size = size, 
-        .nonleaf_size = 0, 
-        .leaf_size = 0, 
-        .rollback_size = 0, 
-        .cache_pressure_size = 0,
-        .is_valid = true
-    }; 
-    return result; 
-}
-
 typedef struct {
     uint32_t len;
     char *data;
@@ -218,165 +185,16 @@ typedef struct tokulogger *TOKULOGGER;
 typedef struct txn_manager *TXN_MANAGER;
 #define NULL_LOGGER ((TOKULOGGER)0)
 typedef struct tokutxn    *TOKUTXN;
-typedef struct txninfo    *TXNINFO;
 #define NULL_TXN ((TOKUTXN)0)
 
-struct logged_btt_pair {
-    DISKOFF off;
-    int32_t size;
-};
-
-typedef struct cachetable *CACHETABLE;
-typedef struct cachefile *CACHEFILE;
-typedef struct ctpair *PAIR;
-typedef class checkpointer *CHECKPOINTER;
-class bn_data;
-
-/* tree command types */
-enum ft_msg_type {
-    FT_NONE = 0,
-    FT_INSERT = 1,
-    FT_DELETE_ANY = 2,  // Delete any matching key.  This used to be called FT_DELETE.
-    //FT_DELETE_BOTH = 3,
-    FT_ABORT_ANY = 4,   // Abort any commands on any matching key.
-    //FT_ABORT_BOTH  = 5, // Abort commands that match both the key and the value
-    FT_COMMIT_ANY  = 6,
-    //FT_COMMIT_BOTH = 7,
-    FT_COMMIT_BROADCAST_ALL = 8, // Broadcast to all leafentries, (commit all transactions).
-    FT_COMMIT_BROADCAST_TXN = 9, // Broadcast to all leafentries, (commit specific transaction).
-    FT_ABORT_BROADCAST_TXN  = 10, // Broadcast to all leafentries, (commit specific transaction).
-    FT_INSERT_NO_OVERWRITE = 11,
-    FT_OPTIMIZE = 12,             // Broadcast
-    FT_OPTIMIZE_FOR_UPGRADE = 13, // same as FT_OPTIMIZE, but record version number in leafnode
-    FT_UPDATE = 14,
-    FT_UPDATE_BROADCAST_ALL = 15
-};
-
-static inline bool
-ft_msg_type_applies_once(enum ft_msg_type type)
-{
-    bool ret_val;
-    switch (type) {
-    case FT_INSERT_NO_OVERWRITE:
-    case FT_INSERT:
-    case FT_DELETE_ANY:
-    case FT_ABORT_ANY:
-    case FT_COMMIT_ANY:
-    case FT_UPDATE:
-        ret_val = true;
-        break;
-    case FT_COMMIT_BROADCAST_ALL:
-    case FT_COMMIT_BROADCAST_TXN:
-    case FT_ABORT_BROADCAST_TXN:
-    case FT_OPTIMIZE:
-    case FT_OPTIMIZE_FOR_UPGRADE:
-    case FT_UPDATE_BROADCAST_ALL:
-    case FT_NONE:
-        ret_val = false;
-        break;
-    default:
-        assert(false);
-    }
-    return ret_val;
-}
-
-static inline bool
-ft_msg_type_applies_all(enum ft_msg_type type)
-{
-    bool ret_val;
-    switch (type) {
-    case FT_NONE:
-    case FT_INSERT_NO_OVERWRITE:
-    case FT_INSERT:
-    case FT_DELETE_ANY:
-    case FT_ABORT_ANY:
-    case FT_COMMIT_ANY:
-    case FT_UPDATE:
-        ret_val = false;
-        break;
-    case FT_COMMIT_BROADCAST_ALL:
-    case FT_COMMIT_BROADCAST_TXN:
-    case FT_ABORT_BROADCAST_TXN:
-    case FT_OPTIMIZE:
-    case FT_OPTIMIZE_FOR_UPGRADE:
-    case FT_UPDATE_BROADCAST_ALL:
-        ret_val = true;
-        break;
-    default:
-        assert(false);
-    }
-    return ret_val;
-}
-
-static inline bool
-ft_msg_type_does_nothing(enum ft_msg_type type)
-{
-    return (type == FT_NONE);
-}
-
 typedef struct xids_t *XIDS;
-typedef struct fifo_msg_t *FIFO_MSG;
-/* tree commands */
-struct ft_msg {
-    enum ft_msg_type type;
-    MSN          msn;          // message sequence number
-    XIDS         xids;
-    union {
-        /* insert or delete */
-        struct ft_msg_insert_delete {
-            const DBT *key;   // for insert, delete, upsertdel
-            const DBT *val;   // for insert, delete, (and it is the "extra" for upsertdel, upsertdel_broadcast_all)
-        } id;
-    } u;
-};
-
-// Message sent into the ft to implement insert, delete, update, etc
-typedef struct ft_msg FT_MSG_S;
-typedef struct ft_msg *FT_MSG;
 
 typedef int (*ft_compare_func)(DB *, const DBT *, const DBT *);
 typedef void (*setval_func)(const DBT *, void *);
 typedef int (*ft_update_func)(DB *, const DBT *, const DBT *, const DBT *, setval_func, void *);
-typedef void (*on_redirect_callback)(FT_HANDLE, void*);
 typedef void (*remove_ft_ref_callback)(FT, void*);
+typedef void (*on_redirect_callback)(FT_HANDLE, void*);
 
 #define UU(x) x __attribute__((__unused__))
 
-typedef struct memarena *MEMARENA;
-typedef struct rollback_log_node *ROLLBACK_LOG_NODE;
-typedef struct serialized_rollback_log_node *SERIALIZED_ROLLBACK_LOG_NODE;
-
-//
-// Types of snapshots that can be taken by a tokutxn
-//  - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads.
-//                       used for SERIALIZABLE and READ UNCOMMITTED
-//  - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot
-//                       used for REPEATABLE READ
-//  - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot
-//                        used for READ COMMITTED
-//
-
-typedef enum __TXN_SNAPSHOT_TYPE { 
-    TXN_SNAPSHOT_NONE=0,
-    TXN_SNAPSHOT_ROOT=1,
-    TXN_SNAPSHOT_CHILD=2
-} TXN_SNAPSHOT_TYPE;
-
-typedef struct ancestors *ANCESTORS;
-typedef struct pivot_bounds const * const PIVOT_BOUNDS;
-typedef struct ftnode_fetch_extra *FTNODE_FETCH_EXTRA;
-typedef struct unlockers *UNLOCKERS;
-
-enum reactivity {
-    RE_STABLE,
-    RE_FUSIBLE,
-    RE_FISSIBLE
-};
-
-enum split_mode {
-    SPLIT_EVENLY,
-    SPLIT_LEFT_HEAVY,
-    SPLIT_RIGHT_HEAVY
-};
-
 #endif
diff --git a/ft/le-cursor.cc b/ft/le-cursor.cc
index b08fc62632c..3eb73f1345b 100644
--- a/ft/le-cursor.cc
+++ b/ft/le-cursor.cc
@@ -89,9 +89,10 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft.h"
-#include "ft-internal.h"
-#include "le-cursor.h"
+#include "ft/ft.h"
+#include "ft/ft-internal.h"
+#include "ft/le-cursor.h"
+#include "ft/cursor.h"
 
 // A LE_CURSOR is a special purpose FT_CURSOR that:
 //  - enables prefetching
diff --git a/ft/leafentry.h b/ft/leafentry.h
index 735e43207e6..4563e26c384 100644
--- a/ft/leafentry.h
+++ b/ft/leafentry.h
@@ -98,8 +98,9 @@ PATENT RIGHTS GRANT:
 #include <util/mempool.h>
 #include <util/omt.h>
 
-#include "txn_manager.h"
-#include "rbuf.h"
+#include "ft/txn_manager.h"
+#include "ft/rbuf.h"
+#include "ft/ft_msg.h"
 
 /*
     Memory format of packed leaf entry
@@ -241,6 +242,8 @@ toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, // NULL if there was no stored
                       size_t *new_leafentry_memorysize,
                       LEAFENTRY *new_leafentry_p);
 
+class bn_data;
+
 void
 toku_le_apply_msg(FT_MSG   msg,
                   LEAFENTRY old_leafentry, // NULL if there was no stored data.
diff --git a/ft/log-internal.h b/ft/log-internal.h
index be8ab7a53da..e8acd2e91eb 100644
--- a/ft/log-internal.h
+++ b/ft/log-internal.h
@@ -119,6 +119,7 @@ using namespace toku;
 
 #define LOGGER_MIN_BUF_SIZE (1<<24)
 
+// TODO: Remove mylock, it has no value
 struct mylock {
     toku_mutex_t lock;
 };
@@ -283,6 +284,7 @@ struct tokutxn {
                       // txn to not transition to commit or abort
     uint64_t client_id;
 };
+typedef struct tokutxn    *TOKUTXN;
 
 static inline int
 txn_has_current_rollback_log(TOKUTXN txn) {
@@ -306,6 +308,9 @@ struct txninfo {
     BLOCKNUM   current_rollback;
 };
 
+// TODO: Remove null txn
+#define NULL_TXN ((TOKUTXN)0)
+
 static inline int toku_logsizeof_uint8_t (uint32_t v __attribute__((__unused__))) {
     return 1;
 }
diff --git a/ft/logformat.cc b/ft/logformat.cc
index 4d32d9f6eac..6dab12bdf31 100644
--- a/ft/logformat.cc
+++ b/ft/logformat.cc
@@ -195,7 +195,7 @@ const struct logtype logtypes[] = {
                             {"BYTESTRING", "iname", 0},   // pathname of file
                             {"uint8_t", "unlink_on_close", 0},
                             NULLFIELD}, IGNORE_LOG_BEGIN},
-    //We do not use a TXNINFO struct since recovery log has
+    //We do not use a txninfo struct since recovery log has
     //FILENUMS and TOKUTXN has FTs (for open_fts)
     {"xstillopen", 's', FA{{"TXNID_PAIR", "xid", 0}, 
                            {"TXNID_PAIR", "parentxid", 0}, 
diff --git a/ft/logger.h b/ft/logger.h
index 6488ec0707d..11731fb5b97 100644
--- a/ft/logger.h
+++ b/ft/logger.h
@@ -92,8 +92,9 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "fttypes.h"
-#include "ft_layout_version.h"
+#include "ft/fttypes.h"
+#include "ft/ft-internal.h"
+#include "ft/ft_layout_version.h"
 
 enum {
     TOKU_LOG_VERSION_1 = 1,
diff --git a/ft/rollback.h b/ft/rollback.h
index 2e9493b0e6b..e9cb528b7a9 100644
--- a/ft/rollback.h
+++ b/ft/rollback.h
@@ -92,8 +92,13 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <util/omt.h>
-#include "sub_block.h"
+#include "ft/sub_block.h"
+#include "ft/cachetable.h"
+
+#include "util/memarena.h"
+
+typedef struct rollback_log_node *ROLLBACK_LOG_NODE;
+typedef struct serialized_rollback_log_node *SERIALIZED_ROLLBACK_LOG_NODE;
 
 void toku_poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint);
 
@@ -172,6 +177,7 @@ struct serialized_rollback_log_node {
     BLOCKNUM blocknum;
     struct sub_block sub_block[max_sub_blocks];
 };
+typedef struct serialized_rollback_log_node *SERIALIZED_ROLLBACK_LOG_NODE;
 
 static inline void
 toku_static_serialized_rollback_log_destroy(SERIALIZED_ROLLBACK_LOG_NODE log) {
diff --git a/ft/sub_block.h b/ft/sub_block.h
index 23fad83c966..d00df6fa51a 100644
--- a/ft/sub_block.h
+++ b/ft/sub_block.h
@@ -112,6 +112,7 @@ struct sub_block {
 
     uint32_t xsum;                    // sub block checksum
 };
+typedef struct sub_block *SUB_BLOCK;
 
 struct stored_sub_block {
     uint32_t uncompressed_size;
diff --git a/ft/txn.cc b/ft/txn.cc
index 403c8e92c45..999dd242c3e 100644
--- a/ft/txn.cc
+++ b/ft/txn.cc
@@ -388,7 +388,7 @@ toku_txn_update_xids_in_txn(TOKUTXN txn, TXNID xid)
 
 //Used on recovery to recover a transaction.
 int
-toku_txn_load_txninfo (TOKUTXN txn, TXNINFO info) {
+toku_txn_load_txninfo (TOKUTXN txn, struct txninfo *info) {
     txn->roll_info.rollentry_raw_count = info->rollentry_raw_count;
     uint32_t i;
     for (i = 0; i < info->num_fts; i++) {
diff --git a/ft/txn.h b/ft/txn.h
index 5e83d6511a2..57f3b0ed805 100644
--- a/ft/txn.h
+++ b/ft/txn.h
@@ -142,7 +142,7 @@ int toku_txn_begin_with_xid (
 
 void toku_txn_update_xids_in_txn(TOKUTXN txn, TXNID xid);
 
-int toku_txn_load_txninfo (TOKUTXN txn, TXNINFO info);
+int toku_txn_load_txninfo (TOKUTXN txn, struct txninfo *info);
 
 int toku_txn_commit_txn (TOKUTXN txn, int nosync,
                          TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra);
diff --git a/ft/txn_manager.h b/ft/txn_manager.h
index 12267297a0e..58d7555dc05 100644
--- a/ft/txn_manager.h
+++ b/ft/txn_manager.h
@@ -123,6 +123,7 @@ struct txn_manager {
     TXNID last_xid_seen_for_recover;
     TXNID last_calculated_oldest_referenced_xid;
 };
+typedef struct txn_manager *TXN_MANAGER;
 
 struct txn_manager_state { 
     txn_manager_state(TXN_MANAGER mgr) :
@@ -189,6 +190,22 @@ TXNID toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager);
 
 TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager);
 
+//
+// Types of snapshots that can be taken by a tokutxn
+//  - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads.
+//                       used for SERIALIZABLE and READ UNCOMMITTED
+//  - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot
+//                       used for REPEATABLE READ
+//  - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot
+//                        used for READ COMMITTED
+//
+
+typedef enum __TXN_SNAPSHOT_TYPE { 
+    TXN_SNAPSHOT_NONE=0,
+    TXN_SNAPSHOT_ROOT=1,
+    TXN_SNAPSHOT_CHILD=2
+} TXN_SNAPSHOT_TYPE;
+
 void toku_txn_manager_handle_snapshot_create_for_child_txn(
     TOKUTXN txn,
     TXN_MANAGER txn_manager,

From ece1d4c4edb78f4e694dfc09557ce0c227c563bb Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:24:46 -0400
Subject: [PATCH 006/190] FT-245 Move queue to util, use toku_ prefix for
 consistency with the rest of the code base

---
 ft/CMakeLists.txt                            |  1 -
 ft/ftloader-internal.h                       |  2 +-
 ft/ftloader.cc                               | 30 ++++++++++----------
 ft/tests/ftloader-test-bad-generate.cc       |  2 +-
 ft/tests/ftloader-test-extractor-errors.cc   |  2 +-
 ft/tests/ftloader-test-extractor.cc          |  2 +-
 ft/tests/ftloader-test-merge-files-dbufio.cc |  8 +++---
 ft/tests/ftloader-test-writer-errors.cc      | 14 ++++-----
 ft/tests/ftloader-test-writer.cc             | 14 ++++-----
 ft/tests/ftloader-test.cc                    |  4 +--
 util/CMakeLists.txt                          |  1 +
 {ft => util}/queue.cc                        | 10 +++----
 {ft => util}/queue.h                         | 12 ++++----
 {ft => util}/tests/queue-test.cc             | 12 ++++----
 14 files changed, 56 insertions(+), 58 deletions(-)
 rename {ft => util}/queue.cc (96%)
 rename {ft => util}/queue.h (95%)
 rename {ft => util}/tests/queue-test.cc (94%)

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 95d7866cb9d..57158a0972f 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -54,7 +54,6 @@ set(FT_SOURCES
   log_upgrade
   minicron
   pqueue
-  queue
   quicklz
   recover
   rollback
diff --git a/ft/ftloader-internal.h b/ft/ftloader-internal.h
index d60537490dd..11effdfb3da 100644
--- a/ft/ftloader-internal.h
+++ b/ft/ftloader-internal.h
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 #include <db.h>
 #include "fttypes.h"
 #include "ftloader.h"
-#include "queue.h"
+#include "util/queue.h"
 #include <toku_pthread.h>
 #include "dbufio.h"
 
diff --git a/ft/ftloader.cc b/ft/ftloader.cc
index 6b5e9ae986b..34ac684fe9c 100644
--- a/ft/ftloader.cc
+++ b/ft/ftloader.cc
@@ -423,7 +423,7 @@ void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error) {
 
     destroy_rowset(&bl->primary_rowset);
     if (bl->primary_rowset_queue) {
-        queue_destroy(bl->primary_rowset_queue);
+        toku_queue_destroy(bl->primary_rowset_queue);
         bl->primary_rowset_queue = nullptr;
     }
 
@@ -629,7 +629,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
         int r = init_rowset(&bl->primary_rowset, memory_per_rowset_during_extract(bl)); 
         if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; }
     }
-    {   int r = queue_create(&bl->primary_rowset_queue, EXTRACTOR_QUEUE_DEPTH); 
+    {   int r = toku_queue_create(&bl->primary_rowset_queue, EXTRACTOR_QUEUE_DEPTH); 
         if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; }
     }
     {
@@ -1138,7 +1138,7 @@ static void* extractor_thread (void *blv) {
     while (1) {
         void *item;
         {
-            int rq = queue_deq(bl->primary_rowset_queue, &item, NULL, NULL);
+            int rq = toku_queue_deq(bl->primary_rowset_queue, &item, NULL, NULL);
             if (rq==EOF) break;
             invariant(rq==0); // other errors are arbitrarily bad.
         }
@@ -1169,7 +1169,7 @@ static void enqueue_for_extraction (FTLOADER bl) {
     struct rowset *XMALLOC(enqueue_me);
     *enqueue_me = bl->primary_rowset;
     zero_rowset(&bl->primary_rowset);
-    int r = queue_enq(bl->primary_rowset_queue, (void*)enqueue_me, 1, NULL);
+    int r = toku_queue_enq(bl->primary_rowset_queue, (void*)enqueue_me, 1, NULL);
     resource_assert_zero(r); 
 }
 
@@ -1206,7 +1206,7 @@ finish_extractor (FTLOADER bl) {
     }
     //printf("%s:%d please finish extraction\n", __FILE__, __LINE__);
     {
-        int r = queue_eof(bl->primary_rowset_queue);
+        int r = toku_queue_eof(bl->primary_rowset_queue);
         invariant(r==0);
     }
     //printf("%s:%d joining\n", __FILE__, __LINE__);
@@ -1218,7 +1218,7 @@ finish_extractor (FTLOADER bl) {
         bl->extractor_live = false;
     }
     {
-        int r = queue_destroy(bl->primary_rowset_queue);
+        int r = toku_queue_destroy(bl->primary_rowset_queue);
         invariant(r==0);
         bl->primary_rowset_queue = nullptr;
     }
@@ -1882,7 +1882,7 @@ int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q
         if (to_q) {
             if (row_wont_fit(output_rowset, keys[mini].size + vals[mini].size)) {
                 {
-                    int r = queue_enq(q, (void*)output_rowset, 1, NULL);
+                    int r = toku_queue_enq(q, (void*)output_rowset, 1, NULL);
                     if (r!=0) {
                         result = r;
                         break;
@@ -1958,7 +1958,7 @@ int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q
     }
 
     if (result==0 && to_q) {
-        int r = queue_enq(q, (void*)output_rowset, 1, NULL);
+        int r = toku_queue_enq(q, (void*)output_rowset, 1, NULL);
         if (r!=0) 
             result = r;
         else 
@@ -2149,7 +2149,7 @@ int merge_files (struct merge_fileset *fs,
     if (result) ft_loader_set_panic(bl, result, true, which_db, nullptr, nullptr);
 
     {
-        int r = queue_eof(output_q);
+        int r = toku_queue_eof(output_q);
         if (r!=0 && result==0) result = r;
     }
     // It's conceivable that the progress_allocation could be nonzero (for example if bl->N==0)
@@ -2371,7 +2371,7 @@ static int write_header (struct dbout *out, long long translation_location_on_di
 static void drain_writer_q(QUEUE q) {
     void *item;
     while (1) {
-        int r = queue_deq(q, &item, NULL, NULL);
+        int r = toku_queue_deq(q, &item, NULL, NULL);
         if (r == EOF)
             break;
         invariant(r == 0);
@@ -2501,7 +2501,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
     while (result == 0) {
         void *item;
         {
-            int rr = queue_deq(q, &item, NULL, NULL);
+            int rr = toku_queue_deq(q, &item, NULL, NULL);
             if (rr == EOF) break;
             if (rr != 0) {
                 ft_loader_set_panic(bl, rr, true, which_db, nullptr, nullptr);
@@ -2723,7 +2723,7 @@ static int loader_do_i (FTLOADER bl,
     struct rowset *rows = &(bl->rows[which_db]);
     invariant(rows->data==NULL); // the rows should be all cleaned up already
 
-    int r = queue_create(&bl->fractal_queues[which_db], FRACTAL_WRITER_QUEUE_DEPTH);
+    int r = toku_queue_create(&bl->fractal_queues[which_db], FRACTAL_WRITER_QUEUE_DEPTH);
     if (r) goto error;
 
     {
@@ -2767,7 +2767,7 @@ static int loader_do_i (FTLOADER bl,
 
             r = toku_pthread_create(bl->fractal_threads+which_db, NULL, fractal_thread, (void*)&fta);
             if (r) {
-                int r2 __attribute__((__unused__)) = queue_destroy(bl->fractal_queues[which_db]);            
+                int r2 __attribute__((__unused__)) = toku_queue_destroy(bl->fractal_queues[which_db]);            
                 // ignore r2, since we already have an error
                 bl->fractal_queues[which_db] = nullptr;
                 goto error;
@@ -2788,7 +2788,7 @@ static int loader_do_i (FTLOADER bl,
                 if (r == 0) r = fta.errno_result;
             }
         } else {
-            queue_eof(bl->fractal_queues[which_db]);
+            toku_queue_eof(bl->fractal_queues[which_db]);
             r = toku_loader_write_ft_from_q(bl, descriptor, fd, progress_allocation, 
                                             bl->fractal_queues[which_db], bl->extracted_datasizes[which_db], which_db, 
                                             target_nodesize, target_basementnodesize, target_compression_method, target_fanout);
@@ -2797,7 +2797,7 @@ static int loader_do_i (FTLOADER bl,
 
  error: // this is the cleanup code.  Even if r==0 (no error) we fall through to here.
     if (bl->fractal_queues[which_db]) {
-        int r2 = queue_destroy(bl->fractal_queues[which_db]);
+        int r2 = toku_queue_destroy(bl->fractal_queues[which_db]);
         invariant(r2==0);
         bl->fractal_queues[which_db] = nullptr;
     }
diff --git a/ft/tests/ftloader-test-bad-generate.cc b/ft/tests/ftloader-test-bad-generate.cc
index 9ae24f7c4ec..ca3e649c565 100644
--- a/ft/tests/ftloader-test-bad-generate.cc
+++ b/ft/tests/ftloader-test-bad-generate.cc
@@ -183,7 +183,7 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail) {
 
     // feed rowsets to the extractor
     for (int i = 0; i < nrowsets; i++) {
-        r = queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL);
+        r = toku_queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL);
         assert(r == 0);
     }
 
diff --git a/ft/tests/ftloader-test-extractor-errors.cc b/ft/tests/ftloader-test-extractor-errors.cc
index 007fd39fe08..3cd5a1e586f 100644
--- a/ft/tests/ftloader-test-extractor-errors.cc
+++ b/ft/tests/ftloader-test-extractor-errors.cc
@@ -201,7 +201,7 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail, const char
 
     // feed rowsets to the extractor
     for (int i = 0; i < nrowsets; i++) {
-        r = queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL);
+        r = toku_queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL);
         assert(r == 0);
     }
 
diff --git a/ft/tests/ftloader-test-extractor.cc b/ft/tests/ftloader-test-extractor.cc
index afba44a7a22..b806f44ab2b 100644
--- a/ft/tests/ftloader-test-extractor.cc
+++ b/ft/tests/ftloader-test-extractor.cc
@@ -415,7 +415,7 @@ static void test_extractor(int nrows, int nrowsets, const char *testdir) {
 
     // feed rowsets to the extractor
     for (int i = 0; i < nrowsets; i++) {
-        r = queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL);
+        r = toku_queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL);
         assert(r == 0);
     }
     r = toku_ft_loader_finish_extractor(loader);
diff --git a/ft/tests/ftloader-test-merge-files-dbufio.cc b/ft/tests/ftloader-test-merge-files-dbufio.cc
index cdd4c1d6691..44f3d27fcd6 100644
--- a/ft/tests/ftloader-test-merge-files-dbufio.cc
+++ b/ft/tests/ftloader-test-merge-files-dbufio.cc
@@ -346,7 +346,7 @@ static void *consumer_thread (void *ctv) {
     struct consumer_thunk *cthunk = (struct consumer_thunk *)ctv;
     while (1) {
 	void *item;
-	int r = queue_deq(cthunk->q, &item, NULL, NULL);
+	int r = toku_queue_deq(cthunk->q, &item, NULL, NULL);
 	if (r==EOF) return NULL;
 	assert(r==0);
 	struct rowset *rowset = (struct rowset *)item;
@@ -423,7 +423,7 @@ static void test (const char *directory, bool is_error) {
     ft_loader_set_fractal_workers_count_from_c(bl);
 
     QUEUE q;
-    { int r = queue_create(&q, 1000); assert(r==0); }
+    { int r = toku_queue_create(&q, 1000); assert(r==0); }
     DBUFIO_FILESET bfs;
     const int MERGE_BUF_SIZE = 100000; // bigger than 64K so that we will trigger malloc issues.
     { int r = create_dbufio_fileset(&bfs, N_SOURCES, fds, MERGE_BUF_SIZE, false);  assert(r==0); }
@@ -474,7 +474,7 @@ static void test (const char *directory, bool is_error) {
             panic_dbufio_fileset(bfs, r);
     }
     {
-	int r = queue_eof(q);
+	int r = toku_queue_eof(q);
 	assert(r==0);
     }
 
@@ -501,7 +501,7 @@ static void test (const char *directory, bool is_error) {
 	}
     }
     {
-	int r = queue_destroy(q);
+	int r = toku_queue_destroy(q);
 	assert(r==0);
     }
     toku_ft_loader_internal_destroy(bl, false);
diff --git a/ft/tests/ftloader-test-writer-errors.cc b/ft/tests/ftloader-test-writer-errors.cc
index 7767cee00e0..0309e6082f3 100644
--- a/ft/tests/ftloader-test-writer-errors.cc
+++ b/ft/tests/ftloader-test-writer-errors.cc
@@ -159,20 +159,20 @@ static int write_dbfile (char *tf_template, int n, char *output_name, bool expec
     ft_loader_fi_close_all(&bl.file_infos);
 
     QUEUE q;
-    r = queue_create(&q, 0xFFFFFFFF); // infinite queue.
+    r = toku_queue_create(&q, 0xFFFFFFFF); // infinite queue.
     assert(r==0);
     r = merge_files(&fs, &bl, 0, dest_db, compare_ints, 0, q); CKERR(r);
     assert(fs.n_temp_files==0);
 
     QUEUE q2;
-    r = queue_create(&q2, 0xFFFFFFFF); // infinite queue.
+    r = toku_queue_create(&q2, 0xFFFFFFFF); // infinite queue.
     assert(r==0);
 
     size_t num_found = 0;
     size_t found_size_est = 0;
     while (1) {
 	void *v;
-	r = queue_deq(q, &v, NULL, NULL);
+	r = toku_queue_deq(q, &v, NULL, NULL);
 	if (r==EOF) break;
 	struct rowset *rs = (struct rowset *)v;
 	if (verbose) printf("v=%p\n", v);
@@ -187,16 +187,16 @@ static int write_dbfile (char *tf_template, int n, char *output_name, bool expec
 
 	num_found += rs->n_rows;
 
-	r = queue_enq(q2, v, 0, NULL);
+	r = toku_queue_enq(q2, v, 0, NULL);
 	assert(r==0);
     }
     assert((int)num_found == n);
     if (!expect_error) assert(found_size_est == size_est);
 
-    r = queue_eof(q2);
+    r = toku_queue_eof(q2);
     assert(r==0);
 
-    r = queue_destroy(q);
+    r = toku_queue_destroy(q);
     assert(r==0);
 
     DESCRIPTOR_S desc;
@@ -225,7 +225,7 @@ static int write_dbfile (char *tf_template, int n, char *output_name, bool expec
     ft_loader_destroy_poll_callback(&bl.poll_callback);
     ft_loader_lock_destroy(&bl);
     
-    r = queue_destroy(q2);
+    r = toku_queue_destroy(q2);
     assert(r==0);
    
     destroy_merge_fileset(&fs);
diff --git a/ft/tests/ftloader-test-writer.cc b/ft/tests/ftloader-test-writer.cc
index bf0641a3939..99cd5fdd3fd 100644
--- a/ft/tests/ftloader-test-writer.cc
+++ b/ft/tests/ftloader-test-writer.cc
@@ -215,20 +215,20 @@ static void test_write_dbfile (char *tf_template, int n, char *output_name, TXNI
     ft_loader_fi_close_all(&bl.file_infos);
 
     QUEUE q;
-    r = queue_create(&q, 0xFFFFFFFF); // infinite queue.
+    r = toku_queue_create(&q, 0xFFFFFFFF); // infinite queue.
     assert(r==0);
     r = merge_files(&fs, &bl, 0, dest_db, compare_ints, 0, q); CKERR(r);
     assert(fs.n_temp_files==0);
 
     QUEUE q2;
-    r = queue_create(&q2, 0xFFFFFFFF); // infinite queue.
+    r = toku_queue_create(&q2, 0xFFFFFFFF); // infinite queue.
     assert(r==0);
 
     size_t num_found = 0;
     size_t found_size_est = 0;
     while (1) {
 	void *v;
-	r = queue_deq(q, &v, NULL, NULL);
+	r = toku_queue_deq(q, &v, NULL, NULL);
 	if (r==EOF) break;
 	struct rowset *rs = (struct rowset *)v;
 	if (verbose) printf("v=%p\n", v);
@@ -243,16 +243,16 @@ static void test_write_dbfile (char *tf_template, int n, char *output_name, TXNI
 
 	num_found += rs->n_rows;
 
-	r = queue_enq(q2, v, 0, NULL);
+	r = toku_queue_enq(q2, v, 0, NULL);
 	assert(r==0);
     }
     assert((int)num_found == n);
     assert(found_size_est == size_est);
  
-    r = queue_eof(q2);
+    r = toku_queue_eof(q2);
     assert(r==0);
 
-    r = queue_destroy(q);
+    r = toku_queue_destroy(q);
     assert(r==0);
 
     DESCRIPTOR_S desc;
@@ -265,7 +265,7 @@ static void test_write_dbfile (char *tf_template, int n, char *output_name, TXNI
     r = toku_loader_write_ft_from_q_in_C(&bl, &desc, fd, 1000, q2, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16);
     assert(r==0);
 
-    r = queue_destroy(q2);
+    r = toku_queue_destroy(q2);
     assert_zero(r);
    
     destroy_merge_fileset(&fs);
diff --git a/ft/tests/ftloader-test.cc b/ft/tests/ftloader-test.cc
index 343262720a8..d1ed27f02d4 100644
--- a/ft/tests/ftloader-test.cc
+++ b/ft/tests/ftloader-test.cc
@@ -412,7 +412,7 @@ static void test_merge_files (const char *tf_template, const char *output_name)
     ft_loader_fi_close_all(&bl.file_infos);
 
     QUEUE q;
-    r = queue_create(&q, 0xFFFFFFFF); // infinite queue.
+    r = toku_queue_create(&q, 0xFFFFFFFF); // infinite queue.
     assert(r==0);
 
     r = merge_files(&fs, &bl, 0, dest_db, compare_ints, 0, q); CKERR(r);
@@ -436,7 +436,7 @@ static void test_merge_files (const char *tf_template, const char *output_name)
     // verify the dbfile
     verify_dbfile(10, sorted_keys, sorted_vals, output_name);
 
-    r = queue_destroy(q);
+    r = toku_queue_destroy(q);
     assert(r==0);
 }
 
diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt
index 6a0bb6208a5..b1faed58f2d 100644
--- a/util/CMakeLists.txt
+++ b/util/CMakeLists.txt
@@ -5,6 +5,7 @@ set(util_srcs
   memarena
   mempool
   partitioned_counter
+  queue
   threadpool
   scoped_malloc
   x1764
diff --git a/ft/queue.cc b/util/queue.cc
similarity index 96%
rename from ft/queue.cc
rename to util/queue.cc
index 37c3bc025f8..ecc6747c3e3 100644
--- a/ft/queue.cc
+++ b/util/queue.cc
@@ -128,7 +128,7 @@ struct queue {
 //   q->mutex and q->cond are used as condition variables.
 
 
-int queue_create (QUEUE *q, uint64_t weight_limit)
+int toku_queue_create (QUEUE *q, uint64_t weight_limit)
 {
     QUEUE CALLOC(result);
     if (result==NULL) return get_error_errno();
@@ -143,7 +143,7 @@ int queue_create (QUEUE *q, uint64_t weight_limit)
     return 0;
 }
 
-int queue_destroy (QUEUE q)
+int toku_queue_destroy (QUEUE q)
 {
     if (q->head) return EINVAL;
     assert(q->contents_weight==0);
@@ -153,7 +153,7 @@ int queue_destroy (QUEUE q)
     return 0;
 }
 
-int queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq)
+int toku_queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq)
 {
     toku_mutex_lock(&q->mutex);
     assert(!q->eof);
@@ -189,7 +189,7 @@ int queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_afte
     return 0;
 }
 
-int queue_eof (QUEUE q)
+int toku_queue_eof (QUEUE q)
 {
     toku_mutex_lock(&q->mutex);
     assert(!q->eof);
@@ -199,7 +199,7 @@ int queue_eof (QUEUE q)
     return 0;
 }
 
-int queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq)
+int toku_queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq)
 {
     toku_mutex_lock(&q->mutex);
     int result;
diff --git a/ft/queue.h b/util/queue.h
similarity index 95%
rename from ft/queue.h
rename to util/queue.h
index ec12a0193d2..d2feef5acde 100644
--- a/ft/queue.h
+++ b/util/queue.h
@@ -92,8 +92,6 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "fttypes.h"
-
 // The abstraction:
 //
 // queue.h implements a queue suitable for a producer-consumer relationship between two pthreads.
@@ -110,21 +108,21 @@ PATENT RIGHTS GRANT:
 
 typedef struct queue *QUEUE;
 
-int queue_create (QUEUE *q, uint64_t weight_limit);
+int toku_queue_create (QUEUE *q, uint64_t weight_limit);
 // Effect: Create a queue with a given weight limit.  The queue is initially empty.
 
-int queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq);
+int toku_queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq);
 // Effect: Insert ITEM of weight WEIGHT into queue.  If the resulting contents weight too much then block (don't return) until the total weight is low enough.
 // If total_weight_after_enq!=NULL then return the current weight of the items in the queue (after finishing blocking on overweight, and after enqueueing the item).
 // If successful return 0.
 // If an error occurs, return the error number, and the state of the queue is undefined.  The item may have been enqueued or not, and in fact the queue may be badly corrupted if the condition variables go awry.  If it's just a matter of out-of-memory, then the queue is probably OK.
 // Requires: There is only a single consumer. (We wake up the consumer using a pthread_cond_signal (which is suitable only for single consumers.)
 
-int queue_eof (QUEUE q);
+int toku_queue_eof (QUEUE q);
 // Effect: Inform the queue that no more values will be inserted.  After all the values that have been inserted are dequeued, further dequeue operations will return EOF.
 // Returns 0 on success.   On failure, things are pretty bad (likely to be some sort of mutex failure).
 
-int queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq);
+int toku_queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq);
 // Effect: Wait until the queue becomes nonempty.  Then dequeue and return the oldest item.  The item and its weight are returned in *ITEM.
 // If weight!=NULL then return the item's weight in *weight.
 // If total_weight_after_deq!=NULL then return the current weight of the items in the queue (after dequeuing the item).
@@ -132,7 +130,7 @@ int queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_af
 // Return EOF is we no more items will be returned.
 // Usage note: The queue should be destroyed only after any consumers will no longer look at it (for example, they saw EOF).
 
-int queue_destroy (QUEUE q);
+int toku_queue_destroy (QUEUE q);
 // Effect: Destroy the queue.
 // Requires: The queue must be empty and no consumer should try to dequeue after this (one way to do this is to make sure the consumer saw EOF).
 // Returns 0 on success.   If the queue is not empty, returns EINVAL.  Other errors are likely to be bad (some sort of mutex or condvar failure).
diff --git a/ft/tests/queue-test.cc b/util/tests/queue-test.cc
similarity index 94%
rename from ft/tests/queue-test.cc
rename to util/tests/queue-test.cc
index edc2c628f94..f613ed561f2 100644
--- a/ft/tests/queue-test.cc
+++ b/util/tests/queue-test.cc
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 #include <unistd.h>
 #include <toku_assert.h>
 #include <toku_pthread.h>
-#include "queue.h"
+#include "util/queue.h"
 
 static int verbose=1;
 
@@ -108,7 +108,7 @@ static void *start_0 (void *arg) {
     long count = 0;
     while (1) {
 	uint64_t this_max_weight;
-	int r=queue_deq(q, &item, &weight, &this_max_weight);
+	int r=toku_queue_deq(q, &item, &weight, &this_max_weight);
 	if (r==EOF) break;
 	assert(r==0);
 	if (this_max_weight>d_max_weight) d_max_weight=this_max_weight;
@@ -123,7 +123,7 @@ static void *start_0 (void *arg) {
 
 static void enq (QUEUE q, long v, uint64_t weight) {
     uint64_t this_max_weight;
-    int r = queue_enq(q, (void*)v, (weight==0)?0:1, &this_max_weight);
+    int r = toku_queue_enq(q, (void*)v, (weight==0)?0:1, &this_max_weight);
     assert(r==0);
     if (this_max_weight>e_max_weight) e_max_weight=this_max_weight;
     //printf("E(%ld)=%ld %ld\n", v, this_max_weight, e_max_weight);
@@ -138,7 +138,7 @@ static void queue_test_0 (uint64_t weight)
     d_max_weight = 0;
     QUEUE q;
     int r;
-    r = queue_create(&q, weight);                               assert(r==0);
+    r = toku_queue_create(&q, weight);                               assert(r==0);
     toku_pthread_t thread;
     r = toku_pthread_create(&thread, NULL, start_0, q); assert(r==0);
     enq(q, 0L, weight);
@@ -148,12 +148,12 @@ static void queue_test_0 (uint64_t weight)
     sleep(1);
     enq(q, 4L, weight);
     enq(q, 5L, weight);
-    r = queue_eof(q);                                      assert(r==0);
+    r = toku_queue_eof(q);                                      assert(r==0);
     void *result;
     r = toku_pthread_join(thread, &result);	           assert(r==0);
     assert(result==NULL);
     assert(count_0==6);
-    r = queue_destroy(q);
+    r = toku_queue_destroy(q);
     assert(d_max_weight <= weight);
     assert(e_max_weight <= weight);
 }

From d0542cb67b731c4da14340d1a71ed0213ebac9de Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:24:46 -0400
Subject: [PATCH 007/190] FT-244 Move the one and only default key comparison
 implementation function to ft-ops.cc

---
 ft/CMakeLists.txt   |   1 -
 ft/ft-ops.cc        |  31 +++++++-
 ft/ft.h             |   4 +
 ft/key.cc           | 189 --------------------------------------------
 ft/key.h            | 104 ------------------------
 ft/tests/keytest.cc |  11 +--
 ft/tests/test.h     |   1 -
 src/ydb.cc          |   1 -
 8 files changed, 40 insertions(+), 302 deletions(-)
 delete mode 100644 ft/key.cc
 delete mode 100644 ft/key.h

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 57158a0972f..4b1530d0029 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -45,7 +45,6 @@ set(FT_SOURCES
   ft-serialize
   ft-test-helpers
   ft-verify
-  key
   leafentry
   le-cursor
   logcursor
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 9c1a99b7e36..89063471f19 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -206,7 +206,6 @@ basement nodes, bulk fetch,  and partial fetch:
 #include "ft-flusher.h"
 #include "ft-internal.h"
 #include "ft_layout_version.h"
-#include "key.h"
 #include "log-internal.h"
 #include "sub_block.h"
 #include "txn_manager.h"
@@ -7033,6 +7032,36 @@ int toku_ft_strerror_r(int error, char *buf, size_t buflen)
     }
 }
 
+// when a and b are chars, return a-b is safe here because return type is int.  No over/underflow possible.
+int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) {
+    int comparelen = key1len<key2len ? key1len : key2len;
+    const unsigned char *k1;
+    const unsigned char *k2;
+    for (CAST_FROM_VOIDP(k1, key1), CAST_FROM_VOIDP(k2, key2);
+	 comparelen>4;
+	 k1+=4, k2+=4, comparelen-=4) {
+	{ int v1=k1[0], v2=k2[0]; if (v1!=v2) return v1-v2; }
+	{ int v1=k1[1], v2=k2[1]; if (v1!=v2) return v1-v2; }
+	{ int v1=k1[2], v2=k2[2]; if (v1!=v2) return v1-v2; }
+	{ int v1=k1[3], v2=k2[3]; if (v1!=v2) return v1-v2; }
+    }
+    for (;
+	 comparelen>0;
+	 k1++, k2++, comparelen--) {
+	if (*k1 != *k2) {
+	    return (int)*k1-(int)*k2;
+	}
+    }
+    if (key1len<key2len) return -1;
+    if (key1len>key2len) return 1;
+    return 0;
+}
+
+int
+toku_builtin_compare_fun (DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) {
+    return toku_keycompare(a->data, a->size, b->data, b->size);
+}
+
 #include <toku_race_tools.h>
 void __attribute__((__constructor__)) toku_ft_helgrind_ignore(void);
 void
diff --git a/ft/ft.h b/ft/ft.h
index e536241722c..baf20000f3a 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -220,4 +220,8 @@ struct toku_product_name_strings_struct {
 
 extern struct toku_product_name_strings_struct toku_product_name_strings;
 extern int tokudb_num_envs;
+
+int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
+int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default")));
+
 #endif
diff --git a/ft/key.cc b/ft/key.cc
deleted file mode 100644
index 3940e1e274a..00000000000
--- a/ft/key.cc
+++ /dev/null
@@ -1,189 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-#include "key.h"
-#include "fttypes.h"
-#include <memory.h>
-
-#if 0
-int toku_keycompare (bytevec key1b, ITEMLEN key1len, bytevec key2b, ITEMLEN key2len) {
-    const unsigned char *key1 = key1b;
-    const unsigned char *key2 = key2b;
-    while (key1len > 0 && key2len > 0) {
-	unsigned char b1 = key1[0];
-	unsigned char b2 = key2[0];
-	if (b1<b2) return -1;
-	if (b1>b2) return 1;
-	key1len--; key1++;
-	key2len--; key2++;
-    }
-    if (key1len<key2len) return -1;
-    if (key1len>key2len) return 1;
-    return 0;
-}
-
-#elif 0
-int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) {
-    if (key1len==key2len) {
-	return memcmp(key1,key2,key1len);
-    } else if (key1len<key2len) {
-	int r = memcmp(key1,key2,key1len);
-	if (r<=0) return -1; /* If the keys are the same up to 1's length, then return -1, since key1 is shorter than key2. */
-	else return 1;
-    } else {
-	return -toku_keycompare(key2,key2len,key1,key1len);
-    }
-}
-#elif 0
-
-int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) {
-    if (key1len==key2len) {
-	return memcmp(key1,key2,key1len);
-    } else if (key1len<key2len) {
-	int r = memcmp(key1,key2,key1len);
-	if (r<=0) return -1; /* If the keys are the same up to 1's length, then return -1, since key1 is shorter than key2. */
-	else return 1;
-    } else {
-	int r = memcmp(key1,key2,key2len);
-	if (r>=0) return 1; /* If the keys are the same up to 2's length, then return 1 since key1 is longer than key2 */
-	else return -1;
-    }
-}
-#elif 0
-/* This one looks tighter, but it does use memcmp... */
-int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) {
-    int comparelen = key1len<key2len ? key1len : key2len;
-    const unsigned char *k1;
-    const unsigned char *k2;
-    for (k1=key1, k2=key2;
-	 comparelen>0;
-	 k1++, k2++, comparelen--) {
-	if (*k1 != *k2) {
-	    return (int)*k1-(int)*k2;
-	}
-    }
-    if (key1len<key2len) return -1;
-    if (key1len>key2len) return 1;
-    return 0;
-}
-#else
-/* unroll that one four times */
-// when a and b are chars, return a-b is safe here because return type is int.  No over/underflow possible.
-int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) {
-    int comparelen = key1len<key2len ? key1len : key2len;
-    const unsigned char *k1;
-    const unsigned char *k2;
-    for (CAST_FROM_VOIDP(k1, key1), CAST_FROM_VOIDP(k2, key2);
-	 comparelen>4;
-	 k1+=4, k2+=4, comparelen-=4) {
-	{ int v1=k1[0], v2=k2[0]; if (v1!=v2) return v1-v2; }
-	{ int v1=k1[1], v2=k2[1]; if (v1!=v2) return v1-v2; }
-	{ int v1=k1[2], v2=k2[2]; if (v1!=v2) return v1-v2; }
-	{ int v1=k1[3], v2=k2[3]; if (v1!=v2) return v1-v2; }
-    }
-    for (;
-	 comparelen>0;
-	 k1++, k2++, comparelen--) {
-	if (*k1 != *k2) {
-	    return (int)*k1-(int)*k2;
-	}
-    }
-    if (key1len<key2len) return -1;
-    if (key1len>key2len) return 1;
-    return 0;
-}
-
-#endif
-
-int
-toku_builtin_compare_fun (DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) {
-    return toku_keycompare(a->data, a->size, b->data, b->size);
-}
diff --git a/ft/key.h b/ft/key.h
deleted file mode 100644
index cf32e9d7249..00000000000
--- a/ft/key.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_KEY_H
-#define TOKU_KEY_H
-
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-#include "ybt.h"
-#include "fttypes.h"
-
-int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
-
-void toku_test_keycompare (void) ;
-
-int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default")));
-
-#endif
diff --git a/ft/tests/keytest.cc b/ft/tests/keytest.cc
index 93896a819b4..70beae0beca 100644
--- a/ft/tests/keytest.cc
+++ b/ft/tests/keytest.cc
@@ -88,11 +88,12 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-#include "test.h"
-#include "key.h"
 
-void
-toku_test_keycompare (void) {
+#include "test.h"
+#include "ft.h"
+
+static void
+test_keycompare (void) {
     assert(toku_keycompare("a",1, "a",1)==0);
     assert(toku_keycompare("aa",2, "a",1)>0);
     assert(toku_keycompare("a",1, "aa",2)<0);
@@ -109,7 +110,7 @@ int
 test_main (int argc , const char *argv[]) {
     default_parse_args(argc, argv);
 
-    toku_test_keycompare();
+    test_keycompare();
     if (verbose) printf("test ok\n");
     return 0;
 }
diff --git a/ft/tests/test.h b/ft/tests/test.h
index f22d8cdbf67..5baa2433aea 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -100,7 +100,6 @@ PATENT RIGHTS GRANT:
 #include <portability/toku_path.h>
 
 #include "ft.h"
-#include "key.h"
 #include "block_table.h"
 #include "log-internal.h"
 #include "logger.h"
diff --git a/src/ydb.cc b/src/ydb.cc
index df4fd6baf87..4d947556cb5 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -117,7 +117,6 @@ const char *toku_copyright_string = "Copyright (c) 2007-2013 Tokutek Inc.  All r
 #include <ft/cachetable.h>
 #include <ft/log.h>
 #include <ft/checkpoint.h>
-#include <ft/key.h>
 #include <ft/ftloader.h>
 #include <ft/log_header.h>
 #include <ft/ft.h>

From d94a8bf751f677464318fd054460c1b958d6a4bd Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:24:46 -0400
Subject: [PATCH 008/190] FT-243 Move minicron to util/

---
 ft/CMakeLists.txt                   |  1 -
 ft/cachetable.h                     |  5 +++--
 src/ydb-internal.h                  |  2 +-
 util/CMakeLists.txt                 |  1 +
 {ft => util}/minicron.cc            |  5 ++---
 {ft => util}/minicron.h             |  1 -
 {ft => util}/tests/minicron-test.cc | 16 ++++++++--------
 7 files changed, 15 insertions(+), 16 deletions(-)
 rename {ft => util}/minicron.cc (99%)
 rename {ft => util}/minicron.h (99%)
 rename {ft => util}/tests/minicron-test.cc (96%)

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 4b1530d0029..9c594bbf316 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -51,7 +51,6 @@ set(FT_SOURCES
   logfilemgr
   logger
   log_upgrade
-  minicron
   pqueue
   quicklz
   recover
diff --git a/ft/cachetable.h b/ft/cachetable.h
index c4290b6f6d7..32686640997 100644
--- a/ft/cachetable.h
+++ b/ft/cachetable.h
@@ -93,8 +93,9 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include <fcntl.h>
-#include "fttypes.h"
-#include "minicron.h"
+
+#include "ft/fttypes.h"
+#include "util/minicron.h"
 
 // Maintain a cache mapping from cachekeys to values (void*)
 // Some of the keys can be pinned.  Don't pin too many or for too long.
diff --git a/src/ydb-internal.h b/src/ydb-internal.h
index 085a4dd0334..d2f0e95b280 100644
--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -96,9 +96,9 @@ PATENT RIGHTS GRANT:
 
 #include <ft/fttypes.h>
 #include <ft/ft-ops.h>
-#include <ft/minicron.h>
 
 #include <util/growable_array.h>
+#include <util/minicron.h>
 #include <util/omt.h>
 
 #include <locktree/locktree.h>
diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt
index b1faed58f2d..3af867238cf 100644
--- a/util/CMakeLists.txt
+++ b/util/CMakeLists.txt
@@ -4,6 +4,7 @@ set(util_srcs
   kibbutz
   memarena
   mempool
+  minicron
   partitioned_counter
   queue
   threadpool
diff --git a/ft/minicron.cc b/util/minicron.cc
similarity index 99%
rename from ft/minicron.cc
rename to util/minicron.cc
index 03d4075e1b2..32e24f6bde5 100644
--- a/ft/minicron.cc
+++ b/util/minicron.cc
@@ -92,9 +92,8 @@ PATENT RIGHTS GRANT:
 #include <errno.h>
 #include <string.h>
 
-#include "toku_assert.h"
-#include "fttypes.h"
-#include "minicron.h"
+#include "portability/toku_assert.h"
+#include "util/minicron.h"
 
 static void
 toku_gettime (toku_timespec_t *a) {
diff --git a/ft/minicron.h b/util/minicron.h
similarity index 99%
rename from ft/minicron.h
rename to util/minicron.h
index d6cb0f76c9f..05d9868c8ac 100644
--- a/ft/minicron.h
+++ b/util/minicron.h
@@ -93,7 +93,6 @@ PATENT RIGHTS GRANT:
 
 #include <toku_pthread.h>
 #include <toku_time.h>
-#include "fttypes.h"
 
 
 // Specification:
diff --git a/ft/tests/minicron-test.cc b/util/tests/minicron-test.cc
similarity index 96%
rename from ft/tests/minicron-test.cc
rename to util/tests/minicron-test.cc
index 5f953f1b694..2ec27a05310 100644
--- a/ft/tests/minicron-test.cc
+++ b/util/tests/minicron-test.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 #include <toku_portability.h>
 #include "test.h"
-#include "minicron.h"
+#include "util/minicron.h"
 #include <unistd.h>
 
 #include <string.h>
@@ -125,7 +125,7 @@ static void*
 test1 (void* v)
 {
     struct minicron m;
-    ZERO_STRUCT(m);
+    memset(&m, 0, sizeof(struct minicron));
     int r = toku_minicron_setup(&m, 0, never_run, 0);   assert(r==0);
     sleep(1);
     r = toku_minicron_shutdown(&m);                     assert(r==0);
@@ -137,7 +137,7 @@ static void*
 test2 (void* v)
 {
     struct minicron m;
-    ZERO_STRUCT(m);
+    memset(&m, 0, sizeof(struct minicron));
     int r = toku_minicron_setup(&m, 10000, never_run, 0);   assert(r==0);
     sleep(2);
     r = toku_minicron_shutdown(&m);                     assert(r==0);
@@ -174,7 +174,7 @@ test3 (void* v)
     struct tenx tx;
     gettimeofday(&tx.tv, 0);
     tx.counter=0;
-    ZERO_STRUCT(m);
+    memset(&m, 0, sizeof(struct minicron));
     int r = toku_minicron_setup(&m, 1000, run_5x, &tx);   assert(r==0);
     sleep(5);
     r = toku_minicron_shutdown(&m);                     assert(r==0);
@@ -197,7 +197,7 @@ static void*
 test4 (void *v) {
     struct minicron m;
     int counter = 0;
-    ZERO_STRUCT(m);
+    memset(&m, 0, sizeof(struct minicron));
     int r = toku_minicron_setup(&m, 2000, run_3sec, &counter); assert(r==0);
     sleep(10);
     r = toku_minicron_shutdown(&m);                     assert(r==0);
@@ -209,7 +209,7 @@ static void*
 test5 (void *v) {
     struct minicron m;
     int counter = 0;
-    ZERO_STRUCT(m);
+    memset(&m, 0, sizeof(struct minicron));
     int r = toku_minicron_setup(&m, 10000, run_3sec, &counter); assert(r==0);
     toku_minicron_change_period(&m, 2000);
     sleep(10);
@@ -221,7 +221,7 @@ test5 (void *v) {
 static void*
 test6 (void *v) {
     struct minicron m;
-    ZERO_STRUCT(m);
+    memset(&m, 0, sizeof(struct minicron));
     int r = toku_minicron_setup(&m, 5000, never_run, 0); assert(r==0);
     toku_minicron_change_period(&m, 0);
     sleep(7);
@@ -233,8 +233,8 @@ test6 (void *v) {
 static void*
 test7 (void *v) {
     struct minicron m;
+    memset(&m, 0, sizeof(struct minicron));
     int counter = 0;
-    ZERO_STRUCT(m);
     int r = toku_minicron_setup(&m, 5000, run_3sec, &counter); assert(r==0);
     sleep(17);
     r = toku_minicron_shutdown(&m);                     assert(r==0);

From 904bdde01da8434a96c98947e818c46630a00e2b Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:24:46 -0400
Subject: [PATCH 009/190] FT-254 Replace NULL_LOGGER etc with nullptr

---
 ft/fttypes.h                                  |  2 -
 ft/log-internal.h                             |  3 --
 ft/logger.cc                                  |  2 +-
 ft/logger.h                                   |  2 -
 ft/recover.h                                  |  4 --
 ft/tdb-recover.cc                             | 10 ++--
 ft/tests/benchmark-test.cc                    |  4 +-
 ft/tests/cachetable-4357.cc                   |  2 +-
 ft/tests/cachetable-4365.cc                   |  2 +-
 ft/tests/cachetable-5097.cc                   |  2 +-
 ft/tests/cachetable-5978-2.cc                 |  2 +-
 ft/tests/cachetable-5978.cc                   |  2 +-
 ft/tests/cachetable-all-write.cc              |  2 +-
 ft/tests/cachetable-checkpoint-pending.cc     |  2 +-
 .../cachetable-checkpoint-pinned-nodes.cc     |  2 +-
 .../cachetable-checkpoint-prefetched-nodes.cc |  2 +-
 ft/tests/cachetable-checkpoint-test.cc        |  2 +-
 ft/tests/cachetable-cleaner-checkpoint.cc     |  2 +-
 ft/tests/cachetable-cleaner-checkpoint2.cc    |  2 +-
 ...hetable-cleaner-thread-attrs-accumulate.cc |  2 +-
 ...hetable-cleaner-thread-empty-cachetable.cc |  2 +-
 ...etable-cleaner-thread-everything-pinned.cc |  2 +-
 ...e-cleaner-thread-nothing-needs-flushing.cc |  2 +-
 ...cachetable-cleaner-thread-same-fullhash.cc |  2 +-
 ft/tests/cachetable-cleaner-thread-simple.cc  |  2 +-
 ft/tests/cachetable-clock-all-pinned.cc       |  2 +-
 ft/tests/cachetable-clock-eviction.cc         |  2 +-
 ft/tests/cachetable-clock-eviction2.cc        |  2 +-
 ft/tests/cachetable-clock-eviction3.cc        |  2 +-
 ft/tests/cachetable-clock-eviction4.cc        |  2 +-
 ft/tests/cachetable-clone-checkpoint.cc       |  2 +-
 ...hetable-clone-partial-fetch-pinned-node.cc |  2 +-
 ft/tests/cachetable-clone-partial-fetch.cc    |  2 +-
 ft/tests/cachetable-clone-pin-nonblocking.cc  |  2 +-
 ft/tests/cachetable-clone-unpin-remove.cc     |  2 +-
 ft/tests/cachetable-count-pinned-test.cc      |  2 +-
 ft/tests/cachetable-debug-test.cc             |  2 +-
 ft/tests/cachetable-eviction-close-test.cc    |  2 +-
 ft/tests/cachetable-eviction-close-test2.cc   |  2 +-
 .../cachetable-eviction-getandpin-test.cc     |  2 +-
 .../cachetable-eviction-getandpin-test2.cc    |  2 +-
 ft/tests/cachetable-fd-test.cc                |  2 +-
 ft/tests/cachetable-fetch-inducing-evictor.cc |  2 +-
 ft/tests/cachetable-flush-during-cleaner.cc   |  2 +-
 ft/tests/cachetable-flush-test.cc             |  2 +-
 ft/tests/cachetable-getandpin-test.cc         |  2 +-
 .../cachetable-kibbutz_and_flush_cachefile.cc |  2 +-
 ft/tests/cachetable-partial-fetch.cc          |  4 +-
 ft/tests/cachetable-pin-checkpoint.cc         |  2 +-
 ...etable-pin-nonblocking-checkpoint-clean.cc |  2 +-
 .../cachetable-prefetch-checkpoint-test.cc    |  2 +-
 .../cachetable-prefetch-close-leak-test.cc    |  2 +-
 ft/tests/cachetable-prefetch-close-test.cc    |  2 +-
 .../cachetable-prefetch-flowcontrol-test.cc   |  2 +-
 .../cachetable-prefetch-getandpin-test.cc     |  2 +-
 ...cachetable-prefetch-maybegetandpin-test.cc |  2 +-
 ft/tests/cachetable-prefetch2-test.cc         |  2 +-
 ft/tests/cachetable-put-checkpoint.cc         |  2 +-
 ft/tests/cachetable-put-test.cc               |  2 +-
 ft/tests/cachetable-simple-clone.cc           |  2 +-
 ft/tests/cachetable-simple-clone2.cc          |  2 +-
 ft/tests/cachetable-simple-close.cc           |  8 ++--
 ft/tests/cachetable-simple-maybe-get-pin.cc   |  2 +-
 ft/tests/cachetable-simple-pin-cheap.cc       |  2 +-
 ft/tests/cachetable-simple-pin-dep-nodes.cc   |  2 +-
 ...cachetable-simple-pin-nonblocking-cheap.cc |  2 +-
 ft/tests/cachetable-simple-pin-nonblocking.cc |  2 +-
 ft/tests/cachetable-simple-pin.cc             |  2 +-
 ft/tests/cachetable-simple-put-dep-nodes.cc   |  2 +-
 .../cachetable-simple-read-pin-nonblocking.cc |  2 +-
 ft/tests/cachetable-simple-read-pin.cc        |  2 +-
 ...chetable-simple-unpin-remove-checkpoint.cc |  2 +-
 ft/tests/cachetable-simple-verify.cc          |  2 +-
 ft/tests/cachetable-test.cc                   | 12 ++---
 ft/tests/cachetable-unpin-and-remove-test.cc  |  4 +-
 .../cachetable-unpin-remove-and-checkpoint.cc |  2 +-
 ft/tests/cachetable-unpin-test.cc             |  4 +-
 ft/tests/cachetable-writer-thread-limit.cc    |  2 +-
 ft/tests/ft-serialize-sub-block-test.cc       |  2 +-
 ft/tests/ft-test-cursor-2.cc                  |  2 +-
 ft/tests/ft-test-cursor.cc                    | 26 +++++-----
 ft/tests/ft-test-header.cc                    |  4 +-
 ft/tests/ft-test.cc                           | 48 +++++++++----------
 ft/tests/ft-test0.cc                          |  2 +-
 ft/tests/ft-test1.cc                          |  2 +-
 ft/tests/ft-test2.cc                          |  2 +-
 ft/tests/ft-test3.cc                          |  2 +-
 ft/tests/ft-test4.cc                          |  2 +-
 ft/tests/ft-test5.cc                          |  2 +-
 ft/tests/ftloader-test-writer.cc              |  2 +-
 ft/tests/ftloader-test.cc                     |  2 +-
 ft/tests/keyrange.cc                          |  2 +-
 ft/tests/le-cursor-right.cc                   |  6 +--
 ft/tests/le-cursor-walk.cc                    |  2 +-
 ft/tests/make-tree.cc                         |  2 +-
 ft/tests/msnfilter.cc                         |  2 +-
 ft/tests/orthopush-flush.cc                   |  2 +-
 ft/tests/shortcut.cc                          |  2 +-
 ft/tests/test-checkpoint-during-flush.cc      |  2 +-
 ft/tests/test-checkpoint-during-merge.cc      |  2 +-
 ft/tests/test-checkpoint-during-rebalance.cc  |  2 +-
 ft/tests/test-checkpoint-during-split.cc      |  2 +-
 ft/tests/test-del-inorder.cc                  |  2 +-
 ft/tests/test-dirty-flushes-on-cleaner.cc     |  2 +-
 ft/tests/test-dump-ft.cc                      |  2 +-
 ft/tests/test-flushes-on-cleaner.cc           |  2 +-
 ft/tests/test-ft-overflow.cc                  |  2 +-
 ft/tests/test-hot-with-bounds.cc              |  2 +-
 ft/tests/test-inc-split.cc                    |  2 +-
 ft/tests/test-merges-on-cleaner.cc            |  2 +-
 ft/tests/test-oldest-referenced-xid-flush.cc  |  2 +-
 ft/tests/test-pick-child-to-flush.cc          |  2 +-
 ft/tests/test.h                               |  4 +-
 ft/tests/test3681.cc                          |  2 +-
 ft/tests/test3856.cc                          |  4 +-
 ft/tests/test3884.cc                          | 12 ++---
 ft/tests/test4115.cc                          |  2 +-
 ft/tests/test4244.cc                          |  2 +-
 ...test_rightmost_leaf_seqinsert_heuristic.cc |  2 +-
 ft/tests/test_rightmost_leaf_split_merge.cc   |  3 +-
 ft/tests/upgrade_test_simple.cc               |  2 +-
 ft/tests/verify-bad-msn.cc                    |  2 +-
 ft/tests/verify-bad-pivots.cc                 |  2 +-
 ft/tests/verify-dup-in-leaf.cc                |  2 +-
 ft/tests/verify-dup-pivots.cc                 |  2 +-
 ft/tests/verify-misrouted-msgs.cc             |  2 +-
 ft/tests/verify-unsorted-leaf.cc              |  2 +-
 ft/tests/verify-unsorted-pivots.cc            |  2 +-
 src/ydb_db.cc                                 |  2 +-
 129 files changed, 188 insertions(+), 196 deletions(-)

diff --git a/ft/fttypes.h b/ft/fttypes.h
index f291484433c..a975e66efe2 100644
--- a/ft/fttypes.h
+++ b/ft/fttypes.h
@@ -183,9 +183,7 @@ typedef struct {
 
 typedef struct tokulogger *TOKULOGGER;
 typedef struct txn_manager *TXN_MANAGER;
-#define NULL_LOGGER ((TOKULOGGER)0)
 typedef struct tokutxn    *TOKUTXN;
-#define NULL_TXN ((TOKUTXN)0)
 
 typedef struct xids_t *XIDS;
 
diff --git a/ft/log-internal.h b/ft/log-internal.h
index e8acd2e91eb..a0ed1df10ff 100644
--- a/ft/log-internal.h
+++ b/ft/log-internal.h
@@ -308,9 +308,6 @@ struct txninfo {
     BLOCKNUM   current_rollback;
 };
 
-// TODO: Remove null txn
-#define NULL_TXN ((TOKUTXN)0)
-
 static inline int toku_logsizeof_uint8_t (uint32_t v __attribute__((__unused__))) {
     return 1;
 }
diff --git a/ft/logger.cc b/ft/logger.cc
index bbac5cf7de3..004c200cc62 100644
--- a/ft/logger.cc
+++ b/ft/logger.cc
@@ -282,7 +282,7 @@ toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create)
 
     FT_HANDLE t = NULL;   // Note, there is no DB associated with this FT.
     toku_ft_handle_create(&t);
-    int r = toku_ft_handle_open(t, toku_product_name_strings.rollback_cachefile, create, create, cachetable, NULL_TXN);
+    int r = toku_ft_handle_open(t, toku_product_name_strings.rollback_cachefile, create, create, cachetable, nullptr);
     if (r == 0) {
         logger->rollback_cachefile = t->ft->cf;
         toku_logger_initialize_rollback_cache(logger, t->ft);
diff --git a/ft/logger.h b/ft/logger.h
index 11731fb5b97..2b444f4499f 100644
--- a/ft/logger.h
+++ b/ft/logger.h
@@ -260,6 +260,4 @@ int toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs,
 
 TXN_MANAGER toku_logger_get_txn_manager(TOKULOGGER logger);
 
-static const TOKULOGGER NULL_logger __attribute__((__unused__)) = NULL;
-
 #endif /* TOKU_LOGGER_H */
diff --git a/ft/recover.h b/ft/recover.h
index 2ef84112784..0675b8b9ae4 100644
--- a/ft/recover.h
+++ b/ft/recover.h
@@ -136,8 +136,4 @@ int toku_recover_lock (const char *lock_dir, int *lockfd);
 
 int toku_recover_unlock(int lockfd);
 
-static const prepared_txn_callback_t NULL_prepared_txn_callback         __attribute__((__unused__)) = NULL;
-static const keep_cachetable_callback_t  NULL_keep_cachetable_callback  __attribute__((__unused__)) = NULL;
-
-
 #endif // TOKURECOVER_H
diff --git a/ft/tdb-recover.cc b/ft/tdb-recover.cc
index 0d3fe0c75be..736fd6685c3 100644
--- a/ft/tdb-recover.cc
+++ b/ft/tdb-recover.cc
@@ -123,11 +123,11 @@ int recovery_main (int argc, const char *const argv[]) {
 	return(1);
     }
 
-    int r = tokudb_recover(NULL,
-			   NULL_prepared_txn_callback,
-			   NULL_keep_cachetable_callback,
-			   NULL_logger,
-			   data_dir, log_dir, NULL, NULL, NULL, NULL, 0);
+    int r = tokudb_recover(nullptr,
+			   nullptr,
+			   nullptr,
+			   nullptr,
+			   data_dir, log_dir, nullptr, nullptr, nullptr, nullptr, 0);
     if (r!=0) {
 	fprintf(stderr, "Recovery failed\n");
 	return(1);
diff --git a/ft/tests/benchmark-test.cc b/ft/tests/benchmark-test.cc
index 0f7a0d4f84b..b607d772cca 100644
--- a/ft/tests/benchmark-test.cc
+++ b/ft/tests/benchmark-test.cc
@@ -119,8 +119,8 @@ static FT_HANDLE t;
 static void setup (void) {
     int r;
     unlink(fname);
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
-    r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, NULL_TXN, toku_builtin_compare_fun); assert(r==0);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
+    r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, nullptr, toku_builtin_compare_fun); assert(r==0);
 }
 
 static void toku_shutdown (void) {
diff --git a/ft/tests/cachetable-4357.cc b/ft/tests/cachetable-4357.cc
index de75f6813d2..01c8875db5d 100644
--- a/ft/tests/cachetable-4357.cc
+++ b/ft/tests/cachetable-4357.cc
@@ -115,7 +115,7 @@ cachetable_test (void) {
   const int test_limit = 12;
   int r;
   CACHETABLE ct;
-  toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+  toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
   const char *fname1 = TOKU_TEST_FILENAME;
   unlink(fname1);
   r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-4365.cc b/ft/tests/cachetable-4365.cc
index ecaeea2d631..3396becbfdd 100644
--- a/ft/tests/cachetable-4365.cc
+++ b/ft/tests/cachetable-4365.cc
@@ -136,7 +136,7 @@ cachetable_test (void) {
   const int test_limit = 12;
   int r;
   CACHETABLE ct;
-  toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+  toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
   const char *fname1 = TOKU_TEST_FILENAME;
   unlink(fname1);
   r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-5097.cc b/ft/tests/cachetable-5097.cc
index 7c958dd3049..2da3439fd1f 100644
--- a/ft/tests/cachetable-5097.cc
+++ b/ft/tests/cachetable-5097.cc
@@ -169,7 +169,7 @@ cachetable_test (void) {
     check_flush = false;
     dirty_flush_called = false;
     
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     evictor_test_helpers::disable_ev_thread(&ct->ev); // disable eviction thread
     
     toku_os_recursive_delete(TOKU_TEST_FILENAME);
diff --git a/ft/tests/cachetable-5978-2.cc b/ft/tests/cachetable-5978-2.cc
index be7c4fb2363..790be1719ab 100644
--- a/ft/tests/cachetable-5978-2.cc
+++ b/ft/tests/cachetable-5978-2.cc
@@ -210,7 +210,7 @@ cachetable_test (void) {
     int r;
     toku_pair_list_set_lock_size(2); // set two bucket mutexes
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-5978.cc b/ft/tests/cachetable-5978.cc
index c72d67909e1..ff140ade37c 100644
--- a/ft/tests/cachetable-5978.cc
+++ b/ft/tests/cachetable-5978.cc
@@ -227,7 +227,7 @@ cachetable_test (void) {
     int r;
     toku_pair_list_set_lock_size(2); // set two bucket mutexes
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-all-write.cc b/ft/tests/cachetable-all-write.cc
index 3af800e7edb..74e3a357529 100644
--- a/ft/tests/cachetable-all-write.cc
+++ b/ft/tests/cachetable-all-write.cc
@@ -116,7 +116,7 @@ cachetable_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-checkpoint-pending.cc b/ft/tests/cachetable-checkpoint-pending.cc
index 615a544a7f7..ff6ea6a48a7 100644
--- a/ft/tests/cachetable-checkpoint-pending.cc
+++ b/ft/tests/cachetable-checkpoint-pending.cc
@@ -187,7 +187,7 @@ static void checkpoint_pending(void) {
     if (verbose) { printf("%s:%d n=%d\n", __FUNCTION__, __LINE__, N); fflush(stdout); }
     const int test_limit = N;
     int r;
-    toku_cachetable_create(&ct, test_limit*sizeof(int), ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit*sizeof(int), ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     r = unlink(fname1); if (r!=0) CKERR2(get_error_errno(), ENOENT);
     r = toku_cachetable_openf(&cf, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-checkpoint-pinned-nodes.cc b/ft/tests/cachetable-checkpoint-pinned-nodes.cc
index cf0d4e28afd..9bc6da74086 100644
--- a/ft/tests/cachetable-checkpoint-pinned-nodes.cc
+++ b/ft/tests/cachetable-checkpoint-pinned-nodes.cc
@@ -152,7 +152,7 @@ cachetable_test (void) {
   const int test_limit = 20;
   int r;
   CACHETABLE ct;
-  toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+  toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
   const char *fname1 = TOKU_TEST_FILENAME;
   unlink(fname1);
   CACHEFILE f1;
diff --git a/ft/tests/cachetable-checkpoint-prefetched-nodes.cc b/ft/tests/cachetable-checkpoint-prefetched-nodes.cc
index fded78d5ba0..59ae451234f 100644
--- a/ft/tests/cachetable-checkpoint-prefetched-nodes.cc
+++ b/ft/tests/cachetable-checkpoint-prefetched-nodes.cc
@@ -154,7 +154,7 @@ cachetable_test (void) {
   const int test_limit = 20;
   int r;
   CACHETABLE ct;
-  toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+  toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
   const char *fname1 = TOKU_TEST_FILENAME;
   unlink(fname1);
   CACHEFILE f1;
diff --git a/ft/tests/cachetable-checkpoint-test.cc b/ft/tests/cachetable-checkpoint-test.cc
index e86e7de4bb0..2be864d5e3b 100644
--- a/ft/tests/cachetable-checkpoint-test.cc
+++ b/ft/tests/cachetable-checkpoint-test.cc
@@ -145,7 +145,7 @@ static void cachetable_checkpoint_test(int n, enum cachetable_dirty dirty) {
     const int test_limit = n;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-cleaner-checkpoint.cc b/ft/tests/cachetable-cleaner-checkpoint.cc
index 7e40d3c861f..283b781e524 100644
--- a/ft/tests/cachetable-cleaner-checkpoint.cc
+++ b/ft/tests/cachetable-cleaner-checkpoint.cc
@@ -147,7 +147,7 @@ cachetable_test (void) {
   const int test_limit = 12;
   int r;
   CACHETABLE ct;
-  toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+  toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
   const char *fname1 = TOKU_TEST_FILENAME;
   unlink(fname1);
   r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-cleaner-checkpoint2.cc b/ft/tests/cachetable-cleaner-checkpoint2.cc
index 4c9eacd004c..009f17f2be2 100644
--- a/ft/tests/cachetable-cleaner-checkpoint2.cc
+++ b/ft/tests/cachetable-cleaner-checkpoint2.cc
@@ -147,7 +147,7 @@ cachetable_test (void) {
   const int test_limit = 12;
   int r;
   CACHETABLE ct;
-  toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+  toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
   const char *fname1 = TOKU_TEST_FILENAME;
   unlink(fname1);
   r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc b/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc
index 1318f342f2b..745facfba99 100644
--- a/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc
+++ b/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc
@@ -143,7 +143,7 @@ run_test (void) {
     int r;
     CACHETABLE ct;
     toku_mutex_init(&attr_mutex, NULL);
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
 
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
diff --git a/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc b/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc
index 3f771b58075..3f31729be06 100644
--- a/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc
+++ b/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc
@@ -99,7 +99,7 @@ cachetable_test (void) {
     const int test_limit = 1000;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     toku_set_cleaner_period(ct, 1);
 
     const char *fname1 = TOKU_TEST_FILENAME;
diff --git a/ft/tests/cachetable-cleaner-thread-everything-pinned.cc b/ft/tests/cachetable-cleaner-thread-everything-pinned.cc
index 0a809339b8e..92056b7fa13 100644
--- a/ft/tests/cachetable-cleaner-thread-everything-pinned.cc
+++ b/ft/tests/cachetable-cleaner-thread-everything-pinned.cc
@@ -111,7 +111,7 @@ run_test (void) {
     const int test_limit = 1000;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     toku_set_cleaner_period(ct, 1);
 
     const char *fname1 = TOKU_TEST_FILENAME;
diff --git a/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc b/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc
index 33a603baec9..57aac61ac96 100644
--- a/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc
+++ b/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc
@@ -111,7 +111,7 @@ run_test (void) {
     const int test_limit = 1000;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     toku_set_cleaner_period(ct, 1);
 
     const char *fname1 = TOKU_TEST_FILENAME;
diff --git a/ft/tests/cachetable-cleaner-thread-same-fullhash.cc b/ft/tests/cachetable-cleaner-thread-same-fullhash.cc
index 485224302b0..24524134864 100644
--- a/ft/tests/cachetable-cleaner-thread-same-fullhash.cc
+++ b/ft/tests/cachetable-cleaner-thread-same-fullhash.cc
@@ -119,7 +119,7 @@ run_test (void) {
     const int test_limit = 1000;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     my_cleaner_callback_called = false;
 
     const char *fname1 = TOKU_TEST_FILENAME;
diff --git a/ft/tests/cachetable-cleaner-thread-simple.cc b/ft/tests/cachetable-cleaner-thread-simple.cc
index 5d4fed42e50..89a170c296c 100644
--- a/ft/tests/cachetable-cleaner-thread-simple.cc
+++ b/ft/tests/cachetable-cleaner-thread-simple.cc
@@ -119,7 +119,7 @@ run_test (void) {
     const int test_limit = 1000;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     toku_set_cleaner_period(ct, 1);
     my_cleaner_callback_called = false;
 
diff --git a/ft/tests/cachetable-clock-all-pinned.cc b/ft/tests/cachetable-clock-all-pinned.cc
index af08020e4aa..18482a2dd42 100644
--- a/ft/tests/cachetable-clock-all-pinned.cc
+++ b/ft/tests/cachetable-clock-all-pinned.cc
@@ -96,7 +96,7 @@ cachetable_test (void) {
     int test_limit = 6;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-clock-eviction.cc b/ft/tests/cachetable-clock-eviction.cc
index f024a79e51d..00c56c06522 100644
--- a/ft/tests/cachetable-clock-eviction.cc
+++ b/ft/tests/cachetable-clock-eviction.cc
@@ -143,7 +143,7 @@ cachetable_test (void) {
     num_entries = 0;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-clock-eviction2.cc b/ft/tests/cachetable-clock-eviction2.cc
index 23926241b97..3a8266f7b8a 100644
--- a/ft/tests/cachetable-clock-eviction2.cc
+++ b/ft/tests/cachetable-clock-eviction2.cc
@@ -186,7 +186,7 @@ cachetable_test (void) {
     const int test_limit = 16;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-clock-eviction3.cc b/ft/tests/cachetable-clock-eviction3.cc
index 735bde724d0..0c5210e238c 100644
--- a/ft/tests/cachetable-clock-eviction3.cc
+++ b/ft/tests/cachetable-clock-eviction3.cc
@@ -202,7 +202,7 @@ cachetable_test (void) {
     const int test_limit = 20;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     evictor_test_helpers::set_hysteresis_limits(&ct->ev, test_limit, 100*test_limit);
     evictor_test_helpers::disable_ev_thread(&ct->ev);
     const char *fname1 = TOKU_TEST_FILENAME;
diff --git a/ft/tests/cachetable-clock-eviction4.cc b/ft/tests/cachetable-clock-eviction4.cc
index 9dc1f1a5218..169c4f31e81 100644
--- a/ft/tests/cachetable-clock-eviction4.cc
+++ b/ft/tests/cachetable-clock-eviction4.cc
@@ -181,7 +181,7 @@ cachetable_test (void) {
     num_entries = 0;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-clone-checkpoint.cc b/ft/tests/cachetable-clone-checkpoint.cc
index f7904ffd73d..4c2d4bad3a8 100644
--- a/ft/tests/cachetable-clone-checkpoint.cc
+++ b/ft/tests/cachetable-clone-checkpoint.cc
@@ -145,7 +145,7 @@ cachetable_test (void) {
     const int test_limit = 200;
     int r;
     ct = NULL;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc b/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc
index 4c5e1133555..5f75c91b4e0 100644
--- a/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc
+++ b/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc
@@ -139,7 +139,7 @@ cachetable_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-clone-partial-fetch.cc b/ft/tests/cachetable-clone-partial-fetch.cc
index fed76332a45..d48efc69be0 100644
--- a/ft/tests/cachetable-clone-partial-fetch.cc
+++ b/ft/tests/cachetable-clone-partial-fetch.cc
@@ -144,7 +144,7 @@ cachetable_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-clone-pin-nonblocking.cc b/ft/tests/cachetable-clone-pin-nonblocking.cc
index a56dc034202..29f3518c815 100644
--- a/ft/tests/cachetable-clone-pin-nonblocking.cc
+++ b/ft/tests/cachetable-clone-pin-nonblocking.cc
@@ -126,7 +126,7 @@ cachetable_test (enum cachetable_dirty dirty, bool cloneable) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-clone-unpin-remove.cc b/ft/tests/cachetable-clone-unpin-remove.cc
index 1aeff2ee28e..b9b10739beb 100644
--- a/ft/tests/cachetable-clone-unpin-remove.cc
+++ b/ft/tests/cachetable-clone-unpin-remove.cc
@@ -137,7 +137,7 @@ cachetable_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-count-pinned-test.cc b/ft/tests/cachetable-count-pinned-test.cc
index d4437278054..a1ffa8e4021 100644
--- a/ft/tests/cachetable-count-pinned-test.cc
+++ b/ft/tests/cachetable-count-pinned-test.cc
@@ -97,7 +97,7 @@ cachetable_count_pinned_test (int n) {
     const int test_limit = 2*n;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-debug-test.cc b/ft/tests/cachetable-debug-test.cc
index dde4a0c69b1..1e207fbd505 100644
--- a/ft/tests/cachetable-debug-test.cc
+++ b/ft/tests/cachetable-debug-test.cc
@@ -96,7 +96,7 @@ cachetable_debug_test (int n) {
     const int test_limit = n;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-eviction-close-test.cc b/ft/tests/cachetable-eviction-close-test.cc
index 18a65729501..fbd7c3b04a8 100644
--- a/ft/tests/cachetable-eviction-close-test.cc
+++ b/ft/tests/cachetable-eviction-close-test.cc
@@ -155,7 +155,7 @@ static void cachetable_eviction_full_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-eviction-close-test2.cc b/ft/tests/cachetable-eviction-close-test2.cc
index c8004ca1cb1..f92729b3481 100644
--- a/ft/tests/cachetable-eviction-close-test2.cc
+++ b/ft/tests/cachetable-eviction-close-test2.cc
@@ -168,7 +168,7 @@ static void cachetable_eviction_full_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-eviction-getandpin-test.cc b/ft/tests/cachetable-eviction-getandpin-test.cc
index a1887fe6c94..e1bba60c755 100644
--- a/ft/tests/cachetable-eviction-getandpin-test.cc
+++ b/ft/tests/cachetable-eviction-getandpin-test.cc
@@ -123,7 +123,7 @@ static void cachetable_predef_fetch_maybegetandpin_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     evictor_test_helpers::disable_ev_thread(&ct->ev);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
diff --git a/ft/tests/cachetable-eviction-getandpin-test2.cc b/ft/tests/cachetable-eviction-getandpin-test2.cc
index d65048f797a..338871a1895 100644
--- a/ft/tests/cachetable-eviction-getandpin-test2.cc
+++ b/ft/tests/cachetable-eviction-getandpin-test2.cc
@@ -130,7 +130,7 @@ static void cachetable_prefetch_maybegetandpin_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     evictor_test_helpers::disable_ev_thread(&ct->ev);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
diff --git a/ft/tests/cachetable-fd-test.cc b/ft/tests/cachetable-fd-test.cc
index 16b757bebdf..65e0d78268f 100644
--- a/ft/tests/cachetable-fd-test.cc
+++ b/ft/tests/cachetable-fd-test.cc
@@ -98,7 +98,7 @@ cachetable_fd_test (void) {
     const int test_limit = 1;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     toku_os_recursive_delete(TOKU_TEST_FILENAME);
     r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU);
     assert_zero(r);
diff --git a/ft/tests/cachetable-fetch-inducing-evictor.cc b/ft/tests/cachetable-fetch-inducing-evictor.cc
index ac3191b1a33..2e946aa7ae0 100644
--- a/ft/tests/cachetable-fetch-inducing-evictor.cc
+++ b/ft/tests/cachetable-fetch-inducing-evictor.cc
@@ -113,7 +113,7 @@ cachetable_test (enum pin_evictor_test_type test_type, bool nonblocking) {
     const int test_limit = 7;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     evictor_test_helpers::set_hysteresis_limits(&ct->ev, test_limit, test_limit);
     evictor_test_helpers::disable_ev_thread(&ct->ev);
     const char *fname1 = TOKU_TEST_FILENAME;
diff --git a/ft/tests/cachetable-flush-during-cleaner.cc b/ft/tests/cachetable-flush-during-cleaner.cc
index d4c8c85cfba..24a5e16caac 100644
--- a/ft/tests/cachetable-flush-during-cleaner.cc
+++ b/ft/tests/cachetable-flush-during-cleaner.cc
@@ -117,7 +117,7 @@ cachetable_test (void) {
   const int test_limit = 400;
   int r;
   CACHETABLE ct;
-  toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+  toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
   toku_set_cleaner_period(ct, 1);
 
   const char *fname1 = TOKU_TEST_FILENAME;
diff --git a/ft/tests/cachetable-flush-test.cc b/ft/tests/cachetable-flush-test.cc
index c4c2da0577a..55a13159c9e 100644
--- a/ft/tests/cachetable-flush-test.cc
+++ b/ft/tests/cachetable-flush-test.cc
@@ -97,7 +97,7 @@ test_cachetable_def_flush (int n) {
     const int test_limit = 2*n;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     toku_os_recursive_delete(TOKU_TEST_FILENAME);
     r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU);
     assert_zero(r);
diff --git a/ft/tests/cachetable-getandpin-test.cc b/ft/tests/cachetable-getandpin-test.cc
index 6165de34eb0..56d665160d0 100644
--- a/ft/tests/cachetable-getandpin-test.cc
+++ b/ft/tests/cachetable-getandpin-test.cc
@@ -134,7 +134,7 @@ cachetable_getandpin_test (int n) {
     const int test_limit = 1024*1024;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc b/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc
index f44414cb667..749b3cdb8af 100644
--- a/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc
+++ b/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc
@@ -114,7 +114,7 @@ run_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-partial-fetch.cc b/ft/tests/cachetable-partial-fetch.cc
index 27f5800d06f..3a8fc33316e 100644
--- a/ft/tests/cachetable-partial-fetch.cc
+++ b/ft/tests/cachetable-partial-fetch.cc
@@ -166,7 +166,7 @@ cachetable_test (void) {
     int r;
     CACHETABLE ct;
     bool doing_prefetch = false;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
@@ -215,7 +215,7 @@ cachetable_test (void) {
     // close and reopen cachefile so we can do some simple prefetch tests
     toku_cachefile_close(&f1, false, ZERO_LSN);
     toku_cachetable_close(&ct);
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
     //
     // verify that a prefetch of the node will succeed
diff --git a/ft/tests/cachetable-pin-checkpoint.cc b/ft/tests/cachetable-pin-checkpoint.cc
index e5022afee88..bf7ec984562 100644
--- a/ft/tests/cachetable-pin-checkpoint.cc
+++ b/ft/tests/cachetable-pin-checkpoint.cc
@@ -413,7 +413,7 @@ cachetable_test (void) {
 
     int r;
     
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc b/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc
index ba4bebab323..32fe84ea57a 100644
--- a/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc
+++ b/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc
@@ -100,7 +100,7 @@ run_test (void) {
     const int test_limit = 20;
     int r;
     ct = NULL;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     f1 = NULL;
diff --git a/ft/tests/cachetable-prefetch-checkpoint-test.cc b/ft/tests/cachetable-prefetch-checkpoint-test.cc
index 2122f61afa8..6eed6428cbf 100644
--- a/ft/tests/cachetable-prefetch-checkpoint-test.cc
+++ b/ft/tests/cachetable-prefetch-checkpoint-test.cc
@@ -153,7 +153,7 @@ static void cachetable_prefetch_checkpoint_test(int n, enum cachetable_dirty dir
     CACHETABLE ct;
     CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL);
     wc.flush_callback = flush;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-prefetch-close-leak-test.cc b/ft/tests/cachetable-prefetch-close-leak-test.cc
index 3153c6f3a3c..719b6b40d21 100644
--- a/ft/tests/cachetable-prefetch-close-leak-test.cc
+++ b/ft/tests/cachetable-prefetch-close-leak-test.cc
@@ -139,7 +139,7 @@ static void cachetable_prefetch_close_leak_test (void) {
     const int test_limit = 1;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-prefetch-close-test.cc b/ft/tests/cachetable-prefetch-close-test.cc
index d013db1ab73..8495ad4c5ab 100644
--- a/ft/tests/cachetable-prefetch-close-test.cc
+++ b/ft/tests/cachetable-prefetch-close-test.cc
@@ -141,7 +141,7 @@ static void cachetable_prefetch_full_test (bool partial_fetch) {
     expect_pf = false;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-prefetch-flowcontrol-test.cc b/ft/tests/cachetable-prefetch-flowcontrol-test.cc
index 6159e8eb67f..6125799447b 100644
--- a/ft/tests/cachetable-prefetch-flowcontrol-test.cc
+++ b/ft/tests/cachetable-prefetch-flowcontrol-test.cc
@@ -152,7 +152,7 @@ fetch (CACHEFILE f        __attribute__((__unused__)),
 static void cachetable_prefetch_flowcontrol_test (int cachetable_size_limit) {
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, cachetable_size_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, cachetable_size_limit, ZERO_LSN, nullptr);
     evictor_test_helpers::set_hysteresis_limits(&ct->ev, cachetable_size_limit, cachetable_size_limit);
     evictor_test_helpers::disable_ev_thread(&ct->ev);
     const char *fname1 = TOKU_TEST_FILENAME;
diff --git a/ft/tests/cachetable-prefetch-getandpin-test.cc b/ft/tests/cachetable-prefetch-getandpin-test.cc
index 9aba0fdbafa..9474a46089e 100644
--- a/ft/tests/cachetable-prefetch-getandpin-test.cc
+++ b/ft/tests/cachetable-prefetch-getandpin-test.cc
@@ -162,7 +162,7 @@ static void cachetable_prefetch_maybegetandpin_test (bool do_partial_fetch) {
     const int test_limit = 2;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-prefetch-maybegetandpin-test.cc b/ft/tests/cachetable-prefetch-maybegetandpin-test.cc
index 14c12bbb817..72a1a5ada9f 100644
--- a/ft/tests/cachetable-prefetch-maybegetandpin-test.cc
+++ b/ft/tests/cachetable-prefetch-maybegetandpin-test.cc
@@ -119,7 +119,7 @@ static void cachetable_prefetch_maybegetandpin_test (void) {
     const int test_limit = 1;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-prefetch2-test.cc b/ft/tests/cachetable-prefetch2-test.cc
index 6c81ce49188..4285b586039 100644
--- a/ft/tests/cachetable-prefetch2-test.cc
+++ b/ft/tests/cachetable-prefetch2-test.cc
@@ -122,7 +122,7 @@ static void cachetable_prefetch_maybegetandpin_test (void) {
     const int test_limit = 1;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-put-checkpoint.cc b/ft/tests/cachetable-put-checkpoint.cc
index 8691e2b93b0..fb0c510442e 100644
--- a/ft/tests/cachetable-put-checkpoint.cc
+++ b/ft/tests/cachetable-put-checkpoint.cc
@@ -545,7 +545,7 @@ cachetable_test (void) {
 
     int r;
 
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-put-test.cc b/ft/tests/cachetable-put-test.cc
index 0280681903e..fcfce830cd0 100644
--- a/ft/tests/cachetable-put-test.cc
+++ b/ft/tests/cachetable-put-test.cc
@@ -96,7 +96,7 @@ cachetable_put_test (int n) {
     const int test_limit = 2*n;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-clone.cc b/ft/tests/cachetable-simple-clone.cc
index fe96b440248..1794a1982f5 100644
--- a/ft/tests/cachetable-simple-clone.cc
+++ b/ft/tests/cachetable-simple-clone.cc
@@ -150,7 +150,7 @@ test_clean (enum cachetable_dirty dirty, bool cloneable) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-clone2.cc b/ft/tests/cachetable-simple-clone2.cc
index 7dcd2a2bb7c..0543d80981c 100644
--- a/ft/tests/cachetable-simple-clone2.cc
+++ b/ft/tests/cachetable-simple-clone2.cc
@@ -137,7 +137,7 @@ test_clean (enum cachetable_dirty dirty, bool cloneable) {
     const int test_limit = 200;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-close.cc b/ft/tests/cachetable-simple-close.cc
index 03c66162aab..652d701f193 100644
--- a/ft/tests/cachetable-simple-close.cc
+++ b/ft/tests/cachetable-simple-close.cc
@@ -145,7 +145,7 @@ simple_test(bool unlink_on_close) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
@@ -214,7 +214,7 @@ static void test_pair_stays_in_cache(enum cachetable_dirty dirty) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
@@ -245,7 +245,7 @@ static void test_multiple_cachefiles(bool use_same_hash) {
         const int test_limit = 1000;
         int r;
         CACHETABLE ct;
-        toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+        toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
 
         char fname1[strlen(TOKU_TEST_FILENAME) + sizeof("_1")];    
         strcpy(fname1, TOKU_TEST_FILENAME);
@@ -333,7 +333,7 @@ static void test_evictor(void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
 
     char fname1[strlen(TOKU_TEST_FILENAME) + sizeof("_1")];    
     strcpy(fname1, TOKU_TEST_FILENAME);
diff --git a/ft/tests/cachetable-simple-maybe-get-pin.cc b/ft/tests/cachetable-simple-maybe-get-pin.cc
index 08c14191be6..be3f737fce3 100644
--- a/ft/tests/cachetable-simple-maybe-get-pin.cc
+++ b/ft/tests/cachetable-simple-maybe-get-pin.cc
@@ -100,7 +100,7 @@ cachetable_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-pin-cheap.cc b/ft/tests/cachetable-simple-pin-cheap.cc
index f5608b7572c..af5f134646a 100644
--- a/ft/tests/cachetable-simple-pin-cheap.cc
+++ b/ft/tests/cachetable-simple-pin-cheap.cc
@@ -125,7 +125,7 @@ run_test (pair_lock_type lock_type) {
     struct unlockers unlockers = {true, unlock_dummy, NULL, NULL};
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-pin-dep-nodes.cc b/ft/tests/cachetable-simple-pin-dep-nodes.cc
index d8ced02318b..1a04dbbd1a3 100644
--- a/ft/tests/cachetable-simple-pin-dep-nodes.cc
+++ b/ft/tests/cachetable-simple-pin-dep-nodes.cc
@@ -158,7 +158,7 @@ cachetable_test (bool write_first, bool write_second, bool start_checkpoint) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc b/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc
index cec5aff8266..0f10c98a443 100644
--- a/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc
+++ b/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc
@@ -164,7 +164,7 @@ run_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-pin-nonblocking.cc b/ft/tests/cachetable-simple-pin-nonblocking.cc
index a96f7649226..d6e74270866 100644
--- a/ft/tests/cachetable-simple-pin-nonblocking.cc
+++ b/ft/tests/cachetable-simple-pin-nonblocking.cc
@@ -147,7 +147,7 @@ run_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-pin.cc b/ft/tests/cachetable-simple-pin.cc
index e40890ccc04..f4385821674 100644
--- a/ft/tests/cachetable-simple-pin.cc
+++ b/ft/tests/cachetable-simple-pin.cc
@@ -139,7 +139,7 @@ run_test (void) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-put-dep-nodes.cc b/ft/tests/cachetable-simple-put-dep-nodes.cc
index 1a5074a172f..d1ca984e8a8 100644
--- a/ft/tests/cachetable-simple-put-dep-nodes.cc
+++ b/ft/tests/cachetable-simple-put-dep-nodes.cc
@@ -173,7 +173,7 @@ cachetable_test (bool write_first, bool write_second, bool start_checkpoint) {
     const int test_limit = 12;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-read-pin-nonblocking.cc b/ft/tests/cachetable-simple-read-pin-nonblocking.cc
index 6a3d7c34f4a..cb86aa5bad7 100644
--- a/ft/tests/cachetable-simple-read-pin-nonblocking.cc
+++ b/ft/tests/cachetable-simple-read-pin-nonblocking.cc
@@ -152,7 +152,7 @@ run_test (void) {
     int r;
     void *ret;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-simple-read-pin.cc b/ft/tests/cachetable-simple-read-pin.cc
index 5bbc7455755..2683ea04bed 100644
--- a/ft/tests/cachetable-simple-read-pin.cc
+++ b/ft/tests/cachetable-simple-read-pin.cc
@@ -158,7 +158,7 @@ run_test (void) {
     int r;
     void *ret;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
diff --git a/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc b/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc
index b94123ad9a6..15e3fbe10bb 100644
--- a/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc
+++ b/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc
@@ -114,7 +114,7 @@ cachetable_test (void) {
   const int test_limit = 120;
   int r;
   CACHETABLE ct;
-  toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+  toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
   const char *fname1 = TOKU_TEST_FILENAME;
   unlink(fname1);
   CACHEFILE f1;
diff --git a/ft/tests/cachetable-simple-verify.cc b/ft/tests/cachetable-simple-verify.cc
index 99364660bd1..89453355bd2 100644
--- a/ft/tests/cachetable-simple-verify.cc
+++ b/ft/tests/cachetable-simple-verify.cc
@@ -95,7 +95,7 @@ cachetable_test (void) {
   const int test_limit = 12;
   int r;
   CACHETABLE ct;
-  toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+  toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
   const char *fname1 = TOKU_TEST_FILENAME;
   unlink(fname1);
   CACHEFILE f1;
diff --git a/ft/tests/cachetable-test.cc b/ft/tests/cachetable-test.cc
index e498df10a5c..e3085e37572 100644
--- a/ft/tests/cachetable-test.cc
+++ b/ft/tests/cachetable-test.cc
@@ -118,7 +118,7 @@ static inline void test_mutex_unlock(void) {
 static void
 test_cachetable_create(void) {
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     toku_cachetable_close(&ct);
 }
 
@@ -172,7 +172,7 @@ static void test_nested_pin (void) {
     void *vv,*vv2;
     const char *fname = TOKU_TEST_FILENAME;
     if (verbose) printf("creating cachetable\n");
-    toku_cachetable_create(&t, 1, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&t, 1, ZERO_LSN, nullptr);
     toku_os_recursive_delete(fname);
     r = toku_cachetable_openf(&f, t, fname, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO);
     assert(r==0);
@@ -257,7 +257,7 @@ static void test_multi_filehandles (void) {
     unlink(fname1);
     unlink(fname2);
 
-    toku_cachetable_create(&t, 4, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&t, 4, ZERO_LSN, nullptr);
     r = toku_cachetable_openf(&f1, t, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO);   assert(r==0);
     r = link(fname1, fname2);                                     assert(r==0);
     r = toku_cachetable_openf(&f2, t, fname2, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO);   assert(r==0);
@@ -325,7 +325,7 @@ static void test_dirty(void) {
     int dirty; long long pinned; long entry_size;
     int r;
 
-    toku_cachetable_create(&t, 4, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&t, 4, ZERO_LSN, nullptr);
 
     const char *fname = TOKU_TEST_FILENAME;
     toku_os_recursive_delete(fname);
@@ -455,7 +455,7 @@ static void test_size_resize(void) {
     int n = 3;
     long size = 1;
 
-    toku_cachetable_create(&t, n*size, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&t, n*size, ZERO_LSN, nullptr);
 
     const char *fname = TOKU_TEST_FILENAME;
     unlink(fname);
@@ -509,7 +509,7 @@ static void test_size_flush(void) {
 
     const int n = 8;
     long long size = 1*1024*1024;
-    toku_cachetable_create(&t, n*size, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&t, n*size, ZERO_LSN, nullptr);
 
     const char *fname = TOKU_TEST_FILENAME;
     unlink(fname);
diff --git a/ft/tests/cachetable-unpin-and-remove-test.cc b/ft/tests/cachetable-unpin-and-remove-test.cc
index 8e199e153d6..9cd8dda74bd 100644
--- a/ft/tests/cachetable-unpin-and-remove-test.cc
+++ b/ft/tests/cachetable-unpin-and-remove-test.cc
@@ -116,7 +116,7 @@ cachetable_unpin_and_remove_test (int n) {
     int i;
 
     CACHETABLE ct;
-    toku_cachetable_create(&ct, table_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, table_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
@@ -172,7 +172,7 @@ cachetable_put_evict_remove_test (int n) {
     int i;
 
     CACHETABLE ct;
-    toku_cachetable_create(&ct, table_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, table_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-unpin-remove-and-checkpoint.cc b/ft/tests/cachetable-unpin-remove-and-checkpoint.cc
index e121f2165d9..23302bf3f45 100644
--- a/ft/tests/cachetable-unpin-remove-and-checkpoint.cc
+++ b/ft/tests/cachetable-unpin-remove-and-checkpoint.cc
@@ -114,7 +114,7 @@ run_test (void) {
     const int test_limit = 12;
     int r;
     ct = NULL;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-unpin-test.cc b/ft/tests/cachetable-unpin-test.cc
index 4d0fe46f5f4..cb2c92d575b 100644
--- a/ft/tests/cachetable-unpin-test.cc
+++ b/ft/tests/cachetable-unpin-test.cc
@@ -97,7 +97,7 @@ cachetable_unpin_test (int n) {
     const int test_limit = 2*n;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
@@ -145,7 +145,7 @@ unpin_and_evictor_test(enum unpin_evictor_test_type test_type) {
     int r;
     CACHETABLE ct;
     int test_limit = 4;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/cachetable-writer-thread-limit.cc b/ft/tests/cachetable-writer-thread-limit.cc
index fe7a26e4b3a..c3f61ebe7f1 100644
--- a/ft/tests/cachetable-writer-thread-limit.cc
+++ b/ft/tests/cachetable-writer-thread-limit.cc
@@ -125,7 +125,7 @@ cachetable_test (void) {
     test_limit = 6;
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr);
     const char *fname1 = TOKU_TEST_FILENAME;
     unlink(fname1);
     CACHEFILE f1;
diff --git a/ft/tests/ft-serialize-sub-block-test.cc b/ft/tests/ft-serialize-sub-block-test.cc
index 47865bfcce7..0910595961d 100644
--- a/ft/tests/ft-serialize-sub-block-test.cc
+++ b/ft/tests/ft-serialize-sub-block-test.cc
@@ -112,7 +112,7 @@ static void test_sub_block(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     error = toku_open_ft_handle(fname, true, &ft, nodesize, basementnodesize, compression_method, ct, null_txn, toku_builtin_compare_fun);
     assert(error == 0);
diff --git a/ft/tests/ft-test-cursor-2.cc b/ft/tests/ft-test-cursor-2.cc
index 8421e5eb39f..96c4d2085ec 100644
--- a/ft/tests/ft-test-cursor-2.cc
+++ b/ft/tests/ft-test-cursor-2.cc
@@ -118,7 +118,7 @@ static void test_multiple_ft_cursor_dbts(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/ft-test-cursor.cc b/ft/tests/ft-test-cursor.cc
index 4132ae968ed..3807db28f04 100644
--- a/ft/tests/ft-test-cursor.cc
+++ b/ft/tests/ft-test-cursor.cc
@@ -170,7 +170,7 @@ static void test_ft_cursor_first(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -208,7 +208,7 @@ static void test_ft_cursor_last(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -247,7 +247,7 @@ static void test_ft_cursor_first_last(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -289,7 +289,7 @@ static void test_ft_cursor_rfirst(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -353,7 +353,7 @@ static void test_ft_cursor_walk(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -415,7 +415,7 @@ static void test_ft_cursor_rwalk(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -496,7 +496,7 @@ static void test_ft_cursor_rand(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -546,7 +546,7 @@ static void test_ft_cursor_split(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -617,7 +617,7 @@ static void test_multiple_ft_cursors(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -663,7 +663,7 @@ static void test_multiple_ft_cursor_walk(int n) {
     int nodesize = 1<<12;
     int h = log16(n);
     int cachesize = 2 * h * ncursors * nodesize;
-    toku_cachetable_create(&ct, cachesize, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, cachesize, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -736,7 +736,7 @@ static void test_ft_cursor_set(int n, int cursor_op) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -804,7 +804,7 @@ static void test_ft_cursor_set_range(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(r==0);
@@ -864,7 +864,7 @@ static void test_ft_cursor_delete(int n) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
     assert(error == 0);
diff --git a/ft/tests/ft-test-header.cc b/ft/tests/ft-test-header.cc
index cf3a9838860..5b19ecd6245 100644
--- a/ft/tests/ft-test-header.cc
+++ b/ft/tests/ft-test-header.cc
@@ -104,7 +104,7 @@ static void test_header (void) {
     const char *fname = TOKU_TEST_FILENAME;
 
     // First create dictionary
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
@@ -122,7 +122,7 @@ static void test_header (void) {
     toku_cachetable_close(&ct);
 
     // Now read dictionary back into memory and examine some header fields
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 0, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
 
diff --git a/ft/tests/ft-test.cc b/ft/tests/ft-test.cc
index 7bd77595954..862a18243e9 100644
--- a/ft/tests/ft-test.cc
+++ b/ft/tests/ft-test.cc
@@ -101,7 +101,7 @@ static void test_dump_empty_db (void) {
     CACHETABLE ct;
     int r;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
@@ -124,7 +124,7 @@ static void test_multiple_files_of_size (int size) {
     toku_os_recursive_delete(TOKU_TEST_FILENAME);
     r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); assert(r == 0);
     
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(n0, 1, &t0, size, size / 4, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
     r = toku_open_ft_handle(n1, 1, &t1, size, size / 4, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
     for (i=0; i<10000; i++) {
@@ -148,7 +148,7 @@ static void test_multiple_files_of_size (int size) {
     
 
     /* Now see if the data is all there. */
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(n0, 0, &t0, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     if (verbose) printf("%s:%d r=%d\n", __FILE__, __LINE__,r);
     assert(r==0);
@@ -184,7 +184,7 @@ static void test_multiple_ft_handles_one_db_one_file (void) {
     if (verbose) printf("test_multiple_ft_handles_one_db_one_file:");
     
     unlink(fname);
-    toku_cachetable_create(&ct, 32, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 32, ZERO_LSN, nullptr);
     for (i=0; i<MANYN; i++) {
 	r = toku_open_ft_handle(fname, (i==0), &trees[i], 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
 	assert(r==0);
@@ -223,7 +223,7 @@ static void  test_read_what_was_written (void) {
     unlink(fname);
     
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);  assert(r==0);
     r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
     toku_cachetable_close(&ct);
@@ -231,7 +231,7 @@ static void  test_read_what_was_written (void) {
     
 
     /* Now see if we can read an empty tree in. */
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);  assert(r==0);
 
     /* See if we can put something in it. */
@@ -246,7 +246,7 @@ static void  test_read_what_was_written (void) {
     
 
     /* Now see if we can read it in and get the value. */
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
 
     ft_lookup_and_check_nodup(ft, "hello", "there");
@@ -308,7 +308,7 @@ static void  test_read_what_was_written (void) {
 
     
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
 
     ft_lookup_and_check_nodup(ft, "hello", "there");
@@ -341,7 +341,7 @@ static void test_cursor_last_empty(void) {
     unlink(fname);
     
     //printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     //printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);  assert(r==0);
     //printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
@@ -375,7 +375,7 @@ static void test_cursor_next (void) {
 
     unlink(fname);
     
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     //printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);  assert(r==0);
     //printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
@@ -451,7 +451,7 @@ static void test_wrongendian_compare (int wrong_p, unsigned int N) {
 	assert(wrong_compare_fun(NULL, toku_fill_dbt(&at, b, 4), toku_fill_dbt(&bt, a, 4))<0);
     }
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     //printf("%s:%d WRONG=%d\n", __FILE__, __LINE__, wrong_p);
 
     if (0) { // ???? Why is this commented out?
@@ -548,7 +548,7 @@ static void test_large_kv(int bsize, int ksize, int vsize) {
 
     if (verbose) printf("test_large_kv: %d %d %d\n", bsize, ksize, vsize);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, bsize, bsize / 4, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
@@ -592,7 +592,7 @@ static void test_ft_delete_empty(void) {
     int r;
     CACHETABLE ct;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, 4096, 1024, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
@@ -618,7 +618,7 @@ static void test_ft_delete_present(int n) {
     CACHETABLE ct;
     int i;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, 4096, 1024, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
@@ -681,7 +681,7 @@ static void test_ft_delete_not_present(int n) {
     CACHETABLE ct;
     int i;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, 4096, 1024, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
@@ -725,7 +725,7 @@ static void test_ft_delete_cursor_first(int n) {
     CACHETABLE ct;
     int i;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, 4096, 1024, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
@@ -820,7 +820,7 @@ static void test_insert_delete_lookup(int n) {
     CACHETABLE ct;
     int i;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, 4096, 1024, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
@@ -907,7 +907,7 @@ static void test_new_ft_cursor_first(int n) {
     CACHETABLE ct;
     int i;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     toku_ft_handle_create(&t);
     toku_ft_handle_set_nodesize(t, 4096);
@@ -959,7 +959,7 @@ static void test_new_ft_cursor_last(int n) {
     CACHETABLE ct;
     int i;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     toku_ft_handle_create(&t);
     toku_ft_handle_set_nodesize(t, 4096);
@@ -1012,7 +1012,7 @@ static void test_new_ft_cursor_next(int n) {
     CACHETABLE ct;
     int i;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     toku_ft_handle_create(&t);
     toku_ft_handle_set_nodesize(t, 4096);
@@ -1055,7 +1055,7 @@ static void test_new_ft_cursor_prev(int n) {
     CACHETABLE ct;
     int i;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     toku_ft_handle_create(&t);
     toku_ft_handle_set_nodesize(t, 4096);
@@ -1098,7 +1098,7 @@ static void test_new_ft_cursor_current(int n) {
     CACHETABLE ct;
     int i;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     toku_ft_handle_create(&t);
     toku_ft_handle_set_nodesize(t, 4096);
@@ -1180,7 +1180,7 @@ static void test_new_ft_cursor_set_range(int n) {
     FT_HANDLE ft=0;
     FT_CURSOR cursor=0;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     toku_ft_handle_create(&ft);
     toku_ft_handle_set_nodesize(ft, 4096);
@@ -1241,7 +1241,7 @@ static void test_new_ft_cursor_set(int n, int cursor_op, DB *db) {
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0);
 
diff --git a/ft/tests/ft-test0.cc b/ft/tests/ft-test0.cc
index 01b79fc0ff4..e2edde5145d 100644
--- a/ft/tests/ft-test0.cc
+++ b/ft/tests/ft-test0.cc
@@ -101,7 +101,7 @@ static void test0 (void) {
     const char *fname = TOKU_TEST_FILENAME;
     if (verbose) printf("%s:%d test0\n", __FILE__, __LINE__);
     
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     if (verbose) printf("%s:%d test0\n", __FILE__, __LINE__);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
diff --git a/ft/tests/ft-test1.cc b/ft/tests/ft-test1.cc
index 9fd22431fa8..6d70bf287ff 100644
--- a/ft/tests/ft-test1.cc
+++ b/ft/tests/ft-test1.cc
@@ -101,7 +101,7 @@ static void test1 (void) {
     const char *fname = TOKU_TEST_FILENAME;
     DBT k,v;
     
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/ft-test2.cc b/ft/tests/ft-test2.cc
index 981b2933ff4..6efac8c1a41 100644
--- a/ft/tests/ft-test2.cc
+++ b/ft/tests/ft-test2.cc
@@ -102,7 +102,7 @@ static void test2 (int limit) {
     const char *fname = TOKU_TEST_FILENAME;
     if (verbose) printf("%s:%d checking\n", __FILE__, __LINE__);
     
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     if (verbose) printf("%s:%d did setup\n", __FILE__, __LINE__);
diff --git a/ft/tests/ft-test3.cc b/ft/tests/ft-test3.cc
index 3049114a74b..3fb81660e0d 100644
--- a/ft/tests/ft-test3.cc
+++ b/ft/tests/ft-test3.cc
@@ -106,7 +106,7 @@ static void test3 (int nodesize, int basementnodesize, int count) {
     int i;
     CACHETABLE ct;
     
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     gettimeofday(&t0, 0);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, null_txn, toku_builtin_compare_fun);
diff --git a/ft/tests/ft-test4.cc b/ft/tests/ft-test4.cc
index 4752d85ad74..2f5e861204a 100644
--- a/ft/tests/ft-test4.cc
+++ b/ft/tests/ft-test4.cc
@@ -106,7 +106,7 @@ static void test4 (int nodesize, int count) {
     gettimeofday(&t0, 0);
     unlink(fname);
     
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &t, nodesize, nodesize / 8, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
     for (i=0; i<count; i++) {
 	char key[100],val[100];
diff --git a/ft/tests/ft-test5.cc b/ft/tests/ft-test5.cc
index dddcd54af66..dc2fe43c7e6 100644
--- a/ft/tests/ft-test5.cc
+++ b/ft/tests/ft-test5.cc
@@ -106,7 +106,7 @@ static void test5 (void) {
     MALLOC_N(limit,values);
     for (i=0; i<limit; i++) values[i]=-1;
     unlink(fname);
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &t, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);   assert(r==0);
     for (i=0; i<limit/2; i++) {
 	char key[100],val[100];
diff --git a/ft/tests/ftloader-test-writer.cc b/ft/tests/ftloader-test-writer.cc
index 99cd5fdd3fd..bb955bd65fc 100644
--- a/ft/tests/ftloader-test-writer.cc
+++ b/ft/tests/ftloader-test-writer.cc
@@ -129,7 +129,7 @@ static void verify_dbfile(int n, const char *name) {
     int r;
 
     CACHETABLE ct;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     TOKUTXN const null_txn = NULL;
     FT_HANDLE t = NULL;
diff --git a/ft/tests/ftloader-test.cc b/ft/tests/ftloader-test.cc
index d1ed27f02d4..46b7cd751ee 100644
--- a/ft/tests/ftloader-test.cc
+++ b/ft/tests/ftloader-test.cc
@@ -336,7 +336,7 @@ static void verify_dbfile(int n, int sorted_keys[], const char *sorted_vals[], c
     int r;
 
     CACHETABLE ct;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     TOKUTXN const null_txn = NULL;
     FT_HANDLE t = NULL;
diff --git a/ft/tests/keyrange.cc b/ft/tests/keyrange.cc
index 67651ae4f21..7fad706c377 100644
--- a/ft/tests/keyrange.cc
+++ b/ft/tests/keyrange.cc
@@ -111,7 +111,7 @@ static void close_ft_and_ct (void) {
 static void open_ft_and_ct (bool unlink_old) {
     int r;
     if (unlink_old) unlink(fname);
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &t, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);   assert(r==0);
 }
 
diff --git a/ft/tests/le-cursor-right.cc b/ft/tests/le-cursor-right.cc
index 050a278098f..d20ba9b4594 100644
--- a/ft/tests/le-cursor-right.cc
+++ b/ft/tests/le-cursor-right.cc
@@ -196,7 +196,7 @@ test_pos_infinity(const char *fname, int n) {
     int error;
 
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     FT_HANDLE ft = NULL;
     error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare);
@@ -230,7 +230,7 @@ test_neg_infinity(const char *fname, int n) {
     int error;
 
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     FT_HANDLE ft = NULL;
     error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare);
@@ -284,7 +284,7 @@ test_between(const char *fname, int n) {
     int error;
 
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     FT_HANDLE ft = NULL;
     error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare);
diff --git a/ft/tests/le-cursor-walk.cc b/ft/tests/le-cursor-walk.cc
index 0cebb50a768..e382e8d57ab 100644
--- a/ft/tests/le-cursor-walk.cc
+++ b/ft/tests/le-cursor-walk.cc
@@ -192,7 +192,7 @@ walk_tree(const char *fname, int n) {
     int error;
 
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     FT_HANDLE ft = NULL;
     error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);
diff --git a/ft/tests/make-tree.cc b/ft/tests/make-tree.cc
index d4b91008d37..50351fd7959 100644
--- a/ft/tests/make-tree.cc
+++ b/ft/tests/make-tree.cc
@@ -209,7 +209,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
 
     // create a cachetable
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     // create the ft
     TOKUTXN null_txn = NULL;
diff --git a/ft/tests/msnfilter.cc b/ft/tests/msnfilter.cc
index 9881f4bb1a4..e6eb8038792 100644
--- a/ft/tests/msnfilter.cc
+++ b/ft/tests/msnfilter.cc
@@ -202,7 +202,7 @@ test_msnfilter(int do_verify) {
 
     // create a cachetable
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     // create the ft
     TOKUTXN null_txn = NULL;
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index e467c70b6ed..eb3297807f4 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -1226,7 +1226,7 @@ test_main (int argc, const char *argv[]) {
     initialize_dummymsn();
     int r;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     FT_HANDLE t;
     r = toku_open_ft_handle(fname, 1, &t, 128*1024, 4096, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
diff --git a/ft/tests/shortcut.cc b/ft/tests/shortcut.cc
index 15ff6e563f9..5abb41cb943 100644
--- a/ft/tests/shortcut.cc
+++ b/ft/tests/shortcut.cc
@@ -108,7 +108,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[]  __attribute
 
     unlink(fname);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare);   assert(r==0);
     r = toku_ft_cursor(ft, &cursor, NULL, false, false);               assert(r==0);
 
diff --git a/ft/tests/test-checkpoint-during-flush.cc b/ft/tests/test-checkpoint-during-flush.cc
index 976a5a5b958..ac04682398e 100644
--- a/ft/tests/test-checkpoint-during-flush.cc
+++ b/ft/tests/test-checkpoint-during-flush.cc
@@ -184,7 +184,7 @@ doit (bool after_child_pin) {
 
     toku_flusher_thread_set_callback(flusher_callback, &after_child_pin);
     
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink("foo1.ft_handle");
     r = toku_open_ft_handle("foo1.ft_handle", 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/test-checkpoint-during-merge.cc b/ft/tests/test-checkpoint-during-merge.cc
index d3950ee746a..652763781ad 100644
--- a/ft/tests/test-checkpoint-during-merge.cc
+++ b/ft/tests/test-checkpoint-during-merge.cc
@@ -175,7 +175,7 @@ doit (int state) {
 
     toku_flusher_thread_set_callback(flusher_callback, &state);
     
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink("foo2.ft_handle");
     unlink("bar2.ft_handle");
     // note the basement node size is 5 times the node size
diff --git a/ft/tests/test-checkpoint-during-rebalance.cc b/ft/tests/test-checkpoint-during-rebalance.cc
index 0a78e260e34..ce24ba6889c 100644
--- a/ft/tests/test-checkpoint-during-rebalance.cc
+++ b/ft/tests/test-checkpoint-during-rebalance.cc
@@ -175,7 +175,7 @@ doit (int state) {
 
     toku_flusher_thread_set_callback(flusher_callback, &state);
     
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink("foo3.ft_handle");
     unlink("bar3.ft_handle");
     // note the basement node size is 5 times the node size
diff --git a/ft/tests/test-checkpoint-during-split.cc b/ft/tests/test-checkpoint-during-split.cc
index c1f7f0d9b1f..315a097eaca 100644
--- a/ft/tests/test-checkpoint-during-split.cc
+++ b/ft/tests/test-checkpoint-during-split.cc
@@ -184,7 +184,7 @@ doit (bool after_split) {
 
     toku_flusher_thread_set_callback(flusher_callback, &after_split);
     
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink("foo4.ft_handle");
     unlink("bar4.ft_handle");
     // note the basement node size is 5 times the node size
diff --git a/ft/tests/test-del-inorder.cc b/ft/tests/test-del-inorder.cc
index 9054661fa0e..eca4c1d0f9a 100644
--- a/ft/tests/test-del-inorder.cc
+++ b/ft/tests/test-del-inorder.cc
@@ -109,7 +109,7 @@ doit (void) {
 
     int r;
 
-    toku_cachetable_create(&ct, 16*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 16*1024, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/test-dirty-flushes-on-cleaner.cc b/ft/tests/test-dirty-flushes-on-cleaner.cc
index 39b835e278f..ea369be7799 100644
--- a/ft/tests/test-dirty-flushes-on-cleaner.cc
+++ b/ft/tests/test-dirty-flushes-on-cleaner.cc
@@ -132,7 +132,7 @@ doit (void) {
 
     int r;
     
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &ft, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/test-dump-ft.cc b/ft/tests/test-dump-ft.cc
index f18723c525e..47f625d3731 100644
--- a/ft/tests/test-dump-ft.cc
+++ b/ft/tests/test-dump-ft.cc
@@ -106,7 +106,7 @@ test_main(int argc, const char *argv[]) {
     FILE *f = fopen("test-dump-ft.out", "w");
     unlink(n);
     assert(f);
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(n, 1, &t, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
     int i;
     for (i=0; i<10000; i++) {
diff --git a/ft/tests/test-flushes-on-cleaner.cc b/ft/tests/test-flushes-on-cleaner.cc
index 32b03496e5e..fe5d9b38f9d 100644
--- a/ft/tests/test-flushes-on-cleaner.cc
+++ b/ft/tests/test-flushes-on-cleaner.cc
@@ -132,7 +132,7 @@ doit (bool keep_other_bn_in_memory) {
 
     int r;
     
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &ft, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/test-ft-overflow.cc b/ft/tests/test-ft-overflow.cc
index dee6dd36496..c6afed36d04 100644
--- a/ft/tests/test-ft-overflow.cc
+++ b/ft/tests/test-ft-overflow.cc
@@ -106,7 +106,7 @@ test_overflow (void) {
     uint32_t nodesize = 1<<20; 
     int r;
     unlink(fname);
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &t, nodesize, nodesize / 8, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
 
     DBT k,v;
diff --git a/ft/tests/test-hot-with-bounds.cc b/ft/tests/test-hot-with-bounds.cc
index b6eaab3073a..bd18d297b9b 100644
--- a/ft/tests/test-hot-with-bounds.cc
+++ b/ft/tests/test-hot-with-bounds.cc
@@ -113,7 +113,7 @@ doit (void) {
 
     int r;
 
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink(TOKU_TEST_FILENAME);
     r = toku_open_ft_handle(TOKU_TEST_FILENAME, 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/test-inc-split.cc b/ft/tests/test-inc-split.cc
index cafcb496f7a..40533509e33 100644
--- a/ft/tests/test-inc-split.cc
+++ b/ft/tests/test-inc-split.cc
@@ -137,7 +137,7 @@ doit (int ksize __attribute__((__unused__))) {
     int i;
     int r;
     
-    toku_cachetable_create(&ct, 16*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 16*1024, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/test-merges-on-cleaner.cc b/ft/tests/test-merges-on-cleaner.cc
index 142a41ae6c9..1093de08221 100644
--- a/ft/tests/test-merges-on-cleaner.cc
+++ b/ft/tests/test-merges-on-cleaner.cc
@@ -131,7 +131,7 @@ doit (void) {
 
     int r;
     
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &ft, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/test-oldest-referenced-xid-flush.cc b/ft/tests/test-oldest-referenced-xid-flush.cc
index 60728582389..c223d12ea71 100644
--- a/ft/tests/test-oldest-referenced-xid-flush.cc
+++ b/ft/tests/test-oldest-referenced-xid-flush.cc
@@ -131,7 +131,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
     FT_HANDLE t;
     BLOCKNUM grandchild_leaf_blocknum, child_nonleaf_blocknum, root_blocknum;
 
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink("foo1.ft_handle");
     r = toku_open_ft_handle("foo1.ft_handle", 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, nullptr, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/test-pick-child-to-flush.cc b/ft/tests/test-pick-child-to-flush.cc
index 96482177a31..7a96ff154db 100644
--- a/ft/tests/test-pick-child-to-flush.cc
+++ b/ft/tests/test-pick-child-to-flush.cc
@@ -165,7 +165,7 @@ doit (void) {
     BLOCKNUM node_leaf[2];
     int r;
     
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/test.h b/ft/tests/test.h
index 5baa2433aea..ea02abb527f 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -119,13 +119,15 @@ PATENT RIGHTS GRANT:
 
 const ITEMLEN len_ignore = 0xFFFFFFFF;
 
+static const prepared_txn_callback_t NULL_prepared_txn_callback         __attribute__((__unused__)) = NULL;
+static const keep_cachetable_callback_t  NULL_keep_cachetable_callback  __attribute__((__unused__)) = NULL;
+static const TOKULOGGER NULL_logger                                     __attribute__((__unused__)) = NULL;
 
 // dummymsn needed to simulate msn because test messages are injected at a lower level than toku_ft_root_put_msg()
 #define MIN_DUMMYMSN ((MSN) {(uint64_t)1<<62})
 static MSN dummymsn;      
 static int dummymsn_initialized = 0;
 
-
 static void
 initialize_dummymsn(void) {
     if (dummymsn_initialized == 0) {
diff --git a/ft/tests/test3681.cc b/ft/tests/test3681.cc
index 44f522d059f..4307dfa741a 100644
--- a/ft/tests/test3681.cc
+++ b/ft/tests/test3681.cc
@@ -107,7 +107,7 @@ static TOKUTXN const null_txn = 0;
 volatile bool done = false;
 
 static void setup (void) {
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     const char *fname = TOKU_TEST_FILENAME;
     unlink(fname);
     { int r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);         assert(r==0); }
diff --git a/ft/tests/test3856.cc b/ft/tests/test3856.cc
index 6a8b1155f51..2353e871b83 100644
--- a/ft/tests/test3856.cc
+++ b/ft/tests/test3856.cc
@@ -123,7 +123,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_
     CACHETABLE ct;
     FT_HANDLE t;
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     unlink(fname);
     int r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, null_txn, string_cmp); assert(r==0);
 
@@ -137,7 +137,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_
     r = toku_close_ft_handle_nolsn(t, 0); assert(r == 0);
     toku_cachetable_close(&ct);
 
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, null_txn, string_cmp); assert(r == 0);
 
     for (int n = 0; n < count/100; ++n) {
diff --git a/ft/tests/test3884.cc b/ft/tests/test3884.cc
index 7fc3059800d..cb3914529ea 100644
--- a/ft/tests/test3884.cc
+++ b/ft/tests/test3884.cc
@@ -218,7 +218,7 @@ test_split_on_boundary(void)
     unlink(fname);
     CACHETABLE ct;
     FT_HANDLE ft;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
 
     FTNODE nodea, nodeb;
@@ -288,7 +288,7 @@ test_split_with_everything_on_the_left(void)
     unlink(fname);
     CACHETABLE ct;
     FT_HANDLE ft;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
 
     FTNODE nodea, nodeb;
@@ -360,7 +360,7 @@ test_split_on_boundary_of_last_node(void)
     unlink(fname);
     CACHETABLE ct;
     FT_HANDLE ft;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
 
     FTNODE nodea, nodeb;
@@ -424,7 +424,7 @@ test_split_at_begin(void)
     unlink(fname);
     CACHETABLE ct;
     FT_HANDLE ft;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
 
     FTNODE nodea, nodeb;
@@ -484,7 +484,7 @@ test_split_at_end(void)
     unlink(fname);
     CACHETABLE ct;
     FT_HANDLE ft;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
 
     FTNODE nodea, nodeb;
@@ -538,7 +538,7 @@ test_split_odd_nodes(void)
     unlink(fname);
     CACHETABLE ct;
     FT_HANDLE ft;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
 
     FTNODE nodea, nodeb;
diff --git a/ft/tests/test4115.cc b/ft/tests/test4115.cc
index 5f1d041896f..631af3cf03b 100644
--- a/ft/tests/test4115.cc
+++ b/ft/tests/test4115.cc
@@ -117,7 +117,7 @@ static void close_ft_and_ct (void) {
 static void open_ft_and_ct (bool unlink_old) {
     int r;
     if (unlink_old) unlink(fname);
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &t, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);   assert(r==0);
     toku_ft_set_bt_compare(t, dont_allow_prefix);
 }
diff --git a/ft/tests/test4244.cc b/ft/tests/test4244.cc
index 10810c7710e..c258666b1c5 100644
--- a/ft/tests/test4244.cc
+++ b/ft/tests/test4244.cc
@@ -110,7 +110,7 @@ doit (void) {
 
     int r;
     
-    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr);
     unlink(fname);
     r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
     assert(r==0);
diff --git a/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc b/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc
index 100e5153636..cb2e629f855 100644
--- a/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc
+++ b/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc
@@ -110,7 +110,7 @@ static void test_seqinsert_heuristic(void) {
     
     FT_HANDLE ft_handle;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(name, 1, &ft_handle,
                             4*1024*1024, 64*1024,
                             TOKU_DEFAULT_COMPRESSION_METHOD, ct, NULL,
diff --git a/ft/tests/test_rightmost_leaf_split_merge.cc b/ft/tests/test_rightmost_leaf_split_merge.cc
index 517fc277fd3..f0a99cc4806 100644
--- a/ft/tests/test_rightmost_leaf_split_merge.cc
+++ b/ft/tests/test_rightmost_leaf_split_merge.cc
@@ -92,6 +92,7 @@ PATENT RIGHTS GRANT:
 
 #include <ft/ybt.h>
 #include <ft/ft-cachetable-wrappers.h>
+#include <ft/ft-flusher.h>
 
 // Promotion tracks the rightmost blocknum in the FT when a message
 // is successfully promoted to a non-root leaf node on the right extreme.
@@ -109,7 +110,7 @@ static void test_split_merge(void) {
     
     FT_HANDLE ft_handle;
     CACHETABLE ct;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(name, 1, &ft_handle,
                             4*1024*1024, 64*1024,
                             TOKU_DEFAULT_COMPRESSION_METHOD, ct, NULL,
diff --git a/ft/tests/upgrade_test_simple.cc b/ft/tests/upgrade_test_simple.cc
index 31811527aa2..e9c9d6cb9c7 100644
--- a/ft/tests/upgrade_test_simple.cc
+++ b/ft/tests/upgrade_test_simple.cc
@@ -176,7 +176,7 @@ with_open_tree(const char *fname, tree_cb cb, void *cb_extra)
     FT_HANDLE t;
     CACHETABLE ct;
 
-    toku_cachetable_create(&ct, 16*(1<<20), ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 16*(1<<20), ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 
                       0, 
                       &t, 
diff --git a/ft/tests/verify-bad-msn.cc b/ft/tests/verify-bad-msn.cc
index fd8200ca860..99a70b40f37 100644
--- a/ft/tests/verify-bad-msn.cc
+++ b/ft/tests/verify-bad-msn.cc
@@ -212,7 +212,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
 
     // create a cachetable
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     // create the ft
     TOKUTXN null_txn = NULL;
diff --git a/ft/tests/verify-bad-pivots.cc b/ft/tests/verify-bad-pivots.cc
index ea8b4ed1714..6d1ebfa85a6 100644
--- a/ft/tests/verify-bad-pivots.cc
+++ b/ft/tests/verify-bad-pivots.cc
@@ -182,7 +182,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
 
     // create a cachetable
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     // create the ft
     TOKUTXN null_txn = NULL;
diff --git a/ft/tests/verify-dup-in-leaf.cc b/ft/tests/verify-dup-in-leaf.cc
index 78d36a8bc4e..9806c6063a0 100644
--- a/ft/tests/verify-dup-in-leaf.cc
+++ b/ft/tests/verify-dup-in-leaf.cc
@@ -140,7 +140,7 @@ test_dup_in_leaf(int do_verify) {
 
     // create a cachetable
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     // create the ft
     TOKUTXN null_txn = NULL;
diff --git a/ft/tests/verify-dup-pivots.cc b/ft/tests/verify-dup-pivots.cc
index 5d2c129fc01..c0766a4d035 100644
--- a/ft/tests/verify-dup-pivots.cc
+++ b/ft/tests/verify-dup-pivots.cc
@@ -185,7 +185,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
 
     // create a cachetable
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     // create the ft
     TOKUTXN null_txn = NULL;
diff --git a/ft/tests/verify-misrouted-msgs.cc b/ft/tests/verify-misrouted-msgs.cc
index e90f0e53f6d..556aaa31522 100644
--- a/ft/tests/verify-misrouted-msgs.cc
+++ b/ft/tests/verify-misrouted-msgs.cc
@@ -197,7 +197,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
 
     // create a cachetable
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     // create the ft
     TOKUTXN null_txn = NULL;
diff --git a/ft/tests/verify-unsorted-leaf.cc b/ft/tests/verify-unsorted-leaf.cc
index 83ef9ed52ec..6933606afd2 100644
--- a/ft/tests/verify-unsorted-leaf.cc
+++ b/ft/tests/verify-unsorted-leaf.cc
@@ -142,7 +142,7 @@ test_dup_in_leaf(int do_verify) {
 
     // create a cachetable
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     // create the ft
     TOKUTXN null_txn = NULL;
diff --git a/ft/tests/verify-unsorted-pivots.cc b/ft/tests/verify-unsorted-pivots.cc
index 3b5277a8a2c..eae84382da3 100644
--- a/ft/tests/verify-unsorted-pivots.cc
+++ b/ft/tests/verify-unsorted-pivots.cc
@@ -182,7 +182,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
 
     // create a cachetable
     CACHETABLE ct = NULL;
-    toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER);
+    toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
 
     // create the ft
     TOKUTXN null_txn = NULL;
diff --git a/src/ydb_db.cc b/src/ydb_db.cc
index b9fa32eb4a0..3d2e359e5a1 100644
--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -474,7 +474,7 @@ toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t fla
     int r = toku_ft_handle_open(ft_handle, iname_in_env,
                       is_db_create, is_db_excl,
                       db->dbenv->i->cachetable,
-                      txn ? db_txn_struct_i(txn)->tokutxn : NULL_TXN);
+                      txn ? db_txn_struct_i(txn)->tokutxn : nullptr);
     if (r != 0) {
         goto error_cleanup;
     }

From 4f1762f868b27c7c06547c7e099d2fe6bfe5f054 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:24:46 -0400
Subject: [PATCH 010/190] FT-247 Move files relating to the bulk loader to
 their own directory

---
 ft/CMakeLists.txt                                    | 8 ++++----
 ft/{ftloader-callback.cc => loader/callbacks.cc}     | 2 +-
 ft/{ => loader}/dbufio.cc                            | 4 ++--
 ft/{ => loader}/dbufio.h                             | 0
 ft/{ftloader-internal.h => loader/loader-internal.h} | 4 ++--
 ft/{ftloader.cc => loader/loader.cc}                 | 6 +++---
 ft/{ftloader.h => loader/loader.h}                   | 0
 ft/{ => loader}/pqueue.cc                            | 4 ++--
 ft/{ => loader}/pqueue.h                             | 0
 ft/tests/dbufio-test-destroy.cc                      | 2 +-
 ft/tests/dbufio-test.cc                              | 2 +-
 ft/tests/ftloader-test-bad-generate.cc               | 4 ++--
 ft/tests/ftloader-test-extractor-errors.cc           | 4 ++--
 ft/tests/ftloader-test-extractor.cc                  | 4 ++--
 ft/tests/ftloader-test-merge-files-dbufio.cc         | 2 +-
 ft/tests/ftloader-test-open.cc                       | 4 ++--
 ft/tests/ftloader-test-writer-errors.cc              | 2 +-
 ft/tests/ftloader-test-writer.cc                     | 2 +-
 ft/tests/ftloader-test.cc                            | 2 +-
 ft/tests/pqueue-test.cc                              | 4 ++--
 src/loader.cc                                        | 2 +-
 src/ydb.cc                                           | 2 +-
 src/ydb_env_func.cc                                  | 2 +-
 23 files changed, 33 insertions(+), 33 deletions(-)
 rename ft/{ftloader-callback.cc => loader/callbacks.cc} (99%)
 rename ft/{ => loader}/dbufio.cc (99%)
 rename ft/{ => loader}/dbufio.h (100%)
 rename ft/{ftloader-internal.h => loader/loader-internal.h} (99%)
 rename ft/{ftloader.cc => loader/loader.cc} (99%)
 rename ft/{ftloader.h => loader/loader.h} (100%)
 rename ft/{ => loader}/pqueue.cc (99%)
 rename ft/{ => loader}/pqueue.h (100%)

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 9c594bbf316..4a6b842bde4 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -30,14 +30,11 @@ set(FT_SOURCES
   cachetable
   checkpoint
   compress
-  dbufio
   fifo
   ft
   ft-cachetable-wrappers
   ft-flusher
   ft-hot-flusher
-  ftloader
-  ftloader-callback
   ft_msg
   ft_node-serialize
   ft-node-deserialize
@@ -45,13 +42,16 @@ set(FT_SOURCES
   ft-serialize
   ft-test-helpers
   ft-verify
+  loader/callbacks
+  loader/dbufio
+  loader/loader
+  loader/pqueue
   leafentry
   le-cursor
   logcursor
   logfilemgr
   logger
   log_upgrade
-  pqueue
   quicklz
   recover
   rollback
diff --git a/ft/ftloader-callback.cc b/ft/loader/callbacks.cc
similarity index 99%
rename from ft/ftloader-callback.cc
rename to ft/loader/callbacks.cc
index 3472d294551..a6b7686e023 100644
--- a/ft/ftloader-callback.cc
+++ b/ft/loader/callbacks.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <errno.h>
 #include <string.h>
 
-#include "ftloader-internal.h"
+#include "loader/loader-internal.h"
 #include "ybt.h"
 
 static void error_callback_lock(ft_loader_error_callback loader_error) {
diff --git a/ft/dbufio.cc b/ft/loader/dbufio.cc
similarity index 99%
rename from ft/dbufio.cc
rename to ft/loader/dbufio.cc
index 69b3bd8e936..aa964f95353 100644
--- a/ft/dbufio.cc
+++ b/ft/loader/dbufio.cc
@@ -89,14 +89,14 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "dbufio.h"
+#include "loader/dbufio.h"
 #include "fttypes.h"
 #include <toku_assert.h>
 #include <errno.h>
 #include <unistd.h>
 #include "memory.h"
 #include <string.h>
-#include "ftloader-internal.h"
+#include "loader/loader-internal.h"
 #include "ft-internal.h"
 #include "ft.h"
 
diff --git a/ft/dbufio.h b/ft/loader/dbufio.h
similarity index 100%
rename from ft/dbufio.h
rename to ft/loader/dbufio.h
diff --git a/ft/ftloader-internal.h b/ft/loader/loader-internal.h
similarity index 99%
rename from ft/ftloader-internal.h
rename to ft/loader/loader-internal.h
index 11effdfb3da..779d2da450c 100644
--- a/ft/ftloader-internal.h
+++ b/ft/loader/loader-internal.h
@@ -92,10 +92,10 @@ PATENT RIGHTS GRANT:
 
 #include <db.h>
 #include "fttypes.h"
-#include "ftloader.h"
+#include "loader/loader.h"
 #include "util/queue.h"
 #include <toku_pthread.h>
-#include "dbufio.h"
+#include "loader/dbufio.h"
 
 enum { EXTRACTOR_QUEUE_DEPTH = 2,
        FILE_BUFFER_SIZE  = 1<<24,
diff --git a/ft/ftloader.cc b/ft/loader/loader.cc
similarity index 99%
rename from ft/ftloader.cc
rename to ft/loader/loader.cc
index 34ac684fe9c..d2914cf0511 100644
--- a/ft/ftloader.cc
+++ b/ft/loader/loader.cc
@@ -102,12 +102,12 @@ PATENT RIGHTS GRANT:
 
 #include <util/x1764.h>
 
-#include "ftloader-internal.h"
+#include "loader/loader-internal.h"
 #include "ft-internal.h"
 #include "sub_block.h"
 #include "sub_block_map.h"
-#include "pqueue.h"
-#include "dbufio.h"
+#include "loader/pqueue.h"
+#include "loader/dbufio.h"
 #include "leafentry.h"
 #include "log-internal.h"
 #include "ft.h"
diff --git a/ft/ftloader.h b/ft/loader/loader.h
similarity index 100%
rename from ft/ftloader.h
rename to ft/loader/loader.h
diff --git a/ft/pqueue.cc b/ft/loader/pqueue.cc
similarity index 99%
rename from ft/pqueue.cc
rename to ft/loader/pqueue.cc
index fa76551b81f..25620e37cc5 100644
--- a/ft/pqueue.cc
+++ b/ft/loader/pqueue.cc
@@ -92,8 +92,8 @@ PATENT RIGHTS GRANT:
 #include <toku_portability.h>
 #include "toku_os.h"
 #include "ft-internal.h"
-#include "ftloader-internal.h"
-#include "pqueue.h"
+#include "loader/loader-internal.h"
+#include "loader/pqueue.h"
 
 #define pqueue_left(i)   ((i) << 1)
 #define pqueue_right(i)  (((i) << 1) + 1)
diff --git a/ft/pqueue.h b/ft/loader/pqueue.h
similarity index 100%
rename from ft/pqueue.h
rename to ft/loader/pqueue.h
diff --git a/ft/tests/dbufio-test-destroy.cc b/ft/tests/dbufio-test-destroy.cc
index 8110f9554ad..c09cbb683e6 100644
--- a/ft/tests/dbufio-test-destroy.cc
+++ b/ft/tests/dbufio-test-destroy.cc
@@ -88,7 +88,7 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-#include "dbufio.h"
+#include "loader/dbufio.h"
 #include <stdio.h>
 #include <fcntl.h>
 #include <toku_assert.h>
diff --git a/ft/tests/dbufio-test.cc b/ft/tests/dbufio-test.cc
index cffc081921b..2b32684de53 100644
--- a/ft/tests/dbufio-test.cc
+++ b/ft/tests/dbufio-test.cc
@@ -88,7 +88,7 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-#include "dbufio.h"
+#include "loader/dbufio.h"
 #include <stdio.h>
 #include <fcntl.h>
 #include <toku_assert.h>
diff --git a/ft/tests/ftloader-test-bad-generate.cc b/ft/tests/ftloader-test-bad-generate.cc
index ca3e649c565..8fd7c27401a 100644
--- a/ft/tests/ftloader-test-bad-generate.cc
+++ b/ft/tests/ftloader-test-bad-generate.cc
@@ -94,8 +94,8 @@ PATENT RIGHTS GRANT:
 #define DONT_DEPRECATE_MALLOC
 #define DONT_DEPRECATE_WRITES
 #include "test.h"
-#include "ftloader.h"
-#include "ftloader-internal.h"
+#include "loader/loader.h"
+#include "loader/loader-internal.h"
 #include "ftloader-error-injector.h"
 #include "memory.h"
 #include <portability/toku_path.h>
diff --git a/ft/tests/ftloader-test-extractor-errors.cc b/ft/tests/ftloader-test-extractor-errors.cc
index 3cd5a1e586f..6d96dee9145 100644
--- a/ft/tests/ftloader-test-extractor-errors.cc
+++ b/ft/tests/ftloader-test-extractor-errors.cc
@@ -95,8 +95,8 @@ PATENT RIGHTS GRANT:
 #define DONT_DEPRECATE_MALLOC
 #define DONT_DEPRECATE_WRITES
 #include "test.h"
-#include "ftloader.h"
-#include "ftloader-internal.h"
+#include "loader/loader.h"
+#include "loader/loader-internal.h"
 #include "ftloader-error-injector.h"
 #include "memory.h"
 #include <portability/toku_path.h>
diff --git a/ft/tests/ftloader-test-extractor.cc b/ft/tests/ftloader-test-extractor.cc
index b806f44ab2b..868ca41fa18 100644
--- a/ft/tests/ftloader-test-extractor.cc
+++ b/ft/tests/ftloader-test-extractor.cc
@@ -95,8 +95,8 @@ PATENT RIGHTS GRANT:
 #define DONT_DEPRECATE_MALLOC
 #define DONT_DEPRECATE_WRITES
 #include "test.h"
-#include "ftloader.h"
-#include "ftloader-internal.h"
+#include "loader/loader.h"
+#include "loader/loader-internal.h"
 #include "memory.h"
 #include <portability/toku_path.h>
 
diff --git a/ft/tests/ftloader-test-merge-files-dbufio.cc b/ft/tests/ftloader-test-merge-files-dbufio.cc
index 44f3d27fcd6..5eaabf124ec 100644
--- a/ft/tests/ftloader-test-merge-files-dbufio.cc
+++ b/ft/tests/ftloader-test-merge-files-dbufio.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #define DONT_DEPRECATE_MALLOC
 
 #include "test.h"
-#include "ftloader-internal.h"
+#include "loader/loader-internal.h"
 #include <portability/toku_path.h>
 
 static int event_count, event_count_trigger;
diff --git a/ft/tests/ftloader-test-open.cc b/ft/tests/ftloader-test-open.cc
index cdf0a14ab00..c7149ce8113 100644
--- a/ft/tests/ftloader-test-open.cc
+++ b/ft/tests/ftloader-test-open.cc
@@ -94,8 +94,8 @@ PATENT RIGHTS GRANT:
 
 #define DONT_DEPRECATE_MALLOC
 #include "test.h"
-#include "ftloader.h"
-#include "ftloader-internal.h"
+#include "loader/loader.h"
+#include "loader/loader-internal.h"
 #include "memory.h"
 #include <portability/toku_path.h>
 
diff --git a/ft/tests/ftloader-test-writer-errors.cc b/ft/tests/ftloader-test-writer-errors.cc
index 0309e6082f3..6464f1f9240 100644
--- a/ft/tests/ftloader-test-writer-errors.cc
+++ b/ft/tests/ftloader-test-writer-errors.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #define DONT_DEPRECATE_MALLOC
 
 #include "test.h"
-#include "ftloader-internal.h"
+#include "loader/loader-internal.h"
 #include "ftloader-error-injector.h"
 #include <portability/toku_path.h>
 
diff --git a/ft/tests/ftloader-test-writer.cc b/ft/tests/ftloader-test-writer.cc
index bb955bd65fc..6be385ec5db 100644
--- a/ft/tests/ftloader-test-writer.cc
+++ b/ft/tests/ftloader-test-writer.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 
 
 #include "test.h"
-#include "ftloader-internal.h"
+#include "loader/loader-internal.h"
 #include <inttypes.h>
 #include <portability/toku_path.h>
 
diff --git a/ft/tests/ftloader-test.cc b/ft/tests/ftloader-test.cc
index 46b7cd751ee..34ff22b01b3 100644
--- a/ft/tests/ftloader-test.cc
+++ b/ft/tests/ftloader-test.cc
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 #include <stdio.h>
 #include <unistd.h>
-#include "ftloader-internal.h"
+#include "loader/loader-internal.h"
 #include "memory.h"
 #include <portability/toku_path.h>
 
diff --git a/ft/tests/pqueue-test.cc b/ft/tests/pqueue-test.cc
index a42cf830c9e..90a9fbb4e1e 100644
--- a/ft/tests/pqueue-test.cc
+++ b/ft/tests/pqueue-test.cc
@@ -91,8 +91,8 @@ PATENT RIGHTS GRANT:
 
 #include "test.h"
 
-#include "ftloader-internal.h"
-#include "pqueue.h"
+#include "loader/loader-internal.h"
+#include "loader/pqueue.h"
 
 int found_dup = -1;
 
diff --git a/src/loader.cc b/src/loader.cc
index 62b4f0b6cef..0805b2d7d16 100644
--- a/src/loader.cc
+++ b/src/loader.cc
@@ -99,7 +99,7 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 
 #include <ft/ft.h>
-#include <ft/ftloader.h>
+#include <ft/loader/loader.h>
 #include <ft/checkpoint.h>
 
 #include "ydb-internal.h"
diff --git a/src/ydb.cc b/src/ydb.cc
index 4d947556cb5..7149e235821 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -117,7 +117,7 @@ const char *toku_copyright_string = "Copyright (c) 2007-2013 Tokutek Inc.  All r
 #include <ft/cachetable.h>
 #include <ft/log.h>
 #include <ft/checkpoint.h>
-#include <ft/ftloader.h>
+#include <ft/loader/loader.h>
 #include <ft/log_header.h>
 #include <ft/ft.h>
 #include <ft/txn_manager.h>
diff --git a/src/ydb_env_func.cc b/src/ydb_env_func.cc
index 5247e699a23..550f853c2d9 100644
--- a/src/ydb_env_func.cc
+++ b/src/ydb_env_func.cc
@@ -99,7 +99,7 @@ PATENT RIGHTS GRANT:
 #include <ft/ft-flusher.h>
 #include <ft/checkpoint.h>
 #include <ft/recover.h>
-#include <ft/ftloader.h>
+#include <ft/loader/loader.h>
 
 #include "ydb_env_func.h"
 

From 2cd8b4378e43c8a936c9075590126147090caecd Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:25:28 -0400
Subject: [PATCH 011/190] FT-249 FT-256 Add a message buffer class to replace
 FIFO. Use a real functor instead of a macro for iterate.

---
 ft/CMakeLists.txt             |   2 +-
 ft/block_allocator.h          |   3 -
 ft/fifo.cc                    | 253 --------------------------------
 ft/ft-internal.h              |  19 +--
 ft/ft-ops.cc                  | 268 ++++++++++++++++++----------------
 ft/ft-verify.cc               | 171 +++++++++++++---------
 ft/ft.cc                      |   2 +-
 ft/ft_msg.h                   |   3 +-
 ft/ft_node-serialize.cc       | 120 ++++++++-------
 ft/msg_buffer.cc              | 224 ++++++++++++++++++++++++++++
 ft/{fifo.h => msg_buffer.h}   | 139 +++++++++---------
 ft/tests/fifo-test.cc         |  79 +++++-----
 ft/tests/ft-serialize-test.cc |  12 +-
 ft/tests/orthopush-flush.cc   | 246 +++++++++++++++++++------------
 ft/tokuftdump.cc              |  73 +++++----
 ft/xids-internal.h            |   2 +
 16 files changed, 864 insertions(+), 752 deletions(-)
 delete mode 100644 ft/fifo.cc
 create mode 100644 ft/msg_buffer.cc
 rename ft/{fifo.h => msg_buffer.h} (50%)

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 4a6b842bde4..3ee8cbd48d5 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -30,7 +30,6 @@ set(FT_SOURCES
   cachetable
   checkpoint
   compress
-  fifo
   ft
   ft-cachetable-wrappers
   ft-flusher
@@ -52,6 +51,7 @@ set(FT_SOURCES
   logfilemgr
   logger
   log_upgrade
+  msg_buffer
   quicklz
   recover
   rollback
diff --git a/ft/block_allocator.h b/ft/block_allocator.h
index 289e7251c84..815692963fb 100644
--- a/ft/block_allocator.h
+++ b/ft/block_allocator.h
@@ -198,9 +198,6 @@ void block_allocator_print (BLOCK_ALLOCATOR ba);
 uint64_t block_allocator_allocated_limit (BLOCK_ALLOCATOR ba);
 // Effect: Return the unallocated block address of "infinite" size.
 //  That is, return the smallest address that is above all the allocated blocks.
-// Rationale: When writing the root FIFO we don't know how big the block is.
-//  So we start at the "infinite" block, write the fifo, and then
-//  allocate_block_at of the correct size and offset to account for the root FIFO.
 
 int block_allocator_get_nth_block_in_layout_order (BLOCK_ALLOCATOR ba, uint64_t b, uint64_t *offset, uint64_t *size);
 // Effect: Consider the blocks in sorted order.  The reserved block at the beginning is number 0.  The next one is number 1 and so forth.
diff --git a/ft/fifo.cc b/ft/fifo.cc
deleted file mode 100644
index 6acd29be67c..00000000000
--- a/ft/fifo.cc
+++ /dev/null
@@ -1,253 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-#include "fifo.h"
-#include "xids.h"
-#include "ybt.h"
-#include <memory.h>
-#include <toku_assert.h>
-
-struct fifo {
-    int n_items_in_fifo;
-    char *memory;       // An array of bytes into which fifo_entries are embedded.
-    int   memory_size;  // How big is fifo_memory
-    int   memory_used;  // How many bytes are in use?
-};
-
-static void fifo_init(struct fifo *fifo) {
-    fifo->n_items_in_fifo = 0;
-    fifo->memory       = 0;
-    fifo->memory_size  = 0;
-    fifo->memory_used  = 0;
-}
-
-__attribute__((const,nonnull))
-static int fifo_entry_size(struct fifo_entry *entry) {
-    return sizeof (struct fifo_entry) + entry->keylen + entry->vallen
-                  + xids_get_size(&entry->xids_s)
-                  - sizeof(XIDS_S); //Prevent double counting from fifo_entry+xids_get_size
-}
-
-__attribute__((const,nonnull))
-size_t toku_ft_msg_memsize_in_fifo(FT_MSG msg) {
-    // This must stay in sync with fifo_entry_size because that's what we
-    // really trust.  But sometimes we only have an in-memory FT_MSG, not
-    // a serialized fifo_entry so we have to fake it.
-    return sizeof (struct fifo_entry) + msg->u.id.key->size + msg->u.id.val->size
-        + xids_get_size(msg->xids)
-        - sizeof(XIDS_S);
-}
-
-int toku_fifo_create(FIFO *ptr) {
-    struct fifo *XMALLOC(fifo);
-    if (fifo == 0) return ENOMEM;
-    fifo_init(fifo);
-    *ptr = fifo;
-    return 0;
-}
-
-void toku_fifo_resize(FIFO fifo, size_t new_size) {
-    XREALLOC_N(new_size, fifo->memory);
-    fifo->memory_size = new_size;
-}
-
-void toku_fifo_free(FIFO *ptr) {
-    FIFO fifo = *ptr;
-    if (fifo->memory) toku_free(fifo->memory);
-    fifo->memory=0;
-    toku_free(fifo);
-    *ptr = 0;
-}
-
-int toku_fifo_n_entries(FIFO fifo) {
-    return fifo->n_items_in_fifo;
-}
-
-static int next_power_of_two (int n) {
-    int r = 4096;
-    while (r < n) {
-        r*=2;
-        assert(r>0);
-    }
-    return r;
-}
-
-int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *data, unsigned int datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, int32_t *dest) {
-    int need_space_here = sizeof(struct fifo_entry)
-                          + keylen + datalen
-                          + xids_get_size(xids)
-                          - sizeof(XIDS_S); //Prevent double counting
-    int need_space_total = fifo->memory_used+need_space_here;
-    if (fifo->memory == NULL || need_space_total > fifo->memory_size) {
-        // resize the fifo to the next power of 2 greater than the needed space
-        int next_2 = next_power_of_two(need_space_total);
-        toku_fifo_resize(fifo, next_2);
-    }
-    struct fifo_entry *entry = (struct fifo_entry *)(fifo->memory + fifo->memory_used);
-    entry->type = (unsigned char) type;
-    entry->msn = msn;
-    xids_cpy(&entry->xids_s, xids);
-    entry->is_fresh = is_fresh;
-    entry->keylen = keylen;
-    unsigned char *e_key = xids_get_end_of_array(&entry->xids_s);
-    memcpy(e_key, key, keylen);
-    entry->vallen = datalen;
-    memcpy(e_key + keylen, data, datalen);
-    if (dest) {
-        *dest = fifo->memory_used;
-    }
-    fifo->n_items_in_fifo++;
-    fifo->memory_used += need_space_here;
-    return 0;
-}
-
-int toku_fifo_iterate_internal_start(FIFO UU(fifo)) { return 0; }
-int toku_fifo_iterate_internal_has_more(FIFO fifo, int off) { return off < fifo->memory_used; }
-int toku_fifo_iterate_internal_next(FIFO fifo, int off) {
-    struct fifo_entry *e = (struct fifo_entry *)(fifo->memory + off);
-    return off + fifo_entry_size(e);
-}
-struct fifo_entry * toku_fifo_iterate_internal_get_entry(FIFO fifo, int off) {
-    return (struct fifo_entry *)(fifo->memory + off);
-}
-size_t toku_fifo_internal_entry_memsize(struct fifo_entry *e) {
-    return fifo_entry_size(e);
-}
-
-void toku_fifo_iterate (FIFO fifo, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, void*), void *arg) {
-    FIFO_ITERATE(fifo,
-                 key, keylen, data, datalen, type, msn, xids, is_fresh,
-                 f(key,keylen,data,datalen,type,msn,xids,is_fresh, arg));
-}
-
-unsigned int toku_fifo_buffer_size_in_use (FIFO fifo) {
-    return fifo->memory_used;
-}
-
-unsigned long toku_fifo_memory_size_in_use(FIFO fifo) {
-    return sizeof(*fifo)+fifo->memory_used;
-}
-
-unsigned long toku_fifo_memory_footprint(FIFO fifo) {
-    size_t size_used = toku_memory_footprint(fifo->memory, fifo->memory_used);
-    long rval = sizeof(*fifo) + size_used; 
-    return rval;
-}
-
-DBT *fill_dbt_for_fifo_entry(DBT *dbt, const struct fifo_entry *entry) {
-    return toku_fill_dbt(dbt, xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen);
-}
-
-struct fifo_entry *toku_fifo_get_entry(FIFO fifo, int off) {
-    return toku_fifo_iterate_internal_get_entry(fifo, off);
-}
-
-void toku_fifo_clone(FIFO orig_fifo, FIFO* cloned_fifo) {
-    struct fifo *XMALLOC(new_fifo);
-    assert(new_fifo);
-    new_fifo->n_items_in_fifo = orig_fifo->n_items_in_fifo;
-    new_fifo->memory_used = orig_fifo->memory_used;
-    new_fifo->memory_size = new_fifo->memory_used;
-    XMALLOC_N(new_fifo->memory_size, new_fifo->memory);
-    memcpy(
-        new_fifo->memory, 
-        orig_fifo->memory, 
-        new_fifo->memory_size
-        );
-    *cloned_fifo = new_fifo;
-}
-
-bool toku_are_fifos_same(FIFO fifo1, FIFO fifo2) {
-    return (
-        fifo1->memory_used == fifo2->memory_used &&
-        memcmp(fifo1->memory, fifo2->memory, fifo1->memory_used) == 0
-        );
-}
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 79090016f82..3d341e27523 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -106,7 +106,6 @@ PATENT RIGHTS GRANT:
 #include "ft_layout_version.h"
 #include "block_allocator.h"
 #include "cachetable.h"
-#include "fifo.h"
 #include "ft-ops.h"
 #include "toku_list.h"
 #include <util/omt.h>
@@ -118,6 +117,7 @@ PATENT RIGHTS GRANT:
 #include "ft/bndata.h"
 #include "ft/rollback.h"
 #include "ft/ft-search.h"
+#include "ft/msg_buffer.h"
 
 enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
 enum { FT_MSG_OVERHEAD = (2 + sizeof(MSN)) };   // the type plus freshness plus MSN
@@ -207,10 +207,10 @@ struct ftnode_fetch_extra {
 };
 typedef struct ftnode_fetch_extra *FTNODE_FETCH_EXTRA;
 
-struct toku_fifo_entry_key_msn_heaviside_extra {
+struct toku_msg_buffer_key_msn_heaviside_extra {
     DESCRIPTOR desc;
     ft_compare_func cmp;
-    FIFO fifo;
+    message_buffer *msg_buffer;
     const DBT *key;
     MSN msn;
 };
@@ -218,24 +218,24 @@ struct toku_fifo_entry_key_msn_heaviside_extra {
 // comparison function for inserting messages into a
 // ftnode_nonleaf_childinfo's message_tree
 int
-toku_fifo_entry_key_msn_heaviside(const int32_t &v, const struct toku_fifo_entry_key_msn_heaviside_extra &extra);
+toku_msg_buffer_key_msn_heaviside(const int32_t &v, const struct toku_msg_buffer_key_msn_heaviside_extra &extra);
 
-struct toku_fifo_entry_key_msn_cmp_extra {
+struct toku_msg_buffer_key_msn_cmp_extra {
     DESCRIPTOR desc;
     ft_compare_func cmp;
-    FIFO fifo;
+    message_buffer *msg_buffer;
 };
 
 // same thing for qsort_r
 int
-toku_fifo_entry_key_msn_cmp(const struct toku_fifo_entry_key_msn_cmp_extra &extrap, const int &a, const int &b);
+toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extrap, const int &a, const int &b);
 
 typedef toku::omt<int32_t> off_omt_t;
 typedef toku::omt<int32_t, int32_t, true> marked_off_omt_t;
 
 // data of an available partition of a nonleaf ftnode
 struct ftnode_nonleaf_childinfo {
-    FIFO buffer;
+    message_buffer msg_buffer;
     off_omt_t broadcast_list;
     marked_off_omt_t fresh_message_tree;
     off_omt_t stale_message_tree;
@@ -946,9 +946,6 @@ bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancesto
 __attribute__((nonnull))
 void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read);
 
-__attribute__((const,nonnull))
-size_t toku_ft_msg_memsize_in_fifo(FT_MSG msg);
-
 int
 toku_ft_search_which_child(
     DESCRIPTOR desc,
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 89063471f19..4c44e839cda 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -168,7 +168,7 @@ Split_or_merge (node, childnum) {
     return;
   If the child needs to be merged (it's a leaf with too little stuff (less than 1/4 full) or a nonleaf with too little fanout (less than 1/4)
     fetch node, the child  and a sibling of the child into main memory.
-    move all messages from the node to the two children (so that the FIFOs are empty)
+    move all messages from the node to the two children (so that the message buffers are empty)
     If the two siblings together fit into one node then
       merge the two siblings.
       fixup the node to point at one child
@@ -491,7 +491,7 @@ get_node_reactivity(FT ft, FTNODE node) {
 unsigned int
 toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc)
 {
-    return toku_fifo_buffer_size_in_use(bnc->buffer);
+    return bnc->msg_buffer.buffer_size_in_use();
 }
 
 // return true if the size of the buffers plus the amount of work done is large enough.   (But return false if there is nothing to be flushed (the buffers empty)).
@@ -538,7 +538,7 @@ uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum) {
 int
 toku_bnc_n_entries(NONLEAF_CHILDINFO bnc)
 {
-    return toku_fifo_n_entries(bnc->buffer);
+    return bnc->msg_buffer.num_entries();
 }
 
 static const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound_exclusive) {
@@ -567,7 +567,7 @@ long
 toku_bnc_memory_size(NONLEAF_CHILDINFO bnc)
 {
     return (sizeof(*bnc) +
-            toku_fifo_memory_footprint(bnc->buffer) +
+            bnc->msg_buffer.memory_footprint() +
             bnc->fresh_message_tree.memory_size() +
             bnc->stale_message_tree.memory_size() +
             bnc->broadcast_list.memory_size());
@@ -579,7 +579,7 @@ long
 toku_bnc_memory_used(NONLEAF_CHILDINFO bnc)
 {
     return (sizeof(*bnc) +
-            toku_fifo_memory_size_in_use(bnc->buffer) +
+            bnc->msg_buffer.memory_size_in_use() +
             bnc->fresh_message_tree.memory_size() +
             bnc->stale_message_tree.memory_size() +
             bnc->broadcast_list.memory_size());
@@ -2162,46 +2162,43 @@ key_msn_cmp(const DBT *a, const DBT *b, const MSN amsn, const MSN bmsn,
 }
 
 int
-toku_fifo_entry_key_msn_heaviside(const int32_t &offset, const struct toku_fifo_entry_key_msn_heaviside_extra &extra)
+toku_msg_buffer_key_msn_heaviside(const int32_t &offset, const struct toku_msg_buffer_key_msn_heaviside_extra &extra)
 {
-    const struct fifo_entry *query = toku_fifo_get_entry(extra.fifo, offset);
-    DBT qdbt;
-    const DBT *query_key = fill_dbt_for_fifo_entry(&qdbt, query);
-    const DBT *target_key = extra.key;
-    return key_msn_cmp(query_key, target_key, query->msn, extra.msn,
+    MSN query_msn;
+    DBT query_key;
+    extra.msg_buffer->get_message_key_msn(offset, &query_key, &query_msn);
+    return key_msn_cmp(&query_key, extra.key, query_msn, extra.msn,
                        extra.desc, extra.cmp);
 }
 
 int
-toku_fifo_entry_key_msn_cmp(const struct toku_fifo_entry_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo)
+toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo)
 {
-    const struct fifo_entry *a = toku_fifo_get_entry(extra.fifo, ao);
-    const struct fifo_entry *b = toku_fifo_get_entry(extra.fifo, bo);
-    DBT adbt, bdbt;
-    const DBT *akey = fill_dbt_for_fifo_entry(&adbt, a);
-    const DBT *bkey = fill_dbt_for_fifo_entry(&bdbt, b);
-    return key_msn_cmp(akey, bkey, a->msn, b->msn,
+    MSN amsn, bmsn;
+    DBT akey, bkey;
+    extra.msg_buffer->get_message_key_msn(ao, &akey, &amsn);
+    extra.msg_buffer->get_message_key_msn(bo, &bkey, &bmsn);
+    return key_msn_cmp(&akey, &bkey, amsn, bmsn,
                        extra.desc, extra.cmp);
 }
 
-void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp)
 // Effect: Enqueue the message represented by the parameters into the
 //   bnc's buffer, and put it in either the fresh or stale message tree,
 //   or the broadcast list.
-//
-// This is only exported for tests.
-{
+static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, FT_MSG msg, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp) {
+    int r = 0;
     int32_t offset;
-    int r = toku_fifo_enq(bnc->buffer, key, keylen, data, datalen, type, msn, xids, is_fresh, &offset);
-    assert_zero(r);
+    bnc->msg_buffer.enqueue(msg, is_fresh, &offset);
+    enum ft_msg_type type = ft_msg_get_type(msg);
     if (ft_msg_type_applies_once(type)) {
-        DBT keydbt;
-        struct toku_fifo_entry_key_msn_heaviside_extra extra = { .desc = desc, .cmp = cmp, .fifo = bnc->buffer, .key = toku_fill_dbt(&keydbt, key, keylen), .msn = msn };
+        DBT key;
+        toku_fill_dbt(&key, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
+        struct toku_msg_buffer_key_msn_heaviside_extra extra = { .desc = desc, .cmp = cmp, .msg_buffer = &bnc->msg_buffer, .key = &key, .msn = msg->msn };
         if (is_fresh) {
-            r = bnc->fresh_message_tree.insert<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(offset, extra, nullptr);
+            r = bnc->fresh_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, extra, nullptr);
             assert_zero(r);
         } else {
-            r = bnc->stale_message_tree.insert<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(offset, extra, nullptr);
+            r = bnc->stale_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, extra, nullptr);
             assert_zero(r);
         }
     } else {
@@ -2212,14 +2209,32 @@ void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen,
     }
 }
 
+// This is only exported for tests.
+void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp)
+{
+    DBT k, v;
+    FT_MSG_S msg = {
+        type, msn, xids, .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, data, datalen) } }
+    };
+    bnc_insert_msg(bnc, &msg, is_fresh, desc, cmp);
+}
+
 // append a msg to a nonleaf node's child buffer
-// should be static, but used by test programs
-void toku_ft_append_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) {
+static void ft_append_msg_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node,
+                                          int childnum, FT_MSG msg, bool is_fresh) {
     paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL);
-    toku_bnc_insert_msg(BNC(node, childnum), key->data, key->size, val->data, val->size, type, msn, xids, is_fresh, desc, compare_fun);
+    bnc_insert_msg(BNC(node, childnum), msg, is_fresh, desc, compare_fun);
     node->dirty = 1;
 }
 
+// This is only exported for tests.
+void toku_ft_append_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) {
+    FT_MSG_S msg = {
+        type, msn, xids, .u = { .id = { key, val } }
+    };
+    ft_append_msg_to_child_buffer(compare_fun, desc, node, childnum, &msg, is_fresh);
+}
+
 static void ft_nonleaf_msg_once_to_child(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
 // Previously we had passive aggressive promotion, but that causes a lot of I/O a the checkpoint.  So now we are just putting it in the buffer here.
 // Also we don't worry about the node getting overfull here.  It's the caller's problem.
@@ -2227,7 +2242,7 @@ static void ft_nonleaf_msg_once_to_child(ft_compare_func compare_fun, DESCRIPTOR
     unsigned int childnum = (target_childnum >= 0
                              ? target_childnum
                              : toku_ftnode_which_child(node, msg->u.id.key, desc, compare_fun));
-    toku_ft_append_to_child_buffer(compare_fun, desc, node, childnum, msg->type, msg->msn, msg->xids, is_fresh, msg->u.id.key, msg->u.id.val);
+    ft_append_msg_to_child_buffer(compare_fun, desc, node, childnum, msg, is_fresh);
     NONLEAF_CHILDINFO bnc = BNC(node, childnum);
     bnc->flow[0] += flow_deltas[0];
     bnc->flow[1] += flow_deltas[1];
@@ -2514,8 +2529,6 @@ void toku_bnc_flush_to_child(
     )
 {
     paranoid_invariant(bnc);
-    STAT64INFO_S stats_delta = {0,0};
-    size_t remaining_memsize = toku_fifo_buffer_size_in_use(bnc->buffer);
 
     TOKULOGGER logger = toku_cachefile_logger(ft->cf);
     TXN_MANAGER txn_manager = logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr;
@@ -2531,21 +2544,30 @@ void toku_bnc_flush_to_child(
                         oldest_referenced_xid_for_simple_gc,                    
                         child->oldest_referenced_xid_known,
                         true);
-    FIFO_ITERATE(
-        bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
-        ({
-            DBT hk,hv;
-            FT_MSG_S ftmsg = { type, msn, xids, .u = { .id = { toku_fill_dbt(&hk, key, keylen),
-                                                               toku_fill_dbt(&hv, val, vallen) } } };
+    struct flush_msg_fn {
+        FT ft;
+        FTNODE child;
+        NONLEAF_CHILDINFO bnc;
+        txn_gc_info *gc_info;
+
+        STAT64INFO_S stats_delta;
+        size_t remaining_memsize = bnc->msg_buffer.buffer_size_in_use();
+
+        flush_msg_fn(FT t, FTNODE n, NONLEAF_CHILDINFO nl, txn_gc_info *g) :
+            ft(t), child(n), bnc(nl), gc_info(g), remaining_memsize(bnc->msg_buffer.buffer_size_in_use()) {
+            stats_delta = { 0, 0 };
+        }
+        int operator()(FT_MSG msg, bool is_fresh) {
             size_t flow_deltas[] = { 0, 0 };
+            size_t memsize_in_buffer = message_buffer::msg_memsize_in_buffer(msg);
             if (remaining_memsize <= bnc->flow[0]) {
                 // this message is in the current checkpoint's worth of
-                // the end of the fifo
-                flow_deltas[0] = FIFO_CURRENT_ENTRY_MEMSIZE;
+                // the end of the message buffer
+                flow_deltas[0] = memsize_in_buffer;
             } else if (remaining_memsize <= bnc->flow[0] + bnc->flow[1]) {
                 // this message is in the last checkpoint's worth of the
-                // end of the fifo
-                flow_deltas[1] = FIFO_CURRENT_ENTRY_MEMSIZE;
+                // end of the message buffer
+                flow_deltas[1] = memsize_in_buffer;
             }
             toku_ft_node_put_msg(
                 ft->compare_fun,
@@ -2553,22 +2575,26 @@ void toku_bnc_flush_to_child(
                 &ft->cmp_descriptor,
                 child,
                 -1,
-                &ftmsg,
+                msg,
                 is_fresh,
-                &gc_info,
+                gc_info,
                 flow_deltas,
                 &stats_delta
                 );
-            remaining_memsize -= FIFO_CURRENT_ENTRY_MEMSIZE;
-        }));
+            remaining_memsize -= memsize_in_buffer;
+            return 0;
+        }
+    } flush_fn(ft, child, bnc, &gc_info);
+    bnc->msg_buffer.iterate(flush_fn);
+
     child->oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
 
-    invariant(remaining_memsize == 0);
-    if (stats_delta.numbytes || stats_delta.numrows) {
-        toku_ft_update_stats(&ft->in_memory_stats, stats_delta);
+    invariant(flush_fn.remaining_memsize == 0);
+    if (flush_fn.stats_delta.numbytes || flush_fn.stats_delta.numrows) {
+        toku_ft_update_stats(&ft->in_memory_stats, flush_fn.stats_delta);
     }
     if (do_garbage_collection) {
-        size_t buffsize = toku_fifo_buffer_size_in_use(bnc->buffer);
+        size_t buffsize = bnc->msg_buffer.buffer_size_in_use();
         STATUS_INC(FT_MSG_BYTES_OUT, buffsize);
         // may be misleading if there's a broadcast message in there
         STATUS_INC(FT_MSG_BYTES_CURR, -buffsize);
@@ -2597,7 +2623,7 @@ toku_ft_node_put_msg (
 // Effect: Push message into the subtree rooted at NODE.
 //   If NODE is a leaf, then
 //   put message into leaf, applying it to the leafentries
-//   If NODE is a nonleaf, then push the message into the FIFO(s) of the relevent child(ren).
+//   If NODE is a nonleaf, then push the message into the message buffer(s) of the relevent child(ren).
 //   The node may become overfull.  That's not our problem.
 {
     toku_assert_entire_node_in_memory(node);
@@ -3192,7 +3218,7 @@ void toku_ft_root_put_msg(
     struct ftnode_fetch_extra bfe;
     fill_bfe_for_full_read(&bfe, ft);
 
-    size_t flow_deltas[] = { toku_ft_msg_memsize_in_fifo(msg), 0 };
+    size_t flow_deltas[] = { message_buffer::msg_memsize_in_buffer(msg), 0 };
 
     pair_lock_type lock_type;
     lock_type = PL_READ; // try first for a read lock
@@ -4656,13 +4682,13 @@ is_le_val_del(LEAFENTRY le, FT_CURSOR ftcursor) {
     return rval;
 }
 
-struct store_fifo_offset_extra {
+struct store_msg_buffer_offset_extra {
     int32_t *offsets;
     int i;
 };
 
-int store_fifo_offset(const int32_t &offset, const uint32_t UU(idx), struct store_fifo_offset_extra *const extra) __attribute__((nonnull(3)));
-int store_fifo_offset(const int32_t &offset, const uint32_t UU(idx), struct store_fifo_offset_extra *const extra)
+int store_msg_buffer_offset(const int32_t &offset, const uint32_t UU(idx), struct store_msg_buffer_offset_extra *const extra) __attribute__((nonnull(3)));
+int store_msg_buffer_offset(const int32_t &offset, const uint32_t UU(idx), struct store_msg_buffer_offset_extra *const extra)
 {
     extra->offsets[extra->i] = offset;
     extra->i++;
@@ -4670,55 +4696,46 @@ int store_fifo_offset(const int32_t &offset, const uint32_t UU(idx), struct stor
 }
 
 /**
- * Given pointers to offsets within a FIFO where we can find messages,
+ * Given pointers to offsets within a message buffer where we can find messages,
  * figure out the MSN of each message, and compare those MSNs.  Returns 1,
  * 0, or -1 if a is larger than, equal to, or smaller than b.
  */
-int fifo_offset_msn_cmp(FIFO &fifo, const int32_t &ao, const int32_t &bo);
-int fifo_offset_msn_cmp(FIFO &fifo, const int32_t &ao, const int32_t &bo)
+int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, const int32_t &bo);
+int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, const int32_t &bo)
 {
-    const struct fifo_entry *a = toku_fifo_get_entry(fifo, ao);
-    const struct fifo_entry *b = toku_fifo_get_entry(fifo, bo);
-    if (a->msn.msn > b->msn.msn) {
+    MSN amsn, bmsn;
+    msg_buffer.get_message_key_msn(ao, nullptr, &amsn);
+    msg_buffer.get_message_key_msn(bo, nullptr, &bmsn);
+    if (amsn.msn > bmsn.msn) {
         return +1;
     }
-    if (a->msn.msn < b->msn.msn) {
+    if (amsn.msn < bmsn.msn) {
         return -1;
     }
     return 0;
 }
 
 /**
- * Given a fifo_entry, either decompose it into its parameters and call
- * toku_ft_bn_apply_msg, or discard it, based on its MSN and the MSN of the
- * basement node.
+ * Given a message buffer and and offset, apply the message with toku_ft_bn_apply_msg, or discard it,
+ * based on its MSN and the MSN of the basement node.
  */
 static void
-do_bn_apply_msg(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update)
-{
+do_bn_apply_msg(FT_HANDLE ft_handle, BASEMENTNODE bn, message_buffer *msg_buffer, int32_t offset,
+                txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update) {
+    DBT k, v;
+    FT_MSG_S msg = msg_buffer->get_message(offset, &k, &v);
+
     // The messages are being iterated over in (key,msn) order or just in
     // msn order, so all the messages for one key, from one buffer, are in
     // ascending msn order.  So it's ok that we don't update the basement
     // node's msn until the end.
-    if (entry->msn.msn > bn->max_msn_applied.msn) {
-        ITEMLEN keylen = entry->keylen;
-        ITEMLEN vallen = entry->vallen;
-        enum ft_msg_type type = (enum ft_msg_type) entry->type;
-        MSN msn = entry->msn;
-        const XIDS xids = (XIDS) &entry->xids_s;
-        bytevec key = xids_get_end_of_array(xids);
-        bytevec val = (uint8_t*)key + entry->keylen;
-
-        DBT hk;
-        toku_fill_dbt(&hk, key, keylen);
-        DBT hv;
-        FT_MSG_S ftmsg = { type, msn, xids, .u = { .id = { &hk, toku_fill_dbt(&hv, val, vallen) } } };
+    if (msg.msn.msn > bn->max_msn_applied.msn) {
         toku_ft_bn_apply_msg(
-            t->ft->compare_fun,
-            t->ft->update_fun,
-            &t->ft->cmp_descriptor,
+            ft_handle->ft->compare_fun,
+            ft_handle->ft->update_fun,
+            &ft_handle->ft->cmp_descriptor,
             bn,
-            &ftmsg,
+            &msg,
             gc_info,
             workdone,
             stats_to_update
@@ -4726,13 +4743,15 @@ do_bn_apply_msg(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, txn_gc_i
     } else {
         STATUS_INC(FT_MSN_DISCARDS, 1);
     }
-    // We must always mark entry as stale since it has been marked
+
+    // We must always mark message as stale since it has been marked
     // (using omt::iterate_and_mark_range)
     // It is possible to call do_bn_apply_msg even when it won't apply the message because
     // the node containing it could have been evicted and brought back in.
-    entry->is_fresh = false;
+    msg_buffer->set_freshness(offset, false);
 }
 
+
 struct iterate_do_bn_apply_msg_extra {
     FT_HANDLE t;
     BASEMENTNODE bn;
@@ -4745,8 +4764,7 @@ struct iterate_do_bn_apply_msg_extra {
 int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e) __attribute__((nonnull(3)));
 int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e)
 {
-    struct fifo_entry *entry = toku_fifo_get_entry(e->bnc->buffer, offset);
-    do_bn_apply_msg(e->t, e->bn, entry, e->gc_info, e->workdone, e->stats_to_update);
+    do_bn_apply_msg(e->t, e->bn, &e->bnc->msg_buffer, offset, e->gc_info, e->workdone, e->stats_to_update);
     return 0;
 }
 
@@ -4770,8 +4788,8 @@ static void
 find_bounds_within_message_tree(
     DESCRIPTOR desc,       /// used for cmp
     ft_compare_func cmp,  /// used to compare keys
-    const find_bounds_omt_t &message_tree,      /// tree holding FIFO offsets, in which we want to look for indices
-    FIFO buffer,           /// buffer in which messages are found
+    const find_bounds_omt_t &message_tree,      /// tree holding message buffer offsets, in which we want to look for indices
+    message_buffer *msg_buffer,           /// message buffer in which messages are found
     struct pivot_bounds const * const bounds,  /// key bounds within the basement node we're applying messages to
     uint32_t *lbi,        /// (output) "lower bound inclusive" (index into message_tree)
     uint32_t *ube         /// (output) "upper bound exclusive" (index into message_tree)
@@ -4785,15 +4803,15 @@ find_bounds_within_message_tree(
         // message (with any msn) with the key lower_bound_exclusive.
         // This will be a message we want to try applying, so it is the
         // "lower bound inclusive" within the message_tree.
-        struct toku_fifo_entry_key_msn_heaviside_extra lbi_extra;
+        struct toku_msg_buffer_key_msn_heaviside_extra lbi_extra;
         ZERO_STRUCT(lbi_extra);
         lbi_extra.desc = desc;
         lbi_extra.cmp = cmp;
-        lbi_extra.fifo = buffer;
+        lbi_extra.msg_buffer = msg_buffer;
         lbi_extra.key = bounds->lower_bound_exclusive;
         lbi_extra.msn = MAX_MSN;
         int32_t found_lb;
-        r = message_tree.template find<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(lbi_extra, +1, &found_lb, lbi);
+        r = message_tree.template find<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(lbi_extra, +1, &found_lb, lbi);
         if (r == DB_NOTFOUND) {
             // There is no relevant data (the lower bound is bigger than
             // any message in this tree), so we have no range and we're
@@ -4809,7 +4827,7 @@ find_bounds_within_message_tree(
             const DBT *ubi = bounds->upper_bound_inclusive;
             const int32_t offset = found_lb;
             DBT found_lbidbt;
-            fill_dbt_for_fifo_entry(&found_lbidbt, toku_fifo_get_entry(buffer, offset));
+            msg_buffer->get_message_key_msn(offset, &found_lbidbt, nullptr);
             FAKE_DB(db, desc);
             int c = cmp(&db, &found_lbidbt, ubi);
             // These DBTs really are both inclusive bounds, so we need
@@ -4833,14 +4851,14 @@ find_bounds_within_message_tree(
         // the first thing bigger than the upper_bound_inclusive key.
         // This is therefore the smallest thing we don't want to apply,
         // and omt::iterate_on_range will not examine it.
-        struct toku_fifo_entry_key_msn_heaviside_extra ube_extra;
+        struct toku_msg_buffer_key_msn_heaviside_extra ube_extra;
         ZERO_STRUCT(ube_extra);
         ube_extra.desc = desc;
         ube_extra.cmp = cmp;
-        ube_extra.fifo = buffer;
+        ube_extra.msg_buffer = msg_buffer;
         ube_extra.key = bounds->upper_bound_inclusive;
         ube_extra.msn = MAX_MSN;
-        r = message_tree.template find<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(ube_extra, +1, nullptr, ube);
+        r = message_tree.template find<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(ube_extra, +1, nullptr, ube);
         if (r == DB_NOTFOUND) {
             // Couldn't find anything in the buffer bigger than our key,
             // so we need to look at everything up to the end of
@@ -4882,13 +4900,13 @@ bnc_apply_messages_to_basement_node(
 
     uint32_t stale_lbi, stale_ube;
     if (!bn->stale_ancestor_messages_applied) {
-        find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->stale_message_tree, bnc->buffer, bounds, &stale_lbi, &stale_ube);
+        find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->stale_message_tree, &bnc->msg_buffer, bounds, &stale_lbi, &stale_ube);
     } else {
         stale_lbi = 0;
         stale_ube = 0;
     }
     uint32_t fresh_lbi, fresh_ube;
-    find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->fresh_message_tree, bnc->buffer, bounds, &fresh_lbi, &fresh_ube);
+    find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->fresh_message_tree, &bnc->msg_buffer, bounds, &fresh_lbi, &fresh_ube);
 
     // We now know where all the messages we must apply are, so one of the
     // following 4 cases will do the application, depending on which of
@@ -4905,30 +4923,29 @@ bnc_apply_messages_to_basement_node(
         const int buffer_size = ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + bnc->broadcast_list.size());
         toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
         int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
-        struct store_fifo_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
+        struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
 
         // Populate offsets array with offsets to stale messages
-        r = bnc->stale_message_tree.iterate_on_range<struct store_fifo_offset_extra, store_fifo_offset>(stale_lbi, stale_ube, &sfo_extra);
+        r = bnc->stale_message_tree.iterate_on_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(stale_lbi, stale_ube, &sfo_extra);
         assert_zero(r);
 
         // Then store fresh offsets, and mark them to be moved to stale later.
-        r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_fifo_offset_extra, store_fifo_offset>(fresh_lbi, fresh_ube, &sfo_extra);
+        r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(fresh_lbi, fresh_ube, &sfo_extra);
         assert_zero(r);
 
         // Store offsets of all broadcast messages.
-        r = bnc->broadcast_list.iterate<struct store_fifo_offset_extra, store_fifo_offset>(&sfo_extra);
+        r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(&sfo_extra);
         assert_zero(r);
         invariant(sfo_extra.i == buffer_size);
 
         // Sort by MSN.
-        r = toku::sort<int32_t, FIFO, fifo_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->buffer);
+        r = toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
         assert_zero(r);
 
         // Apply the messages in MSN order.
         for (int i = 0; i < buffer_size; ++i) {
             *msgs_applied = true;
-            struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]);
-            do_bn_apply_msg(t, bn, entry, gc_info, &workdone_this_ancestor, &stats_delta);
+            do_bn_apply_msg(t, bn, &bnc->msg_buffer, offsets[i], gc_info, &workdone_this_ancestor, &stats_delta);
         }
     } else if (stale_lbi == stale_ube) {
         // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
@@ -5084,7 +5101,7 @@ static bool bn_needs_ancestors_messages(
                 find_bounds_within_message_tree(&ft->cmp_descriptor,
                                                 ft->compare_fun,
                                                 bnc->stale_message_tree,
-                                                bnc->buffer,
+                                                &bnc->msg_buffer,
                                                 &curr_bounds,
                                                 &stale_lbi,
                                                 &stale_ube);
@@ -5097,7 +5114,7 @@ static bool bn_needs_ancestors_messages(
             find_bounds_within_message_tree(&ft->cmp_descriptor,
                                             ft->compare_fun,
                                             bnc->fresh_message_tree,
-                                            bnc->buffer,
+                                            &bnc->msg_buffer,
                                             &curr_bounds,
                                             &fresh_lbi,
                                             &fresh_ube);
@@ -5208,11 +5225,11 @@ struct copy_to_stale_extra {
 int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) __attribute__((nonnull(3)));
 int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra)
 {
-    struct fifo_entry *entry = toku_fifo_get_entry(extra->bnc->buffer, offset);
-    DBT keydbt;
-    DBT *key = fill_dbt_for_fifo_entry(&keydbt, entry);
-    struct toku_fifo_entry_key_msn_heaviside_extra heaviside_extra = { .desc = &extra->ft->cmp_descriptor, .cmp = extra->ft->compare_fun, .fifo = extra->bnc->buffer, .key = key, .msn = entry->msn };
-    int r = extra->bnc->stale_message_tree.insert<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(offset, heaviside_extra, nullptr);
+    MSN msn;
+    DBT key;
+    extra->bnc->msg_buffer.get_message_key_msn(offset, &key, &msn);
+    struct toku_msg_buffer_key_msn_heaviside_extra heaviside_extra = { .desc = &extra->ft->cmp_descriptor, .cmp = extra->ft->compare_fun, .msg_buffer = &extra->bnc->msg_buffer, .key = &key, .msn = msn };
+    int r = extra->bnc->stale_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, heaviside_extra, nullptr);
     invariant_zero(r);
     return 0;
 }
@@ -6786,13 +6803,20 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
             if (node->height > 0) {
                 NONLEAF_CHILDINFO bnc = BNC(node, i);
                 fprintf(file, "%*schild %d buffered (%d entries):", depth+1, "", i, toku_bnc_n_entries(bnc));
-                FIFO_ITERATE(bnc->buffer, key, keylen, data, datalen, type, msn, xids, UU(is_fresh),
-                             {
-                                 data=data; datalen=datalen; keylen=keylen;
-                                 fprintf(file, "%*s xid=%" PRIu64 " %u (type=%d) msn=0x%" PRIu64 "\n", depth+2, "", xids_get_innermost_xid(xids), (unsigned)toku_dtoh32(*(int*)key), type, msn.msn);
-                                 //assert(strlen((char*)key)+1==keylen);
-                                 //assert(strlen((char*)data)+1==datalen);
-                             });
+                struct print_msg_fn {
+                    FILE *file;
+                    int depth;
+                    print_msg_fn(FILE *f, int d) : file(f), depth(d) { }
+                    int operator()(FT_MSG msg, bool UU(is_fresh)) {
+                        fprintf(file, "%*s xid=%" PRIu64 " %u (type=%d) msn=0x%" PRIu64 "\n",
+                                      depth+2, "",
+                                      xids_get_innermost_xid(ft_msg_get_xids(msg)),
+                                      (unsigned)toku_dtoh32(*(int*)ft_msg_get_key(msg)),
+                                      ft_msg_get_type(msg), msg->msn.msn);
+                        return 0;
+                    }
+                } print_fn(file, depth);
+                bnc->msg_buffer.iterate(print_fn);
             }
             else {
                 int size = BLB_DATA(node, i)->num_klpairs();
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index 7e8d241cce2..51df7be4881 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -120,6 +120,7 @@ static int
 verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, bytevec key, ITEMLEN keylen, bytevec UU(data), ITEMLEN UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot)
     __attribute__((warn_unused_result));
 
+UU()
 static int
 verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, bytevec key, ITEMLEN keylen, bytevec UU(data), ITEMLEN UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) {
     int result = 0;
@@ -169,7 +170,7 @@ get_ith_key_dbt (BASEMENTNODE bn, int i) {
 struct count_msgs_extra {
     int count;
     MSN msn;
-    FIFO fifo;
+    message_buffer *msg_buffer;
 };
 
 // template-only function, but must be extern
@@ -177,15 +178,16 @@ int count_msgs(const int32_t &offset, const uint32_t UU(idx), struct count_msgs_
     __attribute__((nonnull(3)));
 int count_msgs(const int32_t &offset, const uint32_t UU(idx), struct count_msgs_extra *const e)
 {
-    const struct fifo_entry *entry = toku_fifo_get_entry(e->fifo, offset);
-    if (entry->msn.msn == e->msn.msn) {
+    MSN msn;
+    e->msg_buffer->get_message_key_msn(offset, nullptr, &msn);
+    if (msn.msn == e->msn.msn) {
         e->count++;
     }
     return 0;
 }
 
 struct verify_message_tree_extra {
-    FIFO fifo;
+    message_buffer *msg_buffer;
     bool broadcast;
     bool is_fresh;
     int i;
@@ -202,20 +204,22 @@ int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), struct ve
     BLOCKNUM blocknum = e->blocknum;
     int keep_going_on_failure = e->keep_going_on_failure;
     int result = 0;
-    const struct fifo_entry *entry = toku_fifo_get_entry(e->fifo, offset);
+    DBT k, v;
+    FT_MSG_S msg = e->msg_buffer->get_message(offset, &k, &v);
+    bool is_fresh = e->msg_buffer->get_freshness(offset);
     if (e->broadcast) {
-        VERIFY_ASSERTION(ft_msg_type_applies_all((enum ft_msg_type) entry->type) || ft_msg_type_does_nothing((enum ft_msg_type) entry->type),
+        VERIFY_ASSERTION(ft_msg_type_applies_all((enum ft_msg_type) msg.type) || ft_msg_type_does_nothing((enum ft_msg_type) msg.type),
                          e->i, "message found in broadcast list that is not a broadcast");
     } else {
-        VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) entry->type),
+        VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) msg.type),
                          e->i, "message found in fresh or stale message tree that does not apply once");
         if (e->is_fresh) {
             if (e->messages_have_been_moved) {
-                VERIFY_ASSERTION(entry->is_fresh,
+                VERIFY_ASSERTION(is_fresh,
                                  e->i, "message found in fresh message tree that is not fresh");
             }
         } else {
-            VERIFY_ASSERTION(!entry->is_fresh,
+            VERIFY_ASSERTION(!is_fresh,
                              e->i, "message found in stale message tree that is fresh");
         }
     }
@@ -235,15 +239,15 @@ int verify_marked_messages(const int32_t &offset, const uint32_t UU(idx), struct
     BLOCKNUM blocknum = e->blocknum;
     int keep_going_on_failure = e->keep_going_on_failure;
     int result = 0;
-    const struct fifo_entry *entry = toku_fifo_get_entry(e->fifo, offset);
-    VERIFY_ASSERTION(!entry->is_fresh, e->i, "marked message found in the fresh message tree that is fresh");
+    bool is_fresh = e->msg_buffer->get_freshness(offset);
+    VERIFY_ASSERTION(!is_fresh, e->i, "marked message found in the fresh message tree that is fresh");
  done:
     return result;
 }
 
 template<typename verify_omt_t>
 static int
-verify_sorted_by_key_msn(FT_HANDLE ft_handle, FIFO fifo, const verify_omt_t &mt) {
+verify_sorted_by_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const verify_omt_t &mt) {
     int result = 0;
     size_t last_offset = 0;
     for (uint32_t i = 0; i < mt.size(); i++) {
@@ -251,12 +255,12 @@ verify_sorted_by_key_msn(FT_HANDLE ft_handle, FIFO fifo, const verify_omt_t &mt)
         int r = mt.fetch(i, &offset);
         assert_zero(r);
         if (i > 0) {
-            struct toku_fifo_entry_key_msn_cmp_extra extra;
+            struct toku_msg_buffer_key_msn_cmp_extra extra;
             ZERO_STRUCT(extra);
             extra.desc = &ft_handle->ft->cmp_descriptor;
             extra.cmp = ft_handle->ft->compare_fun;
-            extra.fifo = fifo;
-            if (toku_fifo_entry_key_msn_cmp(extra, last_offset, offset) >= 0) {
+            extra.msg_buffer = msg_buffer;
+            if (toku_msg_buffer_key_msn_cmp(extra, last_offset, offset) >= 0) {
                 result = TOKUDB_NEEDS_REPAIR;
                 break;
             }
@@ -268,15 +272,15 @@ verify_sorted_by_key_msn(FT_HANDLE ft_handle, FIFO fifo, const verify_omt_t &mt)
 
 template<typename count_omt_t>
 static int
-count_eq_key_msn(FT_HANDLE ft_handle, FIFO fifo, const count_omt_t &mt, const DBT *key, MSN msn) {
-    struct toku_fifo_entry_key_msn_heaviside_extra extra;
+count_eq_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const count_omt_t &mt, const DBT *key, MSN msn) {
+    struct toku_msg_buffer_key_msn_heaviside_extra extra;
     ZERO_STRUCT(extra);
     extra.desc = &ft_handle->ft->cmp_descriptor;
     extra.cmp = ft_handle->ft->compare_fun;
-    extra.fifo = fifo;
+    extra.msg_buffer = msg_buffer;
     extra.key = key;
     extra.msn = msn;
-    int r = mt.template find_zero<struct toku_fifo_entry_key_msn_heaviside_extra, toku_fifo_entry_key_msn_heaviside>(extra, nullptr, nullptr);
+    int r = mt.template find_zero<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(extra, nullptr, nullptr);
     int count;
     if (r == 0) {
         count = 1;
@@ -308,6 +312,80 @@ toku_get_node_for_verify(
         );
 }
 
+struct verify_msg_fn {
+    FT_HANDLE ft_handle;
+    NONLEAF_CHILDINFO bnc;
+    const DBT *curr_less_pivot;
+    const DBT *curr_geq_pivot;
+    BLOCKNUM blocknum;
+    MSN this_msn;
+    int verbose;
+    int keep_going_on_failure;
+    bool messages_have_been_moved;
+
+    MSN last_msn;
+    int msg_i;
+    int result = 0; // needed by VERIFY_ASSERTION
+
+    verify_msg_fn(FT_HANDLE handle, NONLEAF_CHILDINFO nl, const DBT *less, const DBT *geq,
+                  BLOCKNUM b, MSN tmsn, int v, int k, bool m) :
+        ft_handle(handle), bnc(nl), curr_less_pivot(less), curr_geq_pivot(geq),
+        blocknum(b), this_msn(tmsn), verbose(v), keep_going_on_failure(k), messages_have_been_moved(m), last_msn(ZERO_MSN), msg_i(0) {
+    }
+
+    int operator()(FT_MSG msg, bool is_fresh) {
+        enum ft_msg_type type = (enum ft_msg_type) msg->type;
+        MSN msn = msg->msn;
+        XIDS xid = msg->xids;
+        const void *key = ft_msg_get_key(msg);
+        const void *data = ft_msg_get_val(msg);
+        ITEMLEN keylen = ft_msg_get_keylen(msg);
+        ITEMLEN datalen = ft_msg_get_vallen(msg);
+
+        int r = verify_msg_in_child_buffer(ft_handle, type, msn, key, keylen, data, datalen, xid,
+                                           curr_less_pivot,
+                                           curr_geq_pivot);
+        VERIFY_ASSERTION(r == 0, msg_i, "A message in the buffer is out of place");
+        VERIFY_ASSERTION((msn.msn > last_msn.msn), msg_i, "msn per msg must be monotonically increasing toward newer messages in buffer");
+        VERIFY_ASSERTION((msn.msn <= this_msn.msn), msg_i, "all messages must have msn within limit of this node's max_msn_applied_to_node_in_memory");
+        if (ft_msg_type_applies_once(type)) {
+            int count;
+            DBT keydbt;
+            toku_fill_dbt(&keydbt, key, keylen);
+            int total_count = 0;
+            count = count_eq_key_msn(ft_handle, &bnc->msg_buffer, bnc->fresh_message_tree, toku_fill_dbt(&keydbt, key, keylen), msn);
+            total_count += count;
+            if (is_fresh) {
+                VERIFY_ASSERTION(count == 1, msg_i, "a fresh message was not found in the fresh message tree");
+            } else if (messages_have_been_moved) {
+                VERIFY_ASSERTION(count == 0, msg_i, "a stale message was found in the fresh message tree");
+            }
+            VERIFY_ASSERTION(count <= 1, msg_i, "a message was found multiple times in the fresh message tree");
+            count = count_eq_key_msn(ft_handle, &bnc->msg_buffer, bnc->stale_message_tree, &keydbt, msn);
+
+            total_count += count;
+            if (is_fresh) {
+                VERIFY_ASSERTION(count == 0, msg_i, "a fresh message was found in the stale message tree");
+            } else if (messages_have_been_moved) {
+                VERIFY_ASSERTION(count == 1, msg_i, "a stale message was not found in the stale message tree");
+            }
+            VERIFY_ASSERTION(count <= 1, msg_i, "a message was found multiple times in the stale message tree");
+
+            VERIFY_ASSERTION(total_count <= 1, msg_i, "a message was found in both message trees (or more than once in a single tree)");
+            VERIFY_ASSERTION(total_count >= 1, msg_i, "a message was not found in either message tree");
+        } else {
+            VERIFY_ASSERTION(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type), msg_i, "a message was found that does not apply either to all or to only one key");
+            struct count_msgs_extra extra = { .count = 0, .msn = msn, .msg_buffer = &bnc->msg_buffer };
+            bnc->broadcast_list.iterate<struct count_msgs_extra, count_msgs>(&extra);
+            VERIFY_ASSERTION(extra.count == 1, msg_i, "a broadcast message was not found in the broadcast list");
+        }
+        last_msn = msn;
+        msg_i++;
+done:
+        return result;
+    }
+};
+
 static int
 toku_verify_ftnode_internal(FT_HANDLE ft_handle,
                             MSN rootmsn, MSN parentmsn_with_messages, bool messages_exist_above,
@@ -351,55 +429,18 @@ toku_verify_ftnode_internal(FT_HANDLE ft_handle,
         const DBT *curr_less_pivot = (i==0) ? lesser_pivot : &node->childkeys[i-1];
         const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : &node->childkeys[i];
         if (node->height > 0) {
-            MSN last_msn = ZERO_MSN;
-            // Verify that messages in the buffers are in the right place.
             NONLEAF_CHILDINFO bnc = BNC(node, i);
-            VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, bnc->buffer, bnc->fresh_message_tree) == 0, i, "fresh_message_tree");
-            VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, bnc->buffer, bnc->stale_message_tree) == 0, i, "stale_message_tree");
-            FIFO_ITERATE(bnc->buffer, key, keylen, data, datalen, itype, msn, xid, is_fresh,
-                         ({
-                             enum ft_msg_type type = (enum ft_msg_type) itype;
-                             int r = verify_msg_in_child_buffer(ft_handle, type, msn, key, keylen, data, datalen, xid,
-                                                                curr_less_pivot,
-                                                                curr_geq_pivot);
-                             VERIFY_ASSERTION(r==0, i, "A message in the buffer is out of place");
-                             VERIFY_ASSERTION((msn.msn > last_msn.msn), i, "msn per msg must be monotonically increasing toward newer messages in buffer");
-                             VERIFY_ASSERTION((msn.msn <= this_msn.msn), i, "all messages must have msn within limit of this node's max_msn_applied_to_node_in_memory");
-                             if (ft_msg_type_applies_once(type)) {
-                                 int count;
-                                 DBT keydbt;
-                                 toku_fill_dbt(&keydbt, key, keylen);
-                                 int total_count = 0;
-                                 count = count_eq_key_msn(ft_handle, bnc->buffer, bnc->fresh_message_tree, toku_fill_dbt(&keydbt, key, keylen), msn);
-                                 total_count += count;
-                                 if (is_fresh) {
-                                     VERIFY_ASSERTION(count == 1, i, "a fresh message was not found in the fresh message tree");
-                                 } else if (messages_have_been_moved) {
-                                     VERIFY_ASSERTION(count == 0, i, "a stale message was found in the fresh message tree");
-                                 }
-                                 VERIFY_ASSERTION(count <= 1, i, "a message was found multiple times in the fresh message tree");
-                                 count = count_eq_key_msn(ft_handle, bnc->buffer, bnc->stale_message_tree, &keydbt, msn);
+            // Verify that messages in the buffers are in the right place.
+            VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, &bnc->msg_buffer, bnc->fresh_message_tree) == 0, i, "fresh_message_tree");
+            VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, &bnc->msg_buffer, bnc->stale_message_tree) == 0, i, "stale_message_tree");
 
-                                 total_count += count;
-                                 if (is_fresh) {
-                                     VERIFY_ASSERTION(count == 0, i, "a fresh message was found in the stale message tree");
-                                 } else if (messages_have_been_moved) {
-                                     VERIFY_ASSERTION(count == 1, i, "a stale message was not found in the stale message tree");
-                                 }
-                                 VERIFY_ASSERTION(count <= 1, i, "a message was found multiple times in the stale message tree");
+            verify_msg_fn verify_msg(ft_handle, bnc, curr_less_pivot, curr_geq_pivot,
+                                     blocknum, this_msn, verbose, keep_going_on_failure, messages_have_been_moved);
+            int r = bnc->msg_buffer.iterate(verify_msg);
+            if (r != 0) { result = r; goto done; }
 
-                                 VERIFY_ASSERTION(total_count <= 1, i, "a message was found in both message trees (or more than once in a single tree)");
-                                 VERIFY_ASSERTION(total_count >= 1, i, "a message was not found in either message tree");
-                             } else {
-                                 VERIFY_ASSERTION(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type), i, "a message was found that does not apply either to all or to only one key");
-                                 struct count_msgs_extra extra = { .count = 0, .msn = msn, .fifo = bnc->buffer };
-                                 bnc->broadcast_list.iterate<struct count_msgs_extra, count_msgs>(&extra);
-                                 VERIFY_ASSERTION(extra.count == 1, i, "a broadcast message was not found in the broadcast list");
-                             }
-                             last_msn = msn;
-                         }));
-            struct verify_message_tree_extra extra = { .fifo = bnc->buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->thisnodename, .keep_going_on_failure = keep_going_on_failure, .messages_have_been_moved = messages_have_been_moved };
-            int r = bnc->fresh_message_tree.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra);
+            struct verify_message_tree_extra extra = { .msg_buffer = &bnc->msg_buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->thisnodename, .keep_going_on_failure = keep_going_on_failure, .messages_have_been_moved = messages_have_been_moved };
+            r = bnc->fresh_message_tree.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra);
             if (r != 0) { result = r; goto done; }
             extra.is_fresh = false;
             r = bnc->stale_message_tree.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra);
diff --git a/ft/ft.cc b/ft/ft.cc
index 5c8e439e644..26111334211 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -187,7 +187,7 @@ ft_log_fassociate_during_checkpoint (CACHEFILE cf, void *header_v) {
 }
 
 // Maps to cf->begin_checkpoint_userdata
-// Create checkpoint-in-progress versions of header and translation (btt) (and fifo for now...).
+// Create checkpoint-in-progress versions of header and translation (btt)
 // Has access to fd (it is protected).
 //
 // Not reentrant for a single FT (see ft_checkpoint)
diff --git a/ft/ft_msg.h b/ft/ft_msg.h
index 2f996c6558f..e0db4a51ddb 100644
--- a/ft/ft_msg.h
+++ b/ft/ft_msg.h
@@ -2,7 +2,8 @@
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 
 /* The purpose of this file is to provide access to the ft_msg,
- * which is the ephemeral version of the fifo_msg.
+ * which is the ephemeral version of the messages that lives in
+ * a message buffer.
  */
 
 #ifndef FT_MSG_H
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 22dbe994eb7..1e4951d00c7 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -310,26 +310,26 @@ serialize_ftnode_partition_size (FTNODE node, int i)
 }
 
 #define FTNODE_PARTITION_DMT_LEAVES 0xaa
-#define FTNODE_PARTITION_FIFO_MSG 0xbb
+#define FTNODE_PARTITION_MSG_BUFFER 0xbb
 
 UU() static int
-assert_fresh(const int32_t &offset, const uint32_t UU(idx), struct fifo *const f) {
-    struct fifo_entry *entry = toku_fifo_get_entry(f, offset);
-    assert(entry->is_fresh);
+assert_fresh(const int32_t &offset, const uint32_t UU(idx), message_buffer *const msg_buffer) {
+    bool is_fresh = msg_buffer->get_freshness(offset);
+    assert(is_fresh);
     return 0;
 }
 
 UU() static int
-assert_stale(const int32_t &offset, const uint32_t UU(idx), struct fifo *const f) {
-    struct fifo_entry *entry = toku_fifo_get_entry(f, offset);
-    assert(!entry->is_fresh);
+assert_stale(const int32_t &offset, const uint32_t UU(idx), message_buffer *const msg_buffer) {
+    bool is_fresh = msg_buffer->get_freshness(offset);
+    assert(!is_fresh);
     return 0;
 }
 
 static void bnc_verify_message_trees(NONLEAF_CHILDINFO UU(bnc)) {
 #ifdef TOKU_DEBUG_PARANOID
-    bnc->fresh_message_tree.iterate<struct fifo, assert_fresh>(bnc->buffer);
-    bnc->stale_message_tree.iterate<struct fifo, assert_stale>(bnc->buffer);
+    bnc->fresh_message_tree.iterate<message_buffer, assert_fresh>(&bnc->msg_buffer);
+    bnc->stale_message_tree.iterate<message_buffer, assert_stale>(&bnc->msg_buffer);
 #endif
 }
 
@@ -342,21 +342,27 @@ wbuf_write_offset(const int32_t &offset, const uint32_t UU(idx), struct wbuf *co
 static void
 serialize_child_buffer(NONLEAF_CHILDINFO bnc, struct wbuf *wb)
 {
-    unsigned char ch = FTNODE_PARTITION_FIFO_MSG;
+    unsigned char ch = FTNODE_PARTITION_MSG_BUFFER;
     wbuf_nocrc_char(wb, ch);
-    // serialize the FIFO, first the number of entries, then the elements
+
+    // serialize the message buffer, first the number of entries, then the elements
     wbuf_nocrc_int(wb, toku_bnc_n_entries(bnc));
-    FIFO_ITERATE(
-        bnc->buffer, key, keylen, data, datalen, type, msn, xids, is_fresh,
-        {
+    struct msg_serialize_fn {
+        struct wbuf *wb;
+        msg_serialize_fn(struct wbuf *w) : wb(w) { }
+        int operator()(FT_MSG msg, bool is_fresh) {
+            enum ft_msg_type type = (enum ft_msg_type) msg->type;
             paranoid_invariant((int) type >= 0 && (int) type < 256);
             wbuf_nocrc_char(wb, (unsigned char) type);
             wbuf_nocrc_char(wb, (unsigned char) is_fresh);
-            wbuf_MSN(wb, msn);
-            wbuf_nocrc_xids(wb, xids);
-            wbuf_nocrc_bytes(wb, key, keylen);
-            wbuf_nocrc_bytes(wb, data, datalen);
-        });
+            wbuf_MSN(wb, msg->msn);
+            wbuf_nocrc_xids(wb, ft_msg_get_xids(msg));
+            wbuf_nocrc_bytes(wb, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
+            wbuf_nocrc_bytes(wb, ft_msg_get_val(msg), ft_msg_get_vallen(msg));
+            return 0;
+        }
+    } serialize_fn(wb);
+    bnc->msg_buffer.iterate(serialize_fn);
 
     bnc_verify_message_trees(bnc);
 
@@ -1084,7 +1090,7 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf,
         XMALLOC_N(n_in_this_buffer, fresh_offsets);
         XMALLOC_N(n_in_this_buffer, broadcast_offsets);
     }
-    toku_fifo_resize(bnc->buffer, rbuf->size + 64);
+    bnc->msg_buffer.resize(rbuf->size + 64);
     for (int i = 0; i < n_in_this_buffer; i++) {
         bytevec key; ITEMLEN keylen;
         bytevec val; ITEMLEN vallen;
@@ -1116,19 +1122,24 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf,
         } else {
             dest = NULL;
         }
-        r = toku_fifo_enq(bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, dest); /* Copies the data into the fifo */
-        lazy_assert_zero(r);
+        // TODO: Function to parse stuff out of an rbuf into an FT_MSG
+        DBT k, v;
+        FT_MSG_S msg = {
+            type, msn, xids,
+            .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen) } }
+        };
+        bnc->msg_buffer.enqueue(&msg, is_fresh, dest);
         xids_destroy(&xids);
     }
     invariant(rbuf->ndone == rbuf->size);
 
     if (cmp) {
-        struct toku_fifo_entry_key_msn_cmp_extra extra = { .desc = desc, .cmp = cmp, .fifo = bnc->buffer };
-        r = toku::sort<int32_t, const struct toku_fifo_entry_key_msn_cmp_extra, toku_fifo_entry_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
+        struct toku_msg_buffer_key_msn_cmp_extra extra = { .desc = desc, .cmp = cmp, .msg_buffer = &bnc->msg_buffer };
+        r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
         assert_zero(r);
         bnc->fresh_message_tree.destroy();
         bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
-        r = toku::sort<int32_t, const struct toku_fifo_entry_key_msn_cmp_extra, toku_fifo_entry_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
+        r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
         assert_zero(r);
         bnc->stale_message_tree.destroy();
         bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer);
@@ -1137,9 +1148,9 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf,
     }
 }
 
-// effect: deserialize a single message from rbuf and enque the result into the given fifo
+// effect: deserialize a single message from rbuf and enqueue the result into the given message buffer
 static void
-fifo_deserialize_msg_from_rbuf(FIFO fifo, struct rbuf *rbuf) {
+msg_buffer_deserialize_msg_from_rbuf(message_buffer *msg_buffer, struct rbuf *rbuf) {
     bytevec key, val;
     ITEMLEN keylen, vallen;
     enum ft_msg_type type = (enum ft_msg_type) rbuf_char(rbuf);
@@ -1149,8 +1160,13 @@ fifo_deserialize_msg_from_rbuf(FIFO fifo, struct rbuf *rbuf) {
     xids_create_from_buffer(rbuf, &xids);
     rbuf_bytes(rbuf, &key, &keylen); /* Returns a pointer into the rbuf. */
     rbuf_bytes(rbuf, &val, &vallen);
-    int r = toku_fifo_enq(fifo, key, keylen, val, vallen, type, msn, xids, is_fresh, nullptr);
-    lazy_assert_zero(r);
+    // TODO: Function to parse stuff out of an rbuf into an FT_MSG
+    DBT k, v;
+    FT_MSG_S msg = {
+        type, msn, xids,
+        .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen) } }
+    };
+    msg_buffer->enqueue(&msg, is_fresh, nullptr);
     xids_destroy(&xids);
 }
 
@@ -1162,9 +1178,9 @@ deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf) {
     int32_t *XMALLOC_N(n_in_this_buffer, fresh_offsets);
     int32_t *XMALLOC_N(n_in_this_buffer, broadcast_offsets);
 
-    toku_fifo_resize(bnc->buffer, rbuf->size + 64);
+    bnc->msg_buffer.resize(rbuf->size + 64);
     for (int i = 0; i < n_in_this_buffer; i++) {
-        fifo_deserialize_msg_from_rbuf(bnc->buffer, rbuf);
+        msg_buffer_deserialize_msg_from_rbuf(&bnc->msg_buffer, rbuf);
     }
 
     // read in each message tree (fresh, stale, broadcast)
@@ -1253,7 +1269,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
 
 NONLEAF_CHILDINFO toku_create_empty_nl(void) {
     NONLEAF_CHILDINFO XMALLOC(cn);
-    int r = toku_fifo_create(&cn->buffer); assert_zero(r);
+    cn->msg_buffer.create();
     cn->fresh_message_tree.create_no_array();
     cn->stale_message_tree.create_no_array();
     cn->broadcast_list.create_no_array();
@@ -1261,10 +1277,10 @@ NONLEAF_CHILDINFO toku_create_empty_nl(void) {
     return cn;
 }
 
-// must clone the OMTs, since we serialize them along with the FIFO
+// must clone the OMTs, since we serialize them along with the message buffer
 NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo) {
     NONLEAF_CHILDINFO XMALLOC(cn);
-    toku_fifo_clone(orig_childinfo->buffer, &cn->buffer);
+    cn->msg_buffer.clone(&orig_childinfo->msg_buffer);
     cn->fresh_message_tree.create_no_array();
     cn->fresh_message_tree.clone(orig_childinfo->fresh_message_tree);
     cn->stale_message_tree.create_no_array();
@@ -1283,7 +1299,7 @@ void destroy_basement_node (BASEMENTNODE bn)
 
 void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl)
 {
-    toku_fifo_free(&nl->buffer);
+    nl->msg_buffer.destroy();
     nl->fresh_message_tree.destroy();
     nl->stale_message_tree.destroy();
     nl->broadcast_list.destroy();
@@ -1615,7 +1631,7 @@ deserialize_ftnode_partition(
     ch = rbuf_char(&rb);
 
     if (node->height > 0) {
-        assert(ch == FTNODE_PARTITION_FIFO_MSG);
+        assert(ch == FTNODE_PARTITION_MSG_BUFFER);
         NONLEAF_CHILDINFO bnc = BNC(node, childnum);
         if (node->layout_version_read_from_disk <= FT_LAYOUT_VERSION_26) {
             // Layout version <= 26 did not serialize sorted message trees to disk.
@@ -1827,7 +1843,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
     paranoid_invariant(is_valid_ftnode_fetch_type(bfe->type));
 
     // setup the memory of the partitions
-    // for partitions being decompressed, create either FIFO or basement node
+    // for partitions being decompressed, create either message buffer or basement node
     // for partitions staying compressed, create sub_block
     setup_ftnode_partitions(node, bfe, false);
 
@@ -1995,7 +2011,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
             highest_msn.msn = lowest.msn + n_in_this_buffer;
         }
 
-        // Create the FIFO entires from the deserialized buffer.
+        // Create the message buffers from the deserialized buffer.
         for (int j = 0; j < n_in_this_buffer; ++j) {
             bytevec key; ITEMLEN keylen;
             bytevec val; ITEMLEN vallen;
@@ -2025,25 +2041,21 @@ deserialize_and_upgrade_internal_node(FTNODE node,
             // Increment our MSN, the last message should have the
             // newest/highest MSN.  See above for a full explanation.
             lowest.msn++;
-            r = toku_fifo_enq(bnc->buffer,
-                              key,
-                              keylen,
-                              val,
-                              vallen,
-                              type,
-                              lowest,
-                              xids,
-                              true,
-                              dest);
-            lazy_assert_zero(r);
+            // TODO: Function to parse stuff out of an rbuf into an FT_MSG
+            DBT k, v;
+            FT_MSG_S msg = {
+                type, lowest, xids,
+                .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen) } }
+            };
+            bnc->msg_buffer.enqueue(&msg, true, dest);
             xids_destroy(&xids);
         }
 
         if (bfe->h->compare_fun) {
-            struct toku_fifo_entry_key_msn_cmp_extra extra = { .desc = &bfe->h->cmp_descriptor,
+            struct toku_msg_buffer_key_msn_cmp_extra extra = { .desc = &bfe->h->cmp_descriptor,
                                                                .cmp = bfe->h->compare_fun,
-                                                               .fifo = bnc->buffer };
-            typedef toku::sort<int32_t, const struct toku_fifo_entry_key_msn_cmp_extra, toku_fifo_entry_key_msn_cmp> key_msn_sort;
+                                                               .msg_buffer = &bnc->msg_buffer };
+            typedef toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp> key_msn_sort;
             r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
             assert_zero(r);
             bnc->fresh_message_tree.destroy();
@@ -2053,7 +2065,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
         }
     }
 
-    // Assign the highest msn from our upgrade message FIFO queues.
+    // Assign the highest msn from our upgrade message buffers
     node->max_msn_applied_to_node_on_disk = highest_msn;
     // Since we assigned MSNs to this node's messages, we need to dirty it.
     node->dirty = 1;
@@ -2433,7 +2445,7 @@ deserialize_ftnode_from_rbuf(
     paranoid_invariant(is_valid_ftnode_fetch_type(bfe->type));
 
     // setup the memory of the partitions
-    // for partitions being decompressed, create either FIFO or basement node
+    // for partitions being decompressed, create either message buffer or basement node
     // for partitions staying compressed, create sub_block
     setup_ftnode_partitions(node, bfe, true);
 
diff --git a/ft/msg_buffer.cc b/ft/msg_buffer.cc
new file mode 100644
index 00000000000..47bf845b186
--- /dev/null
+++ b/ft/msg_buffer.cc
@@ -0,0 +1,224 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2014 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#include "ft/msg_buffer.h"
+#include "ft/ybt.h"
+
+void message_buffer::create() {
+    _num_entries = 0;
+    _memory = nullptr;
+    _memory_size = 0;
+    _memory_used = 0;
+}
+
+void message_buffer::clone(message_buffer *src) {
+    _num_entries = src->_num_entries;
+    _memory_used = src->_memory_used;
+    _memory_size = src->_memory_size;
+    XMALLOC_N(_memory_size, _memory);
+    memcpy(_memory, src->_memory, _memory_size);
+}
+
+void message_buffer::destroy() {
+    if (_memory != nullptr) {
+        toku_free(_memory);
+    }
+}
+
+void message_buffer::resize(size_t new_size) {
+    XREALLOC_N(new_size, _memory);
+    _memory_size = new_size;
+}
+
+static int next_power_of_two (int n) {
+    int r = 4096;
+    while (r < n) {
+        r*=2;
+        assert(r>0);
+    }
+    return r;
+}
+
+struct message_buffer::buffer_entry *message_buffer::get_buffer_entry(int32_t offset) const {
+    return (struct buffer_entry *) (_memory + offset);
+}
+
+void message_buffer::enqueue(FT_MSG msg, bool is_fresh, int32_t *offset) {
+    ITEMLEN keylen = ft_msg_get_keylen(msg);
+    ITEMLEN datalen = ft_msg_get_vallen(msg);
+    XIDS xids = ft_msg_get_xids(msg);
+    int need_space_here = sizeof(struct buffer_entry)
+                          + keylen + datalen
+                          + xids_get_size(xids)
+                          - sizeof(XIDS_S); //Prevent double counting
+    int need_space_total = _memory_used + need_space_here;
+    if (_memory == nullptr || need_space_total > _memory_size) {
+        // resize the buffer to the next power of 2 greater than the needed space
+        int next_2 = next_power_of_two(need_space_total);
+        resize(next_2);
+    }
+    struct buffer_entry *entry = get_buffer_entry(_memory_used);
+    entry->type = (unsigned char) ft_msg_get_type(msg);
+    entry->msn = msg->msn;
+    xids_cpy(&entry->xids_s, xids);
+    entry->is_fresh = is_fresh;
+    unsigned char *e_key = xids_get_end_of_array(&entry->xids_s);
+    entry->keylen = keylen;
+    memcpy(e_key, ft_msg_get_key(msg), keylen);
+    entry->vallen = datalen;
+    memcpy(e_key + keylen, ft_msg_get_val(msg), datalen);
+    if (offset) {
+        *offset = _memory_used;
+    }
+    _num_entries++;
+    _memory_used += need_space_here;
+}
+
+void message_buffer::set_freshness(int32_t offset, bool is_fresh) {
+    struct buffer_entry *entry = get_buffer_entry(offset);
+    entry->is_fresh = is_fresh;
+}
+
+bool message_buffer::get_freshness(int32_t offset) const {
+    struct buffer_entry *entry = get_buffer_entry(offset);
+    return entry->is_fresh;
+}
+
+FT_MSG_S message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const {
+    struct buffer_entry *entry = get_buffer_entry(offset);
+    ITEMLEN keylen = entry->keylen;
+    ITEMLEN vallen = entry->vallen;
+    enum ft_msg_type type = (enum ft_msg_type) entry->type;
+    MSN msn = entry->msn;
+    const XIDS xids = (XIDS) &entry->xids_s;
+    bytevec key = xids_get_end_of_array(xids);
+    bytevec val = (uint8_t *) key + entry->keylen;
+    FT_MSG_S msg = {
+        type, msn, xids,
+        .u = { .id = { toku_fill_dbt(keydbt, key, keylen), toku_fill_dbt(valdbt, val, vallen) } }
+    };
+    return msg;
+}
+
+void message_buffer::get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const {
+    struct buffer_entry *entry = get_buffer_entry(offset);
+    if (key != nullptr) {
+        toku_fill_dbt(key, xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen);
+    }
+    if (msn != nullptr) {
+        *msn = entry->msn;
+    }
+}
+
+int message_buffer::num_entries() const {
+    return _num_entries;
+}
+
+size_t message_buffer::buffer_size_in_use() const {
+    return _memory_used;
+}
+
+size_t message_buffer::memory_size_in_use() const {
+    return sizeof(*this) + _memory_used;
+}
+
+size_t message_buffer::memory_footprint() const {
+    return sizeof(*this) + toku_memory_footprint(_memory, _memory_used);
+}
+
+bool message_buffer::equals(message_buffer *other) const {
+    return (_memory_used == other->_memory_used &&
+            memcmp(_memory, other->_memory, _memory_used) == 0);
+}
+
+size_t message_buffer::msg_memsize_in_buffer(FT_MSG msg) {
+    return sizeof(struct buffer_entry)
+        + msg->u.id.key->size + msg->u.id.val->size
+        + xids_get_size(msg->xids)
+        - sizeof(XIDS_S);
+}
diff --git a/ft/fifo.h b/ft/msg_buffer.h
similarity index 50%
rename from ft/fifo.h
rename to ft/msg_buffer.h
index 5333ca905a7..ded17820474 100644
--- a/ft/fifo.h
+++ b/ft/msg_buffer.h
@@ -1,8 +1,6 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FIFO_H
-#define FIFO_H
-#ident "$Id$"
+
 /*
 COPYING CONDITIONS NOTICE:
 
@@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 COPYRIGHT NOTICE:
 
   TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
+  Copyright (C) 2014 Tokutek, Inc.
 
 DISCLAIMER:
 
@@ -88,77 +86,76 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+#pragma once
 
 #include "ft/fttypes.h"
 #include "ft/xids-internal.h"
 #include "ft/xids.h"
 #include "ft/ft_msg.h"
+#include "ft/ybt.h"
 
-// If the fifo_entry is unpacked, the compiler aligns the xids array and we waste a lot of space
-struct __attribute__((__packed__)) fifo_entry {
-    unsigned int keylen;
-    unsigned int vallen;
-    unsigned char type;
-    bool          is_fresh;
-    MSN           msn;
-    XIDS_S        xids_s;
+class message_buffer {
+public:
+    void create();
+
+    void clone(message_buffer *dst);
+
+    void destroy();
+
+    void resize(size_t new_size);
+
+    void enqueue(FT_MSG msg, bool is_fresh, int32_t *offset);
+
+    void set_freshness(int32_t offset, bool is_fresh);
+
+    bool get_freshness(int32_t offset) const;
+
+    FT_MSG_S get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const;
+
+    void get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const;
+
+    int num_entries() const;
+
+    size_t buffer_size_in_use() const;
+
+    size_t memory_size_in_use() const;
+
+    size_t memory_footprint() const;
+
+    template <typename F>
+    int iterate(F &fn) const {
+        for (int32_t offset = 0; offset < _memory_used; ) {
+            DBT k, v;
+            FT_MSG_S msg = get_message(offset, &k, &v);
+            bool is_fresh = get_freshness(offset);
+            int r = fn(&msg, is_fresh);
+            if (r != 0) {
+                return r;
+            }
+            offset += msg_memsize_in_buffer(&msg);
+        }
+        return 0;
+    }
+
+    bool equals(message_buffer *other) const;
+
+    static size_t msg_memsize_in_buffer(FT_MSG msg);
+
+private:
+    // If this isn't packged, the compiler aligns the xids array and we waste a lot of space
+    struct __attribute__((__packed__)) buffer_entry {
+        unsigned int  keylen;
+        unsigned int  vallen;
+        unsigned char type;
+        bool          is_fresh;
+        MSN           msn;
+        XIDS_S        xids_s;
+    };
+
+    struct buffer_entry *get_buffer_entry(int32_t offset) const;
+
+    int   _num_entries;
+    char *_memory;       // An array of bytes into which buffer entries are embedded.
+    int   _memory_size;  // How big is _memory
+    int   _memory_used;  // How many bytes are in use?
 };
-
-typedef struct fifo *FIFO;
-
-int toku_fifo_create(FIFO *);
-
-void toku_fifo_resize(FIFO fifo, size_t new_size);
-
-void toku_fifo_free(FIFO *);
-
-int toku_fifo_n_entries(FIFO);
-
-int toku_fifo_enq (FIFO, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, int32_t *dest);
-
-unsigned int toku_fifo_buffer_size_in_use (FIFO fifo);
-unsigned long toku_fifo_memory_size_in_use(FIFO fifo);  // return how much memory in the fifo holds useful data
-
-unsigned long toku_fifo_memory_footprint(FIFO fifo);  // return how much memory the fifo occupies
-
-void toku_fifo_iterate(FIFO, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, void*), void*);
-
-#define FIFO_ITERATE(fifo,keyvar,keylenvar,datavar,datalenvar,typevar,msnvar,xidsvar,is_freshvar,body) ({ \
-  for (int fifo_iterate_off = toku_fifo_iterate_internal_start(fifo);                                     \
-       toku_fifo_iterate_internal_has_more(fifo, fifo_iterate_off);                                       \
-       fifo_iterate_off = toku_fifo_iterate_internal_next(fifo, fifo_iterate_off)) {                      \
-      struct fifo_entry *e = toku_fifo_iterate_internal_get_entry(fifo, fifo_iterate_off);                \
-      ITEMLEN keylenvar = e->keylen;                                                                      \
-      ITEMLEN datalenvar = e->vallen;                                                                     \
-      enum ft_msg_type typevar = (enum ft_msg_type) e->type;                                              \
-      MSN     msnvar  = e->msn;                                                                           \
-      XIDS    xidsvar = &e->xids_s;                                                                       \
-      bytevec keyvar  = xids_get_end_of_array(xidsvar);                                                   \
-      bytevec datavar = (const uint8_t*)keyvar + e->keylen;                                               \
-      bool is_freshvar = e->is_fresh;                                                                     \
-      body;                                                                                               \
-  } })
-
-#define FIFO_CURRENT_ENTRY_MEMSIZE toku_fifo_internal_entry_memsize(e)
-
-// Internal functions for the iterator.
-int toku_fifo_iterate_internal_start(FIFO fifo);
-int toku_fifo_iterate_internal_has_more(FIFO fifo, int off);
-int toku_fifo_iterate_internal_next(FIFO fifo, int off);
-struct fifo_entry * toku_fifo_iterate_internal_get_entry(FIFO fifo, int off);
-size_t toku_fifo_internal_entry_memsize(struct fifo_entry *e) __attribute__((const,nonnull));
-size_t toku_ft_msg_memsize_in_fifo(FT_MSG msg) __attribute__((const,nonnull));
-
-DBT *fill_dbt_for_fifo_entry(DBT *dbt, const struct fifo_entry *entry);
-struct fifo_entry *toku_fifo_get_entry(FIFO fifo, int off);
-
-void toku_fifo_clone(FIFO orig_fifo, FIFO* cloned_fifo);
-
-bool toku_are_fifos_same(FIFO fifo1, FIFO fifo2);
-
-
-
-
-#endif
diff --git a/ft/tests/fifo-test.cc b/ft/tests/fifo-test.cc
index 0a2047ab920..96e48d82674 100644
--- a/ft/tests/fifo-test.cc
+++ b/ft/tests/fifo-test.cc
@@ -94,28 +94,19 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 static void
-test_fifo_create (void) {
-    int r;
-    FIFO f;
-
-    f = 0;
-    r = toku_fifo_create(&f); 
-    assert(r == 0); assert(f != 0);
-
-    toku_fifo_free(&f);
-    assert(f == 0);
+test_create (void) {
+    message_buffer msg_buffer;
+    msg_buffer.create();
+    msg_buffer.destroy();
 }
 
 static void
-test_fifo_enq (int n) {
+test_enqueue(int n) {
     int r;
-    FIFO f;
+    message_buffer msg_buffer;
     MSN startmsn = ZERO_MSN;
 
-    f = 0;
-    r = toku_fifo_create(&f); 
-    assert(r == 0); assert(f != 0);
-
+    msg_buffer.create();
     char *thekey = 0; int thekeylen;
     char *theval = 0; int thevallen;
 
@@ -146,38 +137,56 @@ test_fifo_enq (int n) {
         if (startmsn.msn == ZERO_MSN.msn)
             startmsn = msn;
         enum ft_msg_type type = (enum ft_msg_type) i;
-        r = toku_fifo_enq(f, thekey, thekeylen, theval, thevallen, type, msn, xids, true, NULL); assert(r == 0);
+        DBT k, v;
+        FT_MSG_S msg = {
+            type, msn, xids, .u = { .id = { toku_fill_dbt(&k, thekey, thekeylen), toku_fill_dbt(&v, theval, thevallen) } }
+        };
+        msg_buffer.enqueue(&msg, true, nullptr);
         xids_destroy(&xids);
     }
 
-    int i = 0;
-    FIFO_ITERATE(f, key, keylen, val, vallen, type, msn, xids, UU(is_fresh), {
-        if (verbose) printf("checkit %d %d %" PRIu64 "\n", i, type, msn.msn);
-        assert(msn.msn == startmsn.msn + i);
-        buildkey(i);
-        buildval(i);
-        assert((int) keylen == thekeylen); assert(memcmp(key, thekey, keylen) == 0);
-        assert((int) vallen == thevallen); assert(memcmp(val, theval, vallen) == 0);
-        assert(i % 256 == (int)type);
-	assert((TXNID)i==xids_get_innermost_xid(xids));
-        i += 1;
-    });
-    assert(i == n);
+    struct checkit_fn {
+        char *thekey;
+        int thekeylen;
+        char *theval;
+        int thevallen;
+        MSN startmsn;
+        int verbose;
+        int i;
+        checkit_fn(char *tk, int tkl, char *tv, int tvl, MSN smsn, bool v)
+            : thekey(tk), thekeylen(tkl), theval(tv), thevallen(tvl), startmsn(smsn), verbose(v), i(0) {
+        }
+        int operator()(FT_MSG msg, bool UU(is_fresh)) {
+            MSN msn = msg->msn;
+            enum ft_msg_type type = ft_msg_get_type(msg);
+            if (verbose) printf("checkit %d %d %" PRIu64 "\n", i, type, msn.msn);
+            assert(msn.msn == startmsn.msn + i);
+            buildkey(i);
+            buildval(i);
+            assert((int) ft_msg_get_keylen(msg) == thekeylen); assert(memcmp(ft_msg_get_key(msg), thekey, ft_msg_get_keylen(msg)) == 0);
+            assert((int) ft_msg_get_vallen(msg) == thevallen); assert(memcmp(ft_msg_get_val(msg), theval, ft_msg_get_vallen(msg)) == 0);
+            assert(i % 256 == (int)type);
+            assert((TXNID)i==xids_get_innermost_xid(ft_msg_get_xids(msg)));
+            i += 1;
+            return 0;
+        }
+    } checkit(thekey, thekeylen, theval, thevallen, startmsn, verbose);
+    msg_buffer.iterate(checkit);
+    assert(checkit.i == n);
 
     if (thekey) toku_free(thekey);
     if (theval) toku_free(theval);
 
-    toku_fifo_free(&f);
-    assert(f == 0);
+    msg_buffer.destroy();
 }
 
 int
 test_main(int argc, const char *argv[]) {
     default_parse_args(argc, argv);
     initialize_dummymsn();
-    test_fifo_create();
-    test_fifo_enq(4);
-    test_fifo_enq(512);
+    test_create();
+    test_enqueue(4);
+    test_enqueue(512);
     
     return 0;
 }
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index 25a6a0227bc..95e5f70919a 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -1160,13 +1160,13 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     assert(BP_BLOCKNUM(dn,0).b==30);
     assert(BP_BLOCKNUM(dn,1).b==35);
 
-    FIFO src_fifo_1 = BNC(&sn, 0)->buffer;
-    FIFO src_fifo_2 = BNC(&sn, 1)->buffer;
-    FIFO dest_fifo_1 = BNC(dn, 0)->buffer;
-    FIFO dest_fifo_2 = BNC(dn, 1)->buffer;
+    message_buffer *src_msg_buffer1 = &BNC(&sn, 0)->msg_buffer;
+    message_buffer *src_msg_buffer2 = &BNC(&sn, 1)->msg_buffer;
+    message_buffer *dest_msg_buffer1 = &BNC(dn, 0)->msg_buffer;
+    message_buffer *dest_msg_buffer2 = &BNC(dn, 1)->msg_buffer;
 
-    assert(toku_are_fifos_same(src_fifo_1, dest_fifo_1));
-    assert(toku_are_fifos_same(src_fifo_2, dest_fifo_2));
+    assert(src_msg_buffer1->equals(dest_msg_buffer1));
+    assert(src_msg_buffer2->equals(dest_msg_buffer2));
 
     toku_ftnode_free(&dn);
 
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index eb3297807f4..83dce2a08ce 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -384,41 +384,60 @@ flush_to_internal(FT_HANDLE t) {
     memset(parent_messages_present, 0, sizeof parent_messages_present);
     memset(child_messages_present, 0, sizeof child_messages_present);
 
-    FIFO_ITERATE(child_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
-                 {
-                     DBT keydbt;
-                     DBT valdbt;
-                     toku_fill_dbt(&keydbt, key, keylen);
-                     toku_fill_dbt(&valdbt, val, vallen);
-                     int found = 0;
-                     for (i = 0; i < num_parent_messages; ++i) {
-                         if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 &&
-                             msn.msn == parent_messages[i]->msn.msn) {
-                             assert(parent_messages_present[i] == 0);
-                             assert(found == 0);
-                             assert(dummy_cmp(NULL, &valdbt, parent_messages[i]->u.id.val) == 0);
-                             assert(type == parent_messages[i]->type);
-                             assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids));
-                             assert(parent_messages_is_fresh[i] == is_fresh);
-                             parent_messages_present[i]++;
-                             found++;
-                         }
-                     }
-                     for (i = 0; i < num_child_messages; ++i) {
-                         if (dummy_cmp(NULL, &keydbt, child_messages[i]->u.id.key) == 0 &&
-                             msn.msn == child_messages[i]->msn.msn) {
-                             assert(child_messages_present[i] == 0);
-                             assert(found == 0);
-                             assert(dummy_cmp(NULL, &valdbt, child_messages[i]->u.id.val) == 0);
-                             assert(type == child_messages[i]->type);
-                             assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids));
-                             assert(child_messages_is_fresh[i] == is_fresh);
-                             child_messages_present[i]++;
-                             found++;
-                         }
-                     }
-                     assert(found == 1);
-                 });
+    struct checkit_fn {
+        int num_parent_messages;
+        FT_MSG *parent_messages;
+        int *parent_messages_present;
+        bool *parent_messages_is_fresh;
+        int num_child_messages;
+        FT_MSG *child_messages;
+        int *child_messages_present;
+        bool *child_messages_is_fresh;
+        checkit_fn(int np, FT_MSG *pm, int *npp, bool *pmf, int nc, FT_MSG *cm, int *ncp, bool *cmf) :
+            num_parent_messages(np), parent_messages(pm), parent_messages_present(npp), parent_messages_is_fresh(pmf),
+            num_child_messages(nc), child_messages(cm), child_messages_present(ncp), child_messages_is_fresh(cmf) {
+        }
+        int operator()(FT_MSG msg, bool is_fresh) {
+            DBT keydbt;
+            DBT valdbt;
+            toku_fill_dbt(&keydbt, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
+            toku_fill_dbt(&valdbt, ft_msg_get_val(msg), ft_msg_get_vallen(msg));
+            int found = 0;
+            MSN msn = msg->msn;
+            enum ft_msg_type type = ft_msg_get_type(msg);
+            XIDS xids = ft_msg_get_xids(msg);
+            for (int i = 0; i < num_parent_messages; ++i) {
+                if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 &&
+                        msn.msn == parent_messages[i]->msn.msn) {
+                    assert(parent_messages_present[i] == 0);
+                    assert(found == 0);
+                    assert(dummy_cmp(NULL, &valdbt, parent_messages[i]->u.id.val) == 0);
+                    assert(type == parent_messages[i]->type);
+                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids));
+                    assert(parent_messages_is_fresh[i] == is_fresh);
+                    parent_messages_present[i]++;
+                    found++;
+                }
+            }
+            for (int i = 0; i < num_child_messages; ++i) {
+                if (dummy_cmp(NULL, &keydbt, child_messages[i]->u.id.key) == 0 &&
+                        msn.msn == child_messages[i]->msn.msn) {
+                    assert(child_messages_present[i] == 0);
+                    assert(found == 0);
+                    assert(dummy_cmp(NULL, &valdbt, child_messages[i]->u.id.val) == 0);
+                    assert(type == child_messages[i]->type);
+                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids));
+                    assert(child_messages_is_fresh[i] == is_fresh);
+                    child_messages_present[i]++;
+                    found++;
+                }
+            }
+            assert(found == 1);
+            return 0;
+        }
+    } checkit(num_parent_messages, parent_messages, parent_messages_present, parent_messages_is_fresh,
+              num_child_messages, child_messages, child_messages_present, child_messages_is_fresh);
+    child_bnc->msg_buffer.iterate(checkit);
 
     for (i = 0; i < num_parent_messages; ++i) {
         assert(parent_messages_present[i] == 1);
@@ -525,41 +544,60 @@ flush_to_internal_multiple(FT_HANDLE t) {
     memset(child_messages_present, 0, sizeof child_messages_present);
 
     for (int j = 0; j < 8; ++j) {
-        FIFO_ITERATE(child_bncs[j]->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
-                     {
-                         DBT keydbt;
-                         DBT valdbt;
-                         toku_fill_dbt(&keydbt, key, keylen);
-                         toku_fill_dbt(&valdbt, val, vallen);
-                         int found = 0;
-                         for (i = 0; i < num_parent_messages; ++i) {
-                             if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 &&
-                                 msn.msn == parent_messages[i]->msn.msn) {
-                                 assert(parent_messages_present[i] == 0);
-                                 assert(found == 0);
-                                 assert(dummy_cmp(NULL, &valdbt, parent_messages[i]->u.id.val) == 0);
-                                 assert(type == parent_messages[i]->type);
-                                 assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids));
-                                 assert(parent_messages_is_fresh[i] == is_fresh);
-                                 parent_messages_present[i]++;
-                                 found++;
-                             }
-                         }
-                         for (i = 0; i < num_child_messages; ++i) {
-                             if (dummy_cmp(NULL, &keydbt, child_messages[i]->u.id.key) == 0 &&
-                                 msn.msn == child_messages[i]->msn.msn) {
-                                 assert(child_messages_present[i] == 0);
-                                 assert(found == 0);
-                                 assert(dummy_cmp(NULL, &valdbt, child_messages[i]->u.id.val) == 0);
-                                 assert(type == child_messages[i]->type);
-                                 assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids));
-                                 assert(child_messages_is_fresh[i] == is_fresh);
-                                 child_messages_present[i]++;
-                                 found++;
-                             }
-                         }
-                         assert(found == 1);
-                     });
+        struct checkit_fn {
+            int num_parent_messages;
+            FT_MSG *parent_messages;
+            int *parent_messages_present;
+            bool *parent_messages_is_fresh;
+            int num_child_messages;
+            FT_MSG *child_messages;
+            int *child_messages_present;
+            bool *child_messages_is_fresh;
+            checkit_fn(int np, FT_MSG *pm, int *npp, bool *pmf, int nc, FT_MSG *cm, int *ncp, bool *cmf) :
+                num_parent_messages(np), parent_messages(pm), parent_messages_present(npp), parent_messages_is_fresh(pmf),
+                num_child_messages(nc), child_messages(cm), child_messages_present(ncp), child_messages_is_fresh(cmf) {
+            }
+            int operator()(FT_MSG msg, bool is_fresh) {
+                DBT keydbt;
+                DBT valdbt;
+                toku_fill_dbt(&keydbt, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
+                toku_fill_dbt(&valdbt, ft_msg_get_val(msg), ft_msg_get_vallen(msg));
+                int found = 0;
+                MSN msn = msg->msn;
+                enum ft_msg_type type = ft_msg_get_type(msg);
+                XIDS xids = ft_msg_get_xids(msg);
+                for (int i = 0; i < num_parent_messages; ++i) {
+                    if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 &&
+                            msn.msn == parent_messages[i]->msn.msn) {
+                        assert(parent_messages_present[i] == 0);
+                        assert(found == 0);
+                        assert(dummy_cmp(NULL, &valdbt, parent_messages[i]->u.id.val) == 0);
+                        assert(type == parent_messages[i]->type);
+                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids));
+                        assert(parent_messages_is_fresh[i] == is_fresh);
+                        parent_messages_present[i]++;
+                        found++;
+                    }
+                }
+                for (int i = 0; i < num_child_messages; ++i) {
+                    if (dummy_cmp(NULL, &keydbt, child_messages[i]->u.id.key) == 0 &&
+                            msn.msn == child_messages[i]->msn.msn) {
+                        assert(child_messages_present[i] == 0);
+                        assert(found == 0);
+                        assert(dummy_cmp(NULL, &valdbt, child_messages[i]->u.id.val) == 0);
+                        assert(type == child_messages[i]->type);
+                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids));
+                        assert(child_messages_is_fresh[i] == is_fresh);
+                        child_messages_present[i]++;
+                        found++;
+                    }
+                }
+                assert(found == 1);
+                return 0;
+            }
+        } checkit(num_parent_messages, parent_messages, parent_messages_present, parent_messages_is_fresh,
+                  num_child_messages, child_messages, child_messages_present, child_messages_is_fresh);
+        child_bncs[j]->msg_buffer.iterate(checkit);
     }
 
     for (i = 0; i < num_parent_messages; ++i) {
@@ -721,11 +759,13 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
         bool msgs_applied;
         toku_apply_ancestors_messages_to_node(t, child, &ancestors, &infinite_bounds, &msgs_applied, -1);
 
-        FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
-                     {
-                         key = key; keylen = keylen; val = val; vallen = vallen; type = type; msn = msn; xids = xids;
-                         assert(!is_fresh);
-                     });
+        struct checkit_fn {
+            int operator()(FT_MSG UU(msg), bool is_fresh) {
+                 assert(!is_fresh);
+                return 0;
+            }
+        } checkit;
+        parent_bnc->msg_buffer.iterate(checkit);
         invariant(parent_bnc->fresh_message_tree.size() + parent_bnc->stale_message_tree.size()
                   == (uint32_t) num_parent_messages);
 
@@ -947,23 +987,33 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
     bool msgs_applied;
     toku_apply_ancestors_messages_to_node(t, child, &ancestors, &bounds, &msgs_applied, -1);
 
-    FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
-                 {
-                     val = val; vallen = vallen; type = type; msn = msn; xids = xids;
-                     DBT keydbt;
-                     toku_fill_dbt(&keydbt, key, keylen);
-                     if (dummy_cmp(NULL, &keydbt, &childkeys[7]) > 0) {
-                         for (i = 0; i < num_parent_messages; ++i) {
-                             if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 &&
-                                 msn.msn == parent_messages[i]->msn.msn) {
-                                 assert(is_fresh == parent_messages_is_fresh[i]);
-                                 break;
-                             }
-                         }
-                     } else {
-                         assert(!is_fresh);
-                     }
-                 });
+    struct checkit_fn {
+        DBT *childkeys;
+        int num_parent_messages;
+        FT_MSG *parent_messages;
+        bool *parent_messages_is_fresh;
+        checkit_fn(DBT *ck, int np, FT_MSG *pm, bool *pmf) :
+            childkeys(ck), num_parent_messages(np), parent_messages(pm), parent_messages_is_fresh(pmf) {
+        }
+        int operator()(FT_MSG msg, bool is_fresh) {
+            DBT keydbt;
+            toku_fill_dbt(&keydbt, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
+            MSN msn = msg->msn;
+            if (dummy_cmp(NULL, &keydbt, &childkeys[7]) > 0) {
+                for (int i = 0; i < num_parent_messages; ++i) {
+                    if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 &&
+                            msn.msn == parent_messages[i]->msn.msn) {
+                        assert(is_fresh == parent_messages_is_fresh[i]);
+                        break;
+                    }
+                }
+            } else {
+                assert(!is_fresh);
+            }
+            return 0;
+        }
+    } checkit(childkeys, num_parent_messages, parent_messages, parent_messages_is_fresh);
+    parent_bnc->msg_buffer.iterate(checkit);
 
     toku_ftnode_free(&parentnode);
 
@@ -1134,11 +1184,13 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     bool msgs_applied;
     toku_apply_ancestors_messages_to_node(t, child2, &ancestors, &infinite_bounds, &msgs_applied, -1);
 
-    FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
-                 {
-                     key = key; keylen = keylen; val = val; vallen = vallen; type = type; msn = msn; xids = xids;
-                     assert(!is_fresh);
-                 });
+    struct checkit_fn {
+        int operator()(FT_MSG UU(msg), bool is_fresh) {
+            assert(!is_fresh);
+            return 0;
+        }
+    } checkit;
+    parent_bnc->msg_buffer.iterate(checkit);
     invariant(parent_bnc->fresh_message_tree.size() + parent_bnc->stale_message_tree.size()
               == (uint32_t) num_parent_messages);
 
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index a7d94f41d78..45c4d154087 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -276,38 +276,47 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT h) {
                 printf("   buffer contains %u bytes (%d items)\n", n_bytes, n_entries);
             }
             if (do_dump_data) {
-                FIFO_ITERATE(bnc->buffer, key, keylen, data, datalen, typ, msn, xids, UU(is_fresh),
-                             {
-                                 printf("    msn=%" PRIu64 " (0x%" PRIx64 ") ", msn.msn, msn.msn);
-                                 printf("    TYPE=");
-                                 switch ((enum ft_msg_type)typ) {
-                                 case FT_NONE: printf("NONE"); goto ok;
-                                 case FT_INSERT: printf("INSERT"); goto ok;
-                                 case FT_INSERT_NO_OVERWRITE: printf("INSERT_NO_OVERWRITE"); goto ok;
-                                 case FT_DELETE_ANY: printf("DELETE_ANY"); goto ok;
-                                 case FT_ABORT_ANY: printf("ABORT_ANY"); goto ok;
-                                 case FT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok;
-                                 case FT_COMMIT_BROADCAST_ALL: printf("COMMIT_BROADCAST_ALL"); goto ok;
-                                 case FT_COMMIT_BROADCAST_TXN: printf("COMMIT_BROADCAST_TXN"); goto ok;
-                                 case FT_ABORT_BROADCAST_TXN: printf("ABORT_BROADCAST_TXN"); goto ok;
-                                 case FT_OPTIMIZE: printf("OPTIMIZE"); goto ok;
-                                 case FT_OPTIMIZE_FOR_UPGRADE: printf("OPTIMIZE_FOR_UPGRADE"); goto ok;
-                                 case FT_UPDATE:   printf("UPDATE"); goto ok;
-                                 case FT_UPDATE_BROADCAST_ALL: printf("UPDATE_BROADCAST_ALL"); goto ok;
-                                 }
-                                 printf("HUH?");
-                             ok:
-                                 printf(" xid=");
-                                 xids_fprintf(stdout, xids);
-                                 printf(" ");
-                                 print_item(key, keylen);
-                                 if (datalen>0) {
-                                     printf(" ");
-                                     print_item(data, datalen);
-                                 }
-                                 printf("\n");
-                             }
-                             );
+                struct dump_data_fn {
+                    int operator()(FT_MSG msg, bool UU(is_fresh)) {
+                        enum ft_msg_type type = (enum ft_msg_type) msg->type;
+                        MSN msn = msg->msn;
+                        XIDS xids = msg->xids;
+                        const void *key = ft_msg_get_key(msg);
+                        const void *data = ft_msg_get_val(msg);
+                        ITEMLEN keylen = ft_msg_get_keylen(msg);
+                        ITEMLEN datalen = ft_msg_get_vallen(msg);
+                        printf("    msn=%" PRIu64 " (0x%" PRIx64 ") ", msn.msn, msn.msn);
+                        printf("    TYPE=");
+                        switch (type) {
+                            case FT_NONE: printf("NONE"); goto ok;
+                            case FT_INSERT: printf("INSERT"); goto ok;
+                            case FT_INSERT_NO_OVERWRITE: printf("INSERT_NO_OVERWRITE"); goto ok;
+                            case FT_DELETE_ANY: printf("DELETE_ANY"); goto ok;
+                            case FT_ABORT_ANY: printf("ABORT_ANY"); goto ok;
+                            case FT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok;
+                            case FT_COMMIT_BROADCAST_ALL: printf("COMMIT_BROADCAST_ALL"); goto ok;
+                            case FT_COMMIT_BROADCAST_TXN: printf("COMMIT_BROADCAST_TXN"); goto ok;
+                            case FT_ABORT_BROADCAST_TXN: printf("ABORT_BROADCAST_TXN"); goto ok;
+                            case FT_OPTIMIZE: printf("OPTIMIZE"); goto ok;
+                            case FT_OPTIMIZE_FOR_UPGRADE: printf("OPTIMIZE_FOR_UPGRADE"); goto ok;
+                            case FT_UPDATE:   printf("UPDATE"); goto ok;
+                            case FT_UPDATE_BROADCAST_ALL: printf("UPDATE_BROADCAST_ALL"); goto ok;
+                        }
+                        printf("HUH?");
+ok:
+                        printf(" xid=");
+                        xids_fprintf(stdout, xids);
+                        printf(" ");
+                        print_item(key, keylen);
+                        if (datalen>0) {
+                            printf(" ");
+                            print_item(data, datalen);
+                        }
+                        printf("\n");
+                        return 0;
+                    }
+                } dump_fn;
+                bnc->msg_buffer.iterate(dump_fn);
             }
         } else {
             printf(" n_bytes_in_buffer= %" PRIu64 "", BLB_DATA(n, i)->get_disk_size());
diff --git a/ft/xids-internal.h b/ft/xids-internal.h
index 6ceae6ee35e..b0c2d20bc6b 100644
--- a/ft/xids-internal.h
+++ b/ft/xids-internal.h
@@ -98,6 +98,8 @@ PATENT RIGHTS GRANT:
 // ids[num_xids - 1] is the innermost transaction.
 // Should only be accessed by accessor functions xids_xxx, not directly.
 
+#include <portability/toku_stdint.h>
+
 // If the xids struct is unpacked, the compiler aligns the ids[] and we waste a lot of space
 typedef struct __attribute__((__packed__)) xids_t {
     uint8_t  num_xids;    // maximum value of MAX_TRANSACTION_RECORDS - 1 ...

From d2ab8a6253f5c044752afa3d72265aeba5b7e3b4 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:25:34 -0400
Subject: [PATCH 012/190] FT-241 Use #pragma once as a header guard

---
 ft/background_job_manager.h                      |  6 ++----
 ft/block_allocator.h                             |  7 ++-----
 ft/block_table.h                                 |  7 ++-----
 ft/bndata.h                                      |  2 --
 ft/cachetable-internal.h                         |  7 ++-----
 ft/cachetable.h                                  |  6 ++----
 ft/checkpoint.h                                  |  7 ++-----
 ft/comparator.h                                  |  5 ++---
 ft/compress.h                                    |  8 ++------
 ft/ft-cachetable-wrappers.h                      |  6 ++----
 ft/ft-flusher-internal.h                         |  6 ++----
 ft/ft-flusher.h                                  |  6 ++----
 ft/ft-internal.h                                 |  6 ++----
 ft/ft-ops.h                                      |  6 ++----
 ft/ft-search.h                                   |  7 ++-----
 ft/ft.h                                          |  6 ++----
 ft/ft_layout_version.h                           |  6 ++----
 ft/ft_msg.h                                      |  7 ++-----
 ft/fttypes.h                                     |  6 ++----
 ft/le-cursor.h                                   |  7 ++-----
 ft/leafentry.h                                   |  8 ++------
 ft/loader/dbufio.h                               |  6 ++----
 ft/loader/loader-internal.h                      |  6 ++----
 ft/loader/loader.h                               |  6 ++----
 ft/loader/pqueue.h                               |  7 ++-----
 ft/log-internal.h                                |  6 ++----
 ft/log.h                                         |  7 ++-----
 ft/logcursor.h                                   |  8 ++------
 ft/logfilemgr.h                                  |  8 ++------
 ft/logformat.cc                                  |  4 +---
 ft/logger.h                                      |  6 ++----
 ft/quicklz.h                                     |  7 ++-----
 ft/rbuf.h                                        |  7 ++-----
 ft/recover.h                                     |  6 ++----
 ft/rollback-apply.h                              |  8 ++------
 ft/rollback-ct-callbacks.h                       |  7 ++-----
 ft/rollback.h                                    |  7 ++-----
 ft/rollback_log_node_cache.h                     |  6 ++----
 ft/sub_block.h                                   |  8 ++------
 ft/sub_block_map.h                               |  6 ++----
 ft/tests/cachetable-test.h                       |  3 ++-
 ft/tests/ftloader-error-injector.h               |  7 ++-----
 ft/tests/test-ft-txns.h                          |  6 ++----
 ft/tests/test.h                                  |  3 ++-
 ft/tokuconst.h                                   |  9 ++-------
 ft/txn.h                                         |  6 ++----
 ft/txn_child_manager.h                           |  6 ++----
 ft/txn_manager.h                                 |  6 ++----
 ft/txn_state.h                                   |  6 ++----
 ft/ule-internal.h                                |  9 ++-------
 ft/ule.h                                         |  7 ++-----
 ft/wbuf.h                                        |  7 ++-----
 ft/workset.h                                     |  6 ++----
 ft/xids-internal.h                               |  7 ++-----
 ft/xids.h                                        |  9 ++-------
 ft/ybt.h                                         |  6 ++----
 locktree/concurrent_tree.h                       |  7 ++-----
 locktree/keyrange.h                              |  7 ++-----
 locktree/lock_request.h                          |  7 ++-----
 locktree/range_buffer.h                          |  7 ++-----
 locktree/tests/concurrent_tree_unit_test.h       |  2 ++
 locktree/tests/lock_request_unit_test.h          |  7 ++-----
 locktree/tests/locktree_unit_test.h              |  7 ++-----
 locktree/tests/manager_unit_test.h               |  7 ++-----
 locktree/tests/test.h                            |  7 ++-----
 locktree/treenode.h                              |  7 ++-----
 locktree/txnid_set.h                             |  7 ++-----
 locktree/wfg.h                                   |  7 ++-----
 portability/memory.h                             |  7 ++-----
 portability/rdtsc.h                              |  2 ++
 portability/toku_assert.h                        |  7 +++----
 portability/toku_atomic.h                        |  7 ++-----
 portability/toku_byteswap.h                      |  7 ++-----
 portability/toku_crash.h                         |  7 ++-----
 portability/toku_htod.h                          | 11 ++---------
 portability/toku_htonl.h                         | 11 ++---------
 portability/toku_list.h                          | 12 ++----------
 portability/toku_os.h                            |  7 ++-----
 portability/toku_os_types.h                      |  7 ++-----
 portability/toku_path.h                          |  7 ++-----
 portability/toku_portability.h                   |  7 ++-----
 portability/toku_pthread.h                       |  7 ++-----
 portability/toku_race_tools.h                    |  6 ++----
 portability/toku_random.h                        |  7 ++-----
 portability/toku_stdint.h                        |  8 ++------
 portability/toku_stdlib.h                        |  3 +++
 portability/toku_time.h                          |  7 ++-----
 src/indexer-internal.h                           |  5 +----
 src/indexer.h                                    |  7 +------
 src/loader.h                                     |  6 +-----
 src/tests/checkpoint_test.h                      |  9 ++-------
 src/tests/key-val.h                              |  9 ++-------
 src/tests/recover-test_crash_in_flusher_thread.h |  3 +++
 src/tests/stress_openclose.h                     |  2 ++
 src/tests/test.h                                 |  9 +++------
 src/tests/test_kv_gen.h                          |  9 ++-------
 src/tests/threaded_stress_test_helpers.h         |  7 ++-----
 src/ydb-internal.h                               |  6 ++----
 src/ydb.h                                        |  7 +------
 src/ydb_cursor.h                                 | 10 ++--------
 src/ydb_db.h                                     |  5 +----
 src/ydb_env_func.h                               |  8 ++------
 src/ydb_load.h                                   |  6 +-----
 src/ydb_row_lock.h                               |  5 +----
 src/ydb_txn.h                                    |  8 ++------
 src/ydb_write.h                                  | 11 +----------
 tools/tokudb_common.h                            |  6 ++----
 tools/tokudb_common_funcs.h                      |  6 ++----
 util/circular_buffer.h                           |  7 ++-----
 util/constexpr.h                                 |  4 ++--
 util/dmt.h                                       |  3 ++-
 util/doubly_linked_list.h                        |  6 ++----
 util/fmutex.h                                    |  5 +----
 util/frwlock.h                                   |  6 ++----
 util/growable_array.h                            |  6 ++----
 util/kibbutz.h                                   |  7 ++-----
 util/memarena.h                                  |  6 ++----
 util/mempool.h                                   |  9 ++-------
 util/minicron.h                                  |  7 +------
 util/nb_mutex.h                                  |  6 ++----
 util/omt.h                                       |  6 ++----
 util/partitioned_counter.h                       |  7 ++-----
 util/queue.h                                     |  5 ++---
 util/rwlock.h                                    |  5 ++---
 util/sort.h                                      |  5 +----
 util/status.h                                    |  1 +
 util/threadpool.h                                |  5 +----
 util/x1764.h                                     |  7 ++-----
 128 files changed, 244 insertions(+), 590 deletions(-)

diff --git a/ft/background_job_manager.h b/ft/background_job_manager.h
index 5474a569454..c16902987b2 100644
--- a/ft/background_job_manager.h
+++ b/ft/background_job_manager.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef BACKGROUND_JOB_MANAGER_H
-#define BACKGROUND_JOB_MANAGER_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -130,5 +130,3 @@ void bjm_remove_background_job(BACKGROUND_JOB_MANAGER bjm);
 // has completed, bjm_add_background_job returns an error. 
 //
 void bjm_wait_for_jobs_to_finish(BACKGROUND_JOB_MANAGER bjm);
-
-#endif
diff --git a/ft/block_allocator.h b/ft/block_allocator.h
index 815692963fb..adc9b5369ac 100644
--- a/ft/block_allocator.h
+++ b/ft/block_allocator.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef BLOCK_ALLOCATOR_H
-#define  BLOCK_ALLOCATOR_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -222,6 +222,3 @@ void block_allocator_merge_blockpairs_into (uint64_t d,       struct block_alloc
 //   dst must be large enough.
 //   No blocks may overlap.
 // Rationale: This is exposed so it can be tested by a glass box tester.  Otherwise it would be static (file-scope) function inside block_allocator.c
-
-
-#endif
diff --git a/ft/block_table.h b/ft/block_table.h
index 72c914988fa..9fbf4f3dcf1 100644
--- a/ft/block_table.h
+++ b/ft/block_table.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef BLOCKTABLE_H
-#define BLOCKTABLE_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -171,6 +171,3 @@ enum {RESERVED_BLOCKNUM_NULL       =0,
       RESERVED_BLOCKNUM_DESCRIPTOR =2,
       RESERVED_BLOCKNUMS};
 
-
-#endif
-
diff --git a/ft/bndata.h b/ft/bndata.h
index 75db59daea9..6c34833c00e 100644
--- a/ft/bndata.h
+++ b/ft/bndata.h
@@ -88,7 +88,6 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-
 #pragma once
 
 #include <util/mempool.h>
@@ -384,4 +383,3 @@ private:
                                                 uint32_t key_data_size, uint32_t val_data_size, bool all_keys_same_length,
                                                 uint32_t fixed_klpair_length);
 };
-
diff --git a/ft/cachetable-internal.h b/ft/cachetable-internal.h
index a02449f3c07..0e0d1ad0f64 100644
--- a/ft/cachetable-internal.h
+++ b/ft/cachetable-internal.h
@@ -1,9 +1,6 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 
-#ifndef TokuDB_cachetable_internal_h
-#define TokuDB_cachetable_internal_h
-
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -90,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -654,5 +653,3 @@ struct cachetable {
 
     char *env_dir;
 };
-
-#endif // End of header guardian.
diff --git a/ft/cachetable.h b/ft/cachetable.h
index 32686640997..5c5eb575909 100644
--- a/ft/cachetable.h
+++ b/ft/cachetable.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef CACHETABLE_H
-#define CACHETABLE_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -639,5 +639,3 @@ void toku_pair_list_set_lock_size(uint32_t num_locks);
 // layer.
 __attribute__((const,nonnull))
 bool toku_ctpair_is_write_locked(PAIR pair);
-
-#endif /* CACHETABLE_H */
diff --git a/ft/checkpoint.h b/ft/checkpoint.h
index 9e1725af91b..63acfa7c0bc 100644
--- a/ft/checkpoint.h
+++ b/ft/checkpoint.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_CHECKPOINT_H
-#define TOKU_CHECKPOINT_H
 
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2009-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 #ident "$Id$"
@@ -200,6 +200,3 @@ typedef struct {
 } CHECKPOINT_STATUS_S, *CHECKPOINT_STATUS;
 
 void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS stat);
-
-
-#endif
diff --git a/ft/comparator.h b/ft/comparator.h
index 98c20b82aa5..6b78dcfc69d 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -85,12 +85,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-
-#pragma once
-
 #include <db.h>
 #include <string.h>
 
diff --git a/ft/compress.h b/ft/compress.h
index bc25b55be8b..8e39fc96220 100644
--- a/ft/compress.h
+++ b/ft/compress.h
@@ -86,13 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_COMPRESS_H
-#define TOKU_COMPRESS_H
-
-
 #include <zlib.h>
 #include <db.h>
 
@@ -131,5 +129,3 @@ void toku_decompress (Bytef       *dest,   uLongf destLen,
 //  This function can decompress data compressed with either zlib or quicklz compression methods (calling toku_compress(), which puts an appropriate header on so we know which it is.)
 // Requires: destLen is equal to the actual decompressed size of the data.
 // Requires: The source must have been properly compressed.
-
-#endif
diff --git a/ft/ft-cachetable-wrappers.h b/ft/ft-cachetable-wrappers.h
index d276dd8af5f..b98cdd0fd19 100644
--- a/ft/ft-cachetable-wrappers.h
+++ b/ft/ft-cachetable-wrappers.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FT_CACHETABLE_WRAPPERS_H
-#define FT_CACHETABLE_WRAPPERS_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -192,5 +192,3 @@ void toku_unpin_ftnode_read_only(FT ft, FTNODE node);
 
 // Effect: Swaps pair values of two pinned nodes
 void toku_ftnode_swap_pair_values(FTNODE nodea, FTNODE nodeb);
-
-#endif
diff --git a/ft/ft-flusher-internal.h b/ft/ft-flusher-internal.h
index 512f5ffd27d..d2aeea3c4ff 100644
--- a/ft/ft-flusher-internal.h
+++ b/ft/ft-flusher-internal.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FT_FLUSHER_INTERNAL_H
-#define FT_FLUSHER_INTERNAL_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -236,5 +236,3 @@ default_pick_child_after_split(FT h,
                                int childnumb,
                                void *extra);
 
-
-#endif // End of header guardian.
diff --git a/ft/ft-flusher.h b/ft/ft-flusher.h
index 0861669157a..0111827653b 100644
--- a/ft/ft-flusher.h
+++ b/ft/ft-flusher.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FT_FLUSHER_H
-#define FT_FLUSHER_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -251,5 +251,3 @@ int
 toku_ft_hot_optimize(FT_HANDLE ft_h, DBT* left, DBT* right,
                       int (*progress_callback)(void *extra, float progress),
                       void *progress_extra, uint64_t* loops_run);
-
-#endif // End of header guardian.
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 3d341e27523..0c9dcb844e3 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FT_INTERNAL_H
-#define FT_INTERNAL_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -1229,5 +1229,3 @@ void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extr
 
 int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h) __attribute__((nonnull));
 int toku_upgrade_msn_from_root_to_header(int fd, FT h) __attribute__((nonnull));
-
-#endif
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index b1c246f61f3..fdfe3d56f06 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FT_OPS_H
-#define FT_OPS_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -362,5 +362,3 @@ void toku_ft_set_direct_io(bool direct_io_on);
 void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers);
 
 void toku_note_deserialized_basement_node(bool fixed_key_size);
-
-#endif
diff --git a/ft/ft-search.h b/ft/ft-search.h
index 8e8fece6a3c..2c7f935a022 100644
--- a/ft/ft-search.h
+++ b/ft/ft-search.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef FT_SEARCH_H
-#define FT_SEARCH_H
-
 #include "ft/ybt.h"
 
 enum ft_search_direction_e {
@@ -155,5 +154,3 @@ static inline ft_search_t *ft_search_init(ft_search_t *so, ft_search_compare_fun
 static inline void ft_search_finish(ft_search_t *so) {
     toku_destroy_dbt(&so->pivot_bound);
 }
-
-#endif
diff --git a/ft/ft.h b/ft/ft.h
index baf20000f3a..d6b9914e279 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FT_H
-#define FT_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -223,5 +223,3 @@ extern int tokudb_num_envs;
 
 int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
 int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default")));
-
-#endif
diff --git a/ft/ft_layout_version.h b/ft/ft_layout_version.h
index 01c7363e98d..2479aff9cb0 100644
--- a/ft/ft_layout_version.h
+++ b/ft/ft_layout_version.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FT_LAYOUT_VERSION_H
-#define FT_LAYOUT_VERSION_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -130,5 +130,3 @@ enum ft_layout_version_e {
     FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM = FT_LAYOUT_VERSION_14,
     FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES = FT_LAYOUT_VERSION_15,
 };
-
-#endif
diff --git a/ft/ft_msg.h b/ft/ft_msg.h
index e0db4a51ddb..8a0b80be969 100644
--- a/ft/ft_msg.h
+++ b/ft/ft_msg.h
@@ -6,9 +6,6 @@
  * a message buffer.
  */
 
-#ifndef FT_MSG_H
-#define FT_MSG_H
-
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -95,6 +92,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -211,5 +210,3 @@ void *ft_msg_get_key(FT_MSG ft_msg);
 void *ft_msg_get_val(FT_MSG ft_msg);
 
 enum ft_msg_type ft_msg_get_type(FT_MSG ft_msg);
-
-#endif
diff --git a/ft/fttypes.h b/ft/fttypes.h
index a975e66efe2..f65d3e3a6a8 100644
--- a/ft/fttypes.h
+++ b/ft/fttypes.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FTTYPES_H
-#define FTTYPES_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -194,5 +194,3 @@ typedef void (*remove_ft_ref_callback)(FT, void*);
 typedef void (*on_redirect_callback)(FT_HANDLE, void*);
 
 #define UU(x) x __attribute__((__unused__))
-
-#endif
diff --git a/ft/le-cursor.h b/ft/le-cursor.h
index d443666492c..7295e59ae14 100644
--- a/ft/le-cursor.h
+++ b/ft/le-cursor.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef LE_CURSOR_H
-#define LE_CURSOR_H
-
 #include "ft-ops.h"
 
 // A leaf entry cursor (LE_CURSOR) is a special type of FT_CURSOR that visits all of the leaf entries in a tree
@@ -127,5 +126,3 @@ bool toku_le_cursor_is_key_greater_or_equal(LE_CURSOR le_cursor, const DBT *key)
 // extracts position of le_cursor into estimate. Responsibility of caller to handle
 // thread safety. Caller (the indexer), does so by ensuring indexer lock is held
 void toku_le_cursor_update_estimate(LE_CURSOR le_cursor, DBT* estimate);
-
-#endif
diff --git a/ft/leafentry.h b/ft/leafentry.h
index 4563e26c384..bf954940c91 100644
--- a/ft/leafentry.h
+++ b/ft/leafentry.h
@@ -1,9 +1,6 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 
-#ifndef TOKU_LEAFENTRY_H
-#define TOKU_LEAFENTRY_H
-
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -90,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -265,6 +264,3 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
                         txn_gc_info *gc_info,
                         LEAFENTRY *new_leaf_entry,
                         int64_t * numbytes_delta_p);
-
-#endif /* TOKU_LEAFENTRY_H */
-
diff --git a/ft/loader/dbufio.h b/ft/loader/dbufio.h
index 0762bf9a8c6..8bdcbc122ee 100644
--- a/ft/loader/dbufio.h
+++ b/ft/loader/dbufio.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_DBUFIO_H
-#define TOKU_DBUFIO_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 
 #include <toku_portability.h>
@@ -108,5 +108,3 @@ int dbufio_fileset_read (DBUFIO_FILESET bfs, int filenum, void *buf_v, size_t co
 int panic_dbufio_fileset(DBUFIO_FILESET, int error);
 
 void dbufio_print(DBUFIO_FILESET);
-
-#endif
diff --git a/ft/loader/loader-internal.h b/ft/loader/loader-internal.h
index 779d2da450c..c02d9619f1f 100644
--- a/ft/loader/loader-internal.h
+++ b/ft/loader/loader-internal.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FTLOADER_INTERNAL_H
-#define FTLOADER_INTERNAL_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 
 #include <db.h>
@@ -362,5 +362,3 @@ int toku_ft_loader_get_error(FTLOADER bl, int *loader_errno);
 void ft_loader_lock_init(FTLOADER bl);
 void ft_loader_lock_destroy(FTLOADER bl);
 void ft_loader_set_fractal_workers_count_from_c(FTLOADER bl);
-
-#endif  // FTLOADER_INTERNAL_H
diff --git a/ft/loader/loader.h b/ft/loader/loader.h
index ab78af34ea2..ba4ee839262 100644
--- a/ft/loader/loader.h
+++ b/ft/loader/loader.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef FTLOADER_H
-#define FTLOADER_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -133,5 +133,3 @@ void toku_ft_loader_set_size_factor (uint32_t factor);
 void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*));
 
 size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid);
-
-#endif // FTLOADER_H
diff --git a/ft/loader/pqueue.h b/ft/loader/pqueue.h
index cd550d70572..9a8045111bc 100644
--- a/ft/loader/pqueue.h
+++ b/ft/loader/pqueue.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_PQUEUE_H
-#define TOKU_PQUEUE_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -121,6 +121,3 @@ void pqueue_free(pqueue_t *q);
 size_t pqueue_size(pqueue_t *q);
 int pqueue_insert(pqueue_t *q, pqueue_node_t *d);
 int pqueue_pop(pqueue_t *q, pqueue_node_t **d);
-
-
-#endif //TOKU_PQUEUE_H
diff --git a/ft/log-internal.h b/ft/log-internal.h
index a0ed1df10ff..3d935c79810 100644
--- a/ft/log-internal.h
+++ b/ft/log-internal.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef LOG_INTERNAL_H
-#define LOG_INTERNAL_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -373,5 +373,3 @@ static inline char *fixup_fname(BYTESTRING *f) {
     fname[f->len]=0;
     return fname;
 }
-
-#endif
diff --git a/ft/log.h b/ft/log.h
index 18ba802df6d..c59c981a45f 100644
--- a/ft/log.h
+++ b/ft/log.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_LOGGGER_H
-#define TOKU_LOGGGER_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -129,6 +129,3 @@ static inline void toku_free_FILENUMS(FILENUMS val) { toku_free(val.filenums); }
 
 int toku_maybe_upgrade_log (const char *env_dir, const char *log_dir, LSN * lsn_of_clean_shutdown, bool * upgrade_in_progress);
 uint64_t toku_log_upgrade_get_footprint(void);
-
-
-#endif
diff --git a/ft/logcursor.h b/ft/logcursor.h
index f374f6c2874..e3ae366bbce 100644
--- a/ft/logcursor.h
+++ b/ft/logcursor.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKULOGCURSOR_H
-#define TOKULOGCURSOR_H
 
 #ident "$Id$"
 /*
@@ -89,12 +87,13 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include <ft/log_header.h>
 
-
 struct toku_logcursor;
 typedef struct toku_logcursor *TOKULOGCURSOR;
 
@@ -127,6 +126,3 @@ int toku_logcursor_last(const TOKULOGCURSOR lc, struct log_entry **le);
 int toku_logcursor_log_exists(const TOKULOGCURSOR lc);
 
 void toku_logcursor_print(TOKULOGCURSOR lc);
-
-
-#endif // TOKULOGCURSOR_H
diff --git a/ft/logfilemgr.h b/ft/logfilemgr.h
index de9322604bc..73e0a335496 100644
--- a/ft/logfilemgr.h
+++ b/ft/logfilemgr.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKULOGFILEMGR_H
-#define TOKULOGFILEMGR_H
 
 #ident "$Id$"
 /*
@@ -89,12 +87,13 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include <ft/log_header.h>
 
-
 // this is the basic information we need to keep per logfile
 struct toku_logfile_info {
     int64_t index;
@@ -118,6 +117,3 @@ LSN toku_logfilemgr_get_last_lsn(TOKULOGFILEMGR lfm);
 void toku_logfilemgr_update_last_lsn(TOKULOGFILEMGR lfm, LSN lsn);
 
 void toku_logfilemgr_print(TOKULOGFILEMGR lfm);
-
-
-#endif //TOKULOGFILEMGR_H
diff --git a/ft/logformat.cc b/ft/logformat.cc
index 6dab12bdf31..8fdbaf45428 100644
--- a/ft/logformat.cc
+++ b/ft/logformat.cc
@@ -849,8 +849,7 @@ int main (int argc, const char *const argv[]) {
     pf = fopen(printpath, "w");   assert(pf!=0);
     fprintf2(cf, hf, "/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */\n");
     fprintf2(cf, hf, "// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:\n");
-    fprintf(hf, "#ifndef LOG_HEADER_H\n");
-    fprintf(hf, "#define  LOG_HEADER_H\n");
+    fprintf(hf, "#pragma once\n");
     fprintf2(cf, hf, "/* Do not edit this file.  This code generated by logformat.c.  Copyright (c) 2007-2013 Tokutek Inc.    */\n");
     fprintf2(cf, hf, "#ident \"Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved.\"\n");
     fprintf2(cf, pf, "#include <stdint.h>\n");
@@ -867,7 +866,6 @@ int main (int argc, const char *const argv[]) {
     generate_rollbacks();
     generate_log_entry_functions();
     generate_logprint();
-    fprintf(hf, "#endif\n");
     {
         int r=fclose(hf); assert(r==0);
         r=fclose(cf); assert(r==0);
diff --git a/ft/logger.h b/ft/logger.h
index 2b444f4499f..9a3ab3a248d 100644
--- a/ft/logger.h
+++ b/ft/logger.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_LOGGER_H
-#define TOKU_LOGGER_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -259,5 +259,3 @@ void toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s);
 int toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs, uint32_t *version_found);
 
 TXN_MANAGER toku_logger_get_txn_manager(TOKULOGGER logger);
-
-#endif /* TOKU_LOGGER_H */
diff --git a/ft/quicklz.h b/ft/quicklz.h
index 2f2db8cd739..23b6e10f8a6 100644
--- a/ft/quicklz.h
+++ b/ft/quicklz.h
@@ -86,10 +86,10 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-#ifndef QLZ_HEADER
-#define QLZ_HEADER
 
 // Fast data compression library
 // Copyright (C) 2006-2011 Lasse Mikkel Reinhold
@@ -228,6 +228,3 @@ int qlz_get_setting(int setting);
 #if defined (__cplusplus)
 }
 #endif
-
-#endif
-
diff --git a/ft/rbuf.h b/ft/rbuf.h
index a21123bfb73..755d0182ff2 100644
--- a/ft/rbuf.h
+++ b/ft/rbuf.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef RBUF_H
-#define RBUF_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -275,6 +275,3 @@ static inline void rbuf_ma_BYTESTRING  (struct rbuf *r, MEMARENA ma, BYTESTRING
     assert(bs->data);
     r->ndone = newndone;
 }
-
-
-#endif
diff --git a/ft/recover.h b/ft/recover.h
index 0675b8b9ae4..9d4d081cd7d 100644
--- a/ft/recover.h
+++ b/ft/recover.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKURECOVER_H
-#define TOKURECOVER_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -135,5 +135,3 @@ extern int tokudb_recovery_trace;
 int toku_recover_lock (const char *lock_dir, int *lockfd);
 
 int toku_recover_unlock(int lockfd);
-
-#endif // TOKURECOVER_H
diff --git a/ft/rollback-apply.h b/ft/rollback-apply.h
index 50e53ea6d24..af93d62cebe 100644
--- a/ft/rollback-apply.h
+++ b/ft/rollback-apply.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef ROLLBACK_APPLY_H
-#define ROLLBACK_APPLY_H
 
 #ident "$Id$"
 /*
@@ -89,16 +87,14 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-
 typedef int(*apply_rollback_item)(TOKUTXN txn, struct roll_entry *item, LSN lsn);
 int toku_commit_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn);
 int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn);
 
 int toku_rollback_commit(TOKUTXN txn, LSN lsn);
 int toku_rollback_abort(TOKUTXN txn, LSN lsn);
-
-
-#endif // ROLLBACK_APPLY_H
diff --git a/ft/rollback-ct-callbacks.h b/ft/rollback-ct-callbacks.h
index aeb4650e17d..d8494c8a9bd 100644
--- a/ft/rollback-ct-callbacks.h
+++ b/ft/rollback-ct-callbacks.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef ROLLBACK_CT_CALLBACKS_H
-#define ROLLBACK_CT_CALLBACKS_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -134,6 +134,3 @@ static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log(FT
     wc.write_extraargs = h;
     return wc;
 }
-
-
-#endif // ROLLBACK_CT_CALLBACKS_H
diff --git a/ft/rollback.h b/ft/rollback.h
index e9cb528b7a9..b1441a9b17b 100644
--- a/ft/rollback.h
+++ b/ft/rollback.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_ROLLBACK_H
-#define TOKU_ROLLBACK_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -196,6 +196,3 @@ void make_rollback_log_empty(ROLLBACK_LOG_NODE log);
 static inline bool rollback_log_is_unused(ROLLBACK_LOG_NODE log) {
     return (log->txnid.parent_id64 == TXNID_NONE);
 }
-
-
-#endif // TOKU_ROLLBACK_H
diff --git a/ft/rollback_log_node_cache.h b/ft/rollback_log_node_cache.h
index 0db99faf23b..8b234250569 100644
--- a/ft/rollback_log_node_cache.h
+++ b/ft/rollback_log_node_cache.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_ROLLBACK_LOG_NODE_CACHE_H
-#define TOKU_ROLLBACK_LOG_NODE_CACHE_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -115,5 +115,3 @@ private:
 };
 
 ENSURE_POD(rollback_log_node_cache);
-
-#endif // TOKU_ROLLBACK_LOG_NODE_CACHE_H
diff --git a/ft/sub_block.h b/ft/sub_block.h
index d00df6fa51a..64df17f55e9 100644
--- a/ft/sub_block.h
+++ b/ft/sub_block.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_SUB_BLOCK_H
-#define TOKU_SUB_BLOCK_H
 
 #ident "$Id$"
 /*
@@ -89,13 +87,14 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "compress.h"
 #include "fttypes.h"
 
-
 static const int max_sub_blocks = 8;
 static const int target_sub_block_size = 512*1024;
 static const int max_basement_nodes = 32;
@@ -213,6 +212,3 @@ int
 decompress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], unsigned char *compressed_data, unsigned char *uncompressed_data, int num_cores, struct toku_thread_pool *pool);
 
 extern int verbose_decompress_sub_block;
-
-
-#endif
diff --git a/ft/sub_block_map.h b/ft/sub_block_map.h
index 3c1d71078d8..f2246279982 100644
--- a/ft/sub_block_map.h
+++ b/ft/sub_block_map.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef _TOKU_SUB_BLOCK_MAP_H
-#define _TOKU_SUB_BLOCK_MAP_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -123,5 +123,3 @@ sub_block_map_deserialize(struct sub_block_map *sbmap, struct rbuf *rbuf) {
     sbmap->offset = rbuf_int(rbuf);
     sbmap->size = rbuf_int(rbuf);
 }
-
-#endif
diff --git a/ft/tests/cachetable-test.h b/ft/tests/cachetable-test.h
index 6d143237c11..75316469f6e 100644
--- a/ft/tests/cachetable-test.h
+++ b/ft/tests/cachetable-test.h
@@ -86,8 +86,9 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#pragma once
 
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include "cachetable-internal.h"
 
diff --git a/ft/tests/ftloader-error-injector.h b/ft/tests/ftloader-error-injector.h
index 656e8a3dfe1..e0ba18aa235 100644
--- a/ft/tests/ftloader-error-injector.h
+++ b/ft/tests/ftloader-error-injector.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef FTLOADER_ERROR_INJECTOR_H
-#define FTLOADER_ERROR_INJECTOR_H
-
 #include <portability/toku_atomic.h>
 
 static toku_mutex_t event_mutex = TOKU_MUTEX_INITIALIZER;
@@ -224,5 +223,3 @@ static void *my_realloc(void *p, size_t n) {
     }
     return realloc(p, n);
 }
-
-#endif
diff --git a/ft/tests/test-ft-txns.h b/ft/tests/test-ft-txns.h
index bc887391589..645281ae867 100644
--- a/ft/tests/test-ft-txns.h
+++ b/ft/tests/test-ft-txns.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TEST_FT_TXNS_H
-#define TEST_FT_TXNS_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -179,5 +179,3 @@ static inline void shutdown_after_recovery(TOKULOGGER *loggerp, CACHETABLE *ctp)
     int r = toku_logger_close(loggerp);
     CKERR(r);
 }
-
-#endif /* TEST_FT_TXNS_H */
diff --git a/ft/tests/test.h b/ft/tests/test.h
index ea02abb527f..78c4e2afb05 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -86,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -393,4 +395,3 @@ main(int argc, const char *argv[]) {
     toku_ft_layer_destroy();
     return r;
 }
-
diff --git a/ft/tokuconst.h b/ft/tokuconst.h
index 73ac3a6a693..9593cd5761c 100644
--- a/ft/tokuconst.h
+++ b/ft/tokuconst.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKUCONST_H
-#define TOKUCONST_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -99,10 +99,5 @@ PATENT RIGHTS GRANT:
  * root transaction (id 0).
  */
 
-
 enum {MAX_NESTED_TRANSACTIONS = 253};
 enum {MAX_TRANSACTION_RECORDS = MAX_NESTED_TRANSACTIONS + 1};
-
-
-#endif
-
diff --git a/ft/txn.h b/ft/txn.h
index 57f3b0ed805..c9be49a1a36 100644
--- a/ft/txn.h
+++ b/ft/txn.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKUTXN_H
-#define TOKUTXN_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -221,5 +221,3 @@ bool toku_txn_has_spilled_rollback(TOKUTXN txn);
 
 uint64_t toku_txn_get_client_id(TOKUTXN txn);
 void toku_txn_set_client_id(TOKUTXN txn, uint64_t client_id);
-
-#endif //TOKUTXN_H
diff --git a/ft/txn_child_manager.h b/ft/txn_child_manager.h
index 07cf2ee3b5e..537f800cdd8 100644
--- a/ft/txn_child_manager.h
+++ b/ft/txn_child_manager.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_TXN_CHILD_MANAGER_H
-#define TOKU_TXN_CHILD_MANAGER_H
 
 #ident "$Id: rollback.h 49033 2012-10-17 18:48:30Z zardosht $"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -116,5 +116,3 @@ friend class txn_child_manager_unit_test;
 
 
 ENSURE_POD(txn_child_manager);
-
-#endif // TOKU_TXN_CHILD_MANAGER_H
diff --git a/ft/txn_manager.h b/ft/txn_manager.h
index 58d7555dc05..6a0ce1fadac 100644
--- a/ft/txn_manager.h
+++ b/ft/txn_manager.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKUTXN_MANAGER_H
-#define TOKUTXN_MANAGER_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -285,5 +285,3 @@ bool toku_txn_manager_txns_exist(TXN_MANAGER mgr);
 void toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment);
 
 TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, const xid_omt_t &snapshot_txnids, const rx_omt_t &referenced_xids);
-
-#endif // TOKUTXN_MANAGER_H
diff --git a/ft/txn_state.h b/ft/txn_state.h
index d8e192edec3..0375cdcc542 100644
--- a/ft/txn_state.h
+++ b/ft/txn_state.h
@@ -86,10 +86,10 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-#if !defined(TOKUTXN_STATE_H)
-#define TOKUTXN_STATE_H
 
 // this is a separate file so that the hotindexing tests can see the txn states
 
@@ -101,5 +101,3 @@ enum tokutxn_state {
     TOKUTXN_RETIRED,      // txn no longer exists
 };
 typedef enum tokutxn_state TOKUTXN_STATE;
-
-#endif
diff --git a/ft/ule-internal.h b/ft/ule-internal.h
index d2dd212850b..8e295195c4d 100644
--- a/ft/ule-internal.h
+++ b/ft/ule-internal.h
@@ -5,9 +5,6 @@
  * ule mechanisms that do not belong in the public interface.
  */
 
-#ifndef TOKU_ULE_INTERNAL_H
-#define TOKU_ULE_INTERNAL_H
-
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -94,6 +91,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -157,7 +156,3 @@ le_pack(ULE ule, // data to be packed into new leafentry
 
 size_t le_memsize_from_ule (ULE ule);
 void ule_cleanup(ULE ule);
-
-
-#endif  // TOKU_ULE_H
-
diff --git a/ft/ule.h b/ft/ule.h
index 0dd34212ff1..1441e39c7dd 100644
--- a/ft/ule.h
+++ b/ft/ule.h
@@ -6,9 +6,6 @@
  * requirements of the nested transaction logic belongs here.
  */
 
-#ifndef TOKU_ULE_H
-#define TOKU_ULE_H
-
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -95,6 +92,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -130,5 +129,3 @@ TXNID uxr_get_txnid(UXRHANDLE uxr);
 
 //1 does much slower debugging
 #define GARBAGE_COLLECTION_DEBUG 0
-
-#endif  // TOKU_ULE_H
diff --git a/ft/wbuf.h b/ft/wbuf.h
index 93cfe0c7185..5f5ab3e65c8 100644
--- a/ft/wbuf.h
+++ b/ft/wbuf.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef WBUF_H
-#define WBUF_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -332,6 +332,3 @@ static inline void wbuf_FILENUMS (struct wbuf *w, FILENUMS v) {
         wbuf_FILENUM(w, v.filenums[i]);
     }
 }
-
-
-#endif
diff --git a/ft/workset.h b/ft/workset.h
index 27dd9778006..b2451d1e169 100644
--- a/ft/workset.h
+++ b/ft/workset.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef _TOKU_WORKSET_H
-#define _TOKU_WORKSET_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -187,5 +187,3 @@ workset_join(struct workset *ws) {
     }
     workset_unlock(ws);
 }
-
-#endif
diff --git a/ft/xids-internal.h b/ft/xids-internal.h
index b0c2d20bc6b..7e074d90ffa 100644
--- a/ft/xids-internal.h
+++ b/ft/xids-internal.h
@@ -1,9 +1,6 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 
-#ifndef XIDS_INTERNAL_H
-#define XIDS_INTERNAL_H
-
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -90,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -106,5 +105,3 @@ typedef struct __attribute__((__packed__)) xids_t {
                            // ... because transaction 0 is implicit
     TXNID     ids[];
 } XIDS_S;
-
-#endif
diff --git a/ft/xids.h b/ft/xids.h
index 45246785775..55a2440c0fd 100644
--- a/ft/xids.h
+++ b/ft/xids.h
@@ -12,9 +12,6 @@
  * TokuWiki/Imp/TransactionsOverview.
  */
 
-#ifndef XIDS_H
-#define XIDS_H
-
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -101,6 +98,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -145,7 +144,3 @@ unsigned char *xids_get_end_of_array(XIDS xids);
 void wbuf_nocrc_xids(struct wbuf *wb, XIDS xids);
 
 void xids_fprintf(FILE* fp, XIDS xids);
-
-
-
-#endif 
diff --git a/ft/ybt.h b/ft/ybt.h
index ae19f527493..4ddffbafc00 100644
--- a/ft/ybt.h
+++ b/ft/ybt.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_YBT_H
-#define TOKU_YBT_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -137,5 +137,3 @@ int toku_dbt_infinite_compare(const DBT *a, const DBT *b);
 
 // returns: true if the given dbts have the same data pointer and size
 bool toku_dbt_equals(const DBT *a, const DBT *b);
-
-#endif /* TOKU_YBT_H */
diff --git a/locktree/concurrent_tree.h b/locktree/concurrent_tree.h
index 740a5f1311c..4a6b10bdcfc 100644
--- a/locktree/concurrent_tree.h
+++ b/locktree/concurrent_tree.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef CONCURRENT_TREE_H
-#define CONCURRENT_TREE_H
-
 #include <ft/comparator.h>
 
 #include "treenode.h"
@@ -203,5 +202,3 @@ private:
 #include "concurrent_tree.cc"
 
 } /* namespace toku */
-
-#endif /* CONCURRENT_TREE_H */
diff --git a/locktree/keyrange.h b/locktree/keyrange.h
index cab5866a5da..9843703c79e 100644
--- a/locktree/keyrange.h
+++ b/locktree/keyrange.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef KEYRANGE_H
-#define KEYRANGE_H
-
 #include <ft/comparator.h>
 
 namespace toku {
@@ -184,5 +183,3 @@ private:
 };
 
 } /* namespace toku */
-
-#endif /* KEYRANGE_H */
diff --git a/locktree/lock_request.h b/locktree/lock_request.h
index 0916a6529e0..f7f302c3298 100644
--- a/locktree/lock_request.h
+++ b/locktree/lock_request.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_LOCK_REQUEST_H
-#define TOKU_LOCK_REQUEST_H
-
 #include <db.h>
 #include <toku_pthread.h>
 
@@ -243,5 +242,3 @@ private:
 ENSURE_POD(lock_request);
 
 } /* namespace toku */
-
-#endif /* TOKU_LOCK_REQUEST_H */
diff --git a/locktree/range_buffer.h b/locktree/range_buffer.h
index ac019ba18ce..22bb5c43463 100644
--- a/locktree/range_buffer.h
+++ b/locktree/range_buffer.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef RANGE_BUFFER_H
-#define RANGE_BUFFER_H
-
 #include <toku_stdint.h>
 
 #include <ft/ybt.h>
@@ -217,5 +216,3 @@ public:
 };
 
 } /* namespace toku */
-
-#endif /* RANGE_BUFFER_H */
diff --git a/locktree/tests/concurrent_tree_unit_test.h b/locktree/tests/concurrent_tree_unit_test.h
index bda34978e50..f57c45f9d5c 100644
--- a/locktree/tests/concurrent_tree_unit_test.h
+++ b/locktree/tests/concurrent_tree_unit_test.h
@@ -86,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
diff --git a/locktree/tests/lock_request_unit_test.h b/locktree/tests/lock_request_unit_test.h
index 3183bf2b734..a20f2f1326f 100644
--- a/locktree/tests/lock_request_unit_test.h
+++ b/locktree/tests/lock_request_unit_test.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_LOCK_REQUEST_UNIT_TEST_H
-#define TOKU_LOCK_REQUEST_UNIT_TEST_H
-
 #include "test.h"
 #include "locktree_unit_test.h"
 
@@ -132,5 +131,3 @@ private:
 };
 
 }
-
-#endif
diff --git a/locktree/tests/locktree_unit_test.h b/locktree/tests/locktree_unit_test.h
index b074cc837ba..ba7a934340b 100644
--- a/locktree/tests/locktree_unit_test.h
+++ b/locktree/tests/locktree_unit_test.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_LOCKTREE_UNIT_TEST_H
-#define TOKU_LOCKTREE_UNIT_TEST_H
-
 #include "test.h"
 
 #include "locktree.h"
@@ -157,5 +156,3 @@ private:
 };
 
 } /* namespace toku */
-
-#endif /* TOKU_LOCKTREE_UNIT_TEST_H */
diff --git a/locktree/tests/manager_unit_test.h b/locktree/tests/manager_unit_test.h
index ba38b97989e..bd6e6db5f52 100644
--- a/locktree/tests/manager_unit_test.h
+++ b/locktree/tests/manager_unit_test.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_MANAGER_TEST_H
-#define TOKU_MANAGER_TEST_H
-
 #include <toku_assert.h>
 #include <locktree/locktree.h>
 
@@ -111,5 +110,3 @@ public:
 };
 
 } /* namespace toku */
-
-#endif /* TOKU_MANAGER_TEST_H */
diff --git a/locktree/tests/test.h b/locktree/tests/test.h
index cf9a805543c..ea701a90a4a 100644
--- a/locktree/tests/test.h
+++ b/locktree/tests/test.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_TEST_H
-#define TOKU_TEST_H
-
 #include <ft/ybt.h>
 #include <limits.h>
 
@@ -160,5 +159,3 @@ static int compare_dbts(DB *db, const DBT *key1, const DBT *key2) {
 }
 
 } /* namespace toku */
-
-#endif
diff --git a/locktree/treenode.h b/locktree/treenode.h
index e48dc50d72b..24bd4ed49e1 100644
--- a/locktree/treenode.h
+++ b/locktree/treenode.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TREENODE_H
-#define TREENODE_H
-
 #include <memory.h>
 #include <string.h>
 
@@ -283,5 +282,3 @@ private:
 #include "treenode.cc"
 
 } /* namespace toku */
-
-#endif /* TREENODE_H */
diff --git a/locktree/txnid_set.h b/locktree/txnid_set.h
index d2971c5c167..2caf4038995 100644
--- a/locktree/txnid_set.h
+++ b/locktree/txnid_set.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_TXNID_SET_H
-#define TOKU_TXNID_SET_H
-
 #include <ft/fttypes.h>
 
 #include <util/omt.h>
@@ -130,5 +129,3 @@ private:
 ENSURE_POD(txnid_set);
 
 } /* namespace toku */
-
-#endif /* TOKU_TXNID_SET_H */
diff --git a/locktree/wfg.h b/locktree/wfg.h
index 2bfd3797f9b..8f9abd67d42 100644
--- a/locktree/wfg.h
+++ b/locktree/wfg.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_WFG_H
-#define TOKU_WFG_H
-
 #include <ft/fttypes.h>
 
 #include <util/omt.h>
@@ -159,5 +158,3 @@ private:
 ENSURE_POD(wfg);
 
 } /* namespace toku */
-
-#endif /* TOKU_WFG_H */
diff --git a/portability/memory.h b/portability/memory.h
index e4608d6108f..4e90d0afc14 100644
--- a/portability/memory.h
+++ b/portability/memory.h
@@ -1,8 +1,6 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 #ident "$Id$"
-#ifndef MEMORY_H
-#define MEMORY_H
 
 /*
 COPYING CONDITIONS NOTICE:
@@ -89,12 +87,13 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include <stdlib.h>
 #include <toku_portability.h>
 
-
 /* Tokutek memory allocation functions and macros.
  * These are functions for malloc and free */
 
@@ -225,5 +224,3 @@ typedef struct memory_status {
 void toku_memory_get_status(LOCAL_MEMORY_STATUS s);
 
 size_t toku_memory_footprint(void * p, size_t touched);
-
-#endif
diff --git a/portability/rdtsc.h b/portability/rdtsc.h
index e70f636e169..0a5e5374947 100644
--- a/portability/rdtsc.h
+++ b/portability/rdtsc.h
@@ -86,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 // read the processor time stamp register
diff --git a/portability/toku_assert.h b/portability/toku_assert.h
index 0214018c11f..8767f048355 100644
--- a/portability/toku_assert.h
+++ b/portability/toku_assert.h
@@ -86,11 +86,12 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-#ifndef TOKU_ASSERT_H
-#define TOKU_ASSERT_H
 /* The problem with assert.h:  If NDEBUG is set then it doesn't execute the function, if NDEBUG isn't set then we get a branch that isn't taken. */
+
 /* This version will complain if NDEBUG is set. */
 /* It evaluates the argument and then calls a function  toku_do_assert() which takes all the hits for the branches not taken. */
 
@@ -201,5 +202,3 @@ get_error_errno(void)
 }
 
 extern bool toku_gdb_dump_on_assert;
-
-#endif
diff --git a/portability/toku_atomic.h b/portability/toku_atomic.h
index e897d0b7e4a..2243a4ca4b2 100644
--- a/portability/toku_atomic.h
+++ b/portability/toku_atomic.h
@@ -85,12 +85,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2012-2013 Tokutek Inc.  All rights reserved."
 #ident "$Id$"
 
-#ifndef TOKU_ATOMIC_H
-#define TOKU_ATOMIC_H
-
 #include <portability/toku_config.h>
 #include <toku_assert.h>
 
@@ -159,5 +158,3 @@ static inline bool toku_sync_bool_compare_and_swap(T *addr, U oldval, V newval)
 #pragma GCC poison __sync_synchronize
 #pragma GCC poison __sync_lock_test_and_set
 #pragma GCC poison __sync_release
-
-#endif // TOKU_ATOMIC_H
diff --git a/portability/toku_byteswap.h b/portability/toku_byteswap.h
index 4ddeefe2c1b..5895738abe6 100644
--- a/portability/toku_byteswap.h
+++ b/portability/toku_byteswap.h
@@ -85,12 +85,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2012-2013 Tokutek Inc.  All rights reserved."
 #ident "$Id$"
 
-#ifndef TOKU_BYTESWAP_H
-#define TOKU_BYTESWAP_H
-
 #include <portability/toku_config.h>
 
 #if defined(HAVE_BYTESWAP_H)
@@ -102,5 +101,3 @@ PATENT RIGHTS GRANT:
 # include <libkern/OSByteOrder.h>
 # define bswap_64 OSSwapInt64
 #endif
-
-#endif /* TOKU_BYTESWAP_H */
diff --git a/portability/toku_crash.h b/portability/toku_crash.h
index 153ab26d460..acb060323e7 100644
--- a/portability/toku_crash.h
+++ b/portability/toku_crash.h
@@ -86,10 +86,9 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#pragma once
 
-#ifndef PORTABILITY_TOKU_CRASH_H
-#define PORTABILITY_TOKU_CRASH_H
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -192,5 +191,3 @@ toku_crash_and_dump_core_on_purpose(void) {
 }
 
 void toku_try_gdb_stack_trace(const char *gdb_path);
-
-#endif // PORTABILITY_TOKU_CRASH_H
diff --git a/portability/toku_htod.h b/portability/toku_htod.h
index c6a7a143563..0053a93f2fc 100644
--- a/portability/toku_htod.h
+++ b/portability/toku_htod.h
@@ -86,8 +86,9 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#pragma once
 
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 /* Purpose of this file is to provide definitions of 
  * Host to Disk byte transposition functions, an abstraction of
@@ -106,9 +107,6 @@ PATENT RIGHTS GRANT:
  *       HOST AND A LITTLE-ENDIAN DISK.
  */
 
-#ifndef _TOKU_HTOD_H
-#define _TOKU_HTOD_H
-
 #include <portability/toku_config.h>
 
 #if defined(HAVE_ENDIAN_H)
@@ -166,8 +164,3 @@ toku_htod32(uint32_t i) {
 #else
 #error Not supported
 #endif
-
-
-
-#endif
-
diff --git a/portability/toku_htonl.h b/portability/toku_htonl.h
index 126ba932b87..f3dcb9aaba9 100644
--- a/portability/toku_htonl.h
+++ b/portability/toku_htonl.h
@@ -86,15 +86,10 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
-#ifndef _TOKU_HTONL_H
-#define _TOKU_HTONL_H
-
-#if !__linux__ && !__FreeBSD__ && !__sun__
-//#error
-#endif
-
 #include <toku_htod.h>
 #include <arpa/inet.h>
 
@@ -105,5 +100,3 @@ static inline uint32_t toku_htonl(uint32_t i) {
 static inline uint32_t toku_ntohl(uint32_t i) {
     return ntohl(i);
 }
-
-#endif
diff --git a/portability/toku_list.h b/portability/toku_list.h
index b39d56ebd32..534f1179444 100644
--- a/portability/toku_list.h
+++ b/portability/toku_list.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef _TOKUDB_LIST_H
-#define _TOKUDB_LIST_H
 
 #ident "$Id$"
 /*
@@ -89,13 +87,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007, 2008, 2009 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-//TODO: #1378  This is not threadsafe.  Make sure when splitting locks
-//that we protect these calls.
-
-
 // This toku_list is intended to be embedded in other data structures.
 struct toku_list {
     struct toku_list *next, *prev;
@@ -177,7 +173,3 @@ static inline void toku_list_move(struct toku_list *newhead, struct toku_list *o
 #else
 #define toku_list_struct(p, t, f) ((t*)((char*)(p) - ((char*)&((t*)0)->f)))
 #endif
-
-
-
-#endif
diff --git a/portability/toku_os.h b/portability/toku_os.h
index c232919f450..ba21a56de91 100644
--- a/portability/toku_os.h
+++ b/portability/toku_os.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_OS_H
-#define TOKU_OS_H
-
 #include <dirent.h>
 #include <sys/time.h>
 
@@ -179,5 +178,3 @@ int toku_fstat(int fd, toku_struct_stat *statbuf) __attribute__((__visibility__(
 
 // Portable linux 'dup2'
 int toku_dup2(int fd, int fd2) __attribute__((__visibility__("default")));
-
-#endif /* TOKU_OS_H */
diff --git a/portability/toku_os_types.h b/portability/toku_os_types.h
index 698bb9f2524..47c7e53dfda 100644
--- a/portability/toku_os_types.h
+++ b/portability/toku_os_types.h
@@ -86,10 +86,9 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#pragma once
 
-#if !defined(TOKU_OS_TYPES_H)
-#define TOKU_OS_TYPES_H
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include <stdbool.h>
 #include <sys/types.h>
@@ -128,5 +127,3 @@ typedef struct stat toku_struct_stat;
 #if !defined(O_BINARY)
 #define O_BINARY 0
 #endif
-
-#endif
diff --git a/portability/toku_path.h b/portability/toku_path.h
index 4c0df9660a9..bf0af6bbb64 100644
--- a/portability/toku_path.h
+++ b/portability/toku_path.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef PORTABILITY_TOKU_PATH_H
-#define PORTABILITY_TOKU_PATH_H
-
 #include <stdarg.h>
 #include <limits.h>
 #include <sys/types.h>
@@ -124,5 +123,3 @@ char *toku_path_join(char *dest, int n, const char *base, ...);
 //  There are n path components, including base.
 // Returns:
 //  dest (useful for chaining function calls)
-
-#endif // PORTABILITY_TOKU_PATH_H
diff --git a/portability/toku_portability.h b/portability/toku_portability.h
index dc2ac683113..e19fca5d626 100644
--- a/portability/toku_portability.h
+++ b/portability/toku_portability.h
@@ -86,10 +86,10 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-#ifndef TOKU_PORTABILITY_H
-#define TOKU_PORTABILITY_H
 
 #include "toku_config.h"
 
@@ -363,6 +363,3 @@ static inline uint64_t roundup_to_multiple(uint64_t alignment, uint64_t v)
     assert(result<v+alignment);            // The result is the smallest such multiple of alignment.
     return result;
 }
-    
-
-#endif /* TOKU_PORTABILITY_H */
diff --git a/portability/toku_pthread.h b/portability/toku_pthread.h
index 18edad7c1fa..90b12689109 100644
--- a/portability/toku_pthread.h
+++ b/portability/toku_pthread.h
@@ -87,10 +87,9 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#pragma once
 
-#ifndef TOKU_PTHREAD_H
-#define TOKU_PTHREAD_H
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include <pthread.h>
 #include <time.h>
@@ -407,5 +406,3 @@ static inline int
 toku_pthread_setspecific(toku_pthread_key_t key, void *data) {
     return pthread_setspecific(key, data);
 }
-
-#endif /* TOKU_PTHREAD_H */
diff --git a/portability/toku_race_tools.h b/portability/toku_race_tools.h
index 9d3795eae95..eb97e55c6d3 100644
--- a/portability/toku_race_tools.h
+++ b/portability/toku_race_tools.h
@@ -86,10 +86,10 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-#ifndef TOKU_RACE_TOOLS_H
-#define TOKU_RACE_TOOLS_H
 
 #include <portability/toku_config.h>
 
@@ -138,5 +138,3 @@ PATENT RIGHTS GRANT:
 # define RUNNING_ON_VALGRIND (0U)
 
 #endif
-
-#endif // TOKU_RACE_TOOLS_H
diff --git a/portability/toku_random.h b/portability/toku_random.h
index 956e73990a7..0a9df169be1 100644
--- a/portability/toku_random.h
+++ b/portability/toku_random.h
@@ -86,10 +86,9 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#pragma once
 
-#ifndef TOKU_RANDOM_H
-#define TOKU_RANDOM_H
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include <portability/toku_config.h>
 #include <toku_portability.h>
@@ -169,5 +168,3 @@ rand_choices(struct random_data *buf, uint32_t choices) {
 
     return result;
 }
-
-#endif // TOKU_RANDOM_H
diff --git a/portability/toku_stdint.h b/portability/toku_stdint.h
index 0105c94c50b..d75e48a11d0 100644
--- a/portability/toku_stdint.h
+++ b/portability/toku_stdint.h
@@ -86,13 +86,9 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#pragma once
 
-#ifndef TOKU_STDINT_H
-#define TOKU_STDINT_H
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include <stdint.h>
 #include <inttypes.h>
-
-#endif
-
diff --git a/portability/toku_stdlib.h b/portability/toku_stdlib.h
index 9d2a7f78778..f5764868b83 100644
--- a/portability/toku_stdlib.h
+++ b/portability/toku_stdlib.h
@@ -86,5 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+
 #include <stdlib.h>
diff --git a/portability/toku_time.h b/portability/toku_time.h
index 89b8dcb8524..f5eb778eeec 100644
--- a/portability/toku_time.h
+++ b/portability/toku_time.h
@@ -86,10 +86,9 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#pragma once
 
-#ifndef TOKU_TIME_H
-#define TOKU_TIME_H
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include "toku_config.h"
 
@@ -161,5 +160,3 @@ static inline uint64_t toku_current_time_microsec(void) {
     gettimeofday(&t, NULL);
     return t.tv_sec * (1UL * 1000 * 1000) + t.tv_usec;
 }
-
-#endif
diff --git a/src/indexer-internal.h b/src/indexer-internal.h
index a3f1f96f096..fb06c8f8391 100644
--- a/src/indexer-internal.h
+++ b/src/indexer-internal.h
@@ -89,8 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 #ident "$Id$"
 
-#ifndef TOKU_INDEXER_INTERNAL_H
-#define TOKU_INDEXER_INTERNAL_H
+#pragma once
 
 #include <ft/txn_state.h>
 #include <toku_pthread.h>
@@ -168,5 +167,3 @@ void indexer_undo_do_init(DB_INDEXER *indexer);
 void indexer_undo_do_destroy(DB_INDEXER *indexer);
 
 int indexer_undo_do(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info *prov_info, DBT_ARRAY *hot_keys, DBT_ARRAY *hot_vals);
-
-#endif
diff --git a/src/indexer.h b/src/indexer.h
index 3a7842af989..5eaecaf5f3b 100644
--- a/src/indexer.h
+++ b/src/indexer.h
@@ -89,9 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 #ident "$Id$"
 
-#ifndef TOKU_INDEXER_H
-#define TOKU_INDEXER_H
-
+#pragma once
 
 // locking and unlocking functions to synchronize cursor position with
 // XXX_multiple APIs
@@ -178,6 +176,3 @@ typedef struct {
 } INDEXER_STATUS_S, *INDEXER_STATUS;
 
 void toku_indexer_get_status(INDEXER_STATUS s);
-
-
-#endif // TOKU_INDEXER_H
diff --git a/src/loader.h b/src/loader.h
index bd8e85aed93..e6316a5cea8 100644
--- a/src/loader.h
+++ b/src/loader.h
@@ -89,8 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 #ident "$Id$"
 
-#ifndef TOKU_LOADER_H
-#define TOKU_LOADER_H
+#pragma once
 
 /*
 Create and set up a loader.
@@ -208,6 +207,3 @@ typedef struct {
 
 
 void toku_loader_get_status(LOADER_STATUS s);
-
-
-#endif
diff --git a/src/tests/checkpoint_test.h b/src/tests/checkpoint_test.h
index e9d4290a406..0ded9104bf0 100644
--- a/src/tests/checkpoint_test.h
+++ b/src/tests/checkpoint_test.h
@@ -86,13 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2009-2013 Tokutek Inc.  All rights reserved."
 #ident "$Id$"
 
-#ifndef CHECKPOINT_TEST_H
-#define CHECKPOINT_TEST_H
-
-
 DB_ENV *env;
 
 enum {MAX_NAME=128};
@@ -537,6 +535,3 @@ snapshot(DICTIONARY d, int do_checkpoint) {
         db_startup(d, NULL);
     }
 }
-
-
-#endif
diff --git a/src/tests/key-val.h b/src/tests/key-val.h
index d77b8b00e05..1da48508ad5 100644
--- a/src/tests/key-val.h
+++ b/src/tests/key-val.h
@@ -86,10 +86,10 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 
-#ifndef KEY_VAL_H
-#define KEY_VAL_H
 //
 //   Functions to create unique key/value pairs, row generators, checkers, ... for each of NUM_DBS
 //
@@ -295,8 +295,3 @@ static int UU() generate_initial_table(DB *db, DB_TXN *txn, uint32_t rows)
     
     return r;
 }
-
-
-
-
-#endif // KEY_VAL_H
diff --git a/src/tests/recover-test_crash_in_flusher_thread.h b/src/tests/recover-test_crash_in_flusher_thread.h
index 56087ba16fa..014a6428777 100644
--- a/src/tests/recover-test_crash_in_flusher_thread.h
+++ b/src/tests/recover-test_crash_in_flusher_thread.h
@@ -85,8 +85,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "$Id$"
+
 #include "test.h"
 
 #include <stdio.h>
diff --git a/src/tests/stress_openclose.h b/src/tests/stress_openclose.h
index 4e61dcef356..7477f4c16d4 100644
--- a/src/tests/stress_openclose.h
+++ b/src/tests/stress_openclose.h
@@ -85,6 +85,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "$Id$"
 
diff --git a/src/tests/test.h b/src/tests/test.h
index 4cbfcf426d6..db1ef1e4012 100644
--- a/src/tests/test.h
+++ b/src/tests/test.h
@@ -2,10 +2,6 @@
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 #ident "$Id$"
 
-#ifndef __TEST_H
-#define __TEST_H
-
-
 /*
 COPYING CONDITIONS NOTICE:
 
@@ -91,7 +87,10 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+
 #include <toku_portability.h>
 
 #include <string.h>
@@ -513,5 +512,3 @@ main(int argc, char * const argv[])
 #ifndef DB_GID_SIZE
 #define	DB_GID_SIZE	DB_XIDDATASIZE
 #endif
-
-#endif // __TEST_H
diff --git a/src/tests/test_kv_gen.h b/src/tests/test_kv_gen.h
index f17b6c18641..0d54f8680a7 100644
--- a/src/tests/test_kv_gen.h
+++ b/src/tests/test_kv_gen.h
@@ -2,10 +2,6 @@
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 #ident "$Id$"
 
-#ifndef __TEST_KV_GEN_H
-#define __TEST_KV_GEN_H
-
-
 /*
 COPYING CONDITIONS NOTICE:
 
@@ -91,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include "test.h"
@@ -279,6 +277,3 @@ put_multiple_generate(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, con
     }
     return 0;
 }
-
-
-#endif // __TEST_KV_GEN_H
diff --git a/src/tests/threaded_stress_test_helpers.h b/src/tests/threaded_stress_test_helpers.h
index c173d2d2d63..2a841d1a16d 100644
--- a/src/tests/threaded_stress_test_helpers.h
+++ b/src/tests/threaded_stress_test_helpers.h
@@ -86,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2009-2013 Tokutek Inc.  All rights reserved."
 #ident "$Id$"
 
@@ -102,9 +104,6 @@ PATENT RIGHTS GRANT:
 // with keys in the range [0, table_size - 1] unless disperse_keys is true,
 // then the keys are scrambled up in the integer key space.
 
-#ifndef _THREADED_STRESS_TEST_HELPERS_H_
-#define _THREADED_STRESS_TEST_HELPERS_H_
-
 #include "toku_config.h"
 #include "test.h"
 
@@ -2924,5 +2923,3 @@ UU() perf_test_main_with_cmp(struct cli_args *args, int (*cmp)(DB *, const DBT *
     // We want to control the row size and its compressibility.
     open_and_stress_tables(args, false, cmp);
 }
-
-#endif
diff --git a/src/ydb-internal.h b/src/ydb-internal.h
index d2f0e95b280..ae508425263 100644
--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef YDB_INTERNAL_H
-#define YDB_INTERNAL_H
 
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "$Id$"
 
@@ -312,5 +312,3 @@ txn_is_read_only(DB_TXN* txn) {
 void env_panic(DB_ENV * env, int cause, const char * msg);
 void env_note_db_opened(DB_ENV *env, DB *db);
 void env_note_db_closed(DB_ENV *env, DB *db);
-
-#endif
diff --git a/src/ydb.h b/src/ydb.h
index e7de82b5db2..7fcc460b6fe 100644
--- a/src/ydb.h
+++ b/src/ydb.h
@@ -88,11 +88,8 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-// This file defines the public interface to the ydb library
-
-#if !defined(TOKU_YDB_INTERFACE_H)
-#define TOKU_YDB_INTERFACE_H
 
+#pragma once
 
 // Initialize the ydb library globals.  
 // Called when the ydb library is loaded.
@@ -114,5 +111,3 @@ extern "C" uint64_t toku_test_get_latest_lsn(DB_ENV *env) __attribute__((__visib
 
 // test-only function
 extern "C" int toku_test_get_checkpointing_user_data_status(void) __attribute__((__visibility__("default")));
-
-#endif
diff --git a/src/ydb_cursor.h b/src/ydb_cursor.h
index 9666cc4e61e..1eab0523570 100644
--- a/src/ydb_cursor.h
+++ b/src/ydb_cursor.h
@@ -86,14 +86,12 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 // This file defines the public interface to the ydb library
 
-#if !defined(TOKU_YDB_CURSOR_H)
-#define TOKU_YDB_CURSOR_H
-
-
 typedef enum {
     YDB_C_LAYER_STATUS_NUM_ROWS = 0             /* number of rows in this status array */
 } ydb_c_lock_layer_status_entry;
@@ -110,7 +108,3 @@ int toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, vo
 int toku_c_close(DBC * c);
 int toku_db_cursor_internal(DB *db, DB_TXN * txn, DBC **c, uint32_t flags, int is_temporary_cursor);
 int toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags);
-
-
-
-#endif
diff --git a/src/ydb_db.h b/src/ydb_db.h
index 54f0d178731..db3300cfed0 100644
--- a/src/ydb_db.h
+++ b/src/ydb_db.h
@@ -89,8 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_YDB_DB_H
-#define TOKU_YDB_DB_H
+#pragma once
 
 #include <ft/ft.h>
 
@@ -173,5 +172,3 @@ toku_db_destruct_autotxn(DB_TXN *txn, int r, bool changed) {
     }
     return r; 
 }
-
-#endif /* TOKU_YDB_DB_H */
diff --git a/src/ydb_env_func.h b/src/ydb_env_func.h
index cf193b64216..08202a334fe 100644
--- a/src/ydb_env_func.h
+++ b/src/ydb_env_func.h
@@ -86,12 +86,10 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-// This file defines the public interface to the ydb library
-
-#if !defined(TOKU_YDB_ENV_FUNC_H)
-#define TOKU_YDB_ENV_FUNC_H
 
 extern void (*checkpoint_callback_f)(void*);
 extern void * checkpoint_callback_extra;
@@ -105,5 +103,3 @@ void setup_dlmalloc(void) __attribute__((__visibility__("default")));
 
 // Test-only function
 void toku_env_increase_last_xid(DB_ENV *env, uint64_t increment);
-
-#endif
diff --git a/src/ydb_load.h b/src/ydb_load.h
index 6496a92eeec..f5e140d9d5d 100644
--- a/src/ydb_load.h
+++ b/src/ydb_load.h
@@ -1,8 +1,6 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 #ident "$Id$"
-#ifndef YDB_LOAD_H
-#define YDB_LOAD_H
 
 /*
 COPYING CONDITIONS NOTICE:
@@ -89,7 +87,7 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
-#ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
+#pragma once
 
 /*  ydb functions used by loader
  */
@@ -113,5 +111,3 @@ int locked_load_inames(DB_ENV * env,
                        char * new_inames_in_env[/*N*/], /* out */
                        LSN *load_lsn,
                        bool mark_as_loader);
-
-#endif
diff --git a/src/ydb_row_lock.h b/src/ydb_row_lock.h
index 2a1a4ffb5c8..8c50cf87cc9 100644
--- a/src/ydb_row_lock.h
+++ b/src/ydb_row_lock.h
@@ -89,8 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef TOKU_YDB_ROW_LOCK_H
-#define TOKU_YDB_ROW_LOCK_H
+#pragma once
 
 #include <ydb-internal.h>
 
@@ -113,5 +112,3 @@ int toku_db_get_point_write_lock(DB *db, DB_TXN *txn, const DBT *key);
 void toku_db_grab_write_lock(DB *db, DBT *key, TOKUTXN tokutxn);
 
 void toku_db_release_lt_key_ranges(DB_TXN *txn, txn_lt_key_ranges *ranges);
-
-#endif /* TOKU_YDB_ROW_LOCK_H */
diff --git a/src/ydb_txn.h b/src/ydb_txn.h
index 454b6578e9f..57b2201d6cc 100644
--- a/src/ydb_txn.h
+++ b/src/ydb_txn.h
@@ -86,12 +86,10 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-// This file defines the public interface to the ydb library
-
-#if !defined(TOKU_YDB_TXN_H)
-#define TOKU_YDB_TXN_H
 
 // begin, commit, and abort use the multi operation lock 
 // internally to synchronize with begin checkpoint. callers
@@ -112,5 +110,3 @@ bool toku_is_big_tokutxn(TOKUTXN tokutxn);
 
 // Test-only function
 extern "C" void toku_increase_last_xid(DB_ENV *env, uint64_t increment) __attribute__((__visibility__("default")));
-
-#endif
diff --git a/src/ydb_write.h b/src/ydb_write.h
index a890089d895..ba26b5106b7 100644
--- a/src/ydb_write.h
+++ b/src/ydb_write.h
@@ -88,11 +88,8 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-// This file defines the public interface to the ydb library
-
-#if !defined(TOKU_YDB_WRITE_H)
-#define TOKU_YDB_WRITE_H
 
+#pragma once
 
 typedef enum {
     YDB_LAYER_NUM_INSERTS = 0,
@@ -119,7 +116,6 @@ typedef struct {
 
 void ydb_write_layer_get_status(YDB_WRITE_LAYER_STATUS statp);
 
-
 int toku_db_del(DB *db, DB_TXN *txn, DBT *key, uint32_t flags, bool holds_mo_lock);
 int toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, uint32_t flags, bool holds_mo_lock);
 int autotxn_db_del(DB* db, DB_TXN* txn, DBT* key, uint32_t flags);
@@ -159,8 +155,3 @@ int env_update_multiple(
     uint32_t num_keys, DBT_ARRAY keys[],
     uint32_t num_vals, DBT_ARRAY vals[]
     );
-
-
-
-
-#endif
diff --git a/tools/tokudb_common.h b/tools/tokudb_common.h
index aeda0ae5027..f37f1c802c9 100644
--- a/tools/tokudb_common.h
+++ b/tools/tokudb_common.h
@@ -1,8 +1,6 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 #ident "$Id$"
-#if !defined(TOKUDB_COMMON_H)
-#define TOKUDB_COMMON_H
 
 /*
 COPYING CONDITIONS NOTICE:
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include <stdlib.h>
@@ -105,5 +105,3 @@ PATENT RIGHTS GRANT:
 #define IS_SET_ALL(bitvector, bits)    (((bitvector) & (bits)) == (bits))
 
 #define IS_POWER_OF_2(num)             ((num) > 0 && ((num) & ((num) - 1)) == 0)
-
-#endif /* #if !defined(TOKUDB_COMMON_H) */
diff --git a/tools/tokudb_common_funcs.h b/tools/tokudb_common_funcs.h
index c2737025acc..10edb5e1143 100644
--- a/tools/tokudb_common_funcs.h
+++ b/tools/tokudb_common_funcs.h
@@ -1,8 +1,6 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 #ident "$Id$"
-#if !defined(TOKUDB_COMMON_FUNCS_H)
-#define TOKUDB_COMMON_FUNCS_H
 
 /*
 COPYING CONDITIONS NOTICE:
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include "tokudb_common.h"
@@ -333,5 +333,3 @@ main(int argc, char *const argv[]) {
     r = test_main(argc, argv);
     return r;
 }
-
-#endif /* #if !defined(TOKUDB_COMMON_H) */
diff --git a/util/circular_buffer.h b/util/circular_buffer.h
index 6f40cf3046f..13e5f06ab11 100644
--- a/util/circular_buffer.h
+++ b/util/circular_buffer.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef UTIL_CIRCULAR_BUFFER_H
-#define UTIL_CIRCULAR_BUFFER_H
-
 #include <stdbool.h>
 #include <stddef.h>
 #include <portability/toku_pthread.h>
@@ -210,5 +209,3 @@ private:
 }
 
 #include "circular_buffer.cc"
-
-#endif // UTIL_CIRCULAR_BUFFER_H
diff --git a/util/constexpr.h b/util/constexpr.h
index cfea0b46924..444f0f718da 100644
--- a/util/constexpr.h
+++ b/util/constexpr.h
@@ -86,11 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#pragma once
-
 constexpr char UU() static_tolower(const char a) {
     return a >= 'A' && a <= 'Z' ? a - 'A' + 'a' : a;
 }
diff --git a/util/dmt.h b/util/dmt.h
index 374fa785e42..5bde11ab378 100644
--- a/util/dmt.h
+++ b/util/dmt.h
@@ -1,6 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#pragma once
 
 /*
 COPYING CONDITIONS NOTICE:
@@ -86,6 +85,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
diff --git a/util/doubly_linked_list.h b/util/doubly_linked_list.h
index fb125d243be..444d4f30cd9 100644
--- a/util/doubly_linked_list.h
+++ b/util/doubly_linked_list.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef UTIL_DOUBLY_LINKED_LIST_H
-#define UTIL_DOUBLY_LINKED_LIST_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -225,5 +225,3 @@ int DoublyLinkedList<T>::iterate(int (*fun)(T container, extra_t extra), extra_t
 }
 
 }
-
-#endif // UTIL_DOUBLY_LINKED_LIST_H
diff --git a/util/fmutex.h b/util/fmutex.h
index 075925dd03f..224a6972ba7 100644
--- a/util/fmutex.h
+++ b/util/fmutex.h
@@ -1,5 +1,4 @@
-#ifndef FMUTEX_H
-#define FMUTEX_H
+#pragma once
 
 // fair mutex
 struct fmutex {
@@ -105,5 +104,3 @@ int fmutex_users(struct fmutex *fm) const {
 int fmutex_blocked_users(struct fmutex *fm) const {
     return fm->num_want_mutex;
 }
-
-#endif // FMUTEX_H
diff --git a/util/frwlock.h b/util/frwlock.h
index 7811e0d2427..8698c96b279 100644
--- a/util/frwlock.h
+++ b/util/frwlock.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef UTIL_FRWLOCK_H
-#define UTIL_FRWLOCK_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -176,5 +176,3 @@ ENSURE_POD(frwlock);
 
 // include the implementation here
 // #include "frwlock.cc"
-
-#endif // UTIL_FRWLOCK_H
diff --git a/util/growable_array.h b/util/growable_array.h
index 763377d0ab0..cdb7973c9e3 100644
--- a/util/growable_array.h
+++ b/util/growable_array.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef UTIL_GROWABLE_ARRAY_H
-#define UTIL_GROWABLE_ARRAY_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -175,5 +175,3 @@ template<typename T> class GrowableArray {
 };
 
 }
-
-#endif // UTIL_GROWABLE_ARRAY_H
diff --git a/util/kibbutz.h b/util/kibbutz.h
index 83e981b916c..6e9d3cf6643 100644
--- a/util/kibbutz.h
+++ b/util/kibbutz.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef UTIL_KIBBUTZ_H
-#define UTIL_KIBBUTZ_H
-
 //
 // The kibbutz is another threadpool meant to do arbitrary work.
 //
@@ -116,5 +115,3 @@ void toku_kibbutz_enq (KIBBUTZ k, void (*f)(void*), void *extra);
 // destroys the kibbutz
 //
 void toku_kibbutz_destroy (KIBBUTZ k);
-
-#endif // UTIL_KIBBUTZ_H
diff --git a/util/memarena.h b/util/memarena.h
index 0dac262ba46..46b901063d6 100644
--- a/util/memarena.h
+++ b/util/memarena.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_MEMARENA_H
-#define TOKU_MEMARENA_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -132,5 +132,3 @@ size_t toku_memarena_total_memory_size (MEMARENA);
 size_t toku_memarena_total_size_in_use (MEMARENA);
 
 size_t toku_memarena_total_footprint (MEMARENA);
-
-#endif
diff --git a/util/mempool.h b/util/mempool.h
index c8be5e13297..ee6d6bc5a08 100644
--- a/util/mempool.h
+++ b/util/mempool.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef UTIL_MEMPOOL_H
-#define UTIL_MEMPOOL_H
-
 /* a memory pool is a contiguous region of memory that supports single
    allocations from the pool.  these allocated regions are never recycled.
    when the memory pool no longer has free space, the allocated chunks
@@ -181,7 +180,3 @@ static inline int toku_mempool_inrange(struct mempool *mp, void *vp, size_t size
 size_t toku_mempool_footprint(struct mempool *mp);
 
 void toku_mempool_clone(const struct mempool* orig_mp, struct mempool* new_mp);
-
-
-
-#endif // UTIL_MEMPOOL_H
diff --git a/util/minicron.h b/util/minicron.h
index 05d9868c8ac..74e75a86181 100644
--- a/util/minicron.h
+++ b/util/minicron.h
@@ -88,13 +88,11 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "$Id$"
 
-#ifndef TOKU_MINICRON_H
-#define TOKU_MINICRON_H
+#pragma once
 
 #include <toku_pthread.h>
 #include <toku_time.h>
 
-
 // Specification:
 // A minicron is a miniature cron job for executing a job periodically inside a pthread.
 // To create a minicron,
@@ -126,6 +124,3 @@ uint32_t toku_minicron_get_period_in_seconds_unlocked(struct minicron *p);
 uint32_t toku_minicron_get_period_in_ms_unlocked(struct minicron *p);
 int toku_minicron_shutdown(struct minicron *p);
 bool toku_minicron_has_been_shutdown(struct minicron *p);
-
-
-#endif
diff --git a/util/nb_mutex.h b/util/nb_mutex.h
index f781e9d6dda..6124eba2437 100644
--- a/util/nb_mutex.h
+++ b/util/nb_mutex.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef UTIL_NB_MUTEX_H
-#define UTIL_NB_MUTEX_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -155,5 +155,3 @@ static inline int nb_mutex_writers(NB_MUTEX nb_mutex) {
 static inline int nb_mutex_users(NB_MUTEX nb_mutex) {
     return rwlock_users(&nb_mutex->lock);
 }
-
-#endif // UTIL_NB_MUTEX_H
diff --git a/util/omt.h b/util/omt.h
index 6e963badafa..86f39e1fe5b 100644
--- a/util/omt.h
+++ b/util/omt.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef UTIL_OMT_H
-#define UTIL_OMT_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -813,5 +813,3 @@ private:
 
 // include the implementation here
 #include "omt.cc"
-
-#endif // UTIL_OMT_H
diff --git a/util/partitioned_counter.h b/util/partitioned_counter.h
index b7401080f11..832309c1935 100644
--- a/util/partitioned_counter.h
+++ b/util/partitioned_counter.h
@@ -86,12 +86,11 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef UTIL_PARTITIONED_COUNTER_H
-#define UTIL_PARTITIONED_COUNTER_H
-
 // Overview: A partitioned_counter provides a counter that can be incremented and the running sum can be read at any time.
 //  We assume that increments are frequent, whereas reading is infrequent.
 // Implementation hint: Use thread-local storage so each thread increments its own data.  The increment does not require a lock or atomic operation.
@@ -187,5 +186,3 @@ private:
     friend void destroy_thread_local_part_of_partitioned_counters (void *);
 };
 #endif
-
-#endif // UTIL_PARTITIONED_COUNTER_H
diff --git a/util/queue.h b/util/queue.h
index d2feef5acde..51a9662886c 100644
--- a/util/queue.h
+++ b/util/queue.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef TOKU_QUEUE_H
-#define TOKU_QUEUE_H
 
 #ident "$Id$"
 /*
@@ -89,6 +87,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -135,4 +135,3 @@ int toku_queue_destroy (QUEUE q);
 // Requires: The queue must be empty and no consumer should try to dequeue after this (one way to do this is to make sure the consumer saw EOF).
 // Returns 0 on success.   If the queue is not empty, returns EINVAL.  Other errors are likely to be bad (some sort of mutex or condvar failure).
 
-#endif
diff --git a/util/rwlock.h b/util/rwlock.h
index cb72e153eb6..6ee4c6cec1a 100644
--- a/util/rwlock.h
+++ b/util/rwlock.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef UTIL_RWLOCK_H
-#define UTIL_RWLOCK_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -353,4 +353,3 @@ static inline void rwlock_wait_for_users(
     toku_cond_destroy(&cond);
 }
 
-#endif // UTIL_RWLOCK_H
diff --git a/util/sort.h b/util/sort.h
index 825909d4e9f..d3dd2459ee5 100644
--- a/util/sort.h
+++ b/util/sort.h
@@ -89,8 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef UTIL_SORT_H
-#define UTIL_SORT_H
+#pragma once
 
 #include <string.h>
 #include <memory.h>
@@ -272,5 +271,3 @@ namespace toku {
     };
 
 };
-
-#endif // UTIL_SORT_H
diff --git a/util/status.h b/util/status.h
index 16a709237dd..0706185a856 100644
--- a/util/status.h
+++ b/util/status.h
@@ -90,6 +90,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #pragma once
+
 #include <util/partitioned_counter.h>
 #include <util/constexpr.h>
 
diff --git a/util/threadpool.h b/util/threadpool.h
index 3fada1f6e54..1882a4c2f7a 100644
--- a/util/threadpool.h
+++ b/util/threadpool.h
@@ -89,8 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#ifndef UTIL_THREADPOOL_H
-#define UTIL_THREADPOOL_H
+#pragma once
 
 #include <stdio.h>
 
@@ -137,5 +136,3 @@ int toku_thread_pool_run(struct toku_thread_pool *pool, int dowait, int *nthread
 
 // Print the state of the thread pool
 void toku_thread_pool_print(struct toku_thread_pool *pool, FILE *out);
-
-#endif // UTIL_THREADPOOL_H
diff --git a/util/x1764.h b/util/x1764.h
index 1f87f50f09e..ff6b3ea0d8d 100644
--- a/util/x1764.h
+++ b/util/x1764.h
@@ -1,7 +1,5 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ifndef X1764_H
-#define X1764_H
 #ident "$Id$"
 /*
 COPYING CONDITIONS NOTICE:
@@ -88,6 +86,8 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#pragma once
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -121,6 +121,3 @@ void toku_x1764_add (struct x1764 *l, const void *vbuf, int len);
 
 uint32_t toku_x1764_finish (struct x1764 *l);
 // Effect: Return the final 32-bit result.
-
-
-#endif

From 5546c6cd323b944c68f98126393846d31321a2ea Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:25:34 -0400
Subject: [PATCH 013/190] FT-248 Remove dead code from tools/

---
 tools/CMakeLists.txt        |   4 +-
 tools/parseTraceFiles.py    |  82 ---
 tools/tokudb_common.h       | 107 ----
 tools/tokudb_common_funcs.h | 335 -------------
 tools/tokudb_dump.cc        | 250 ++++++++-
 tools/tokudb_gen.cc         | 471 -----------------
 tools/tokudb_load.cc        | 977 ------------------------------------
 7 files changed, 247 insertions(+), 1979 deletions(-)
 delete mode 100755 tools/parseTraceFiles.py
 delete mode 100644 tools/tokudb_common.h
 delete mode 100644 tools/tokudb_common_funcs.h
 delete mode 100644 tools/tokudb_gen.cc
 delete mode 100644 tools/tokudb_load.cc

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 498918951e2..bd5f288b0f1 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,10 +1,8 @@
 set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO)
 
-set(utils tokudb_gen tokudb_load tokudb_dump)
+set(utils tokudb_dump)
 foreach(util ${utils})
   add_executable(${util} ${util})
-  set_target_properties(${util} PROPERTIES
-    COMPILE_DEFINITIONS "IS_TDB=1;USE_TDB=1;TDB_IS_STATIC=1")
   target_link_libraries(${util} ${LIBTOKUDB}_static ft_static z lzma ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
 
   add_space_separated_property(TARGET ${util} COMPILE_FLAGS -fvisibility=hidden)
diff --git a/tools/parseTraceFiles.py b/tools/parseTraceFiles.py
deleted file mode 100755
index f53ef620111..00000000000
--- a/tools/parseTraceFiles.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-try:
-    data = open(sys.argv[1])
-except:
-    print "Could not open '%s'" % (sys.argv[1][0])
-    exit(0)
-
-ts_factor = 1.
-ts_prev = 0.
-
-threadlist = []
-
-for line in data:
-    line = line.rstrip("\n")
-    vals = line.split()
-    [n, tid, ts, funcline] = vals[0:4]
-    # 'note' is all text following funcline
-    note = ''
-    for v in vals[4:-1]:
-        note += v+' '
-    note += vals[-1]
-
-    if ( note == 'calibrate done' ):
-        ts_factor = float(ts) - ts_prev
-        print "Factor = ", ts_factor, "("+str(ts_factor/1000000000)[0:4]+"GHz)"
-
-    time = (float(ts)-ts_prev)/ts_factor
-
-    # create a list of threads
-    #  - each thread has a list of <note,time> pairs, where time is the accumulated time for that note
-    #  - search threadlist for thread_id (tid)
-    #      - if found, search corresponding list of <note,time> pairs for the current note
-    #             - if found, update (+=) the time
-    #             - if not found, create a new <note,time> pair
-    #      - if not found, create a new thread,<note,time> entry
-    found_thread = 0
-    for thread in threadlist:
-        if tid == thread[0]:
-            found_thread = 1
-            notetimelist = thread[1]
-            found_note = 0
-            for notetime in notetimelist:
-                if note == notetime[0]:
-                    found_note = 1
-                    notetime[1] += time
-                    break
-            if found_note == 0:
-                thread[1].append([note, time])
-            break
-    if found_thread == 0:
-        notetime = []
-        notetime.append([note, time])
-        threadlist.append([tid, notetime])
-
-    ts_prev = float(ts)
-
-# trim out unneeded
-for thread in threadlist:
-    trimlist = []
-    for notetime in thread[1]:
-        if notetime[0][0:9] == 'calibrate':
-            trimlist.append(notetime)
-    for notetime in trimlist:
-        thread[1].remove(notetime)
-print ''
-
-# sum times to calculate percent (of 100)
-total_time = 0
-for thread in threadlist:
-    for [note, time] in thread[1]:
-        total_time += time
-
-print '    thread         operation      time(sec)   percent'
-for thread in threadlist:
-    print 'tid : %5s' % thread[0]
-    for [note, time] in thread[1]:
-        print '           %20s    %f %5d' % (note, time, 100. * time/total_time)
-
-
-    
diff --git a/tools/tokudb_common.h b/tools/tokudb_common.h
deleted file mode 100644
index f37f1c802c9..00000000000
--- a/tools/tokudb_common.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#pragma once
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-
-#include <stdlib.h>
-#include <stdint.h>
-#include <limits.h>
-#include <db.h>
-#include <inttypes.h>
-#include <signal.h>
-#include <memory.h>
-
-#define SET_BITS(bitvector, bits)      ((bitvector) |= (bits))
-#define REMOVE_BITS(bitvector, bits)   ((bitvector) &= ~(bits))
-#define IS_SET_ANY(bitvector, bits)    ((bitvector) & (bits))
-#define IS_SET_ALL(bitvector, bits)    (((bitvector) & (bits)) == (bits))
-
-#define IS_POWER_OF_2(num)             ((num) > 0 && ((num) & ((num) - 1)) == 0)
diff --git a/tools/tokudb_common_funcs.h b/tools/tokudb_common_funcs.h
deleted file mode 100644
index 10edb5e1143..00000000000
--- a/tools/tokudb_common_funcs.h
+++ /dev/null
@@ -1,335 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#pragma once
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-
-#include "tokudb_common.h"
-
-//DB_ENV->err disabled since it does not use db_strerror
-#define PRINT_ERROR(retval, ...)                                  \
-do {                                                              \
-if (0) g.dbenv->err(g.dbenv, retval, __VA_ARGS__);                \
-else {                                                            \
-   fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \
-   fprintf(stderr, "%s: %s:", g.progname, db_strerror(retval));   \
-   fprintf(stderr, __VA_ARGS__);                                  \
-   fprintf(stderr, "\n");                                         \
-   fflush(stderr);                                                \
-}                                                                 \
-} while (0)
-
-//DB_ENV->err disabled since it does not use db_strerror, errx does not exist.
-#define PRINT_ERRORX(...)                                               \
-do {                                                              \
-if (0) g.dbenv->err(g.dbenv, 0, __VA_ARGS__);                     \
-else {                                                            \
-   fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \
-   fprintf(stderr, "%s: ", g.progname);                           \
-   fprintf(stderr, __VA_ARGS__);                                  \
-   fprintf(stderr, "\n");                                         \
-   fflush(stderr);                                                \
-}                                                                 \
-} while (0)
-
-int   strtoint32  (char* str,  int32_t* num,  int32_t min,  int32_t max, int base);
-int   strtouint32 (char* str, uint32_t* num, uint32_t min, uint32_t max, int base);
-int   strtoint64  (char* str,  int64_t* num,  int64_t min,  int64_t max, int base);
-int   strtouint64 (char* str, uint64_t* num, uint64_t min, uint64_t max, int base);
-
-/*
- * Convert a string to an integer of type "type".
- *
- *
- * Sets errno and returns:
- *    EINVAL: str == NULL, num == NULL, or string not of the form [ \t]*[+-]?[0-9]+
- *    ERANGE: value out of range specified. (Range of [min, max])
- *
- * *num is unchanged on error.
- * Returns:
- *
- */
-#define DEF_STR_TO(name, type, bigtype, strtofunc, frmt)       \
-int name(char* str, type* num, type min, type max, int base)   \
-{                                                              \
-   char* test;                                                 \
-   bigtype value;                                              \
-                                                               \
-   assert(str);                                                \
-   assert(num);                                                \
-   assert(min <= max);                                         \
-   assert(g.dbenv || g.progname);                              \
-   assert(base == 0 || (base >= 2 && base <= 36));             \
-                                                               \
-   errno = 0;                                                  \
-   while (isspace(*str)) str++;                                \
-   value = strtofunc(str, &test, base);                        \
-   if ((*test != '\0' && *test != '\n') || test == str) {      \
-      PRINT_ERRORX("%s: Invalid numeric argument\n", str);           \
-      errno = EINVAL;                                          \
-      goto error;                                              \
-   }                                                           \
-   if (errno != 0) {                                           \
-      PRINT_ERROR(errno, "%s\n", str);                               \
-   }                                                           \
-   if (value < min) {                                          \
-      PRINT_ERRORX("%s: Less than minimum value (%" frmt ")\n", str, min); \
-      goto error;                                              \
-   }                                                           \
-   if (value > max) {                                          \
-      PRINT_ERRORX("%s: Greater than maximum value (%" frmt ")\n", str, max); \
-      goto error;                                              \
-   }                                                           \
-   *num = value;                                               \
-   return EXIT_SUCCESS;                                        \
-error:                                                         \
-   return errno;                                               \
-}
-
-DEF_STR_TO(strtoint32,  int32_t,  int64_t,  strtoll,  PRId32)
-DEF_STR_TO(strtouint32, uint32_t, uint64_t, strtoull, PRIu32)
-DEF_STR_TO(strtoint64,  int64_t,  int64_t,  strtoll,  PRId64)
-DEF_STR_TO(strtouint64, uint64_t, uint64_t, strtoull, PRIu64)
-
-static inline void
-outputbyte(uint8_t ch)
-{
-   if (g.plaintext) {
-      if (ch == '\\')         printf("\\\\");
-      else if (isprint(ch))   printf("%c", ch);
-      else                    printf("\\%02x", ch);
-   }
-   else printf("%02x", ch);
-}
-
-static inline void
-outputstring(char* str)
-{
-   char* p;
-
-   for (p = str; *p != '\0'; p++) {
-      outputbyte((uint8_t)*p);
-   }
-}
-
-static inline void
-outputplaintextstring(char* str)
-{
-   bool old_plaintext = g.plaintext;
-   g.plaintext = true;
-   outputstring(str);
-   g.plaintext = old_plaintext;
-}
-
-static inline int
-hextoint(int ch)
-{
-   if (ch >= '0' && ch <= '9') {
-      return ch - '0';
-   }
-   if (ch >= 'a' && ch <= 'z') {
-      return ch - 'a' + 10;
-   }
-   if (ch >= 'A' && ch <= 'Z') {
-      return ch - 'A' + 10;
-   }
-   return EOF;
-}
-
-static inline int
-printabletocstring(char* inputstr, char** poutputstr)
-{
-   char highch;
-   char lowch;
-   char nextch;
-   char* cstring;
-
-   assert(inputstr);
-   assert(poutputstr);
-   assert(*poutputstr == NULL);
-
-   cstring = (char*)toku_malloc((strlen(inputstr) + 1) * sizeof(char));
-   if (cstring == NULL) {
-      PRINT_ERROR(errno, "printabletocstring");
-      goto error;
-   }
-
-   for (*poutputstr = cstring; *inputstr != '\0'; inputstr++) {
-      if (*inputstr == '\\') {
-         if ((highch = *++inputstr) == '\\') {
-            *cstring++ = '\\';
-            continue;
-         }
-         if (highch == '\0' || (lowch = *++inputstr) == '\0') {
-            PRINT_ERROR(0, "unexpected end of input data or key/data pair");
-            goto error;
-         }
-         if (!isxdigit(highch)) {
-            PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", highch);
-            goto error;
-         }
-         if (!isxdigit(lowch)) {
-            PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", lowch);
-            goto error;
-         }
-         nextch = (char)((hextoint(highch) << 4) | hextoint(lowch));
-         if (nextch == '\0') {
-            /* Database names are c strings, and cannot have extra NULL terminators. */
-            PRINT_ERROR(0, "Unexpected '\\00' in input.\n");
-            goto error;
-         }
-         *cstring++ = nextch;
-      }
-      else *cstring++ = *inputstr;
-   }
-   /* Terminate the string. */
-   *cstring = '\0';
-   return EXIT_SUCCESS;
-
-error:
-   PRINT_ERROR(0, "Quitting out due to errors.\n");
-   return EXIT_FAILURE;
-}
-
-static inline int
-verify_library_version(void)
-{
-   int major;
-   int minor;
-   
-   db_version(&major, &minor, NULL);
-   if (major != DB_VERSION_MAJOR || minor != DB_VERSION_MINOR) {
-      PRINT_ERRORX("version %d.%d doesn't match library version %d.%d\n",
-             DB_VERSION_MAJOR, DB_VERSION_MINOR, major, minor);
-      return EXIT_FAILURE;
-   }
-   return EXIT_SUCCESS;
-}
-
-static int last_caught = 0;
-
-static void catch_signal(int which_signal) {
-    last_caught = which_signal;
-    if (last_caught == 0) last_caught = SIGINT;
-}
-
-static inline void
-init_catch_signals(void) {
-    signal(SIGINT, catch_signal);
-    signal(SIGTERM, catch_signal);
-#ifdef SIGHUP
-    signal(SIGHUP, catch_signal);
-#endif
-#ifdef SIGPIPE
-    signal(SIGPIPE, catch_signal);
-#endif
-}
-
-static inline int
-caught_any_signals(void) {
-    return last_caught != 0;
-}
-
-static inline void
-resend_signals(void) {
-    if (last_caught) {
-        signal(last_caught, SIG_DFL);
-        raise(last_caught);
-    }
-}
-
-#include <memory.h>
-static int test_main (int argc, char *const argv[]);
-int
-main(int argc, char *const argv[]) {
-    int r;
-    r = test_main(argc, argv);
-    return r;
-}
diff --git a/tools/tokudb_dump.cc b/tools/tokudb_dump.cc
index 1020afb70e0..83578ed9f35 100644
--- a/tools/tokudb_dump.cc
+++ b/tools/tokudb_dump.cc
@@ -88,6 +88,10 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007, 2008 Tokutek Inc.  All rights reserved."
 
+#include <db.h>
+
+#include <toku_stdlib.h>
+#include <toku_stdint.h>
 #include <toku_portability.h>
 #include <toku_assert.h>
 #include <stdio.h>
@@ -97,8 +101,8 @@ PATENT RIGHTS GRANT:
 #include <ctype.h>
 #include <errno.h>
 #include <getopt.h>
-#include <db.h>
-#include "tokudb_common.h"
+#include <signal.h>
+#include <memory.h>
 
 typedef struct {
    bool     leadingspace;
@@ -120,7 +124,245 @@ typedef struct {
 } dump_globals;
 
 dump_globals g;
-#include "tokudb_common_funcs.h"
+
+#define SET_BITS(bitvector, bits)      ((bitvector) |= (bits))
+#define REMOVE_BITS(bitvector, bits)   ((bitvector) &= ~(bits))
+#define IS_SET_ANY(bitvector, bits)    ((bitvector) & (bits))
+#define IS_SET_ALL(bitvector, bits)    (((bitvector) & (bits)) == (bits))
+
+#define IS_POWER_OF_2(num)             ((num) > 0 && ((num) & ((num) - 1)) == 0)
+
+//DB_ENV->err disabled since it does not use db_strerror
+#define PRINT_ERROR(retval, ...)                                  \
+do {                                                              \
+if (0) g.dbenv->err(g.dbenv, retval, __VA_ARGS__);                \
+else {                                                            \
+   fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \
+   fprintf(stderr, "%s: %s:", g.progname, db_strerror(retval));   \
+   fprintf(stderr, __VA_ARGS__);                                  \
+   fprintf(stderr, "\n");                                         \
+   fflush(stderr);                                                \
+}                                                                 \
+} while (0)
+
+//DB_ENV->err disabled since it does not use db_strerror, errx does not exist.
+#define PRINT_ERRORX(...)                                               \
+do {                                                              \
+if (0) g.dbenv->err(g.dbenv, 0, __VA_ARGS__);                     \
+else {                                                            \
+   fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \
+   fprintf(stderr, "%s: ", g.progname);                           \
+   fprintf(stderr, __VA_ARGS__);                                  \
+   fprintf(stderr, "\n");                                         \
+   fflush(stderr);                                                \
+}                                                                 \
+} while (0)
+
+int   strtoint32  (char* str,  int32_t* num,  int32_t min,  int32_t max, int base);
+int   strtouint32 (char* str, uint32_t* num, uint32_t min, uint32_t max, int base);
+int   strtoint64  (char* str,  int64_t* num,  int64_t min,  int64_t max, int base);
+int   strtouint64 (char* str, uint64_t* num, uint64_t min, uint64_t max, int base);
+
+/*
+ * Convert a string to an integer of type "type".
+ *
+ *
+ * Sets errno and returns:
+ *    EINVAL: str == NULL, num == NULL, or string not of the form [ \t]*[+-]?[0-9]+
+ *    ERANGE: value out of range specified. (Range of [min, max])
+ *
+ * *num is unchanged on error.
+ * Returns:
+ *
+ */
+#define DEF_STR_TO(name, type, bigtype, strtofunc, frmt)       \
+int name(char* str, type* num, type min, type max, int base)   \
+{                                                              \
+   char* test;                                                 \
+   bigtype value;                                              \
+                                                               \
+   assert(str);                                                \
+   assert(num);                                                \
+   assert(min <= max);                                         \
+   assert(g.dbenv || g.progname);                              \
+   assert(base == 0 || (base >= 2 && base <= 36));             \
+                                                               \
+   errno = 0;                                                  \
+   while (isspace(*str)) str++;                                \
+   value = strtofunc(str, &test, base);                        \
+   if ((*test != '\0' && *test != '\n') || test == str) {      \
+      PRINT_ERRORX("%s: Invalid numeric argument\n", str);           \
+      errno = EINVAL;                                          \
+      goto error;                                              \
+   }                                                           \
+   if (errno != 0) {                                           \
+      PRINT_ERROR(errno, "%s\n", str);                               \
+   }                                                           \
+   if (value < min) {                                          \
+      PRINT_ERRORX("%s: Less than minimum value (%" frmt ")\n", str, min); \
+      goto error;                                              \
+   }                                                           \
+   if (value > max) {                                          \
+      PRINT_ERRORX("%s: Greater than maximum value (%" frmt ")\n", str, max); \
+      goto error;                                              \
+   }                                                           \
+   *num = value;                                               \
+   return EXIT_SUCCESS;                                        \
+error:                                                         \
+   return errno;                                               \
+}
+
+DEF_STR_TO(strtoint32,  int32_t,  int64_t,  strtoll,  PRId32)
+DEF_STR_TO(strtouint32, uint32_t, uint64_t, strtoull, PRIu32)
+DEF_STR_TO(strtoint64,  int64_t,  int64_t,  strtoll,  PRId64)
+DEF_STR_TO(strtouint64, uint64_t, uint64_t, strtoull, PRIu64)
+
+static inline void
+outputbyte(uint8_t ch)
+{
+   if (g.plaintext) {
+      if (ch == '\\')         printf("\\\\");
+      else if (isprint(ch))   printf("%c", ch);
+      else                    printf("\\%02x", ch);
+   }
+   else printf("%02x", ch);
+}
+
+static inline void
+outputstring(char* str)
+{
+   char* p;
+
+   for (p = str; *p != '\0'; p++) {
+      outputbyte((uint8_t)*p);
+   }
+}
+
+static inline void
+outputplaintextstring(char* str)
+{
+   bool old_plaintext = g.plaintext;
+   g.plaintext = true;
+   outputstring(str);
+   g.plaintext = old_plaintext;
+}
+
+static inline int
+hextoint(int ch)
+{
+   if (ch >= '0' && ch <= '9') {
+      return ch - '0';
+   }
+   if (ch >= 'a' && ch <= 'z') {
+      return ch - 'a' + 10;
+   }
+   if (ch >= 'A' && ch <= 'Z') {
+      return ch - 'A' + 10;
+   }
+   return EOF;
+}
+
+static inline int
+printabletocstring(char* inputstr, char** poutputstr)
+{
+   char highch;
+   char lowch;
+   char nextch;
+   char* cstring;
+
+   assert(inputstr);
+   assert(poutputstr);
+   assert(*poutputstr == NULL);
+
+   cstring = (char*)toku_malloc((strlen(inputstr) + 1) * sizeof(char));
+   if (cstring == NULL) {
+      PRINT_ERROR(errno, "printabletocstring");
+      goto error;
+   }
+
+   for (*poutputstr = cstring; *inputstr != '\0'; inputstr++) {
+      if (*inputstr == '\\') {
+         if ((highch = *++inputstr) == '\\') {
+            *cstring++ = '\\';
+            continue;
+         }
+         if (highch == '\0' || (lowch = *++inputstr) == '\0') {
+            PRINT_ERROR(0, "unexpected end of input data or key/data pair");
+            goto error;
+         }
+         if (!isxdigit(highch)) {
+            PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", highch);
+            goto error;
+         }
+         if (!isxdigit(lowch)) {
+            PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", lowch);
+            goto error;
+         }
+         nextch = (char)((hextoint(highch) << 4) | hextoint(lowch));
+         if (nextch == '\0') {
+            /* Database names are c strings, and cannot have extra NULL terminators. */
+            PRINT_ERROR(0, "Unexpected '\\00' in input.\n");
+            goto error;
+         }
+         *cstring++ = nextch;
+      }
+      else *cstring++ = *inputstr;
+   }
+   /* Terminate the string. */
+   *cstring = '\0';
+   return EXIT_SUCCESS;
+
+error:
+   PRINT_ERROR(0, "Quitting out due to errors.\n");
+   return EXIT_FAILURE;
+}
+
+static inline int
+verify_library_version(void)
+{
+   int major;
+   int minor;
+   
+   db_version(&major, &minor, NULL);
+   if (major != DB_VERSION_MAJOR || minor != DB_VERSION_MINOR) {
+      PRINT_ERRORX("version %d.%d doesn't match library version %d.%d\n",
+             DB_VERSION_MAJOR, DB_VERSION_MINOR, major, minor);
+      return EXIT_FAILURE;
+   }
+   return EXIT_SUCCESS;
+}
+
+static int last_caught = 0;
+
+static void catch_signal(int which_signal) {
+    last_caught = which_signal;
+    if (last_caught == 0) last_caught = SIGINT;
+}
+
+static inline void
+init_catch_signals(void) {
+    signal(SIGINT, catch_signal);
+    signal(SIGTERM, catch_signal);
+#ifdef SIGHUP
+    signal(SIGHUP, catch_signal);
+#endif
+#ifdef SIGPIPE
+    signal(SIGPIPE, catch_signal);
+#endif
+}
+
+static inline int
+caught_any_signals(void) {
+    return last_caught != 0;
+}
+
+static inline void
+resend_signals(void) {
+    if (last_caught) {
+        signal(last_caught, SIG_DFL);
+        raise(last_caught);
+    }
+}
 
 static int   usage          (void);
 static int   create_init_env(void);
@@ -131,7 +373,7 @@ static int   dump_footer    (void);
 static int   dump_header    (void);
 static int   close_database (void);
 
-int test_main(int argc, char *const argv[]) {
+int main(int argc, char *const argv[]) {
    int ch;
    int retval;
 
diff --git a/tools/tokudb_gen.cc b/tools/tokudb_gen.cc
deleted file mode 100644
index c23567116a8..00000000000
--- a/tools/tokudb_gen.cc
+++ /dev/null
@@ -1,471 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#ident "Copyright (c) 2007, 2008 Tokutek Inc.  All rights reserved."
-
-#include <toku_portability.h>
-#include <toku_assert.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <db.h>
-#include <unistd.h>
-#include <string.h>
-#include <ctype.h>
-#include <errno.h>
-#include <getopt.h>
-#include <src/ydb.h>
-
-#include "tokudb_common.h"
-
-typedef struct {
-   DB_ENV*       dbenv;
-   bool        plaintext;
-   char*       progname;
-} gen_globals;
-
-gen_globals g;
-#include "tokudb_common_funcs.h"
-
-static int   usage(void);
-static void  generate_keys(void);
-static int   get_delimiter(char* str);
-
-
-
-char           dbt_delimiter  = '\n';
-char           sort_delimiter[3];
-uint32_t       lengthmin      = 0;
-bool           set_lengthmin  = false;
-uint32_t       lengthlimit    = 0;
-bool           set_lengthlimit= false;
-uint64_t       numkeys        = 0;
-bool           set_numkeys    = false;
-bool           header         = true;
-bool           footer         = true;
-bool           justheader     = false;
-bool           justfooter     = false;
-bool           outputkeys     = true;
-uint32_t       seed           = 1;
-bool           set_seed       = false;
-bool           printableonly  = false;
-bool           leadingspace   = true;
-bool           force_unique   = true;
-bool           dupsort        = false;
-
-static int test_main (int argc, char *const argv[]) {
-   int ch;
-
-   /* Set up the globals. */
-   memset(&g, 0, sizeof(g));
-
-   g.progname = argv[0];
-
-   if (verify_library_version() != 0) goto error;
-   
-   strcpy(sort_delimiter, "");
-
-   while ((ch = getopt(argc, argv, "PpTo:r:m:M:n:uVhHfFd:s:DS")) != EOF) {
-      switch (ch) {
-         case ('P'): {
-            printableonly  = true;
-            break;
-         }
-         case ('p'): {
-            g.plaintext    = true;
-            leadingspace   = true;
-            break;
-         }
-         case ('T'): {
-            g.plaintext    = true;
-            leadingspace   = false;
-            header         = false;
-            footer         = false;
-            break;
-         }
-         case ('o'): {
-            if (freopen(optarg, "w", stdout) == NULL) {
-               PRINT_ERROR(errno, "%s: reopen\n", optarg);
-               goto error;
-            }
-            break;
-         }
-         case ('r'): {
-            if (strtouint32(optarg, &seed, 0, UINT32_MAX, 10)) {
-               PRINT_ERRORX("%s: (-r) Random seed invalid.", optarg);
-                goto error;
-            }
-            set_seed = true;
-            break;
-         }
-         case ('m'): {
-            if (strtouint32(optarg, &lengthmin, 0, UINT32_MAX, 10)) {
-               PRINT_ERRORX("%s: (-m) Min length of keys/values invalid.", optarg);
-                goto error;
-            }
-            set_lengthmin = true;
-            break;
-         }
-         case ('M'): {
-            if (strtouint32(optarg, &lengthlimit, 1, UINT32_MAX, 10)) {
-               PRINT_ERRORX("%s: (-M) Limit of key/value length invalid.", optarg);
-                goto error;
-            }
-            set_lengthlimit = true;
-            break;
-         }
-         case ('n'): {
-            if (strtouint64(optarg, &numkeys, 0, UINT64_MAX, 10)) {
-               PRINT_ERRORX("%s: (-n) Number of keys to generate invalid.", optarg);
-                goto error;
-            }
-            set_numkeys = true;
-            break;
-         }
-         case ('u'): {
-            force_unique = false;
-            break;
-         }
-         case ('h'): {
-            header = false;
-            break;
-         }
-         case ('H'): {
-            justheader = true;
-            break;
-         }
-         case ('f'): {
-            footer = false;
-            break;
-         }
-         case ('F'): {
-            justfooter = true;
-            break;
-         }
-         case ('d'): {
-            int temp = get_delimiter(optarg);
-            if (temp == EOF) {
-               PRINT_ERRORX("%s: (-d) Key (or value) delimiter must be one character.",
-                      optarg);
-               goto error;
-            }
-            if (isxdigit(temp)) {
-               PRINT_ERRORX("%c: (-d) Key (or value) delimiter cannot be a hex digit.",
-                      temp);
-               goto error;
-            }
-            dbt_delimiter = (char)temp;
-            break;
-         }
-         case ('s'): {
-            int temp = get_delimiter(optarg);
-            if (temp == EOF) {
-               PRINT_ERRORX("%s: (-s) Sorting (Between key/value pairs) delimiter must be one character.",
-                      optarg);
-               goto error;
-            }
-            if (isxdigit(temp)) {
-               PRINT_ERRORX("%c: (-s) Sorting (Between key/value pairs) delimiter cannot be a hex digit.",
-                      temp);
-               goto error;
-            }
-            sort_delimiter[0] = (char)temp;
-            sort_delimiter[1] = '\0';
-            break;
-         }
-         case ('V'): {
-            printf("%s\n", db_version(NULL, NULL, NULL));
-            return EXIT_SUCCESS;
-         }
-         case 'D': {
-	    fprintf(stderr, "Duplicates no longer supported by tokudb\n");
-	    return EXIT_FAILURE;
-	 }
-         case 'S': {
-	    fprintf(stderr, "Dupsort no longer supported by tokudb\n");
-	    return EXIT_FAILURE;
-	 }
-         case ('?'):
-         default: {
-            return (usage());
-         }
-      }
-   }
-   argc -= optind;
-   argv += optind;
-
-   if (justheader && !header) {
-      PRINT_ERRORX("The -h and -H options may not both be specified.\n");
-      goto error;
-   }
-   if (justfooter && !footer) {
-      PRINT_ERRORX("The -f and -F options may not both be specified.\n");
-      goto error;
-   }
-   if (justfooter && justheader) {
-      PRINT_ERRORX("The -H and -F options may not both be specified.\n");
-      goto error;
-   }
-   if (justfooter && header) {
-      PRINT_ERRORX("-F implies -h\n");
-      header = false;
-   }
-   if (justheader && footer) {
-      PRINT_ERRORX("-H implies -f\n");
-      footer = false;
-   }
-   if (!leadingspace) {
-      if (footer) {
-         PRINT_ERRORX("-p implies -f\n");
-         footer = false;
-      }
-      if (header) {
-         PRINT_ERRORX("-p implies -h\n");
-         header = false;
-      }
-   }
-   if (justfooter || justheader) outputkeys = false;
-   else if (!set_numkeys)
-   {
-      PRINT_ERRORX("Using default number of keys.  (-n 1024).\n");
-      numkeys = 1024;
-   }
-   if (outputkeys && !set_seed) {
-      PRINT_ERRORX("Using default seed.  (-r 1).\n");
-      seed = 1;
-   }
-   if (outputkeys && !set_lengthmin) {
-      PRINT_ERRORX("Using default lengthmin.  (-m 0).\n");
-      lengthmin = 0;
-   }
-   if (outputkeys && !set_lengthlimit) {
-      PRINT_ERRORX("Using default lengthlimit.  (-M 1024).\n");
-      lengthlimit = 1024;
-   }
-   if (outputkeys && lengthmin >= lengthlimit) {
-      PRINT_ERRORX("Max key size must be greater than min key size.\n");
-      goto error;
-   }
-
-   if (argc != 0) {
-      return usage();
-   }
-   if (header) {
-      printf("VERSION=3\n");
-      printf("format=%s\n", g.plaintext ? "print" : "bytevalue");
-      printf("type=btree\n");
-      // printf("db_pagesize=%d\n", 4096);  //Don't write pagesize which would be useless.
-      if (dupsort)
-         printf("dupsort=%d\n", dupsort);
-      printf("HEADER=END\n");
-   }
-   if (outputkeys) generate_keys();
-   if (footer)     printf("DATA=END\n");
-   return EXIT_SUCCESS;
-
-error:
-   fprintf(stderr, "Quitting out due to errors.\n");
-   return EXIT_FAILURE;
-}
-
-static int usage()
-{
-   fprintf(stderr,
-           "usage: %s [-PpTuVhHfFDS] [-o output] [-r seed] [-m minsize] [-M limitsize]\n"
-           "       %*s[-n numpairs] [-d delimiter] [-s delimiter]\n",
-           g.progname, (int)strlen(g.progname) + 1, "");
-   return EXIT_FAILURE;
-}
-
-static uint8_t randbyte(void)
-{
-   static uint32_t   numsavedbits   = 0;
-   static uint64_t   savedbits      = 0;
-   uint8_t           retval;
-
-   if (numsavedbits < 8) {
-      savedbits |= ((uint64_t)random()) << numsavedbits;
-      numsavedbits += 31;  /* Random generates 31 random bits. */
-   }
-   retval         = savedbits & 0xff;
-   numsavedbits  -= 8;
-   savedbits    >>= 8;
-   return retval;
-}
-
-/* Almost-uniformly random int from [0,limit) */
-static int32_t random_below(int32_t limit)
-{
-   assert(limit > 0);
-   return random() % limit;
-}
-
-static void generate_keys()
-{
-   bool     usedemptykey   = false;
-   uint64_t  numgenerated   = 0;
-   uint64_t  totalsize      = 0;
-   char     identifier[24]; /* 8 bytes * 2 = 16; 16+1=17; 17+null terminator = 18. Extra padding. */
-   int      length;
-   int      i;
-   uint8_t  ch;
-
-   srandom(seed);
-   while (numgenerated < numkeys) {
-      numgenerated++;
-
-      /* Each key is preceded by a space (unless using -T). */
-      if (leadingspace) printf(" ");
-
-      /* Generate a key. */
-      {
-         /* Pick a key length. */
-         length = random_below(lengthlimit - lengthmin) + lengthmin;
-
-         /* Output 'length' random bytes. */
-         for (i = 0; i < length; i++) {
-            do {ch = randbyte();}
-               while (printableonly && !isprint(ch));
-            outputbyte(ch);
-         }
-         totalsize += length;
-        if (force_unique) {
-            if (length == 0 && !usedemptykey) usedemptykey = true;
-            else {
-                /* Append identifier to ensure uniqueness. */
-                sprintf(identifier, "x%" PRIx64, numgenerated);
-                outputstring(identifier);
-                totalsize += strlen(identifier);
-            }
-        }
-      }
-      printf("%c", dbt_delimiter);
-
-      /* Each value is preceded by a space (unless using -T). */
-      if (leadingspace) printf(" ");
-
-      /* Generate a value. */
-      {
-         /* Pick a key length. */
-         length = random_below(lengthlimit - lengthmin) + lengthmin;
-
-         /* Output 'length' random bytes. */
-         for (i = 0; i < length; i++) {
-            do {ch = randbyte();}
-               while (printableonly && !isprint(ch));
-            outputbyte(ch);
-         }
-         totalsize += length;
-      }
-      printf("%c", dbt_delimiter);
-
-      printf("%s", sort_delimiter);
-   }
-}
-
-int get_delimiter(char* str)
-{
-   if (strlen(str) == 2 && str[0] == '\\') {
-      switch (str[1]) {
-         case ('a'): return '\a';
-         case ('b'): return '\b';
-#ifndef __ICL
-         case ('e'): return '\e';
-#endif
-         case ('f'): return '\f';
-         case ('n'): return '\n';
-         case ('r'): return '\r';
-         case ('t'): return '\t';
-         case ('v'): return '\v';
-         case ('0'): return '\0';
-         case ('\\'): return '\\';
-         default: return EOF;
-      }
-   }
-   if (strlen(str) == 1) return str[0];
-   return EOF;
-}
diff --git a/tools/tokudb_load.cc b/tools/tokudb_load.cc
deleted file mode 100644
index 2072b2f7f8d..00000000000
--- a/tools/tokudb_load.cc
+++ /dev/null
@@ -1,977 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#ident "Copyright (c) 2007, 2008 Tokutek Inc.  All rights reserved."
-
-#include <toku_portability.h>
-#include <toku_assert.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <string.h>
-#include <ctype.h>
-#include <errno.h>
-#include <getopt.h>
-#include <db.h>
-#include "tokudb_common.h"
-
-typedef struct {
-   bool     leadingspace;
-   bool     plaintext;
-   bool     overwritekeys;
-   bool     header;
-   bool     eof;
-   bool     keys;
-   bool     is_private;
-   char*    progname;
-   char*    homedir;
-   char*    database;
-   char*    subdatabase;
-   char**   config_options;
-   int32_t  version;
-   int      exitcode;
-   uint64_t linenumber;
-   DBTYPE   dbtype;
-   DB*      db;
-   DB_ENV*  dbenv;
-   struct {
-      char* data[2];
-   }        get_dbt;
-   struct {
-      char* data;
-   }        read_header;
-} load_globals;
-
-load_globals g;
-#include "tokudb_common_funcs.h"
-
-static int   usage          (void);
-static int   load_database  (void);
-static int   create_init_env(void);
-static int   read_header    (void);
-static int   open_database  (void);
-static int   read_keys      (void);
-static int   apply_commandline_options(void);
-static int   close_database (void);
-static int   doublechararray(char** pmem, uint64_t* size);
-
-int test_main(int argc, char *const argv[]) {
-   int ch;
-   int retval;
-   char** next_config_option;
-
-   /* Set up the globals. */
-   memset(&g, 0, sizeof(g));
-   g.leadingspace   = true;
-   g.overwritekeys  = true;
-   g.dbtype         = DB_UNKNOWN;
-   //g.dbtype         = DB_BTREE;
-   g.progname       = argv[0];
-   g.header         = true;
-   
-   if (verify_library_version() != 0) goto error;
-
-   next_config_option = g.config_options = (char**) calloc(argc, sizeof(char*));
-   if (next_config_option == NULL) {
-      PRINT_ERROR(errno, "main: calloc\n");
-      goto error;
-   }
-   while ((ch = getopt(argc, argv, "c:f:h:nP:r:Tt:V")) != EOF) {
-      switch (ch) {
-         case ('c'): {
-            *next_config_option++ = optarg;
-            break;
-         }
-         case ('f'): {
-            if (freopen(optarg, "r", stdin) == NULL) {
-               fprintf(stderr,
-                       "%s: %s: reopen: %s\n",
-                       g.progname, optarg, strerror(errno));
-               goto error;
-            }
-            break;
-         }
-         case ('h'): {
-            g.homedir = optarg;
-            break;
-         }
-         case ('n'): {
-            /* g.overwritekeys = false; */
-            PRINT_ERRORX("-%c option not supported.\n", ch);
-            goto error;
-         }
-         case ('P'): {
-            /* Clear password. */
-            memset(optarg, 0, strlen(optarg));
-            PRINT_ERRORX("-%c option not supported.\n", ch);
-            goto error;
-         }
-         case ('r'): {
-            PRINT_ERRORX("-%c option not supported.\n", ch);
-            goto error;
-         }
-         case ('T'): {
-            g.plaintext    = true;
-            g.leadingspace = false;
-            g.header       = false;
-            break;
-         }
-         case ('t'): {
-            if (!strcmp(optarg, "btree")) {
-               g.dbtype = DB_BTREE;
-               break;
-            }
-            if (!strcmp(optarg, "hash") || !strcmp(optarg, "recno") || !strcmp(optarg, "queue")) {
-               fprintf(stderr, "%s: db type %s not supported.\n", g.progname, optarg);
-               goto error;
-            }
-            fprintf(stderr, "%s: Unrecognized db type %s.\n", g.progname, optarg);
-            goto error;
-         }
-         case ('V'): {
-            printf("%s\n", db_version(NULL, NULL, NULL));
-            goto cleanup;
-         }
-         case ('?'):
-         default: {
-            g.exitcode = usage();
-            goto cleanup;
-         }
-      }
-   }
-   argc -= optind;
-   argv += optind;
-
-   if (argc != 1) {
-      g.exitcode = usage();
-      goto cleanup;
-   }
-   init_catch_signals();
-
-   g.database = argv[0];
-   if (create_init_env() != 0) goto error;
-   if (caught_any_signals()) goto cleanup;
-   while (!g.eof) {
-      if (load_database() != 0) goto error;
-      if (caught_any_signals()) goto cleanup;
-   }
-   if (false) {
-error:
-      g.exitcode = EXIT_FAILURE;
-      fprintf(stderr, "%s: Quitting out due to errors.\n", g.progname);
-   }
-cleanup:
-   if (g.dbenv && (retval = g.dbenv->close(g.dbenv, 0)) != 0) {
-      g.exitcode = EXIT_FAILURE;
-      fprintf(stderr, "%s: dbenv->close: %s\n", g.progname, db_strerror(retval));
-   }
-   if (g.config_options)   toku_free(g.config_options);
-   if (g.subdatabase)      toku_free(g.subdatabase);
-   if (g.read_header.data) toku_free(g.read_header.data);
-   if (g.get_dbt.data[0])  toku_free(g.get_dbt.data[0]);
-   if (g.get_dbt.data[1])  toku_free(g.get_dbt.data[1]);
-   resend_signals();
-
-   return g.exitcode;
-}
-
-int load_database()
-{
-   int retval;
-
-   /* Create a database handle. */
-   retval = db_create(&g.db, g.dbenv, 0);
-   if (retval != 0) {
-      PRINT_ERROR(retval, "db_create");
-      return EXIT_FAILURE;
-   }
-
-   if (g.header && read_header() != 0) goto error;
-   if (g.eof) goto cleanup;
-   if (caught_any_signals()) goto cleanup;
-   if (apply_commandline_options() != 0) goto error;
-   if (g.eof) goto cleanup;
-   if (caught_any_signals()) goto cleanup;
-
-   /*
-   TODO: If/when supporting encryption
-   if (g.password && (retval = db->set_flags(db, DB_ENCRYPT))) {
-      PRINT_ERROR(ret, "DB->set_flags: DB_ENCRYPT");
-      goto error;
-   }
-   */
-   if (open_database() != 0) goto error;
-   if (g.eof) goto cleanup;
-   if (caught_any_signals()) goto cleanup;
-   if (read_keys() != 0) goto error;
-   if (g.eof) goto cleanup;
-   if (caught_any_signals()) goto cleanup;
-
-   if (false) {
-error:
-      g.exitcode = EXIT_FAILURE;
-   }
-cleanup:
-
-   if (close_database() != 0) g.exitcode = EXIT_FAILURE;
-
-   return g.exitcode;
-}
-
-int usage()
-{
-   fprintf(stderr,
-           "usage: %s [-TV] [-c name=value] [-f file] [-h home] [-t btree] db_file\n",
-           g.progname);
-   return EXIT_FAILURE;
-}
-
-int create_init_env()
-{
-   int retval;
-   DB_ENV* dbenv;
-   int flags;
-   //TODO: Experiments to determine right cache size for tokudb, or maybe command line argument.
-   //int cache = 1 << 20; /* 1 megabyte */
-
-   retval = db_env_create(&dbenv, 0);
-   if (retval) {
-      fprintf(stderr, "%s: db_dbenv_create: %s\n", g.progname, db_strerror(retval));
-      goto error;
-   }
-   ///TODO: UNCOMMENT/IMPLEMENT dbenv->set_errfile(dbenv, stderr);
-   dbenv->set_errpfx(dbenv, g.progname);
-   /*
-   TODO: If/when supporting encryption
-   if (g.password && (retval = dbenv->set_encrypt(dbenv, g.password, DB_ENCRYPT_AES))) {
-      PRINT_ERROR(retval, "set_passwd");
-      goto error;
-   }
-   */
-
-   /* Open the dbenvironment. */
-   g.is_private = false;
-   flags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL|DB_INIT_TXN|DB_INIT_LOG; ///TODO: UNCOMMENT/IMPLEMENT | DB_USE_ENVIRON;
-   //TODO: Transactions.. SET_BITS(flags, DB_INIT_TXN);
-   
-   /*
-   ///TODO: UNCOMMENT/IMPLEMENT  Notes:  We require DB_PRIVATE
-   if (!dbenv->open(dbenv, g.homedir, flags, 0)) goto success;
-   */
-
-   /*
-   ///TODO: UNCOMMENT/IMPLEMENT 
-   retval = dbenv->set_cachesize(dbenv, 0, cache, 1);
-   if (retval) {
-      PRINT_ERROR(retval, "DB_ENV->set_cachesize");
-      goto error;
-   }
-   */
-   g.is_private = true;
-   //TODO: Do we want to support transactions/logging even in single-process mode?
-   //Maybe if the db already exists.
-   //If db does not exist.. makes sense not to log or have transactions
-   //REMOVE_BITS(flags, DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN);
-   SET_BITS(flags, DB_CREATE | DB_PRIVATE);
-
-   retval = dbenv->open(dbenv, g.homedir ? g.homedir : ".", flags, 0);
-   if (retval) {
-      PRINT_ERROR(retval, "DB_ENV->open");
-      goto error;
-   }
-   g.dbenv = dbenv;
-   return EXIT_SUCCESS;
-
-error:
-   return EXIT_FAILURE;
-}
-
-#define PARSE_NUMBER(match, dbfunction)                                    \
-if (!strcmp(field, match)) {                                               \
-   if (strtoint32(value, &num, 1, INT32_MAX, 10)) goto error;              \
-   if ((retval = dbfunction(db, num)) != 0) goto printerror;               \
-   continue;                                                               \
-}
-#define PARSE_UNSUPPORTEDNUMBER(match, dbfunction)                         \
-if (!strcmp(field, match)) {                                               \
-   if (strtoint32(value, &num, 1, INT32_MAX, 10)) goto error;              \
-   PRINT_ERRORX("%s option not supported.\n", field);                            \
-   goto error;                                                             \
-}
-#define PARSE_IGNOREDNUMBER(match, dbfunction)                             \
-if (!strcmp(field, match)) {                                               \
-   if (strtoint32(value, &num, 1, INT32_MAX, 10)) goto error;              \
-   PRINT_ERRORX("%s option not supported yet (ignored).\n", field);              \
-   continue;                                                               \
-}
-
-#define PARSE_FLAG(match, flag)                          \
-if (!strcmp(field, match)) {                             \
-   if (strtoint32(value, &num, 0, 1, 10)) {              \
-      PRINT_ERRORX("%s: boolean name=value pairs require a value of 0 or 1",  \
-             field);                                     \
-      goto error;                                        \
-   }                                                     \
-   if ((retval = db->set_flags(db, flag)) != 0) {        \
-      PRINT_ERROR(retval, "set_flags: %s", field);             \
-      goto error;                                        \
-   }                                                     \
-   continue;                                             \
-}
-
-#define PARSE_UNSUPPORTEDFLAG(match, flag)               \
-if (!strcmp(field, match)) {                             \
-   if (strtoint32(value, &num, 0, 1, 10)) {              \
-      PRINT_ERRORX("%s: boolean name=value pairs require a value of 0 or 1",  \
-             field);                                     \
-      goto error;                                        \
-   }                                                     \
-   PRINT_ERRORX("%s option not supported.\n", field);          \
-   goto error;                                           \
-}
-
-#define PARSE_IGNOREDFLAG(match, flag)                   \
-if (!strcmp(field, match)) {                             \
-   if (strtoint32(value, &num, 0, 1, 10)) {              \
-      PRINT_ERRORX("%s: boolean name=value pairs require a value of 0 or 1",  \
-             field);                                     \
-      goto error;                                        \
-   }                                                     \
-   PRINT_ERRORX("%s option not supported yet (ignored).\n", field);  \
-   continue;                                             \
-}
-
-#define PARSE_CHAR(match, dbfunction)                    \
-if (!strcmp(field, match)) {                             \
-   if (strlen(value) != 1) {                             \
-      PRINT_ERRORX("%s=%s: Expected 1-byte value",             \
-             field, value);                              \
-      goto error;                                        \
-   }                                                     \
-   if ((retval = dbfunction(db, value[0])) != 0) {       \
-      goto printerror;                                   \
-   }                                                     \
-   continue;                                             \
-}
-
-#define PARSE_UNSUPPORTEDCHAR(match, dbfunction)         \
-if (!strcmp(field, match)) {                             \
-   if (strlen(value) != 1) {                             \
-      PRINT_ERRORX("%s=%s: Expected 1-byte value",             \
-             field, value);                              \
-      goto error;                                        \
-   }                                                     \
-   PRINT_ERRORX("%s option not supported.\n", field);          \
-   goto error;                                           \
-}
-
-#define PARSE_COMMON_CONFIGURATIONS()  \
-      PARSE_IGNOREDNUMBER(    "bt_minkey",   db->set_bt_minkey);     \
-      PARSE_IGNOREDFLAG(      "chksum",      DB_CHKSUM);             \
-      PARSE_IGNOREDNUMBER(    "db_lorder",   db->set_lorder);        \
-      PARSE_IGNOREDNUMBER(    "db_pagesize", db->set_pagesize);      \
-      PARSE_UNSUPPORTEDNUMBER("extentsize",  db->set_q_extentsize);  \
-      PARSE_UNSUPPORTEDNUMBER("h_ffactor",   db->set_h_ffactor);     \
-      PARSE_UNSUPPORTEDNUMBER("h_nelem",     db->set_h_nelem);       \
-      PARSE_UNSUPPORTEDNUMBER("re_len",      db->set_re_len);        \
-      PARSE_UNSUPPORTEDCHAR(  "re_pad",      db->set_re_pad);        \
-      PARSE_UNSUPPORTEDFLAG(  "recnum",      DB_RECNUM);             \
-      PARSE_UNSUPPORTEDFLAG(  "renumber",    DB_RENUMBER);
-
-
-
-int read_header()
-{
-   static uint64_t datasize = 1 << 10;
-   uint64_t idx = 0;
-   char* field;
-   char* value;
-   int ch;
-   int32_t num;
-   int retval;
-   int r;
-
-   assert(g.header);
-
-   if (g.read_header.data == NULL && (g.read_header.data = (char*)toku_malloc(datasize * sizeof(char))) == NULL) {
-      PRINT_ERROR(errno, "read_header: malloc");
-      goto error;
-   }
-   while (!g.eof) {
-      if (caught_any_signals()) goto success;    
-      g.linenumber++;
-      idx = 0;
-      /* Read a line. */
-      while (true) {
-         if ((ch = getchar()) == EOF) {
-            g.eof = true;
-            if (ferror(stdin)) goto formaterror;
-            break;
-         }
-         if (ch == '\n') break;
-
-         g.read_header.data[idx] = (char)ch;
-         idx++;
-
-         /* Ensure room exists for next character/null terminator. */
-         if (idx == datasize && doublechararray(&g.read_header.data, &datasize)) goto error;
-      }
-      if (idx == 0 && g.eof) goto success;
-      g.read_header.data[idx] = '\0';
-
-      field = g.read_header.data;
-      if ((value = strchr(g.read_header.data, '=')) == NULL) goto formaterror;
-      value[0] = '\0';
-      value++;
-
-      if (field[0] == '\0' || value[0] == '\0') goto formaterror;
-
-      if (!strcmp(field, "HEADER")) break;
-      if (!strcmp(field, "VERSION")) {
-         if (strtoint32(value, &g.version, 1, INT32_MAX, 10)) goto error;
-         if (g.version != 3) {
-            PRINT_ERRORX("line %" PRIu64 ": VERSION %d is unsupported", g.linenumber, g.version);
-            goto error;
-         }
-         continue;
-      }
-      if (!strcmp(field, "format")) {
-         if (!strcmp(value, "bytevalue")) {
-            g.plaintext = false;
-            continue;
-         }
-         if (!strcmp(value, "print")) {
-            g.plaintext = true;
-            continue;
-         }
-         goto formaterror;
-      }
-      if (!strcmp(field, "type")) {
-         if (!strcmp(value, "btree")) {
-            g.dbtype = DB_BTREE;
-            continue;
-         }
-         if (!strcmp(value, "hash") || strcmp(value, "recno") || strcmp(value, "queue")) {
-            PRINT_ERRORX("db type %s not supported.\n", value);
-            goto error;
-         }
-         PRINT_ERRORX("line %" PRIu64 ": unknown type %s", g.linenumber, value);
-         goto error;
-      }
-      if (!strcmp(field, "database") || !strcmp(field, "subdatabase")) {
-         if (g.subdatabase != NULL) {
-            toku_free(g.subdatabase);
-            g.subdatabase = NULL;
-         }
-         if ((retval = printabletocstring(value, &g.subdatabase))) {
-            PRINT_ERROR(retval, "error reading db name");
-            goto error;
-         }
-         continue;
-      }
-      if (!strcmp(field, "keys")) {
-         int32_t temp;
-         if (strtoint32(value, &temp, 0, 1, 10)) {
-            PRINT_ERROR(0,
-                     "%s: boolean name=value pairs require a value of 0 or 1",
-                     field);
-            goto error;
-         }
-         g.keys = (bool)temp;
-         if (!g.keys) {
-            PRINT_ERRORX("keys=0 not supported");
-            goto error;
-         }
-         continue;
-      }
-      PARSE_COMMON_CONFIGURATIONS();
-
-      PRINT_ERRORX("unknown input-file header configuration keyword \"%s\"", field);
-      goto error;
-   }
-success:
-   r = 0;
-
-   if (false) {
-formaterror:
-      r = EXIT_FAILURE;
-      PRINT_ERRORX("line %" PRIu64 ": unexpected format", g.linenumber);
-   }
-   if (false) {
-error:
-      r = EXIT_FAILURE;
-   }
-   return r;
-}
-
-int apply_commandline_options()
-{
-   int r = -1;
-   unsigned idx;
-   char* field;
-   char* value = NULL;
-   int32_t num;
-   int retval;
-
-   for (idx = 0; g.config_options[idx]; idx++) {
-      if (value) {
-         /* Restore the field=value format. */
-         value[-1] = '=';
-         value = NULL;
-      }
-      field = g.config_options[idx];
-
-      if ((value = strchr(field, '=')) == NULL) {
-         PRINT_ERRORX("command-line configuration uses name=value format");
-         goto error;
-      }
-      value[0] = '\0';
-      value++;
-
-      if (field[0] == '\0' || value[0] == '\0') {
-         PRINT_ERRORX("command-line configuration uses name=value format");
-         goto error;
-      }
-
-      if (!strcmp(field, "database") || !strcmp(field, "subdatabase")) {
-         if (g.subdatabase != NULL) {
-            toku_free(g.subdatabase);
-            g.subdatabase = NULL;
-         }
-         if ((retval = printabletocstring(value, &g.subdatabase))) {
-            PRINT_ERROR(retval, "error reading db name");
-            goto error;
-         }
-         continue;
-      }
-      if (!strcmp(field, "keys")) {
-         int32_t temp;
-         if (strtoint32(value, &temp, 0, 1, 10)) {
-            PRINT_ERROR(0,
-                     "%s: boolean name=value pairs require a value of 0 or 1",
-                     field);
-            goto error;
-         }
-         g.keys = (bool)temp;
-         if (!g.keys) {
-            PRINT_ERRORX("keys=0 not supported");
-            goto error;
-         }
-         continue;
-      }
-      PARSE_COMMON_CONFIGURATIONS();
-
-      PRINT_ERRORX("unknown input-file header configuration keyword \"%s\"", field);
-      goto error;
-   }
-   if (value) {
-      /* Restore the field=value format. */
-      value[-1] = '=';
-      value = NULL;
-   }
-   r = 0;
-
-error:
-   return r;
-}
-
-int open_database()
-{
-   DB* db = g.db;
-   int retval;
-
-   int open_flags = 0;
-   //TODO: Transaction auto commit stuff
-   //if (TXN_ON(dbenv)) SET_BITS(open_flags, DB_AUTO_COMMIT);
-
-   //Try to see if it exists first.
-   retval = db->open(db, NULL, g.database, g.subdatabase, g.dbtype, open_flags, 0666);
-   if (retval == ENOENT) {
-      //Does not exist and we did not specify a type.
-      //TODO: Uncomment when DB_UNKNOWN + db->get_type are implemented.
-      /*
-      if (g.dbtype == DB_UNKNOWN) {
-         PRINT_ERRORX("no database type specified");
-         goto error;
-      }*/
-      SET_BITS(open_flags, DB_CREATE);
-      //Try creating it.
-      retval = db->open(db, NULL, g.database, g.subdatabase, g.dbtype, open_flags, 0666);
-   }
-   if (retval != 0) {
-      PRINT_ERROR(retval, "DB->open: %s", g.database);
-      goto error;
-   }
-   //TODO: Uncomment when DB_UNKNOWN + db->get_type are implemented.
-   /*
-   if ((retval = db->get_type(db, &opened_type)) != 0) {
-      PRINT_ERROR(retval, "DB->get_type");
-      goto error;
-   }
-   if (opened_type != DB_BTREE) {
-      PRINT_ERRORX("Unsupported db type %d\n", opened_type);
-      goto error;
-   }
-   if (g.dbtype != DB_UNKNOWN && opened_type != g.dbtype) {
-      PRINT_ERRORX("DBTYPE %d does not match opened DBTYPE %d.\n", g.dbtype, opened_type);
-      goto error;
-   }*/
-   return EXIT_SUCCESS;
-error:
-   fprintf(stderr, "Quitting out due to errors.\n");
-   return EXIT_FAILURE;
-}
-
-int doublechararray(char** pmem, uint64_t* size)
-{
-   assert(pmem);
-   assert(size);
-   assert(IS_POWER_OF_2(*size));
-
-   *size <<= 1;
-   if (*size == 0) {
-      /* Overflowed uint64_t. */
-      PRINT_ERRORX("Line %" PRIu64 ": Line too long.\n", g.linenumber);
-      goto error;
-   }
-   if ((*pmem = (char*)toku_realloc(*pmem, *size)) == NULL) {
-      PRINT_ERROR(errno, "doublechararray: realloc");
-      goto error;
-   }
-   return EXIT_SUCCESS;
-
-error:
-   return EXIT_FAILURE;
-}
-
-static int get_dbt(DBT* pdbt)
-{
-   /* Need to store a key and value. */
-   static uint64_t datasize[2] = {1 << 10, 1 << 10};
-   static int which = 0;
-   char* datum;
-   uint64_t idx = 0;
-   int highch;
-   int lowch;
-
-   /* *pdbt should have been memset to 0 before being called. */
-   which = 1 - which;
-   if (g.get_dbt.data[which] == NULL &&
-      (g.get_dbt.data[which] = (char*)toku_malloc(datasize[which] * sizeof(char))) == NULL) {
-      PRINT_ERROR(errno, "get_dbt: malloc");
-      goto error;
-   }
-   
-   datum = g.get_dbt.data[which];
-
-   if (g.plaintext) {
-      int firstch;
-      int nextch = EOF;
-
-      for (firstch = getchar(); firstch != EOF; firstch = getchar()) {
-         switch (firstch) {
-            case ('\n'): {
-               /* Done reading this key/value. */
-               nextch = EOF;
-               break;
-            }
-            case ('\\'): {
-               /* Escaped \ or two hex digits. */
-               highch = getchar();
-               if (highch == '\\') {
-                  nextch = '\\';
-                  break;
-               }
-               else if (highch == EOF) {
-                  g.eof = true;
-                  PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file (2 hex digits per byte).\n", g.linenumber);
-                  goto error;
-               }
-               else if (!isxdigit(highch)) {
-                  PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, highch);
-                  goto error;
-               }
-
-               lowch = getchar();
-               if (lowch == EOF) {
-                  g.eof = true;
-                  PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file (2 hex digits per byte).\n", g.linenumber);
-                  goto error;
-               }
-               else if (!isxdigit(lowch)) {
-                  PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, lowch);
-                  goto error;
-               }
-
-               nextch = (hextoint(highch) << 4) | hextoint(lowch);
-               break;
-            }
-            default: {
-               if (isprint(firstch)) {
-                  nextch = firstch;
-                  break;
-               }
-               PRINT_ERRORX("Line %" PRIu64 ": Nonprintable character found.", g.linenumber);
-               goto error;
-            }
-         }
-         if (nextch == EOF) {
-            break;
-         }
-         if (idx == datasize[which]) {
-            /* Overflow, double the memory. */
-            if (doublechararray(&g.get_dbt.data[which], &datasize[which])) goto error;
-            datum = g.get_dbt.data[which];
-         }
-         datum[idx] = (char)nextch;
-         idx++;
-      }
-      if (firstch == EOF) g.eof = true;
-   }
-   else {
-      for (highch = getchar(); highch != EOF; highch = getchar()) {
-         if (highch == '\n') {
-            /* Done reading this key/value. */
-            break;
-         }
-
-         lowch = getchar();
-         if (lowch == EOF) {
-            g.eof = true;
-            PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file (2 hex digits per byte).\n", g.linenumber);
-            goto error;
-         }
-         if (!isxdigit(highch)) {
-            PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, highch);
-            goto error;
-         }
-         if (!isxdigit(lowch)) {
-            PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, lowch);
-            goto error;
-         }
-         if (idx == datasize[which]) {
-            /* Overflow, double the memory. */
-            if (doublechararray(&g.get_dbt.data[which], &datasize[which])) goto error;
-            datum = g.get_dbt.data[which];
-         }
-         datum[idx] = (char)((hextoint(highch) << 4) | hextoint(lowch));
-         idx++;
-      }
-      if (highch == EOF) g.eof = true;
-   }
-
-   /* Done reading. */
-   pdbt->size = idx;
-   pdbt->data = (void*)datum;
-   return EXIT_SUCCESS;
-error:
-   return EXIT_FAILURE;
-}
-
-static int insert_pair(DBT* key, DBT* data)
-{
-   DB* db = g.db;
-
-   int retval = db->put(db, NULL, key, data, g.overwritekeys ? 0 : DB_NOOVERWRITE);
-   if (retval != 0) {
-      //TODO: Check for transaction failures/etc.. retry if necessary.
-      PRINT_ERROR(retval, "DB->put");
-      if (!(retval == DB_KEYEXIST && g.overwritekeys)) goto error;
-   }
-   return EXIT_SUCCESS;
-error:
-   return EXIT_FAILURE;
-}
-
-int read_keys()
-{
-   DBT key;
-   DBT data;
-   int spacech;
-
-   char footer[sizeof("ATA=END\n")];
-
-   memset(&key, 0, sizeof(key));
-   memset(&data, 0, sizeof(data));
-
-
-   //TODO: Start transaction/end transaction/abort/retry/etc
-
-   if (!g.leadingspace) {
-      assert(g.plaintext);
-      while (!g.eof) {
-         if (caught_any_signals()) goto success;
-         g.linenumber++;
-         if (get_dbt(&key) != 0) goto error;
-         if (g.eof) {
-            if (key.size == 0) {
-                //Last entry had no newline.  Done.
-                break;
-            }
-            PRINT_ERRORX("Line %" PRIu64 ": Key exists but value missing.", g.linenumber);
-            goto error;
-         }
-         g.linenumber++;
-         if (get_dbt(&data) != 0) goto error;
-         if (insert_pair(&key, &data) != 0) goto error;
-      }
-   }
-   else while (!g.eof) {
-      if (caught_any_signals()) goto success;
-      g.linenumber++;
-      spacech = getchar();
-      switch (spacech) {
-         case (EOF): {
-            /* Done. */
-            g.eof = true;
-            goto success;
-         }
-         case (' '): {
-            /* Time to read a key. */
-            if (get_dbt(&key) != 0) goto error;
-            break;
-         }
-         case ('D'): {
-            if (fgets(footer, sizeof("ATA=END\n"), stdin) != NULL &&
-               (!strcmp(footer, "ATA=END") || !strcmp(footer, "ATA=END\n")))
-            {
-               goto success;
-            }
-            goto unexpectedinput;
-         }
-         default: {
-unexpectedinput:
-            PRINT_ERRORX("Line %" PRIu64 ": Unexpected input while reading key.\n", g.linenumber);
-            goto error;
-         }
-      }
-
-      if (g.eof) {
-         PRINT_ERRORX("Line %" PRIu64 ": Key exists but value missing.", g.linenumber);
-         goto error;
-      }
-      g.linenumber++;
-      spacech = getchar();
-      switch (spacech) {
-         case (EOF): {
-            g.eof = true;
-            PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file while reading value.\n", g.linenumber);
-            goto error;
-         }
-         case (' '): {
-            /* Time to read a key. */
-            if (get_dbt(&data) != 0) goto error;
-            break;
-         }
-         default: {
-            PRINT_ERRORX("Line %" PRIu64 ": Unexpected input while reading value.\n", g.linenumber);
-            goto error;
-         }
-      }
-      if (insert_pair(&key, &data) != 0) goto error;
-   }
-success:
-   return EXIT_SUCCESS;
-error:
-   return EXIT_FAILURE;
-}
-
-int close_database()
-{
-   DB* db = g.db;
-   int retval;
-
-   assert(db);
-   if ((retval = db->close(db, 0)) != 0) {
-      PRINT_ERROR(retval, "DB->close");
-      goto error;
-   }
-   return EXIT_SUCCESS;
-error:
-   return EXIT_FAILURE;
-}

From 741efcd4ee0689cdb7ddaa7f71ca1153310813bc Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:25:34 -0400
Subject: [PATCH 014/190] FT-197 Store cachefiles in OMTs only, remove linked
 list pointers

---
 ft/cachetable-internal.h                  |   6 +-
 ft/cachetable.cc                          | 300 ++++++++++------------
 ft/tests/cachetable-checkpointer-class.cc |  40 +--
 3 files changed, 154 insertions(+), 192 deletions(-)

diff --git a/ft/cachetable-internal.h b/ft/cachetable-internal.h
index 0e0d1ad0f64..9eb3ec66568 100644
--- a/ft/cachetable-internal.h
+++ b/ft/cachetable-internal.h
@@ -178,8 +178,6 @@ class pair_list;
 // Maps to a file on disk.
 //
 struct cachefile {
-    CACHEFILE next;
-    CACHEFILE prev;
     // these next two fields are protected by cachetable's list lock
     // they are managed whenever we add or remove a pair from
     // the cachetable. As of Riddler, this linked list is only used to
@@ -439,14 +437,12 @@ public:
     bool evict_some_stale_pair(evictor* ev);
     void free_stale_data(evictor* ev);
     // access to these fields are protected by the lock
-    CACHEFILE m_active_head; // head of CACHEFILEs that are active
-    CACHEFILE m_stale_head; // head of CACHEFILEs that are stale
-    CACHEFILE m_stale_tail; // tail of CACHEFILEs that are stale
     FILENUM m_next_filenum_to_use;
     uint32_t m_next_hash_id_to_use;
     toku_pthread_rwlock_t m_lock; // this field is publoc so we are still POD
     toku::omt<CACHEFILE> m_active_filenum;
     toku::omt<CACHEFILE> m_active_fileid;
+    toku::omt<CACHEFILE> m_stale_fileid;
 private:    
     CACHEFILE find_cachefile_in_list_unlocked(CACHEFILE start, struct fileid* fileid);
 };
diff --git a/ft/cachetable.cc b/ft/cachetable.cc
index d7c734cc5fd..d57592286a4 100644
--- a/ft/cachetable.cc
+++ b/ft/cachetable.cc
@@ -4414,43 +4414,48 @@ void checkpointer::increment_num_txns() {
     m_checkpoint_num_txns++;
 }
 
+struct iterate_begin_checkpoint {
+    LSN lsn_of_checkpoint_in_progress;
+    iterate_begin_checkpoint(LSN lsn) : lsn_of_checkpoint_in_progress(lsn) { }
+    static int fn(const CACHEFILE &cf, const uint32_t UU(idx), struct iterate_begin_checkpoint *info) {
+        assert(cf->begin_checkpoint_userdata);
+        if (cf->for_checkpoint) {
+            cf->begin_checkpoint_userdata(info->lsn_of_checkpoint_in_progress, cf->userdata);
+        }
+        return 0;
+    }
+};
+
 //
 // Update the user data in any cachefiles in our checkpoint list.
 //
 void checkpointer::update_cachefiles() {
-    CACHEFILE cf;
-    for(cf = m_cf_list->m_active_head; cf; cf=cf->next) {
-        assert(cf->begin_checkpoint_userdata);
-        if (cf->for_checkpoint) {
-            cf->begin_checkpoint_userdata(m_lsn_of_checkpoint_in_progress,
-                                              cf->userdata);
-        }
-    }
+    struct iterate_begin_checkpoint iterate(m_lsn_of_checkpoint_in_progress);
+    int r = m_cf_list->m_active_fileid.iterate<struct iterate_begin_checkpoint,
+                                               iterate_begin_checkpoint::fn>(&iterate);
+    assert_zero(r);
 }
 
+struct iterate_note_pin {
+    static int fn(const CACHEFILE &cf, uint32_t UU(idx), void **UU(extra)) {
+        assert(cf->note_pin_by_checkpoint);
+        cf->note_pin_by_checkpoint(cf, cf->userdata);
+        cf->for_checkpoint = true;
+        return 0;
+    }
+};
+
 //
 // Sets up and kicks off a checkpoint.
 //
 void checkpointer::begin_checkpoint() {
     // 1. Initialize the accountability counters.
-    m_checkpoint_num_files = 0;
     m_checkpoint_num_txns = 0;
     
     // 2. Make list of cachefiles to be included in the checkpoint.
-    // TODO: <CER> How do we remove the non-lock cachetable reference here?
     m_cf_list->read_lock();
-    for (CACHEFILE cf = m_cf_list->m_active_head; cf; cf = cf->next) {
-        // The caller must serialize open, close, and begin checkpoint.
-        // So we should never see a closing cachefile here.
-        // <CER> Is there an assert we can add here?
-        
-        // Putting this check here so that this method may be called
-        // by cachetable tests.
-        assert(cf->note_pin_by_checkpoint);
-        cf->note_pin_by_checkpoint(cf, cf->userdata);
-        cf->for_checkpoint = true;
-        m_checkpoint_num_files++;
-    }
+    m_cf_list->m_active_fileid.iterate<void *, iterate_note_pin::fn>(nullptr);
+    m_checkpoint_num_files = m_cf_list->m_active_fileid.size();
     m_cf_list->read_unlock();
     
     // 3. Create log entries for this checkpoint.
@@ -4475,6 +4480,14 @@ void checkpointer::begin_checkpoint() {
     m_list->write_pending_exp_unlock();
 }
 
+struct iterate_log_fassociate {
+    static int fn(const CACHEFILE &cf, uint32_t UU(idx), void **UU(extra)) {
+        assert(cf->log_fassociate_during_checkpoint);
+        cf->log_fassociate_during_checkpoint(cf, cf->userdata);
+        return 0;
+    }
+};
+
 //
 // Assuming the logger exists, this will write out the folloing 
 // information to the log.
@@ -4498,10 +4511,7 @@ void checkpointer::log_begin_checkpoint() {
     m_lsn_of_checkpoint_in_progress = begin_lsn;
 
     // Log the list of open dictionaries.
-    for (CACHEFILE cf = m_cf_list->m_active_head; cf; cf = cf->next) {
-        assert(cf->log_fassociate_during_checkpoint);
-        cf->log_fassociate_during_checkpoint(cf, cf->userdata);
-    }
+    m_cf_list->m_active_fileid.iterate<void *, iterate_log_fassociate::fn>(nullptr);
 
     // Write open transactions to the log.
     r = toku_txn_manager_iter_over_live_txns(
@@ -4576,17 +4586,29 @@ void checkpointer::end_checkpoint(void (*testcallback_f)(void*),  void* testextr
     toku_free(checkpoint_cfs);
 }
 
-void checkpointer::fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs) {
-    m_cf_list->read_lock();
-    uint32_t curr_index = 0;
-    for (CACHEFILE cf = m_cf_list->m_active_head; cf; cf = cf->next) {
-        if (cf->for_checkpoint) {
-            assert(curr_index < m_checkpoint_num_files);
-            checkpoint_cfs[curr_index] = cf;
-            curr_index++;
-        }
+struct iterate_checkpoint_cfs {
+    CACHEFILE *checkpoint_cfs;
+    uint32_t checkpoint_num_files;
+    uint32_t curr_index;
+    iterate_checkpoint_cfs(CACHEFILE *cfs, uint32_t num_files) :
+        checkpoint_cfs(cfs), checkpoint_num_files(num_files), curr_index(0) {
     }
-    assert(curr_index == m_checkpoint_num_files);
+    static int fn(const CACHEFILE &cf, uint32_t UU(idx), struct iterate_checkpoint_cfs *info) {
+        if (cf->for_checkpoint) {
+            assert(info->curr_index < info->checkpoint_num_files);
+            info->checkpoint_cfs[info->curr_index] = cf;
+            info->curr_index++;
+        }
+        return 0;
+    }
+};
+
+void checkpointer::fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs) {
+    struct iterate_checkpoint_cfs iterate(checkpoint_cfs, m_checkpoint_num_files);
+
+    m_cf_list->read_lock();
+    m_cf_list->m_active_fileid.iterate<struct iterate_checkpoint_cfs, iterate_checkpoint_cfs::fn>(&iterate);
+    assert(iterate.curr_index == m_checkpoint_num_files);
     m_cf_list->read_unlock();
 }
 
@@ -4671,19 +4693,18 @@ void checkpointer::remove_cachefiles(CACHEFILE* checkpoint_cfs) {
 static_assert(std::is_pod<cachefile_list>::value, "cachefile_list isn't POD");
 
 void cachefile_list::init() {
-    m_active_head = NULL;
-    m_stale_head = NULL;
-    m_stale_tail = NULL;
     m_next_filenum_to_use.fileid = 0;
     m_next_hash_id_to_use = 0;
     toku_pthread_rwlock_init(&m_lock, NULL);
     m_active_filenum.create();
     m_active_fileid.create();
+    m_stale_fileid.create();
 }
 
 void cachefile_list::destroy() {
     m_active_filenum.destroy();
     m_active_fileid.destroy();
+    m_stale_fileid.destroy();
     toku_pthread_rwlock_destroy(&m_lock);
 }
 
@@ -4702,34 +4723,31 @@ void cachefile_list::write_lock() {
 void cachefile_list::write_unlock() {
     toku_pthread_rwlock_wrunlock(&m_lock);
 }
-int cachefile_list::cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf) {
-    read_lock();
-    CACHEFILE extant;
-    int r;
-    r = ENOENT;
-    for (extant = m_active_head; extant; extant = extant->next) {
-        if (extant->fname_in_env &&
-            !strcmp(extant->fname_in_env, iname_in_env)) {
-            *cf = extant;
-            r = 0;
-            break;
-        }
-    }
-    read_unlock();
-    return r;
-}
 
-int cachefile_list::cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf) {
-    read_lock();
-    CACHEFILE extant;
-    int r = ENOENT;
-    *cf = NULL;
-    for (extant = m_active_head; extant; extant = extant->next) {
-        if (extant->filenum.fileid==filenum.fileid) {
-            *cf = extant;
-            r = 0;
-            break;
+struct iterate_find_iname {
+    const char *iname_in_env;
+    CACHEFILE found_cf;
+    iterate_find_iname(const char *iname) : iname_in_env(iname), found_cf(nullptr) { }
+    static int fn(const CACHEFILE &cf, uint32_t UU(idx), struct iterate_find_iname *info) {
+        if (cf->fname_in_env && strcmp(cf->fname_in_env, info->iname_in_env) == 0) {
+            info->found_cf = cf;
+            return -1;
         }
+        return 0;
+    }
+};
+
+int cachefile_list::cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf) {
+    struct iterate_find_iname iterate(iname_in_env);
+
+    read_lock();
+    int r = m_active_fileid.iterate<iterate_find_iname, iterate_find_iname::fn>(&iterate);
+    if (iterate.found_cf != nullptr) {
+        assert(strcmp(iterate.found_cf->fname_in_env, iname_in_env) == 0);
+        *cf = iterate.found_cf;
+        r = 0;
+    } else {
+        r = ENOENT;
     }
     read_unlock();
     return r;
@@ -4746,20 +4764,23 @@ static int cachefile_find_by_filenum(const CACHEFILE &a_cf, const FILENUM &b) {
     }
 }
 
+int cachefile_list::cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf) {
+    read_lock();
+    int r = m_active_filenum.find_zero<FILENUM, cachefile_find_by_filenum>(filenum, cf, nullptr);
+    if (r == DB_NOTFOUND) {
+        r = ENOENT;
+    } else {
+        invariant_zero(r);
+    }
+    read_unlock();
+    return r;
+}
+
 static int cachefile_find_by_fileid(const CACHEFILE &a_cf, const struct fileid &b) {
     return toku_fileid_cmp(a_cf->fileid, b);
 }
 
 void cachefile_list::add_cf_unlocked(CACHEFILE cf) {
-    invariant(cf->next == NULL);
-    invariant(cf->prev == NULL);
-    cf->next = m_active_head;
-    cf->prev = NULL;
-    if (m_active_head) {
-        m_active_head->prev = cf;
-    }
-    m_active_head = cf;
-
     int r;
     r = m_active_filenum.insert<FILENUM, cachefile_find_by_filenum>(cf, cf->filenum, nullptr);
     assert_zero(r);
@@ -4769,36 +4790,13 @@ void cachefile_list::add_cf_unlocked(CACHEFILE cf) {
 
 void cachefile_list::add_stale_cf(CACHEFILE cf) {
     write_lock();
-    invariant(cf->next == NULL);
-    invariant(cf->prev == NULL);
-
-    cf->next = m_stale_head;
-    cf->prev = NULL;
-    if (m_stale_head) {
-        m_stale_head->prev = cf;
-    }
-    m_stale_head = cf;
-    if (m_stale_tail == NULL) {
-        m_stale_tail = cf;
-    }
+    int r = m_stale_fileid.insert<struct fileid, cachefile_find_by_fileid>(cf, cf->fileid, nullptr);
+    assert_zero(r);
     write_unlock();
 }
 
 void cachefile_list::remove_cf(CACHEFILE cf) {
     write_lock();
-    invariant(m_active_head != NULL);
-    if (cf->next) {
-        cf->next->prev = cf->prev;
-    }
-    if (cf->prev) {
-        cf->prev->next = cf->next;
-    }
-    if (cf == m_active_head) {
-        invariant(cf->prev == NULL);
-        m_active_head = cf->next;
-    }
-    cf->prev = NULL;
-    cf->next = NULL;
 
     uint32_t idx;
     int r;
@@ -4816,24 +4814,12 @@ void cachefile_list::remove_cf(CACHEFILE cf) {
 }
 
 void cachefile_list::remove_stale_cf_unlocked(CACHEFILE cf) {
-    invariant(m_stale_head != NULL);
-    invariant(m_stale_tail != NULL);
-    if (cf->next) {
-        cf->next->prev = cf->prev;
-    }
-    if (cf->prev) {
-        cf->prev->next = cf->next;
-    }
-    if (cf == m_stale_head) {
-        invariant(cf->prev == NULL);
-        m_stale_head = cf->next;
-    }
-    if (cf == m_stale_tail) {
-        invariant(cf->next == NULL);
-        m_stale_tail = cf->prev;
-    }
-    cf->prev = NULL;
-    cf->next = NULL;
+    uint32_t idx;
+    int r;
+    r = m_stale_fileid.find_zero<struct fileid, cachefile_find_by_fileid>(cf->fileid, nullptr, &idx);
+    assert_zero(r);
+    r = m_stale_fileid.delete_at(idx);
+    assert_zero(r);
 }
 
 FILENUM cachefile_list::reserve_filenum() {
@@ -4849,11 +4835,6 @@ FILENUM cachefile_list::reserve_filenum() {
         break;
     }
     FILENUM filenum = m_next_filenum_to_use;
-#if TOKU_DEBUG_PARANOID
-    for (CACHEFILE extant = m_active_head; extant; extant = extant->next) {
-        assert(filenum.fileid != extant->filenum.fileid);
-    }
-#endif
     m_next_filenum_to_use.fileid++;
     write_unlock();
     return filenum;
@@ -4865,91 +4846,77 @@ uint32_t cachefile_list::get_new_hash_id_unlocked() {
     return retval;
 }
 
-CACHEFILE cachefile_list::find_cachefile_in_list_unlocked(
-    CACHEFILE start,
-    struct fileid* fileid
-    )
-{
-    CACHEFILE retval = NULL;
-    for (CACHEFILE extant = start; extant; extant = extant->next) {
-        if (toku_fileids_are_equal(&extant->fileid, fileid)) {
-            // Clients must serialize cachefile open, close, and unlink
-            // So, during open, we should never see a closing cachefile 
-            // or one that has been marked as unlink on close.
-            assert(!extant->unlink_on_close);
-            retval = extant;
-            goto exit;
-        }
-    }
-exit:
-    return retval;
-}
-
 CACHEFILE cachefile_list::find_cachefile_unlocked(struct fileid* fileid) {
     CACHEFILE cf = nullptr;
     int r = m_active_fileid.find_zero<struct fileid, cachefile_find_by_fileid>(*fileid, &cf, nullptr);
     if (r == 0) {
         assert(!cf->unlink_on_close);
     }
-#if TOKU_DEBUG_PARANOID
-    assert(cf == find_cachefile_in_list_unlocked(m_active_head, fileid));
-#endif
     return cf;
 }
 
 CACHEFILE cachefile_list::find_stale_cachefile_unlocked(struct fileid* fileid) {
-    return find_cachefile_in_list_unlocked(m_stale_head, fileid);
+    CACHEFILE cf = nullptr;
+    int r = m_stale_fileid.find_zero<struct fileid, cachefile_find_by_fileid>(*fileid, &cf, nullptr);
+    if (r == 0) {
+        assert(!cf->unlink_on_close);
+    }
+    return cf;
 }
 
 void cachefile_list::verify_unused_filenum(FILENUM filenum) {
     int r = m_active_filenum.find_zero<FILENUM, cachefile_find_by_filenum>(filenum, nullptr, nullptr);
     assert(r == DB_NOTFOUND);
-#if TOKU_DEBUG_PARANOID
-    for (CACHEFILE extant = m_active_head; extant; extant = extant->next) {
-        invariant(extant->filenum.fileid != filenum.fileid);
-    }
-#endif
 }
 
 // returns true if some eviction ran, false otherwise
 bool cachefile_list::evict_some_stale_pair(evictor* ev) {
-    PAIR p = NULL;
-    CACHEFILE cf_to_destroy = NULL;
     write_lock();
-    if (m_stale_tail == NULL) {
+    if (m_stale_fileid.size() == 0) {
         write_unlock();
         return false;
     }
-    p = m_stale_tail->cf_head;
+
+    CACHEFILE stale_cf = nullptr;
+    int r = m_stale_fileid.fetch(0, &stale_cf);
+    assert_zero(r);
+
     // we should not have a cf in the stale list
     // that does not have any pairs
+    PAIR p = stale_cf->cf_head;
     paranoid_invariant(p != NULL);
-
     evict_pair_from_cachefile(p);
 
     // now that we have evicted something,
     // let's check if the cachefile is needed anymore
-    if (m_stale_tail->cf_head == NULL) {
-        cf_to_destroy = m_stale_tail;
-        remove_stale_cf_unlocked(m_stale_tail);
+    //
+    // it is not needed if the latest eviction caused
+    // the cf_head for that cf to become null
+    bool destroy_cf = stale_cf->cf_head == nullptr;
+    if (destroy_cf) {
+        remove_stale_cf_unlocked(stale_cf);
     }
 
     write_unlock();
     
     ev->remove_pair_attr(p->attr);
     cachetable_free_pair(p);
-    if (cf_to_destroy) {
-        cachefile_destroy(cf_to_destroy);
+    if (destroy_cf) {
+        cachefile_destroy(stale_cf);
     }
     return true;
 }
 
 void cachefile_list::free_stale_data(evictor* ev) {
     write_lock();
-    while (m_stale_tail != NULL) {
-        PAIR p = m_stale_tail->cf_head;
+    while (m_stale_fileid.size() != 0) {
+        CACHEFILE stale_cf = nullptr;
+        int r = m_stale_fileid.fetch(0, &stale_cf); 
+        assert_zero(r);
+
         // we should not have a cf in the stale list
         // that does not have any pairs
+        PAIR p = stale_cf->cf_head;
         paranoid_invariant(p != NULL);
         
         evict_pair_from_cachefile(p);
@@ -4958,10 +4925,9 @@ void cachefile_list::free_stale_data(evictor* ev) {
         
         // now that we have evicted something,
         // let's check if the cachefile is needed anymore
-        if (m_stale_tail->cf_head == NULL) {
-            CACHEFILE cf_to_destroy = m_stale_tail;
-            remove_stale_cf_unlocked(m_stale_tail);
-            cachefile_destroy(cf_to_destroy);
+        if (stale_cf->cf_head == NULL) {
+            remove_stale_cf_unlocked(stale_cf);
+            cachefile_destroy(stale_cf);
         }
     }
     write_unlock();
diff --git a/ft/tests/cachetable-checkpointer-class.cc b/ft/tests/cachetable-checkpointer-class.cc
index c2adc202fb5..fa950d3972a 100644
--- a/ft/tests/cachetable-checkpointer-class.cc
+++ b/ft/tests/cachetable-checkpointer-class.cc
@@ -112,6 +112,14 @@ struct checkpointer_test {
     uint32_t k);
 };
 
+static void init_cachefile(CACHEFILE cf, int which_cf, bool for_checkpoint) {
+    memset(cf, 0, sizeof(*cf));
+    create_dummy_functions(cf);
+    cf->fileid = { 0, (unsigned) which_cf };
+    cf->filenum = { (unsigned) which_cf };
+    cf->for_checkpoint = for_checkpoint;
+}
+
 //------------------------------------------------------------------------------
 // test_begin_checkpoint() -
 //
@@ -135,33 +143,28 @@ void checkpointer_test::test_begin_checkpoint() {
     // 2. Call checkpoint with ONE cachefile.
     //cachefile cf;
     struct cachefile cf;
-    cf.next = NULL;
-    cf.for_checkpoint = false;
-    m_cp.m_cf_list->m_active_head = &cf;
-    create_dummy_functions(&cf);
+    init_cachefile(&cf, 0, false);
+    m_cp.m_cf_list->add_cf_unlocked(&cf);
 
     m_cp.begin_checkpoint();
     assert(m_cp.m_checkpoint_num_files == 1);
     assert(cf.for_checkpoint == true);
+    m_cp.m_cf_list->remove_cf(&cf);
 
     // 3. Call checkpoint with MANY cachefiles.
     const uint32_t count = 3;
     struct cachefile cfs[count];
-    m_cp.m_cf_list->m_active_head = &cfs[0];
     for (uint32_t i = 0; i < count; ++i) {
-        cfs[i].for_checkpoint = false;
+        init_cachefile(&cfs[i], i, false);
         create_dummy_functions(&cfs[i]);
-        if (i == count - 1) {
-            cfs[i].next = NULL;
-        } else {
-            cfs[i].next = &cfs[i + 1];
-        }
+        m_cp.m_cf_list->add_cf_unlocked(&cfs[i]);
     }
 
     m_cp.begin_checkpoint();
     assert(m_cp.m_checkpoint_num_files == count);
     for (uint32_t i = 0; i < count; ++i) {
         assert(cfs[i].for_checkpoint == true);
+        cfl.remove_cf(&cfs[i]);
     }
     ctbl.list.destroy();
     m_cp.destroy();
@@ -195,10 +198,8 @@ void checkpointer_test::test_pending_bits() {
     //
     struct cachefile cf;
     cf.cachetable = &ctbl;
-    memset(&cf, 0, sizeof(cf));
-    cf.next = NULL;
-    cf.for_checkpoint = true;
-    m_cp.m_cf_list->m_active_head = &cf;
+    init_cachefile(&cf, 0, true);
+    m_cp.m_cf_list->add_cf_unlocked(&cf);
     create_dummy_functions(&cf);
 
     CACHEKEY k;
@@ -258,6 +259,7 @@ void checkpointer_test::test_pending_bits() {
 
     ctbl.list.destroy();
     m_cp.destroy();
+    cfl.remove_cf(&cf);
     cfl.destroy();
 }
 
@@ -337,14 +339,11 @@ void checkpointer_test::test_end_checkpoint() {
     cfl.init();
 
     struct cachefile cf;
-    memset(&cf, 0, sizeof(cf));
-    cf.next = NULL;
-    cf.for_checkpoint = true;
-    create_dummy_functions(&cf);
+    init_cachefile(&cf, 0, true);
 
     ZERO_STRUCT(m_cp);
     m_cp.init(&ctbl.list, NULL, &ctbl.ev, &cfl);
-    m_cp.m_cf_list->m_active_head = &cf;
+    m_cp.m_cf_list->add_cf_unlocked(&cf);
 
     // 2. Add data before running checkpoint.
     const uint32_t count = 6;
@@ -394,6 +393,7 @@ void checkpointer_test::test_end_checkpoint() {
         assert(pp);
         m_cp.m_list->evict_completely(pp);
     }
+    cfl.remove_cf(&cf);
     m_cp.destroy();
     ctbl.list.destroy();
     cfl.destroy();

From e583525a40f96febe3d0e8118aabf9fe07f7226a Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:25:34 -0400
Subject: [PATCH 015/190] FT-258 Move cursor code into ft/cursor.h and
 ft/cursor.cc

---
 ft/CMakeLists.txt         |   1 +
 ft/block_table.cc         |   3 +
 ft/checkpoint.cc          |   1 +
 ft/cursor.cc              | 496 ++++++++++++++++++++++++++++
 ft/cursor.h               | 124 ++++++-
 ft/ft-internal.h          |  29 +-
 ft/ft-ops.cc              | 676 +++-----------------------------------
 ft/ft-ops.h               |  47 ---
 ft/ft-search.h            | 156 ---------
 ft/ft.h                   |   1 -
 ft/leafentry.h            |   8 +-
 ft/tdb_logprint.cc        |   3 +-
 ft/tests/ft-clock-test.cc |   6 +-
 ft/tests/test.h           |   1 +
 ft/txn.cc                 |  15 +
 ft/txn.h                  |  14 +
 ft/ule.cc                 |  67 +++-
 src/ydb-internal.h        |   4 +-
 src/ydb_cursor.cc         |   1 +
 19 files changed, 795 insertions(+), 858 deletions(-)
 create mode 100644 ft/cursor.cc
 delete mode 100644 ft/ft-search.h

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 3ee8cbd48d5..ddf5eda4dfe 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -30,6 +30,7 @@ set(FT_SOURCES
   cachetable
   checkpoint
   compress
+  cursor
   ft
   ft-cachetable-wrappers
   ft-flusher
diff --git a/ft/block_table.cc b/ft/block_table.cc
index ba7d60b8d42..725aa9ec7d9 100644
--- a/ft/block_table.cc
+++ b/ft/block_table.cc
@@ -101,6 +101,9 @@ PATENT RIGHTS GRANT:
 #include "wbuf.h"
 #include <util/nb_mutex.h>
 
+// TODO: reorganize this dependency
+#include "ft/ft-ops.h" // for toku_maybe_truncate_file
+
 //When the translation (btt) is stored on disk:
 //  In Header:
 //      size_on_disk
diff --git a/ft/checkpoint.cc b/ft/checkpoint.cc
index bc4629a1d08..98a903675a0 100644
--- a/ft/checkpoint.cc
+++ b/ft/checkpoint.cc
@@ -129,6 +129,7 @@ PATENT RIGHTS GRANT:
 #include <toku_portability.h>
 #include <time.h>
 
+#include "ft/ft.h"
 #include "fttypes.h"
 #include "cachetable.h"
 #include "log-internal.h"
diff --git a/ft/cursor.cc b/ft/cursor.cc
new file mode 100644
index 00000000000..5976234f5b2
--- /dev/null
+++ b/ft/cursor.cc
@@ -0,0 +1,496 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2014 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#include "ft/ft-internal.h"
+
+#include "ft/cursor.h"
+#include "ft/leafentry.h"
+#include "ft/txn.h"
+
+int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *cursorptr, TOKUTXN ttxn,
+                   bool is_snapshot_read, bool disable_prefetching) {
+    if (is_snapshot_read) {
+        invariant(ttxn != NULL);
+        int accepted = toku_txn_reads_txnid(ft_handle->ft->h->root_xid_that_created, ttxn);
+        if (accepted!=TOKUDB_ACCEPT) {
+            invariant(accepted==0);
+            return TOKUDB_MVCC_DICTIONARY_TOO_NEW;
+        }
+    }
+    FT_CURSOR XCALLOC(cursor);
+    cursor->ft_handle = ft_handle;
+    cursor->prefetching = false;
+    toku_init_dbt(&cursor->range_lock_left_key);
+    toku_init_dbt(&cursor->range_lock_right_key);
+    cursor->left_is_neg_infty = false;
+    cursor->right_is_pos_infty = false;
+    cursor->is_snapshot_read = is_snapshot_read;
+    cursor->is_leaf_mode = false;
+    cursor->ttxn = ttxn;
+    cursor->disable_prefetching = disable_prefetching;
+    cursor->is_temporary = false;
+    *cursorptr = cursor;
+    return 0;
+}
+
+void toku_ft_cursor_close(FT_CURSOR cursor) {
+    toku_destroy_dbt(&cursor->key);
+    toku_destroy_dbt(&cursor->val);
+    toku_destroy_dbt(&cursor->range_lock_left_key);
+    toku_destroy_dbt(&cursor->range_lock_right_key);
+    toku_free(cursor);
+}
+
+void toku_ft_cursor_remove_restriction(FT_CURSOR ftcursor) {
+    ftcursor->out_of_range_error = 0;
+    ftcursor->direction = 0;
+}
+
+void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR ftcursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra) {
+    ftcursor->interrupt_cb = cb;
+    ftcursor->interrupt_cb_extra = extra;
+}
+
+void toku_ft_cursor_set_temporary(FT_CURSOR ftcursor) {
+    ftcursor->is_temporary = true;
+}
+
+void toku_ft_cursor_set_leaf_mode(FT_CURSOR ftcursor) {
+    ftcursor->is_leaf_mode = true;
+}
+
+int toku_ft_cursor_is_leaf_mode(FT_CURSOR ftcursor) {
+    return ftcursor->is_leaf_mode;
+}
+
+// TODO: Rename / cleanup - this has nothing to do with locking
+void toku_ft_cursor_set_range_lock(FT_CURSOR cursor,
+                                   const DBT *left, const DBT *right,
+                                   bool left_is_neg_infty, bool right_is_pos_infty,
+                                   int out_of_range_error) {
+    // Destroy any existing keys and then clone the given left, right keys
+    toku_destroy_dbt(&cursor->range_lock_left_key);
+    if (left_is_neg_infty) {
+        cursor->left_is_neg_infty = true;
+    } else {
+        toku_clone_dbt(&cursor->range_lock_left_key, *left);
+    }
+
+    toku_destroy_dbt(&cursor->range_lock_right_key);
+    if (right_is_pos_infty) {
+        cursor->right_is_pos_infty = true;
+    } else {
+        toku_clone_dbt(&cursor->range_lock_right_key, *right);
+    }
+
+    // TOKUDB_FOUND_BUT_REJECTED is a DB_NOTFOUND with instructions to stop looking. (Faster)
+    cursor->out_of_range_error = out_of_range_error == DB_NOTFOUND ? TOKUDB_FOUND_BUT_REJECTED : out_of_range_error;
+    cursor->direction = 0;
+}
+
+void toku_ft_cursor_set_prefetching(FT_CURSOR cursor) {
+    cursor->prefetching = true;
+}
+
+bool toku_ft_cursor_prefetching(FT_CURSOR cursor) {
+    return cursor->prefetching;
+}
+
+//Return true if cursor is uninitialized.  false otherwise.
+bool toku_ft_cursor_not_set(FT_CURSOR cursor) {
+    assert((cursor->key.data==NULL) == (cursor->val.data==NULL));
+    return (bool)(cursor->key.data == NULL);
+}
+
+struct ft_cursor_search_struct {
+    FT_GET_CALLBACK_FUNCTION getf;
+    void *getf_v;
+    FT_CURSOR cursor;
+    ft_search *search;
+};
+
+/* search for the first kv pair that matches the search object */
+static int ft_cursor_search(FT_CURSOR cursor, ft_search *search,
+                            FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool can_bulk_fetch) {
+    int r = toku_ft_search(cursor->ft_handle, search, getf, getf_v, cursor, can_bulk_fetch);
+    return r;
+}
+
+static inline int compare_k_x(FT_HANDLE ft_handle, const DBT *k, const DBT *x) {
+    FAKE_DB(db, &ft_handle->ft->cmp_descriptor);
+    return ft_handle->ft->compare_fun(&db, k, x);
+}
+
+int toku_ft_cursor_compare_one(const ft_search &UU(search), const DBT *UU(x)) {
+    return 1;
+}
+
+static int ft_cursor_compare_set(const ft_search &search, const DBT *x) {
+    FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
+    return compare_k_x(ft_handle, search.k, x) <= 0; /* return min xy: kv <= xy */
+}
+
+static int
+ft_cursor_current_getf(ITEMLEN keylen,                 bytevec key,
+                        ITEMLEN vallen,                 bytevec val,
+                        void *v, bool lock_only) {
+    struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v);
+    int r;
+    if (key==NULL) {
+        r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only);
+    } else {
+        FT_CURSOR cursor = bcss->cursor;
+        DBT newkey;
+        toku_fill_dbt(&newkey, key, keylen);
+        if (compare_k_x(cursor->ft_handle, &cursor->key, &newkey) != 0) {
+            r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); // This was once DB_KEYEMPTY
+            if (r==0) r = TOKUDB_FOUND_BUT_REJECTED;
+        }
+        else
+            r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only);
+    }
+    return r;
+}
+
+static int ft_cursor_compare_next(const ft_search &search, const DBT *x) {
+    FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
+    return compare_k_x(ft_handle, search.k, x) < 0; /* return min xy: kv < xy */
+}
+
+int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
+    if (toku_ft_cursor_not_set(cursor)) {
+        return EINVAL;
+    }
+    cursor->direction = 0;
+    if (op == DB_CURRENT) {
+        struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, 0};
+        ft_search search; 
+        ft_search_init(&search, ft_cursor_compare_set, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle);
+        int r = toku_ft_search(cursor->ft_handle, &search, ft_cursor_current_getf, &bcss, cursor, false);
+        ft_search_finish(&search);
+        return r;
+    }
+    return getf(cursor->key.size, cursor->key.data, cursor->val.size, cursor->val.data, getf_v, false); // ft_cursor_copyout(cursor, outkey, outval);
+}
+
+int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
+    cursor->direction = 0;
+    ft_search search; 
+    ft_search_init(&search, toku_ft_cursor_compare_one, FT_SEARCH_LEFT, nullptr, nullptr, cursor->ft_handle);
+    int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
+    ft_search_finish(&search);
+    return r;
+}
+
+int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
+    cursor->direction = 0;
+    ft_search search; 
+    ft_search_init(&search, toku_ft_cursor_compare_one, FT_SEARCH_RIGHT, nullptr, nullptr, cursor->ft_handle);
+    int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
+    ft_search_finish(&search);
+    return r;
+}
+
+int toku_ft_cursor_check_restricted_range(FT_CURSOR c, bytevec key, ITEMLEN keylen) {
+    if (c->out_of_range_error) {
+        FT ft = c->ft_handle->ft;
+        FAKE_DB(db, &ft->cmp_descriptor);
+        DBT found_key;
+        toku_fill_dbt(&found_key, key, keylen);
+        if ((!c->left_is_neg_infty && c->direction <= 0 && ft->compare_fun(&db, &found_key, &c->range_lock_left_key) < 0) ||
+            (!c->right_is_pos_infty && c->direction >= 0 && ft->compare_fun(&db, &found_key, &c->range_lock_right_key) > 0)) {
+            invariant(c->out_of_range_error);
+            return c->out_of_range_error;
+        }
+    }
+    // Reset cursor direction to mitigate risk if some query type doesn't set the direction.
+    // It is always correct to check both bounds (which happens when direction==0) but it can be slower.
+    c->direction = 0;
+    return 0;
+}
+
+int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd,
+                            FT_GET_CALLBACK_FUNCTION getf, void *getf_v,
+                            uint32_t *keylen, void **key, uint32_t *vallen, void **val) {
+    int r = 0;
+    // if we are searching towards the end, limit is last element
+    // if we are searching towards the beginning, limit is the first element
+    uint32_t limit = (direction > 0) ? (bd->num_klpairs() - 1) : 0;
+
+    //Starting with the prev, find the first real (non-provdel) leafentry.
+    while (index != limit) {
+        index += direction;
+        LEAFENTRY le;
+        void* foundkey = NULL;
+        uint32_t foundkeylen = 0;
+        
+        r = bd->fetch_klpair(index, &le, &foundkeylen, &foundkey);
+        invariant_zero(r);
+
+        if (toku_ft_cursor_is_leaf_mode(cursor) || !le_val_is_del(le, cursor->is_snapshot_read, cursor->ttxn)) {
+            le_extract_val(
+                le,
+                toku_ft_cursor_is_leaf_mode(cursor),
+                cursor->is_snapshot_read,
+                cursor->ttxn,
+                vallen,
+                val
+                );
+            *key = foundkey;
+            *keylen = foundkeylen;
+
+            cursor->direction = direction;
+            r = toku_ft_cursor_check_restricted_range(cursor, *key, *keylen);
+            if (r!=0) {
+                paranoid_invariant(r == cursor->out_of_range_error);
+                // We already got at least one entry from the bulk fetch.
+                // Return 0 (instead of out of range error).
+                r = 0;
+                break;
+            }
+            r = getf(*keylen, *key, *vallen, *val, getf_v, false);
+            if (r == TOKUDB_CURSOR_CONTINUE) {
+                continue;
+            }
+            else {
+                break;
+            }
+        }
+    }
+
+    return r;
+}
+
+int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
+    cursor->direction = +1;
+    ft_search search; 
+    ft_search_init(&search, ft_cursor_compare_next, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle);
+    int r = ft_cursor_search(cursor, &search, getf, getf_v, true);
+    ft_search_finish(&search);
+    if (r == 0) {
+        toku_ft_cursor_set_prefetching(cursor);
+    }
+    return r;
+}
+
+static int ft_cursor_search_eq_k_x_getf(ITEMLEN keylen, bytevec key,
+                                        ITEMLEN vallen, bytevec val,
+                                        void *v, bool lock_only) {
+    struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v);
+    int r;
+    if (key==NULL) {
+        r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, false);
+    } else {
+        FT_CURSOR cursor = bcss->cursor;
+        DBT newkey;
+        toku_fill_dbt(&newkey, key, keylen);
+        if (compare_k_x(cursor->ft_handle, bcss->search->k, &newkey) == 0) {
+            r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only);
+        } else {
+            r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only);
+            if (r==0) r = TOKUDB_FOUND_BUT_REJECTED;
+        }
+    }
+    return r;
+}
+
+/* search for the kv pair that matches the search object and is equal to k */
+static int ft_cursor_search_eq_k_x(FT_CURSOR cursor, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
+    struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, search};
+    int r = toku_ft_search(cursor->ft_handle, search, ft_cursor_search_eq_k_x_getf, &bcss, cursor, false);
+    return r;
+}
+
+static int ft_cursor_compare_prev(const ft_search &search, const DBT *x) {
+    FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
+    return compare_k_x(ft_handle, search.k, x) > 0; /* return max xy: kv > xy */
+}
+
+int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
+    cursor->direction = -1;
+    ft_search search; 
+    ft_search_init(&search, ft_cursor_compare_prev, FT_SEARCH_RIGHT, &cursor->key, nullptr, cursor->ft_handle);
+    int r = ft_cursor_search(cursor, &search, getf, getf_v, true);
+    ft_search_finish(&search);
+    return r;
+}
+
+int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x) {
+    FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
+    return compare_k_x(ft_handle, search.k, x) <= 0; /* return kv <= xy */
+}
+
+int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
+    cursor->direction = 0;
+    ft_search search; 
+    ft_search_init(&search, toku_ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, nullptr, cursor->ft_handle);
+    int r = ft_cursor_search_eq_k_x(cursor, &search, getf, getf_v);
+    ft_search_finish(&search);
+    return r;
+}
+
+int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
+    cursor->direction = 0;
+    ft_search search; 
+    ft_search_init(&search, toku_ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, key_bound, cursor->ft_handle);
+    int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
+    ft_search_finish(&search);
+    return r;
+}
+
+static int ft_cursor_compare_set_range_reverse(const ft_search &search, const DBT *x) {
+    FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
+    return compare_k_x(ft_handle, search.k, x) >= 0; /* return kv >= xy */
+}
+
+int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
+    cursor->direction = 0;
+    ft_search search; 
+    ft_search_init(&search, ft_cursor_compare_set_range_reverse, FT_SEARCH_RIGHT, key, nullptr, cursor->ft_handle);
+    int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
+    ft_search_finish(&search);
+    return r;
+}
+
+//TODO: When tests have been rewritten, get rid of this function.
+//Only used by tests.
+int toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags) {
+    int op = get_flags & DB_OPFLAGS_MASK;
+    if (get_flags & ~DB_OPFLAGS_MASK)
+        return EINVAL;
+
+    switch (op) {
+    case DB_CURRENT:
+    case DB_CURRENT_BINDING:
+        return toku_ft_cursor_current(cursor, op, getf, getf_v);
+    case DB_FIRST:
+        return toku_ft_cursor_first(cursor, getf, getf_v);
+    case DB_LAST:
+        return toku_ft_cursor_last(cursor, getf, getf_v);
+    case DB_NEXT:
+        if (toku_ft_cursor_not_set(cursor)) {
+            return toku_ft_cursor_first(cursor, getf, getf_v);
+        } else {
+            return toku_ft_cursor_next(cursor, getf, getf_v);
+        }
+    case DB_PREV:
+        if (toku_ft_cursor_not_set(cursor)) {
+            return toku_ft_cursor_last(cursor, getf, getf_v);
+        } else {
+            return toku_ft_cursor_prev(cursor, getf, getf_v);
+        }
+    case DB_SET:
+        return toku_ft_cursor_set(cursor, key, getf, getf_v);
+    case DB_SET_RANGE:
+        return toku_ft_cursor_set_range(cursor, key, nullptr, getf, getf_v);
+    default: ;// Fall through
+    }
+    return EINVAL;
+}
+
+void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval) {
+    *pkey = &cursor->key;
+    *pval = &cursor->val;
+}
+
+bool toku_ft_cursor_uninitialized(FT_CURSOR c) {
+    return toku_ft_cursor_not_set(c);
+}
+
+int toku_ft_lookup(FT_HANDLE ft_handle, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
+    FT_CURSOR cursor;
+    int r = toku_ft_cursor(ft_handle, &cursor, NULL, false, false);
+    if (r != 0) {
+        return r;
+    }
+
+    r = toku_ft_cursor_set(cursor, k, getf, getf_v);
+
+    toku_ft_cursor_close(cursor);
+    return r;
+}
diff --git a/ft/cursor.h b/ft/cursor.h
index 7636beb121c..b433694ae8d 100644
--- a/ft/cursor.h
+++ b/ft/cursor.h
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 
 #include <db.h>
 
-typedef bool(*FT_CHECK_INTERRUPT_CALLBACK)(void* extra);
+#include "ft/ft-internal.h"
 
 /* an ft cursor is represented as a kv pair in a tree */
 struct ft_cursor {
@@ -110,3 +110,125 @@ struct ft_cursor {
     void *interrupt_cb_extra;
 };
 typedef struct ft_cursor *FT_CURSOR;
+
+enum ft_search_direction_e {
+    FT_SEARCH_LEFT = 1,  /* search left -> right, finds min xy as defined by the compare function */
+    FT_SEARCH_RIGHT = 2, /* search right -> left, finds max xy as defined by the compare function */
+};
+
+struct ft_search;
+
+/* the search compare function should return 0 for all xy < kv and 1 for all xy >= kv
+   the compare function should be a step function from 0 to 1 for a left to right search
+   and 1 to 0 for a right to left search */
+
+typedef int (*ft_search_compare_func_t)(const struct ft_search &, const DBT *);
+
+/* the search object contains the compare function, search direction, and the kv pair that
+   is used in the compare function.  the context is the user's private data */
+
+struct ft_search {
+    ft_search_compare_func_t compare;
+    enum ft_search_direction_e direction;
+    const DBT *k;
+    void *context;
+    
+    // To fix #3522, we need to remember the pivots that we have searched unsuccessfully.
+    // For example, when searching right (left), we call search->compare() on the ith pivot key.  If search->compare(0 returns
+    //  nonzero, then we search the ith subtree.  If that subsearch returns DB_NOTFOUND then maybe the key isn't present in the
+    //  tree.  But maybe we are doing a DB_NEXT (DB_PREV), and everything was deleted.  So we remember the pivot, and later we
+    //  will only search subtrees which contain keys that are bigger than (less than) the pivot.
+    // The code is a kludge (even before this fix), and interacts strangely with the TOKUDB_FOUND_BUT_REJECTED (which is there
+    //  because a failed DB_GET we would keep searching the rest of the tree).  We probably should write the various lookup
+    //  codes (NEXT, PREV, CURRENT, etc) more directly, and we should probably use a binary search within a node to search the
+    //  pivots so that we can support a larger fanout.
+    // These changes (3312+3522) also (probably) introduce an isolation error (#3529).
+    //  We must make sure we lock the right range for proper isolation level.
+    //  There's probably a bug in which the following could happen.
+    //      Thread A:  Searches through deleted keys A,B,D,E and finds nothing, so searches the next leaf, releasing the YDB lock.
+    //      Thread B:  Inserts key C, and acquires the write lock, then commits.
+    //      Thread A:  Resumes, searching F,G,H and return success.  Thread A then read-locks the range A-H, and doesn't notice
+    //        the value C inserted by thread B.  Thus a failure of serialization.
+    //     See #3529.
+    // There also remains a potential thrashing problem.  When we get a TOKUDB_TRY_AGAIN, we unpin everything.  There's
+    //   no guarantee that we will get everything pinned again.  We ought to keep nodes pinned when we retry, except that on the
+    //   way out with a DB_NOTFOUND we ought to unpin those nodes.  See #3528.
+    DBT pivot_bound;
+    const DBT *k_bound;
+};
+
+/* initialize the search compare object */
+static inline ft_search *ft_search_init(ft_search *search, ft_search_compare_func_t compare,
+                                        enum ft_search_direction_e direction, 
+                                        const DBT *k, const DBT *k_bound, void *context) {
+    search->compare = compare;
+    search->direction = direction;
+    search->k = k;
+    search->context = context;
+    toku_init_dbt(&search->pivot_bound);
+    search->k_bound = k_bound;
+    return search;
+}
+
+static inline void ft_search_finish(ft_search *search) {
+    toku_destroy_dbt(&search->pivot_bound);
+}
+
+int toku_ft_lookup (FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
+
+int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR*, TOKUTXN, bool, bool) __attribute__ ((warn_unused_result));
+
+void toku_ft_cursor_set_prefetching(FT_CURSOR cursor);
+
+bool toku_ft_cursor_prefetching(FT_CURSOR cursor);
+
+bool toku_ft_cursor_not_set(FT_CURSOR cursor);
+
+void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor);
+
+// Sets a boolean on the ft cursor that prevents uncessary copying of the cursor duing a one query.
+void toku_ft_cursor_set_temporary(FT_CURSOR cursor);
+
+void toku_ft_cursor_remove_restriction(FT_CURSOR cursor);
+
+void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra);
+
+int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor);
+
+void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int);
+
+int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
+
+int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
+
+int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
+
+int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
+
+int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
+
+int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
+
+int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
+
+int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
+
+void toku_ft_cursor_close(FT_CURSOR cursor);
+
+bool toku_ft_cursor_uninitialized(FT_CURSOR cursor) __attribute__ ((warn_unused_result));
+
+void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval);
+
+int toku_ft_cursor_check_restricted_range(FT_CURSOR cursor, bytevec key, ITEMLEN keylen);
+
+int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd,
+                            FT_GET_CALLBACK_FUNCTION getf, void *getf_v,
+                            uint32_t *keylen, void **key, uint32_t *vallen, void **val);
+
+// deprecated
+int toku_ft_cursor_get(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags);
+int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn);
+
+// used by get_key_after_bytes
+int toku_ft_cursor_compare_one(const ft_search &search, const DBT *x);
+int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x);
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 0c9dcb844e3..239892db796 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -106,7 +106,6 @@ PATENT RIGHTS GRANT:
 #include "ft_layout_version.h"
 #include "block_allocator.h"
 #include "cachetable.h"
-#include "ft-ops.h"
 #include "toku_list.h"
 #include <util/omt.h>
 #include "leafentry.h"
@@ -116,9 +115,10 @@ PATENT RIGHTS GRANT:
 #include <util/omt.h>
 #include "ft/bndata.h"
 #include "ft/rollback.h"
-#include "ft/ft-search.h"
 #include "ft/msg_buffer.h"
 
+struct ft_search;
+
 enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
 enum { FT_MSG_OVERHEAD = (2 + sizeof(MSN)) };   // the type plus freshness plus MSN
 enum { FT_DEFAULT_FANOUT = 16 };
@@ -181,7 +181,7 @@ struct ftnode_fetch_extra {
     FT h;
     // used in the case where type == ftnode_fetch_subset
     // parameters needed to find out which child needs to be decompressed (so it can be read)
-    ft_search_t* search;
+    ft_search *search;
     DBT range_lock_left_key, range_lock_right_key;
     bool left_is_neg_infty, right_is_pos_infty;
     // states if we should try to aggressively fetch basement nodes 
@@ -858,7 +858,7 @@ static inline void fill_bfe_for_keymatch(
 static inline void fill_bfe_for_subset_read(
     struct ftnode_fetch_extra *bfe,
     FT h,
-    ft_search_t* search,
+    ft_search *search,
     const DBT *left,
     const DBT *right,
     bool left_is_neg_infty,
@@ -951,7 +951,7 @@ toku_ft_search_which_child(
     DESCRIPTOR desc,
     ft_compare_func cmp,
     FTNODE node,
-    ft_search_t *search
+    ft_search *search
     );
 
 bool
@@ -1229,3 +1229,22 @@ void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extr
 
 int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h) __attribute__((nonnull));
 int toku_upgrade_msn_from_root_to_header(int fd, FT h) __attribute__((nonnull));
+
+// A callback function is invoked with the key, and the data.
+// The pointers (to the bytevecs) must not be modified.  The data must be copied out before the callback function returns.
+// Note: In the thread-safe version, the ftnode remains locked while the callback function runs.  So return soon, and don't call the ft code from the callback function.
+// If the callback function returns a nonzero value (an error code), then that error code is returned from the get function itself.
+// The cursor object will have been updated (so that if result==0 the current value is the value being passed)
+//  (If r!=0 then the cursor won't have been updated.)
+// If r!=0, it's up to the callback function to return that value of r.
+// A 'key' bytevec of NULL means that element is not found (effectively infinity or
+// -infinity depending on direction)
+// When lock_only is false, the callback does optional lock tree locking and then processes the key and val.
+// When lock_only is true, the callback only does optional lock tree locking.
+typedef int (*FT_GET_CALLBACK_FUNCTION)(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only);
+
+typedef bool (*FT_CHECK_INTERRUPT_CALLBACK)(void *extra);
+
+struct ft_search;
+struct ft_cursor;
+int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, struct ft_cursor *ftcursor, bool can_bulk_fetch);
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 4c44e839cda..2029607e912 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -201,6 +201,7 @@ basement nodes, bulk fetch,  and partial fetch:
 */
 
 #include "checkpoint.h"
+#include "cursor.h"
 #include "ft.h"
 #include "ft-cachetable-wrappers.h"
 #include "ft-flusher.h"
@@ -4463,225 +4464,6 @@ void toku_ft_handle_create(FT_HANDLE *ft_handle_ptr) {
     *ft_handle_ptr = ft_handle;
 }
 
-/* ************* CURSORS ********************* */
-
-static inline void
-ft_cursor_cleanup_dbts(FT_CURSOR c) {
-    toku_destroy_dbt(&c->key);
-    toku_destroy_dbt(&c->val);
-}
-
-//
-// This function is used by the leafentry iterators.
-// returns TOKUDB_ACCEPT if live transaction context is allowed to read a value
-// that is written by transaction with LSN of id
-// live transaction context may read value if either id is the root ancestor of context, or if
-// id was committed before context's snapshot was taken.
-// For id to be committed before context's snapshot was taken, the following must be true:
-//  - id < context->snapshot_txnid64 AND id is not in context's live root transaction list
-// For the above to NOT be true:
-//  - id > context->snapshot_txnid64 OR id is in context's live root transaction list
-//
-static int
-does_txn_read_entry(TXNID id, TOKUTXN context) {
-    int rval;
-    TXNID oldest_live_in_snapshot = toku_get_oldest_in_live_root_txn_list(context);
-    if (oldest_live_in_snapshot == TXNID_NONE && id < context->snapshot_txnid64) {
-        rval = TOKUDB_ACCEPT;
-    }
-    else if (id < oldest_live_in_snapshot || id == context->txnid.parent_id64) {
-        rval = TOKUDB_ACCEPT;
-    }
-    else if (id > context->snapshot_txnid64 || toku_is_txn_in_live_root_txn_list(*context->live_root_txn_list, id)) {
-        rval = 0;
-    }
-    else {
-        rval = TOKUDB_ACCEPT;
-    }
-    return rval;
-}
-
-static inline void
-ft_cursor_extract_val(LEAFENTRY le,
-                               FT_CURSOR cursor,
-                               uint32_t *vallen,
-                               void            **val) {
-    if (toku_ft_cursor_is_leaf_mode(cursor)) {
-        *val = le;
-        *vallen = leafentry_memsize(le);
-    } else if (cursor->is_snapshot_read) {
-        int r = le_iterate_val(
-            le,
-            does_txn_read_entry,
-            val,
-            vallen,
-            cursor->ttxn
-            );
-        lazy_assert_zero(r);
-    } else {
-        *val = le_latest_val_and_len(le, vallen);
-    }
-}
-
-int toku_ft_cursor (
-    FT_HANDLE ft_handle,
-    FT_CURSOR *cursorptr,
-    TOKUTXN ttxn,
-    bool is_snapshot_read,
-    bool disable_prefetching
-    )
-{
-    if (is_snapshot_read) {
-        invariant(ttxn != NULL);
-        int accepted = does_txn_read_entry(ft_handle->ft->h->root_xid_that_created, ttxn);
-        if (accepted!=TOKUDB_ACCEPT) {
-            invariant(accepted==0);
-            return TOKUDB_MVCC_DICTIONARY_TOO_NEW;
-        }
-    }
-    FT_CURSOR XCALLOC(cursor);
-    cursor->ft_handle = ft_handle;
-    cursor->prefetching = false;
-    toku_init_dbt(&cursor->range_lock_left_key);
-    toku_init_dbt(&cursor->range_lock_right_key);
-    cursor->left_is_neg_infty = false;
-    cursor->right_is_pos_infty = false;
-    cursor->is_snapshot_read = is_snapshot_read;
-    cursor->is_leaf_mode = false;
-    cursor->ttxn = ttxn;
-    cursor->disable_prefetching = disable_prefetching;
-    cursor->is_temporary = false;
-    *cursorptr = cursor;
-    return 0;
-}
-
-void toku_ft_cursor_remove_restriction(FT_CURSOR ftcursor) {
-    ftcursor->out_of_range_error = 0;
-    ftcursor->direction = 0;
-}
-
-void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR ftcursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra) {
-    ftcursor->interrupt_cb = cb;
-    ftcursor->interrupt_cb_extra = extra;
-}
-
-
-void
-toku_ft_cursor_set_temporary(FT_CURSOR ftcursor) {
-    ftcursor->is_temporary = true;
-}
-
-void
-toku_ft_cursor_set_leaf_mode(FT_CURSOR ftcursor) {
-    ftcursor->is_leaf_mode = true;
-}
-
-int
-toku_ft_cursor_is_leaf_mode(FT_CURSOR ftcursor) {
-    return ftcursor->is_leaf_mode;
-}
-
-void
-toku_ft_cursor_set_range_lock(FT_CURSOR cursor, const DBT *left, const DBT *right,
-                              bool left_is_neg_infty, bool right_is_pos_infty,
-                              int out_of_range_error)
-{
-    // Destroy any existing keys and then clone the given left, right keys
-    toku_destroy_dbt(&cursor->range_lock_left_key);
-    if (left_is_neg_infty) {
-        cursor->left_is_neg_infty = true;
-    } else {
-        toku_clone_dbt(&cursor->range_lock_left_key, *left);
-    }
-
-    toku_destroy_dbt(&cursor->range_lock_right_key);
-    if (right_is_pos_infty) {
-        cursor->right_is_pos_infty = true;
-    } else {
-        toku_clone_dbt(&cursor->range_lock_right_key, *right);
-    }
-
-    // TOKUDB_FOUND_BUT_REJECTED is a DB_NOTFOUND with instructions to stop looking. (Faster)
-    cursor->out_of_range_error = out_of_range_error == DB_NOTFOUND ? TOKUDB_FOUND_BUT_REJECTED : out_of_range_error;
-    cursor->direction = 0;
-}
-
-void toku_ft_cursor_close(FT_CURSOR cursor) {
-    ft_cursor_cleanup_dbts(cursor);
-    toku_destroy_dbt(&cursor->range_lock_left_key);
-    toku_destroy_dbt(&cursor->range_lock_right_key);
-    toku_free(cursor);
-}
-
-static inline void ft_cursor_set_prefetching(FT_CURSOR cursor) {
-    cursor->prefetching = true;
-}
-
-static inline bool ft_cursor_prefetching(FT_CURSOR cursor) {
-    return cursor->prefetching;
-}
-
-//Return true if cursor is uninitialized.  false otherwise.
-static bool
-ft_cursor_not_set(FT_CURSOR cursor) {
-    assert((cursor->key.data==NULL) == (cursor->val.data==NULL));
-    return (bool)(cursor->key.data == NULL);
-}
-
-//
-//
-//
-//
-//
-//
-//
-//
-//
-// TODO: ask Yoni why second parameter here is not const 
-//
-//
-//
-//
-//
-//
-//
-//
-//
-static int
-heaviside_from_search_t(const DBT &kdbt, ft_search_t &search) {
-    int cmp = search.compare(search,
-                              search.k ? &kdbt : 0);
-    // The search->compare function returns only 0 or 1
-    switch (search.direction) {
-    case FT_SEARCH_LEFT:   return cmp==0 ? -1 : +1;
-    case FT_SEARCH_RIGHT:  return cmp==0 ? +1 : -1; // Because the comparison runs backwards for right searches.
-    }
-    abort(); return 0;
-}
-
-
-//
-// Returns true if the value that is to be read is empty.
-//
-static inline int
-is_le_val_del(LEAFENTRY le, FT_CURSOR ftcursor) {
-    int rval;
-    if (ftcursor->is_snapshot_read) {
-        bool is_del;
-        le_iterate_is_del(
-            le,
-            does_txn_read_entry,
-            &is_del,
-            ftcursor->ttxn
-            );
-        rval = is_del;
-    }
-    else {
-        rval = le_latest_is_del(le);
-    }
-    return rval;
-}
-
 struct store_msg_buffer_offset_extra {
     int32_t *offsets;
     int i;
@@ -5257,38 +5039,6 @@ toku_move_ftnode_messages_to_stale(FT ft, FTNODE node) {
     }
 }
 
-static int cursor_check_restricted_range(FT_CURSOR c, bytevec key, ITEMLEN keylen) {
-    if (c->out_of_range_error) {
-        FT ft = c->ft_handle->ft;
-        FAKE_DB(db, &ft->cmp_descriptor);
-        DBT found_key;
-        toku_fill_dbt(&found_key, key, keylen);
-        if ((!c->left_is_neg_infty && c->direction <= 0 && ft->compare_fun(&db, &found_key, &c->range_lock_left_key) < 0) ||
-            (!c->right_is_pos_infty && c->direction >= 0 && ft->compare_fun(&db, &found_key, &c->range_lock_right_key) > 0)) {
-            invariant(c->out_of_range_error);
-            return c->out_of_range_error;
-        }
-    }
-    // Reset cursor direction to mitigate risk if some query type doesn't set the direction.
-    // It is always correct to check both bounds (which happens when direction==0) but it can be slower.
-    c->direction = 0;
-    return 0;
-}
-
-static int
-ft_cursor_shortcut (
-    FT_CURSOR cursor,
-    int direction,
-    uint32_t index,
-    bn_data* bd,
-    FT_GET_CALLBACK_FUNCTION getf,
-    void *getf_v,
-    uint32_t *keylen,
-    void **key,
-    uint32_t *vallen,
-    void **val
-    );
-
 // Return true if this key is within the search bound.  If there is no search bound then the tree search continues.
 static bool search_continue(ft_search *search, void *key, uint32_t key_len) {
     bool result = true;
@@ -5302,11 +5052,22 @@ static bool search_continue(ft_search *search, void *key, uint32_t key_len) {
     return result;
 }
 
+static int heaviside_from_search_t(const DBT &kdbt, ft_search &search) {
+    int cmp = search.compare(search,
+                              search.k ? &kdbt : 0);
+    // The search->compare function returns only 0 or 1
+    switch (search.direction) {
+    case FT_SEARCH_LEFT:   return cmp==0 ? -1 : +1;
+    case FT_SEARCH_RIGHT:  return cmp==0 ? +1 : -1; // Because the comparison runs backwards for right searches.
+    }
+    abort(); return 0;
+}
+
 // This is a bottom layer of the search functions.
 static int
 ft_search_basement_node(
     BASEMENTNODE bn,
-    ft_search_t *search,
+    ft_search *search,
     FT_GET_CALLBACK_FUNCTION getf,
     void *getf_v,
     bool *doprefetch,
@@ -5314,7 +5075,7 @@ ft_search_basement_node(
     bool can_bulk_fetch
     )
 {
-    // Now we have to convert from ft_search_t to the heaviside function with a direction.  What a pain...
+    // Now we have to convert from ft_search to the heaviside function with a direction.  What a pain...
 
     int direction;
     switch (search->direction) {
@@ -5339,7 +5100,7 @@ ok: ;
 
     if (toku_ft_cursor_is_leaf_mode(ftcursor))
         goto got_a_good_value;        // leaf mode cursors see all leaf entries
-    if (is_le_val_del(le,ftcursor)) {
+    if (le_val_is_del(le, ftcursor->is_snapshot_read, ftcursor->ttxn)) {
         // Provisionally deleted stuff is gone.
         // So we need to scan in the direction to see if we can find something.
         // Every 100 deleted leaf entries check if the leaf's key is within the search bounds.
@@ -5369,7 +5130,9 @@ ok: ;
             }
             r = bn->data_buffer.fetch_klpair(idx, &le, &keylen, &key);
             assert_zero(r); // we just validated the index
-            if (!is_le_val_del(le,ftcursor)) goto got_a_good_value;
+            if (!le_val_is_del(le, ftcursor->is_snapshot_read, ftcursor->ttxn)) {
+                goto got_a_good_value;
+            }
         }
     }
 got_a_good_value:
@@ -5377,42 +5140,31 @@ got_a_good_value:
         uint32_t vallen;
         void *val;
 
-        ft_cursor_extract_val(le,
-                              ftcursor,
-                              &vallen,
-                              &val
-                              );
-        r = cursor_check_restricted_range(ftcursor, key, keylen);
-        if (r==0) {
+        le_extract_val(le, toku_ft_cursor_is_leaf_mode(ftcursor),
+                       ftcursor->is_snapshot_read, ftcursor->ttxn,
+                       &vallen, &val);
+        r = toku_ft_cursor_check_restricted_range(ftcursor, key, keylen);
+        if (r == 0) {
             r = getf(keylen, key, vallen, val, getf_v, false);
         }
-        if (r==0 || r == TOKUDB_CURSOR_CONTINUE) {
+        if (r == 0 || r == TOKUDB_CURSOR_CONTINUE) {
             // 
             // IMPORTANT: bulk fetch CANNOT go past the current basement node,
             // because there is no guarantee that messages have been applied
             // to other basement nodes, as part of #5770
             //
             if (r == TOKUDB_CURSOR_CONTINUE && can_bulk_fetch) {
-                r = ft_cursor_shortcut(
-                    ftcursor,
-                    direction,
-                    idx,
-                    &bn->data_buffer,
-                    getf,
-                    getf_v,
-                    &keylen,
-                    &key,
-                    &vallen,
-                    &val
-                    );
+                r = toku_ft_cursor_shortcut(ftcursor, direction, idx, &bn->data_buffer,
+                                            getf, getf_v, &keylen, &key, &vallen, &val);
             }
 
-            ft_cursor_cleanup_dbts(ftcursor);
+            toku_destroy_dbt(&ftcursor->key);
+            toku_destroy_dbt(&ftcursor->val);
             if (!ftcursor->is_temporary) {
                 toku_memdup_dbt(&ftcursor->key, key, keylen);
                 toku_memdup_dbt(&ftcursor->val, val, vallen);
             }
-            //The search was successful.  Prefetching can continue.
+            // The search was successful.  Prefetching can continue.
             *doprefetch = true;
         }
     }
@@ -5424,7 +5176,7 @@ static int
 ft_search_node (
     FT_HANDLE ft_handle,
     FTNODE node,
-    ft_search_t *search,
+    ft_search *search,
     int child_to_search,
     FT_GET_CALLBACK_FUNCTION getf,
     void *getf_v,
@@ -5491,7 +5243,7 @@ ft_node_maybe_prefetch(FT_HANDLE ft_handle, FTNODE node, int childnum, FT_CURSOR
 
     // if we want to prefetch in the tree
     // then prefetch the next children if there are any
-    if (*doprefetch && ft_cursor_prefetching(ftcursor) && !ftcursor->disable_prefetching) {
+    if (*doprefetch && toku_ft_cursor_prefetching(ftcursor) && !ftcursor->disable_prefetching) {
         int rc = ft_cursor_rightmost_child_wanted(ftcursor, ft_handle, node);
         for (int i = childnum + 1; (i <= childnum + num_nodes_to_prefetch) && (i <= rc); i++) {
             BLOCKNUM nextchildblocknum = BP_BLOCKNUM(node, i);
@@ -5543,7 +5295,7 @@ unlock_ftnode_fun (void *v) {
 
 /* search in a node's child */
 static int
-ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool *doprefetch, FT_CURSOR ftcursor, UNLOCKERS unlockers,
+ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool *doprefetch, FT_CURSOR ftcursor, UNLOCKERS unlockers,
                  ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool can_bulk_fetch)
 // Effect: Search in a node's child.  Searches are read-only now (at least as far as the hardcopy is concerned).
 {
@@ -5622,7 +5374,7 @@ ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search_t *sea
 }
 
 static inline int
-search_which_child_cmp_with_bound(DB *db, ft_compare_func cmp, FTNODE node, int childnum, ft_search_t *search, DBT *dbt)
+search_which_child_cmp_with_bound(DB *db, ft_compare_func cmp, FTNODE node, int childnum, ft_search *search, DBT *dbt)
 {
     return cmp(db, toku_copy_dbt(dbt, node->childkeys[childnum]), &search->pivot_bound);
 }
@@ -5632,7 +5384,7 @@ toku_ft_search_which_child(
     DESCRIPTOR desc,
     ft_compare_func cmp,
     FTNODE node,
-    ft_search_t *search
+    ft_search *search
     )
 {
     if (node->n_children <= 1) return 0;
@@ -5696,7 +5448,7 @@ static void
 maybe_search_save_bound(
     FTNODE node,
     int child_searched,
-    ft_search_t *search)
+    ft_search *search)
 {
     int p = (search->direction == FT_SEARCH_LEFT) ? child_searched : child_searched - 1;
     if (p >= 0 && p < node->n_children-1) {
@@ -5706,7 +5458,7 @@ maybe_search_save_bound(
 }
 
 // Returns true if there are still children left to search in this node within the search bound (if any).
-static bool search_try_again(FTNODE node, int child_to_search, ft_search_t *search) {
+static bool search_try_again(FTNODE node, int child_to_search, ft_search *search) {
     bool try_again = false;
     if (search->direction == FT_SEARCH_LEFT) {
         if (child_to_search < node->n_children-1) {
@@ -5729,7 +5481,7 @@ static int
 ft_search_node(
     FT_HANDLE ft_handle,
     FTNODE node,
-    ft_search_t *search,
+    ft_search *search,
     int child_to_search,
     FT_GET_CALLBACK_FUNCTION getf,
     void *getf_v,
@@ -5824,8 +5576,7 @@ ft_search_node(
     return r;
 }
 
-static int
-toku_ft_search (FT_HANDLE ft_handle, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, FT_CURSOR ftcursor, bool can_bulk_fetch)
+int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, FT_CURSOR ftcursor, bool can_bulk_fetch)
 // Effect: Perform a search.  Associate cursor with a leaf if possible.
 // All searches are performed through this function.
 {
@@ -5855,7 +5606,7 @@ try_again:
     //       and the partial fetch callback (in case the node is perhaps partially in memory) to the fetch the node
     //  - This eventually calls either toku_ftnode_fetch_callback or  toku_ftnode_pf_req_callback depending on whether the node is in
     //     memory at all or not.
-    //  - Within these functions, the "ft_search_t search" parameter is used to evaluate which child the search is interested in.
+    //  - Within these functions, the "ft_search search" parameter is used to evaluate which child the search is interested in.
     //     If the node is not in memory at all, toku_ftnode_fetch_callback will read the node and decompress only the partition for the
     //     relevant child, be it a message buffer or basement node. If the node is in memory, then toku_ftnode_pf_req_callback
     //     will tell the cachetable that a partial fetch is required if and only if the relevant child is not in memory. If the relevant child
@@ -5956,355 +5707,20 @@ try_again:
     return r;
 }
 
-struct ft_cursor_search_struct {
-    FT_GET_CALLBACK_FUNCTION getf;
-    void *getf_v;
-    FT_CURSOR cursor;
-    ft_search_t *search;
-};
-
-/* search for the first kv pair that matches the search object */
-static int
-ft_cursor_search(FT_CURSOR cursor, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool can_bulk_fetch)
-{
-    int r = toku_ft_search(cursor->ft_handle, search, getf, getf_v, cursor, can_bulk_fetch);
-    return r;
-}
-
-static inline int compare_k_x(FT_HANDLE ft_handle, const DBT *k, const DBT *x) {
-    FAKE_DB(db, &ft_handle->ft->cmp_descriptor);
-    return ft_handle->ft->compare_fun(&db, k, x);
-}
-
-static int
-ft_cursor_compare_one(const ft_search_t &search __attribute__((__unused__)), const DBT *x __attribute__((__unused__)))
-{
-    return 1;
-}
-
-static int ft_cursor_compare_set(const ft_search_t &search, const DBT *x) {
-    FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
-    return compare_k_x(ft_handle, search.k, x) <= 0; /* return min xy: kv <= xy */
-}
-
-static int
-ft_cursor_current_getf(ITEMLEN keylen,                 bytevec key,
-                        ITEMLEN vallen,                 bytevec val,
-                        void *v, bool lock_only) {
-    struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v);
-    int r;
-    if (key==NULL) {
-        r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only);
-    } else {
-        FT_CURSOR cursor = bcss->cursor;
-        DBT newkey;
-        toku_fill_dbt(&newkey, key, keylen);
-        if (compare_k_x(cursor->ft_handle, &cursor->key, &newkey) != 0) {
-            r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); // This was once DB_KEYEMPTY
-            if (r==0) r = TOKUDB_FOUND_BUT_REJECTED;
-        }
-        else
-            r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only);
-    }
-    return r;
-}
-
-int
-toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)
-{
-    if (ft_cursor_not_set(cursor))
-        return EINVAL;
-    cursor->direction = 0;
-    if (op == DB_CURRENT) {
-        struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, 0};
-        ft_search_t search; 
-        ft_search_init(&search, ft_cursor_compare_set, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle);
-        int r = toku_ft_search(cursor->ft_handle, &search, ft_cursor_current_getf, &bcss, cursor, false);
-        ft_search_finish(&search);
-        return r;
-    }
-    return getf(cursor->key.size, cursor->key.data, cursor->val.size, cursor->val.data, getf_v, false); // ft_cursor_copyout(cursor, outkey, outval);
-}
-
-int
-toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)
-{
-    cursor->direction = 0;
-    ft_search_t search; 
-    ft_search_init(&search, ft_cursor_compare_one, FT_SEARCH_LEFT, nullptr, nullptr, cursor->ft_handle);
-    int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
-    ft_search_finish(&search);
-    return r;
-}
-
-int
-toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)
-{
-    cursor->direction = 0;
-    ft_search_t search; 
-    ft_search_init(&search, ft_cursor_compare_one, FT_SEARCH_RIGHT, nullptr, nullptr, cursor->ft_handle);
-    int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
-    ft_search_finish(&search);
-    return r;
-}
-
-static int ft_cursor_compare_next(const ft_search_t &search, const DBT *x) {
-    FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
-    return compare_k_x(ft_handle, search.k, x) < 0; /* return min xy: kv < xy */
-}
-
-static int
-ft_cursor_shortcut (
-    FT_CURSOR cursor,
-    int direction,
-    uint32_t index,
-    bn_data* bd,
-    FT_GET_CALLBACK_FUNCTION getf,
-    void *getf_v,
-    uint32_t *keylen,
-    void **key,
-    uint32_t *vallen,
-    void **val
-    )
-{
-    int r = 0;
-    // if we are searching towards the end, limit is last element
-    // if we are searching towards the beginning, limit is the first element
-    uint32_t limit = (direction > 0) ? (bd->num_klpairs() - 1) : 0;
-
-    //Starting with the prev, find the first real (non-provdel) leafentry.
-    while (index != limit) {
-        index += direction;
-        LEAFENTRY le;
-        void* foundkey = NULL;
-        uint32_t foundkeylen = 0;
-        
-        r = bd->fetch_klpair(index, &le, &foundkeylen, &foundkey);
-        invariant_zero(r);
-
-        if (toku_ft_cursor_is_leaf_mode(cursor) || !is_le_val_del(le, cursor)) {
-            ft_cursor_extract_val(
-                le,
-                cursor,
-                vallen,
-                val
-                );
-            *key = foundkey;
-            *keylen = foundkeylen;
-
-            cursor->direction = direction;
-            r = cursor_check_restricted_range(cursor, *key, *keylen);
-            if (r!=0) {
-                paranoid_invariant(r == cursor->out_of_range_error);
-                // We already got at least one entry from the bulk fetch.
-                // Return 0 (instead of out of range error).
-                r = 0;
-                break;
-            }
-            r = getf(*keylen, *key, *vallen, *val, getf_v, false);
-            if (r == TOKUDB_CURSOR_CONTINUE) {
-                continue;
-            }
-            else {
-                break;
-            }
-        }
-    }
-
-    return r;
-}
-
-int
-toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)
-{
-    cursor->direction = +1;
-    ft_search_t search; 
-    ft_search_init(&search, ft_cursor_compare_next, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle);
-    int r = ft_cursor_search(cursor, &search, getf, getf_v, true);
-    ft_search_finish(&search);
-    if (r == 0) ft_cursor_set_prefetching(cursor);
-    return r;
-}
-
-static int
-ft_cursor_search_eq_k_x_getf(ITEMLEN keylen,               bytevec key,
-                              ITEMLEN vallen,               bytevec val,
-                              void *v, bool lock_only) {
-    struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v);
-    int r;
-    if (key==NULL) {
-        r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, false);
-    } else {
-        FT_CURSOR cursor = bcss->cursor;
-        DBT newkey;
-        toku_fill_dbt(&newkey, key, keylen);
-        if (compare_k_x(cursor->ft_handle, bcss->search->k, &newkey) == 0) {
-            r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only);
-        } else {
-            r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only);
-            if (r==0) r = TOKUDB_FOUND_BUT_REJECTED;
-        }
-    }
-    return r;
-}
-
-/* search for the kv pair that matches the search object and is equal to k */
-static int
-ft_cursor_search_eq_k_x(FT_CURSOR cursor, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)
-{
-    struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, search};
-    int r = toku_ft_search(cursor->ft_handle, search, ft_cursor_search_eq_k_x_getf, &bcss, cursor, false);
-    return r;
-}
-
-static int ft_cursor_compare_prev(const ft_search_t &search, const DBT *x) {
-    FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
-    return compare_k_x(ft_handle, search.k, x) > 0; /* return max xy: kv > xy */
-}
-
-int
-toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)
-{
-    cursor->direction = -1;
-    ft_search_t search; 
-    ft_search_init(&search, ft_cursor_compare_prev, FT_SEARCH_RIGHT, &cursor->key, nullptr, cursor->ft_handle);
-    int r = ft_cursor_search(cursor, &search, getf, getf_v, true);
-    ft_search_finish(&search);
-    return r;
-}
-
-static int ft_cursor_compare_set_range(const ft_search_t &search, const DBT *x) {
-    FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
-    return compare_k_x(ft_handle, search.k, x) <= 0; /* return kv <= xy */
-}
-
-int
-toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)
-{
-    cursor->direction = 0;
-    ft_search_t search; 
-    ft_search_init(&search, ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, nullptr, cursor->ft_handle);
-    int r = ft_cursor_search_eq_k_x(cursor, &search, getf, getf_v);
-    ft_search_finish(&search);
-    return r;
-}
-
-int
-toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)
-{
-    cursor->direction = 0;
-    ft_search_t search; 
-    ft_search_init(&search, ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, key_bound, cursor->ft_handle);
-    int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
-    ft_search_finish(&search);
-    return r;
-}
-
-static int ft_cursor_compare_set_range_reverse(const ft_search_t &search, const DBT *x) {
-    FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
-    return compare_k_x(ft_handle, search.k, x) >= 0; /* return kv >= xy */
-}
-
-int
-toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)
-{
-    cursor->direction = 0;
-    ft_search_t search; 
-    ft_search_init(&search, ft_cursor_compare_set_range_reverse, FT_SEARCH_RIGHT, key, nullptr, cursor->ft_handle);
-    int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
-    ft_search_finish(&search);
-    return r;
-}
-
-
-//TODO: When tests have been rewritten, get rid of this function.
-//Only used by tests.
-int
-toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags)
-{
-    int op = get_flags & DB_OPFLAGS_MASK;
-    if (get_flags & ~DB_OPFLAGS_MASK)
-        return EINVAL;
-
-    switch (op) {
-    case DB_CURRENT:
-    case DB_CURRENT_BINDING:
-        return toku_ft_cursor_current(cursor, op, getf, getf_v);
-    case DB_FIRST:
-        return toku_ft_cursor_first(cursor, getf, getf_v);
-    case DB_LAST:
-        return toku_ft_cursor_last(cursor, getf, getf_v);
-    case DB_NEXT:
-        if (ft_cursor_not_set(cursor)) {
-            return toku_ft_cursor_first(cursor, getf, getf_v);
-        } else {
-            return toku_ft_cursor_next(cursor, getf, getf_v);
-        }
-    case DB_PREV:
-        if (ft_cursor_not_set(cursor)) {
-            return toku_ft_cursor_last(cursor, getf, getf_v);
-        } else {
-            return toku_ft_cursor_prev(cursor, getf, getf_v);
-        }
-    case DB_SET:
-        return toku_ft_cursor_set(cursor, key, getf, getf_v);
-    case DB_SET_RANGE:
-        return toku_ft_cursor_set_range(cursor, key, nullptr, getf, getf_v);
-    default: ;// Fall through
-    }
-    return EINVAL;
-}
-
-void
-toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval)
-// Effect: Retrieves a pointer to the DBTs for the current key and value.
-// Requires:  The caller may not modify the DBTs or the memory at which they points.
-// Requires:  The caller must be in the context of a
-// FT_GET_(STRADDLE_)CALLBACK_FUNCTION
-{
-    *pkey = &cursor->key;
-    *pval = &cursor->val;
-}
-
-bool toku_ft_cursor_uninitialized(FT_CURSOR c) {
-    return ft_cursor_not_set(c);
-}
-
-
-/* ********************************* lookup **************************************/
-
-int
-toku_ft_lookup (FT_HANDLE ft_handle, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)
-{
-    int r, rr;
-    FT_CURSOR cursor;
-
-    rr = toku_ft_cursor(ft_handle, &cursor, NULL, false, false);
-    if (rr != 0) return rr;
-
-    int op = DB_SET;
-    r = toku_ft_cursor_get(cursor, k, getf, getf_v, op);
-
-    toku_ft_cursor_close(cursor);
-
-    return r;
-}
-
 /* ********************************* delete **************************************/
 static int
 getf_nothing (ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN UU(vallen), bytevec UU(val), void *UU(pair_v), bool UU(lock_only)) {
     return 0;
 }
 
-int
-toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn) {
+int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn) {
     int r;
 
     int unchecked_flags = flags;
     bool error_if_missing = (bool) !(flags&DB_DELETE_ANY);
     unchecked_flags &= ~DB_DELETE_ANY;
     if (unchecked_flags!=0) r = EINVAL;
-    else if (ft_cursor_not_set(cursor)) r = EINVAL;
+    else if (toku_ft_cursor_not_set(cursor)) r = EINVAL;
     else {
         r = 0;
         if (error_if_missing) {
@@ -6628,9 +6044,9 @@ static int get_key_after_bytes_in_basementnode(FT ft, BASEMENTNODE bn, const DBT
     return r;
 }
 
-static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search_t *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped);
+static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped);
 
-static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search_t *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
+static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
     int r;
     struct ancestors next_ancestors = {node, childnum, ancestors};
     BLOCKNUM childblocknum = BP_BLOCKNUM(node, childnum);
@@ -6649,7 +6065,7 @@ static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLO
     return get_key_after_bytes_in_subtree(ft_h, ft, child, &next_unlockers, &next_ancestors, &next_bounds, bfe, search, subtree_bytes, start_key, skip_len, callback, cb_extra, skipped);
 }
 
-static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search_t *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
+static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
     int r;
     int childnum = toku_ft_search_which_child(&ft->cmp_descriptor, ft->compare_fun, node, search);
     const uint64_t child_subtree_bytes = subtree_bytes / node->n_children;
@@ -6724,8 +6140,8 @@ int toku_ft_get_key_after_bytes(FT_HANDLE ft_h, const DBT *start_key, uint64_t s
         }
         struct unlock_ftnode_extra unlock_extra = {ft_h, root, false};
         struct unlockers unlockers = {true, unlock_ftnode_fun, (void*)&unlock_extra, (UNLOCKERS) nullptr};
-        ft_search_t search;
-        ft_search_init(&search, (start_key == nullptr ? ft_cursor_compare_one : ft_cursor_compare_set_range), FT_SEARCH_LEFT, start_key, nullptr, ft_h);
+        ft_search search;
+        ft_search_init(&search, (start_key == nullptr ? toku_ft_cursor_compare_one : toku_ft_cursor_compare_set_range), FT_SEARCH_LEFT, start_key, nullptr, ft_h);
         
         int r;
         // We can't do this because of #5768, there may be dictionaries in the wild that have negative stats.  This won't affect mongo so it's ok:
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index fdfe3d56f06..2d62394b5ee 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -98,23 +98,8 @@ PATENT RIGHTS GRANT:
 #include <db.h>
 #include "cachetable.h"
 #include "log.h"
-#include "ft-search.h"
 #include "compress.h"
 #include "ft_msg.h"
-#include "ft/cursor.h"
-
-// A callback function is invoked with the key, and the data.
-// The pointers (to the bytevecs) must not be modified.  The data must be copied out before the callback function returns.
-// Note: In the thread-safe version, the ftnode remains locked while the callback function runs.  So return soon, and don't call the ft code from the callback function.
-// If the callback function returns a nonzero value (an error code), then that error code is returned from the get function itself.
-// The cursor object will have been updated (so that if result==0 the current value is the value being passed)
-//  (If r!=0 then the cursor won't have been updated.)
-// If r!=0, it's up to the callback function to return that value of r.
-// A 'key' bytevec of NULL means that element is not found (effectively infinity or
-// -infinity depending on direction)
-// When lock_only is false, the callback does optional lock tree locking and then processes the key and val.
-// When lock_only is true, the callback only does optional lock tree locking.
-typedef int(*FT_GET_CALLBACK_FUNCTION)(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only);
 
 int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));
 
@@ -208,8 +193,6 @@ toku_ft_handle_open_with_dict_id(
     DICTIONARY_ID use_dictionary_id
     )  __attribute__ ((warn_unused_result));
 
-int toku_ft_lookup (FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-
 // Effect: Insert a key and data pair into an ft
 void toku_ft_insert (FT_HANDLE ft_h, DBT *k, DBT *v, TOKUTXN txn);
 
@@ -262,36 +245,6 @@ extern int toku_ft_debug_mode;
 int toku_verify_ft (FT_HANDLE ft_h)  __attribute__ ((warn_unused_result));
 int toku_verify_ft_with_progress (FT_HANDLE ft_h, int (*progress_callback)(void *extra, float progress), void *extra, int verbose, int keep_going)  __attribute__ ((warn_unused_result));
 
-int toku_ft_cursor (FT_HANDLE, FT_CURSOR*, TOKUTXN, bool, bool)  __attribute__ ((warn_unused_result));
-void toku_ft_cursor_set_leaf_mode(FT_CURSOR);
-// Sets a boolean on the ft cursor that prevents uncessary copying of
-// the cursor duing a one query.
-void toku_ft_cursor_set_temporary(FT_CURSOR);
-void toku_ft_cursor_remove_restriction(FT_CURSOR);
-void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR ftcursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra);
-int toku_ft_cursor_is_leaf_mode(FT_CURSOR);
-void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int);
-
-// get is deprecated in favor of the individual functions below
-int toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags)  __attribute__ ((warn_unused_result));
-
-int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-int toku_ft_cursor_get_both_range(FT_CURSOR cursor, DBT *key, DBT *val, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-int toku_ft_cursor_get_both_range_reverse(FT_CURSOR cursor, DBT *key, DBT *val, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
-
-int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN)  __attribute__ ((warn_unused_result));
-void toku_ft_cursor_close (FT_CURSOR curs);
-bool toku_ft_cursor_uninitialized(FT_CURSOR c)  __attribute__ ((warn_unused_result));
-
-void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval);
-
 DICTIONARY_ID toku_ft_get_dictionary_id(FT_HANDLE);
 
 enum ft_flags {
diff --git a/ft/ft-search.h b/ft/ft-search.h
deleted file mode 100644
index 2c7f935a022..00000000000
--- a/ft/ft-search.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#pragma once
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-#include "ft/ybt.h"
-
-enum ft_search_direction_e {
-    FT_SEARCH_LEFT = 1,  /* search left -> right, finds min xy as defined by the compare function */
-    FT_SEARCH_RIGHT = 2, /* search right -> left, finds max xy as defined by the compare function */
-};
-
-struct ft_search;
-
-/* the search compare function should return 0 for all xy < kv and 1 for all xy >= kv
-   the compare function should be a step function from 0 to 1 for a left to right search
-   and 1 to 0 for a right to left search */
-
-typedef int (*ft_search_compare_func_t)(const struct ft_search &, const DBT *);
-
-/* the search object contains the compare function, search direction, and the kv pair that
-   is used in the compare function.  the context is the user's private data */
-
-typedef struct ft_search {
-    ft_search_compare_func_t compare;
-    enum ft_search_direction_e direction;
-    const DBT *k;
-    void *context;
-    
-    // To fix #3522, we need to remember the pivots that we have searched unsuccessfully.
-    // For example, when searching right (left), we call search->compare() on the ith pivot key.  If search->compare(0 returns
-    //  nonzero, then we search the ith subtree.  If that subsearch returns DB_NOTFOUND then maybe the key isn't present in the
-    //  tree.  But maybe we are doing a DB_NEXT (DB_PREV), and everything was deleted.  So we remember the pivot, and later we
-    //  will only search subtrees which contain keys that are bigger than (less than) the pivot.
-    // The code is a kludge (even before this fix), and interacts strangely with the TOKUDB_FOUND_BUT_REJECTED (which is there
-    //  because a failed DB_GET we would keep searching the rest of the tree).  We probably should write the various lookup
-    //  codes (NEXT, PREV, CURRENT, etc) more directly, and we should probably use a binary search within a node to search the
-    //  pivots so that we can support a larger fanout.
-    // These changes (3312+3522) also (probably) introduce an isolation error (#3529).
-    //  We must make sure we lock the right range for proper isolation level.
-    //  There's probably a bug in which the following could happen.
-    //      Thread A:  Searches through deleted keys A,B,D,E and finds nothing, so searches the next leaf, releasing the YDB lock.
-    //      Thread B:  Inserts key C, and acquires the write lock, then commits.
-    //      Thread A:  Resumes, searching F,G,H and return success.  Thread A then read-locks the range A-H, and doesn't notice
-    //        the value C inserted by thread B.  Thus a failure of serialization.
-    //     See #3529.
-    // There also remains a potential thrashing problem.  When we get a TOKUDB_TRY_AGAIN, we unpin everything.  There's
-    //   no guarantee that we will get everything pinned again.  We ought to keep nodes pinned when we retry, except that on the
-    //   way out with a DB_NOTFOUND we ought to unpin those nodes.  See #3528.
-    DBT pivot_bound;
-    const DBT *k_bound;
-} ft_search_t;
-
-/* initialize the search compare object */
-static inline ft_search_t *ft_search_init(ft_search_t *so, ft_search_compare_func_t compare, enum ft_search_direction_e direction, 
-                                          const DBT *k, const DBT *k_bound, void *context) {
-    so->compare = compare;
-    so->direction = direction;
-    so->k = k;
-    so->context = context;
-    toku_init_dbt(&so->pivot_bound);
-    so->k_bound = k_bound;
-    return so;
-}
-
-static inline void ft_search_finish(ft_search_t *so) {
-    toku_destroy_dbt(&so->pivot_bound);
-}
diff --git a/ft/ft.h b/ft/ft.h
index d6b9914e279..8ef74644cf3 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -96,7 +96,6 @@ PATENT RIGHTS GRANT:
 #include <db.h>
 #include "cachetable.h"
 #include "log.h"
-#include "ft-search.h"
 #include "ft-ops.h"
 #include "compress.h"
 
diff --git a/ft/leafentry.h b/ft/leafentry.h
index bf954940c91..5e9e9d77714 100644
--- a/ft/leafentry.h
+++ b/ft/leafentry.h
@@ -211,6 +211,7 @@ void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le);
 int print_klpair (FILE *outf, const void* key, uint32_t keylen, LEAFENTRY v); // Print a leafentry out in human-readable form.
 
 int le_latest_is_del(LEAFENTRY le); // Return true if it is a provisional delete.
+int le_val_is_del(LEAFENTRY le, bool is_snapshot_read, TOKUTXN txn); // Returns true if the value that is to be read is empty
 bool le_is_clean(LEAFENTRY le); //Return how many xids exist (0 does not count)
 bool le_has_xids(LEAFENTRY le, XIDS xids); // Return true transaction represented by xids is still provisional in this leafentry (le's xid stack is a superset or equal to xids)
 void*     le_latest_val (LEAFENTRY le); // Return the latest val (return NULL for provisional deletes)
@@ -227,10 +228,13 @@ uint64_t le_outermost_uncommitted_xid (LEAFENTRY le);
 //      r|r!=0&&r!=TOKUDB_ACCEPT:  Quit early, return r, because something unexpected went wrong (error case)
 typedef int(*LE_ITERATE_CALLBACK)(TXNID id, TOKUTXN context);
 
-int le_iterate_is_del(LEAFENTRY le, LE_ITERATE_CALLBACK f, bool *is_empty, TOKUTXN context);
-
 int le_iterate_val(LEAFENTRY le, LE_ITERATE_CALLBACK f, void** valpp, uint32_t *vallenp, TOKUTXN context);
 
+void le_extract_val(LEAFENTRY le,
+                    // should we return the entire leafentry as the val?
+                    bool is_leaf_mode, bool is_snapshot_read,
+                    TOKUTXN ttxn, uint32_t *vallen, void **val);
+
 size_t
 leafentry_disksize_13(LEAFENTRY_13 le);
 
diff --git a/ft/tdb_logprint.cc b/ft/tdb_logprint.cc
index c221a88e36c..87952b45dae 100644
--- a/ft/tdb_logprint.cc
+++ b/ft/tdb_logprint.cc
@@ -91,7 +91,8 @@ PATENT RIGHTS GRANT:
 
 /* Dump the log from stdin to stdout. */
 
-#include <ft/log_header.h>
+#include "ft/log_header.h"
+#include "ft/logger.h"
 
 static void newmain (int count) {
     int i;
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index b637b9d3986..6af8e797431 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 
 #include "test.h"
 
-
+#include "ft/cursor.h"
 
 enum ftnode_verify_type {
     read_all=1,
@@ -224,13 +224,13 @@ test2(int fd, FT ft_h, FTNODE *dn) {
     memset(&dummy_db, 0, sizeof(dummy_db));
     memset(&left, 0, sizeof(left));
     memset(&right, 0, sizeof(right));
-    ft_search_t search_t;
+    ft_search search;
     
     ft_h->compare_fun = string_key_cmp;
     fill_bfe_for_subset_read(
         &bfe_subset,
         ft_h,
-        ft_search_init(&search_t, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
+        ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
         &left,
         &right,
         true,
diff --git a/ft/tests/test.h b/ft/tests/test.h
index 78c4e2afb05..bb3440788c6 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -107,6 +107,7 @@ PATENT RIGHTS GRANT:
 #include "logger.h"
 #include "fttypes.h"
 #include "ft-ops.h"
+#include "cursor.h"
 #include "cachetable.h"
 #include "cachetable-internal.h"
 
diff --git a/ft/txn.cc b/ft/txn.cc
index 999dd242c3e..c19985c81a5 100644
--- a/ft/txn.cc
+++ b/ft/txn.cc
@@ -786,6 +786,21 @@ void toku_txn_set_client_id(TOKUTXN txn, uint64_t client_id) {
     txn->client_id = client_id;
 }
 
+int toku_txn_reads_txnid(TXNID txnid, TOKUTXN txn) {
+    int r = 0;
+    TXNID oldest_live_in_snapshot = toku_get_oldest_in_live_root_txn_list(txn);
+    if (oldest_live_in_snapshot == TXNID_NONE && txnid < txn->snapshot_txnid64) {
+        r = TOKUDB_ACCEPT;
+    } else if (txnid < oldest_live_in_snapshot || txnid == txn->txnid.parent_id64) {
+        r = TOKUDB_ACCEPT;
+    } else if (txnid > txn->snapshot_txnid64 || toku_is_txn_in_live_root_txn_list(*txn->live_root_txn_list, txnid)) {
+        r = 0;
+    } else {
+        r = TOKUDB_ACCEPT;
+    }
+    return r;
+}
+
 #include <toku_race_tools.h>
 void __attribute__((__constructor__)) toku_txn_status_helgrind_ignore(void);
 void toku_txn_status_helgrind_ignore(void) {
diff --git a/ft/txn.h b/ft/txn.h
index c9be49a1a36..0bdb48b80c0 100644
--- a/ft/txn.h
+++ b/ft/txn.h
@@ -221,3 +221,17 @@ bool toku_txn_has_spilled_rollback(TOKUTXN txn);
 
 uint64_t toku_txn_get_client_id(TOKUTXN txn);
 void toku_txn_set_client_id(TOKUTXN txn, uint64_t client_id);
+
+ 
+//
+// This function is used by the leafentry iterators.
+// returns TOKUDB_ACCEPT if live transaction context is allowed to read a value
+// that is written by transaction with LSN of id
+// live transaction context may read value if either id is the root ancestor of context, or if
+// id was committed before context's snapshot was taken.
+// For id to be committed before context's snapshot was taken, the following must be true:
+//  - id < context->snapshot_txnid64 AND id is not in context's live root transaction list
+// For the above to NOT be true:
+//  - id > context->snapshot_txnid64 OR id is in context's live root transaction list
+//
+int toku_txn_reads_txnid(TXNID txnid, TOKUTXN txn);
diff --git a/ft/ule.cc b/ft/ule.cc
index b5be3e075f9..4be71314c62 100644
--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -103,17 +103,17 @@ PATENT RIGHTS GRANT:
 // TokuWiki/Imp/TransactionsOverview.
 
 #include <toku_portability.h>
-#include "fttypes.h"
-#include "ft-internal.h"
-
+#include "ft/fttypes.h"
+#include "ft/ft-internal.h"
+#include "ft/ft_msg.h"
+#include "ft/leafentry.h"
+#include "ft/logger.h"
+#include "ft/txn.h"
+#include "ft/txn_manager.h"
+#include "ft/ule.h"
+#include "ft/ule-internal.h"
+#include "ft/xids.h"
 #include <util/omt.h>
-
-#include "leafentry.h"
-#include "xids.h"
-#include "ft_msg.h"
-#include "ule.h"
-#include "txn_manager.h"
-#include "ule-internal.h"
 #include <util/status.h>
 #include <util/scoped_malloc.h>
 #include <util/partitioned_counter.h>
@@ -362,6 +362,9 @@ ule_simple_garbage_collection(ULE ule, txn_gc_info *gc_info) {
 done:;
 }
 
+// TODO: Clean this up
+extern bool garbage_collection_debug;
+
 static void
 ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &referenced_xids, const xid_omt_t &live_root_txns) {
     if (ule->num_cuxrs == 1) goto done;
@@ -2079,7 +2082,7 @@ ule_verify_xids(ULE ule, uint32_t interesting, TXNID *xids) {
 //    is_delp - output parameter that returns answer
 //    context - parameter for f
 //
-int
+static int
 le_iterate_is_del(LEAFENTRY le, LE_ITERATE_CALLBACK f, bool *is_delp, TOKUTXN context) {
 #if ULE_DEBUG
     ULE_S ule;
@@ -2147,6 +2150,27 @@ cleanup:
     return r;
 }
 
+//
+// Returns true if the value that is to be read is empty.
+//
+int le_val_is_del(LEAFENTRY le, bool is_snapshot_read, TOKUTXN txn) {
+    int rval;
+    if (is_snapshot_read) {
+        bool is_del = false;
+        le_iterate_is_del(
+            le,
+            toku_txn_reads_txnid,
+            &is_del,
+            txn
+            );
+        rval = is_del;
+    }
+    else {
+        rval = le_latest_is_del(le);
+    }
+    return rval;
+}
+
 //
 // Iterates over "possible" TXNIDs in a leafentry's stack, until one is accepted by 'f'. Set
 // valpp and vallenp to value and length associated with accepted TXNID
@@ -2267,6 +2291,27 @@ cleanup:
     return r;
 }
 
+void le_extract_val(LEAFENTRY le,
+                    // should we return the entire leafentry as the val?
+                    bool is_leaf_mode, bool is_snapshot_read,
+                    TOKUTXN ttxn, uint32_t *vallen, void **val) {
+    if (is_leaf_mode) {
+        *val = le;
+        *vallen = leafentry_memsize(le);
+    } else if (is_snapshot_read) {
+        int r = le_iterate_val(
+            le,
+            toku_txn_reads_txnid,
+            val,
+            vallen,
+            ttxn
+            );
+        lazy_assert_zero(r);
+    } else {
+        *val = le_latest_val_and_len(le, vallen);
+    }
+}
+
 // This is an on-disk format.  static_asserts verify everything is packed and aligned correctly.
 struct __attribute__ ((__packed__)) leafentry_13 {
     struct leafentry_committed_13 {
diff --git a/src/ydb-internal.h b/src/ydb-internal.h
index ae508425263..60bc4b4314c 100644
--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -94,8 +94,10 @@ PATENT RIGHTS GRANT:
 #include <db.h>
 #include <limits.h>
 
+#include <ft/cachetable.h>
 #include <ft/fttypes.h>
-#include <ft/ft-ops.h>
+#include <ft/logger.h>
+#include <ft/txn.h>
 
 #include <util/growable_array.h>
 #include <util/minicron.h>
diff --git a/src/ydb_cursor.cc b/src/ydb_cursor.cc
index aa236ab0324..c4f25d4d574 100644
--- a/src/ydb_cursor.cc
+++ b/src/ydb_cursor.cc
@@ -97,6 +97,7 @@ PATENT RIGHTS GRANT:
 #include "ydb-internal.h"
 #include "ydb_cursor.h"
 #include "ydb_row_lock.h"
+#include "ft/cursor.h"
 
 static YDB_C_LAYER_STATUS_S ydb_c_layer_status;
 #ifdef STATUS_VALUE

From e2c20624e032addc70796496e75b26def3b20cdd Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:25:34 -0400
Subject: [PATCH 016/190] FT-258 Remove a malloc from the cursor creation path

---
 ft/cursor.cc       | 64 +++++++++++++++++++++++++++-------------------
 ft/cursor.h        | 19 ++++++++------
 src/ydb-internal.h |  7 ++++-
 src/ydb_cursor.cc  | 51 +++++++++++++++++-------------------
 4 files changed, 79 insertions(+), 62 deletions(-)

diff --git a/ft/cursor.cc b/ft/cursor.cc
index 5976234f5b2..f64785db8bb 100644
--- a/ft/cursor.cc
+++ b/ft/cursor.cc
@@ -91,61 +91,73 @@ PATENT RIGHTS GRANT:
 #include "ft/cursor.h"
 #include "ft/leafentry.h"
 #include "ft/txn.h"
+#include "ft/ybt.h"
 
-int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *cursorptr, TOKUTXN ttxn,
-                   bool is_snapshot_read, bool disable_prefetching) {
+int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn,
+                          bool is_snapshot_read, bool disable_prefetching) {
     if (is_snapshot_read) {
         invariant(ttxn != NULL);
         int accepted = toku_txn_reads_txnid(ft_handle->ft->h->root_xid_that_created, ttxn);
-        if (accepted!=TOKUDB_ACCEPT) {
-            invariant(accepted==0);
+        if (accepted != TOKUDB_ACCEPT) {
+            invariant(accepted == 0);
             return TOKUDB_MVCC_DICTIONARY_TOO_NEW;
         }
     }
-    FT_CURSOR XCALLOC(cursor);
+
+    memset(cursor, 0, sizeof(*cursor));
     cursor->ft_handle = ft_handle;
-    cursor->prefetching = false;
-    toku_init_dbt(&cursor->range_lock_left_key);
-    toku_init_dbt(&cursor->range_lock_right_key);
-    cursor->left_is_neg_infty = false;
-    cursor->right_is_pos_infty = false;
     cursor->is_snapshot_read = is_snapshot_read;
-    cursor->is_leaf_mode = false;
     cursor->ttxn = ttxn;
     cursor->disable_prefetching = disable_prefetching;
-    cursor->is_temporary = false;
-    *cursorptr = cursor;
     return 0;
 }
 
-void toku_ft_cursor_close(FT_CURSOR cursor) {
+void toku_ft_cursor_destroy(FT_CURSOR cursor) {
     toku_destroy_dbt(&cursor->key);
     toku_destroy_dbt(&cursor->val);
     toku_destroy_dbt(&cursor->range_lock_left_key);
     toku_destroy_dbt(&cursor->range_lock_right_key);
+}
+
+// deprecated, should only be used by tests
+int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *cursorptr, TOKUTXN ttxn,
+                   bool is_snapshot_read, bool disable_prefetching) {
+    FT_CURSOR XCALLOC(cursor);
+    int r = toku_ft_cursor_create(ft_handle, cursor, ttxn, is_snapshot_read, disable_prefetching);
+    if (r == 0) {
+        *cursorptr = cursor;
+    } else {
+        toku_free(cursor);
+    }
+    return r;
+}
+
+// deprecated, should only be used by tests
+void toku_ft_cursor_close(FT_CURSOR cursor) {
+    toku_ft_cursor_destroy(cursor);
     toku_free(cursor);
 }
 
-void toku_ft_cursor_remove_restriction(FT_CURSOR ftcursor) {
-    ftcursor->out_of_range_error = 0;
-    ftcursor->direction = 0;
+void toku_ft_cursor_remove_restriction(FT_CURSOR cursor) {
+    cursor->out_of_range_error = 0;
+    cursor->direction = 0;
 }
 
-void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR ftcursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra) {
-    ftcursor->interrupt_cb = cb;
-    ftcursor->interrupt_cb_extra = extra;
+void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra) {
+    cursor->interrupt_cb = cb;
+    cursor->interrupt_cb_extra = extra;
 }
 
-void toku_ft_cursor_set_temporary(FT_CURSOR ftcursor) {
-    ftcursor->is_temporary = true;
+void toku_ft_cursor_set_temporary(FT_CURSOR cursor) {
+    cursor->is_temporary = true;
 }
 
-void toku_ft_cursor_set_leaf_mode(FT_CURSOR ftcursor) {
-    ftcursor->is_leaf_mode = true;
+void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor) {
+    cursor->is_leaf_mode = true;
 }
 
-int toku_ft_cursor_is_leaf_mode(FT_CURSOR ftcursor) {
-    return ftcursor->is_leaf_mode;
+int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor) {
+    return cursor->is_leaf_mode;
 }
 
 // TODO: Rename / cleanup - this has nothing to do with locking
diff --git a/ft/cursor.h b/ft/cursor.h
index b433694ae8d..d1c5620c7f2 100644
--- a/ft/cursor.h
+++ b/ft/cursor.h
@@ -174,9 +174,12 @@ static inline void ft_search_finish(ft_search *search) {
     toku_destroy_dbt(&search->pivot_bound);
 }
 
-int toku_ft_lookup (FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
 
-int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR*, TOKUTXN, bool, bool) __attribute__ ((warn_unused_result));
+int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN txn, bool, bool);
+
+void toku_ft_cursor_destroy(FT_CURSOR cursor);
+
+int toku_ft_lookup(FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v)  __attribute__ ((warn_unused_result));
 
 void toku_ft_cursor_set_prefetching(FT_CURSOR cursor);
 
@@ -213,8 +216,6 @@ int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_
 
 int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
 
-void toku_ft_cursor_close(FT_CURSOR cursor);
-
 bool toku_ft_cursor_uninitialized(FT_CURSOR cursor) __attribute__ ((warn_unused_result));
 
 void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval);
@@ -225,10 +226,12 @@ int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_
                             FT_GET_CALLBACK_FUNCTION getf, void *getf_v,
                             uint32_t *keylen, void **key, uint32_t *vallen, void **val);
 
-// deprecated
-int toku_ft_cursor_get(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags);
-int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn);
-
 // used by get_key_after_bytes
 int toku_ft_cursor_compare_one(const ft_search &search, const DBT *x);
 int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x);
+
+// deprecated, should only be used by tests, and eventually removed
+int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *ftcursor_p, TOKUTXN txn, bool, bool) __attribute__ ((warn_unused_result));
+void toku_ft_cursor_close(FT_CURSOR cursor);
+int toku_ft_cursor_get(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags);
+int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn);
diff --git a/src/ydb-internal.h b/src/ydb-internal.h
index 60bc4b4314c..68ce2291924 100644
--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -95,6 +95,7 @@ PATENT RIGHTS GRANT:
 #include <limits.h>
 
 #include <ft/cachetable.h>
+#include <ft/cursor.h>
 #include <ft/fttypes.h>
 #include <ft/logger.h>
 #include <ft/txn.h>
@@ -278,7 +279,7 @@ struct __toku_db_txn_external {
 #define db_txn_struct_i(x) (&((struct __toku_db_txn_external *)x)->internal_part)
 
 struct __toku_dbc_internal {
-    struct ft_cursor *c;
+    struct ft_cursor ftcursor;
     DB_TXN *txn;
     TOKU_ISOLATION iso;
     struct simple_dbt skey_s,sval_s;
@@ -296,6 +297,10 @@ struct __toku_dbc_external {
 	
 #define dbc_struct_i(x) (&((struct __toku_dbc_external *)x)->internal_part)
 
+static inline struct ft_cursor *dbc_ftcursor(DBC *c) {
+    return &dbc_struct_i(c)->ftcursor;
+}
+
 static inline int 
 env_opened(DB_ENV *env) {
     return env->i->cachetable != 0;
diff --git a/src/ydb_cursor.cc b/src/ydb_cursor.cc
index c4f25d4d574..b784e9b6667 100644
--- a/src/ydb_cursor.cc
+++ b/src/ydb_cursor.cc
@@ -134,8 +134,8 @@ get_nonmain_cursor_flags(uint32_t flags) {
 }
 
 static inline bool 
-c_uninitialized(DBC* c) {
-    return toku_ft_cursor_uninitialized(dbc_struct_i(c)->c);
+c_uninitialized(DBC *c) {
+    return toku_ft_cursor_uninitialized(dbc_ftcursor(c));
 }            
 
 typedef struct query_context_wrapped_t {
@@ -201,7 +201,7 @@ typedef struct query_context_with_input_t {
 
 static void
 query_context_base_init(QUERY_CONTEXT_BASE context, DBC *c, uint32_t flag, bool is_write_op, YDB_CALLBACK_FUNCTION f, void *extra) {
-    context->c       = dbc_struct_i(c)->c;
+    context->c       = dbc_ftcursor(c);
     context->txn     = dbc_struct_i(c)->txn;
     context->db      = c->dbp;
     context->f       = f;
@@ -278,7 +278,7 @@ c_getf_first(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
     c_query_context_init(&context, c, flag, f, extra);
     while (r == 0) {
         //toku_ft_cursor_first will call c_getf_first_callback(..., context) (if query is successful)
-        r = toku_ft_cursor_first(dbc_struct_i(c)->c, c_getf_first_callback, &context);
+        r = toku_ft_cursor_first(dbc_ftcursor(c), c_getf_first_callback, &context);
         if (r == DB_LOCK_NOTGRANTED) {
             r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request);
         } else {
@@ -329,7 +329,7 @@ c_getf_last(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
     c_query_context_init(&context, c, flag, f, extra); 
     while (r == 0) {
         //toku_ft_cursor_last will call c_getf_last_callback(..., context) (if query is successful)
-        r = toku_ft_cursor_last(dbc_struct_i(c)->c, c_getf_last_callback, &context);
+        r = toku_ft_cursor_last(dbc_ftcursor(c), c_getf_last_callback, &context);
         if (r == DB_LOCK_NOTGRANTED) {
             r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request);
         } else {
@@ -384,7 +384,7 @@ c_getf_next(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
         c_query_context_init(&context, c, flag, f, extra); 
         while (r == 0) {
             //toku_ft_cursor_next will call c_getf_next_callback(..., context) (if query is successful)
-            r = toku_ft_cursor_next(dbc_struct_i(c)->c, c_getf_next_callback, &context);
+            r = toku_ft_cursor_next(dbc_ftcursor(c), c_getf_next_callback, &context);
             if (r == DB_LOCK_NOTGRANTED) {
                 r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request);
             } else {
@@ -443,7 +443,7 @@ c_getf_prev(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
         c_query_context_init(&context, c, flag, f, extra);
         while (r == 0) {
             //toku_ft_cursor_prev will call c_getf_prev_callback(..., context) (if query is successful)
-            r = toku_ft_cursor_prev(dbc_struct_i(c)->c, c_getf_prev_callback, &context);
+            r = toku_ft_cursor_prev(dbc_ftcursor(c), c_getf_prev_callback, &context);
             if (r == DB_LOCK_NOTGRANTED) {
                 r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request);
             } else {
@@ -496,7 +496,7 @@ c_getf_current(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
     QUERY_CONTEXT_S context; //Describes the context of this query.
     c_query_context_init(&context, c, flag, f, extra); 
     //toku_ft_cursor_current will call c_getf_current_callback(..., context) (if query is successful)
-    int r = toku_ft_cursor_current(dbc_struct_i(c)->c, DB_CURRENT, c_getf_current_callback, &context);
+    int r = toku_ft_cursor_current(dbc_ftcursor(c), DB_CURRENT, c_getf_current_callback, &context);
     c_query_context_destroy(&context);
     return r;
 }
@@ -535,7 +535,7 @@ toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *
     query_context_with_input_init(&context, c, flag, key, NULL, f, extra); 
     while (r == 0) {
         //toku_ft_cursor_set will call c_getf_set_callback(..., context) (if query is successful)
-        r = toku_ft_cursor_set(dbc_struct_i(c)->c, key, c_getf_set_callback, &context);
+        r = toku_ft_cursor_set(dbc_ftcursor(c), key, c_getf_set_callback, &context);
         if (r == DB_LOCK_NOTGRANTED) {
             r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request);
         } else {
@@ -588,7 +588,7 @@ c_getf_set_range(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void
     query_context_with_input_init(&context, c, flag, key, NULL, f, extra); 
     while (r == 0) {
         //toku_ft_cursor_set_range will call c_getf_set_range_callback(..., context) (if query is successful)
-        r = toku_ft_cursor_set_range(dbc_struct_i(c)->c, key, nullptr, c_getf_set_range_callback, &context);
+        r = toku_ft_cursor_set_range(dbc_ftcursor(c), key, nullptr, c_getf_set_range_callback, &context);
         if (r == DB_LOCK_NOTGRANTED) {
             r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request);
         } else {
@@ -642,7 +642,7 @@ c_getf_set_range_with_bound(DBC *c, uint32_t flag, DBT *key, DBT *key_bound, YDB
     query_context_with_input_init(&context, c, flag, key, NULL, f, extra); 
     while (r == 0) {
         //toku_ft_cursor_set_range will call c_getf_set_range_callback(..., context) (if query is successful)
-        r = toku_ft_cursor_set_range(dbc_struct_i(c)->c, key, key_bound, c_getf_set_range_callback, &context);
+        r = toku_ft_cursor_set_range(dbc_ftcursor(c), key, key_bound, c_getf_set_range_callback, &context);
         if (r == DB_LOCK_NOTGRANTED) {
             r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request);
         } else {
@@ -665,7 +665,7 @@ c_getf_set_range_reverse(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION
     query_context_with_input_init(&context, c, flag, key, NULL, f, extra); 
     while (r == 0) {
         //toku_ft_cursor_set_range_reverse will call c_getf_set_range_reverse_callback(..., context) (if query is successful)
-        r = toku_ft_cursor_set_range_reverse(dbc_struct_i(c)->c, key, c_getf_set_range_reverse_callback, &context);
+        r = toku_ft_cursor_set_range_reverse(dbc_ftcursor(c), key, c_getf_set_range_reverse_callback, &context);
         if (r == DB_LOCK_NOTGRANTED) {
             r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request);
         } else {
@@ -710,11 +710,10 @@ c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, b
 }
 
 // Close a cursor.
-int 
-toku_c_close(DBC * c) {
+int toku_c_close(DBC *c) {
     HANDLE_PANICKED_DB(c->dbp);
     HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
-    toku_ft_cursor_close(dbc_struct_i(c)->c);
+    toku_ft_cursor_destroy(dbc_ftcursor(c));
     toku_sdbt_cleanup(&dbc_struct_i(c)->skey_s);
     toku_sdbt_cleanup(&dbc_struct_i(c)->sval_s);
     toku_free(c);
@@ -740,7 +739,7 @@ c_set_bounds(DBC *dbc, const DBT *left_key, const DBT *right_key, bool pre_acqui
     DB *db = dbc->dbp;
     DB_TXN *txn = dbc_struct_i(dbc)->txn;
     HANDLE_PANICKED_DB(db);
-    toku_ft_cursor_set_range_lock(dbc_struct_i(dbc)->c, left_key, right_key,
+    toku_ft_cursor_set_range_lock(dbc_ftcursor(dbc), left_key, right_key,
                                    (left_key == toku_dbt_negative_infinity()),
                                    (right_key == toku_dbt_positive_infinity()),
                                    out_of_range_error);
@@ -758,12 +757,12 @@ c_set_bounds(DBC *dbc, const DBT *left_key, const DBT *right_key, bool pre_acqui
 
 static void
 c_remove_restriction(DBC *dbc) {
-    toku_ft_cursor_remove_restriction(dbc_struct_i(dbc)->c);
+    toku_ft_cursor_remove_restriction(dbc_ftcursor(dbc));
 }
 
 static void
 c_set_check_interrupt_callback(DBC* dbc, bool (*interrupt_callback)(void*), void *extra) {
-    toku_ft_cursor_set_check_interrupt_cb(dbc_struct_i(dbc)->c, interrupt_callback, extra);
+    toku_ft_cursor_set_check_interrupt_cb(dbc_ftcursor(dbc), interrupt_callback, extra);
 }
 
 int
@@ -842,8 +841,6 @@ toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC ** c, uint32_t flags, int is_
             );
     }
 
-    int r = 0;
-    
     struct __toku_dbc_external *XMALLOC(eresult); // so the internal stuff is stuck on the end
     memset(eresult, 0, sizeof(*eresult));
     DBC *result = &eresult->external_part;
@@ -889,25 +886,25 @@ toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC ** c, uint32_t flags, int is_
         is_snapshot_read = (dbc_struct_i(result)->iso == TOKU_ISO_READ_COMMITTED || 
                             dbc_struct_i(result)->iso == TOKU_ISO_SNAPSHOT);
     }
-    r = toku_ft_cursor(
+    int r = toku_ft_cursor_create(
         db->i->ft_handle, 
-        &dbc_struct_i(result)->c,
+        dbc_ftcursor(result),
         txn ? db_txn_struct_i(txn)->tokutxn : NULL,
         is_snapshot_read,
         ((flags & DBC_DISABLE_PREFETCHING) != 0)
         );
-    assert(r == 0 || r == TOKUDB_MVCC_DICTIONARY_TOO_NEW);
     if (r == 0) {
         // Set the is_temporary_cursor boolean inside the ftnode so
         // that a query only needing one cursor will not perform
         // unecessary malloc calls.
+        //
+        // TODO: Move me to toku_ft_cursor_create constructor
         if (is_temporary_cursor) {
-            toku_ft_cursor_set_temporary(dbc_struct_i(result)->c);
+            toku_ft_cursor_set_temporary(dbc_ftcursor(result));
         }
-
         *c = result;
-    }
-    else {
+    } else {
+        invariant(r == TOKUDB_MVCC_DICTIONARY_TOO_NEW);
         toku_free(result);
     }
     return r;

From d8358af74af53f83f85bd7d7272e342fbb7a65b9 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:25:34 -0400
Subject: [PATCH 017/190] FT-258 Remove another malloc from the cursor create
 path. Queries using db->getf_set now perform zero mallocs. Also, move the
 is_temporary bit into the cursor constructor.

---
 buildheader/make_tdb.cc |  1 +
 ft/cursor.cc            | 13 +++----
 ft/cursor.h             |  8 ++--
 src/ydb-internal.h      | 17 ++++++---
 src/ydb_cursor.cc       | 81 ++++++++++++++++++++---------------------
 src/ydb_cursor.h        |  7 +++-
 src/ydb_db.cc           | 12 +++---
 7 files changed, 72 insertions(+), 67 deletions(-)

diff --git a/buildheader/make_tdb.cc b/buildheader/make_tdb.cc
index d185aa352fb..bd0e01e2960 100644
--- a/buildheader/make_tdb.cc
+++ b/buildheader/make_tdb.cc
@@ -612,6 +612,7 @@ static void print_dbc_struct (void) {
 	"int (*c_set_bounds)(DBC*, const DBT*, const DBT*, bool pre_acquire, int out_of_range_error)",
         "void (*c_set_check_interrupt_callback)(DBC*, bool (*)(void*), void *)",
 	"void (*c_remove_restriction)(DBC*)",
+        "char _internal[512]",
 	NULL};
     sort_and_dump_fields("dbc", false, extra);
 }
diff --git a/ft/cursor.cc b/ft/cursor.cc
index f64785db8bb..154bbaa9dff 100644
--- a/ft/cursor.cc
+++ b/ft/cursor.cc
@@ -94,7 +94,9 @@ PATENT RIGHTS GRANT:
 #include "ft/ybt.h"
 
 int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn,
-                          bool is_snapshot_read, bool disable_prefetching) {
+                          bool is_snapshot_read,
+                          bool disable_prefetching,
+                          bool is_temporary) {
     if (is_snapshot_read) {
         invariant(ttxn != NULL);
         int accepted = toku_txn_reads_txnid(ft_handle->ft->h->root_xid_that_created, ttxn);
@@ -106,9 +108,10 @@ int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn,
 
     memset(cursor, 0, sizeof(*cursor));
     cursor->ft_handle = ft_handle;
-    cursor->is_snapshot_read = is_snapshot_read;
     cursor->ttxn = ttxn;
+    cursor->is_snapshot_read = is_snapshot_read;
     cursor->disable_prefetching = disable_prefetching;
+    cursor->is_temporary = is_temporary;
     return 0;
 }
 
@@ -123,7 +126,7 @@ void toku_ft_cursor_destroy(FT_CURSOR cursor) {
 int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *cursorptr, TOKUTXN ttxn,
                    bool is_snapshot_read, bool disable_prefetching) {
     FT_CURSOR XCALLOC(cursor);
-    int r = toku_ft_cursor_create(ft_handle, cursor, ttxn, is_snapshot_read, disable_prefetching);
+    int r = toku_ft_cursor_create(ft_handle, cursor, ttxn, is_snapshot_read, disable_prefetching, false);
     if (r == 0) {
         *cursorptr = cursor;
     } else {
@@ -148,10 +151,6 @@ void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_
     cursor->interrupt_cb_extra = extra;
 }
 
-void toku_ft_cursor_set_temporary(FT_CURSOR cursor) {
-    cursor->is_temporary = true;
-}
-
 void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor) {
     cursor->is_leaf_mode = true;
 }
diff --git a/ft/cursor.h b/ft/cursor.h
index d1c5620c7f2..21194f91140 100644
--- a/ft/cursor.h
+++ b/ft/cursor.h
@@ -175,7 +175,10 @@ static inline void ft_search_finish(ft_search *search) {
 }
 
 
-int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN txn, bool, bool);
+int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN txn,
+                          bool is_snapshot_read,
+                          bool disable_prefetching,
+                          bool is_temporary);
 
 void toku_ft_cursor_destroy(FT_CURSOR cursor);
 
@@ -189,9 +192,6 @@ bool toku_ft_cursor_not_set(FT_CURSOR cursor);
 
 void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor);
 
-// Sets a boolean on the ft cursor that prevents uncessary copying of the cursor duing a one query.
-void toku_ft_cursor_set_temporary(FT_CURSOR cursor);
-
 void toku_ft_cursor_remove_restriction(FT_CURSOR cursor);
 
 void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra);
diff --git a/src/ydb-internal.h b/src/ydb-internal.h
index 68ce2291924..8edda008e80 100644
--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -290,12 +290,17 @@ struct __toku_dbc_internal {
     bool rmw;
 };
 
-struct __toku_dbc_external {
-    struct __toku_dbc          external_part;
-    struct __toku_dbc_internal internal_part;
-};
-	
-#define dbc_struct_i(x) (&((struct __toku_dbc_external *)x)->internal_part)
+static_assert(sizeof(__toku_dbc_internal) <= sizeof(((DBC *) nullptr)->_internal),
+              "__toku_dbc_internal doesn't fit in the internal portion of a DBC");
+
+static inline __toku_dbc_internal *dbc_struct_i(DBC *c) {
+    union dbc_union {
+        __toku_dbc_internal *dbc_internal;
+        char *buf;
+    } u;
+    u.buf = c->_internal;
+    return u.dbc_internal;
+}
 
 static inline struct ft_cursor *dbc_ftcursor(DBC *c) {
     return &dbc_struct_i(c)->ftcursor;
diff --git a/src/ydb_cursor.cc b/src/ydb_cursor.cc
index b784e9b6667..a372fab3006 100644
--- a/src/ydb_cursor.cc
+++ b/src/ydb_cursor.cc
@@ -709,13 +709,19 @@ c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, b
     return r;
 }
 
-// Close a cursor.
-int toku_c_close(DBC *c) {
+
+int toku_c_close_internal(DBC *c) {
     HANDLE_PANICKED_DB(c->dbp);
     HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
     toku_ft_cursor_destroy(dbc_ftcursor(c));
     toku_sdbt_cleanup(&dbc_struct_i(c)->skey_s);
     toku_sdbt_cleanup(&dbc_struct_i(c)->sval_s);
+    return 0;
+}
+
+// Close a cursor.
+int toku_c_close(DBC *c) {
+    toku_c_close_internal(c);
     toku_free(c);
     return 0;
 }
@@ -828,7 +834,7 @@ toku_c_get(DBC* c, DBT* key, DBT* val, uint32_t flag) {
 }
 
 int 
-toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC ** c, uint32_t flags, int is_temporary_cursor) {
+toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC *c, uint32_t flags, int is_temporary_cursor) {
     HANDLE_PANICKED_DB(db);
     HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
     DB_ENV* env = db->dbenv;
@@ -841,11 +847,7 @@ toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC ** c, uint32_t flags, int is_
             );
     }
 
-    struct __toku_dbc_external *XMALLOC(eresult); // so the internal stuff is stuck on the end
-    memset(eresult, 0, sizeof(*eresult));
-    DBC *result = &eresult->external_part;
-
-#define SCRS(name) result->name = name
+#define SCRS(name) c->name = name
     SCRS(c_getf_first);
     SCRS(c_getf_last);
     SCRS(c_getf_next);
@@ -859,59 +861,49 @@ toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC ** c, uint32_t flags, int is_
     SCRS(c_set_check_interrupt_callback);
 #undef SCRS
 
-    result->c_get = toku_c_get;
-    result->c_getf_set = toku_c_getf_set;
-    result->c_close = toku_c_close;
+    c->c_get = toku_c_get;
+    c->c_getf_set = toku_c_getf_set;
+    c->c_close = toku_c_close;
 
-    result->dbp = db;
+    c->dbp = db;
 
-    dbc_struct_i(result)->txn = txn;
-    dbc_struct_i(result)->skey_s = (struct simple_dbt){0,0};
-    dbc_struct_i(result)->sval_s = (struct simple_dbt){0,0};
+    dbc_struct_i(c)->txn = txn;
+    dbc_struct_i(c)->skey_s = (struct simple_dbt){0,0};
+    dbc_struct_i(c)->sval_s = (struct simple_dbt){0,0};
     if (is_temporary_cursor) {
-        dbc_struct_i(result)->skey = &db->i->skey;
-        dbc_struct_i(result)->sval = &db->i->sval;
+        dbc_struct_i(c)->skey = &db->i->skey;
+        dbc_struct_i(c)->sval = &db->i->sval;
     } else {
-        dbc_struct_i(result)->skey = &dbc_struct_i(result)->skey_s;
-        dbc_struct_i(result)->sval = &dbc_struct_i(result)->sval_s;
+        dbc_struct_i(c)->skey = &dbc_struct_i(c)->skey_s;
+        dbc_struct_i(c)->sval = &dbc_struct_i(c)->sval_s;
     }
     if (flags & DB_SERIALIZABLE) {
-        dbc_struct_i(result)->iso = TOKU_ISO_SERIALIZABLE;
+        dbc_struct_i(c)->iso = TOKU_ISO_SERIALIZABLE;
     } else {
-        dbc_struct_i(result)->iso = txn ? db_txn_struct_i(txn)->iso : TOKU_ISO_SERIALIZABLE;
+        dbc_struct_i(c)->iso = txn ? db_txn_struct_i(txn)->iso : TOKU_ISO_SERIALIZABLE;
     }
-    dbc_struct_i(result)->rmw = (flags & DB_RMW) != 0;
+    dbc_struct_i(c)->rmw = (flags & DB_RMW) != 0;
     bool is_snapshot_read = false;
     if (txn) {
-        is_snapshot_read = (dbc_struct_i(result)->iso == TOKU_ISO_READ_COMMITTED || 
-                            dbc_struct_i(result)->iso == TOKU_ISO_SNAPSHOT);
+        is_snapshot_read = (dbc_struct_i(c)->iso == TOKU_ISO_READ_COMMITTED || 
+                            dbc_struct_i(c)->iso == TOKU_ISO_SNAPSHOT);
     }
     int r = toku_ft_cursor_create(
         db->i->ft_handle, 
-        dbc_ftcursor(result),
+        dbc_ftcursor(c),
         txn ? db_txn_struct_i(txn)->tokutxn : NULL,
         is_snapshot_read,
-        ((flags & DBC_DISABLE_PREFETCHING) != 0)
+        ((flags & DBC_DISABLE_PREFETCHING) != 0),
+        is_temporary_cursor != 0
         );
-    if (r == 0) {
-        // Set the is_temporary_cursor boolean inside the ftnode so
-        // that a query only needing one cursor will not perform
-        // unecessary malloc calls.
-        //
-        // TODO: Move me to toku_ft_cursor_create constructor
-        if (is_temporary_cursor) {
-            toku_ft_cursor_set_temporary(dbc_ftcursor(result));
-        }
-        *c = result;
-    } else {
+    if (r != 0) {
         invariant(r == TOKUDB_MVCC_DICTIONARY_TOO_NEW);
-        toku_free(result);
     }
     return r;
 }
 
 static inline int 
-autotxn_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) {
+autotxn_db_cursor(DB *db, DB_TXN *txn, DBC *c, uint32_t flags) {
     if (!txn && (db->dbenv->i->open_flags & DB_INIT_TXN)) {
         return toku_ydb_do_error(db->dbenv, EINVAL,
               "Cursors in a transaction environment must have transactions.\n");
@@ -920,9 +912,14 @@ autotxn_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) {
 }
 
 // Create a cursor on a db.
-int 
-toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) {
-    int r = autotxn_db_cursor(db, txn, c, flags);
+int toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) {
+    DBC *XMALLOC(cursor);
+    int r = autotxn_db_cursor(db, txn, cursor, flags);
+    if (r == 0) {
+        *c = cursor;
+    } else {
+        toku_free(cursor);
+    }
     return r;
 }
 
diff --git a/src/ydb_cursor.h b/src/ydb_cursor.h
index 1eab0523570..ff7070f668b 100644
--- a/src/ydb_cursor.h
+++ b/src/ydb_cursor.h
@@ -105,6 +105,9 @@ void ydb_c_layer_get_status(YDB_C_LAYER_STATUS statp);
 
 int toku_c_get(DBC * c, DBT * key, DBT * data, uint32_t flag);
 int toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra);
-int toku_c_close(DBC * c);
-int toku_db_cursor_internal(DB *db, DB_TXN * txn, DBC **c, uint32_t flags, int is_temporary_cursor);
+
 int toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags);
+int toku_db_cursor_internal(DB *db, DB_TXN * txn, DBC *c, uint32_t flags, int is_temporary_cursor);
+
+int toku_c_close(DBC *c);
+int toku_c_close_internal(DBC *c);
diff --git a/src/ydb_db.cc b/src/ydb_db.cc
index 3d2e359e5a1..87ad9189f5c 100644
--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -225,13 +225,13 @@ int
 db_getf_set(DB *db, DB_TXN *txn, uint32_t flags, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) {
     HANDLE_PANICKED_DB(db);
     HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
-    DBC *c;
+    DBC c;
     uint32_t create_flags = flags & (DB_ISOLATION_FLAGS | DB_RMW);
     flags &= ~DB_ISOLATION_FLAGS;
     int r = toku_db_cursor_internal(db, txn, &c, create_flags | DBC_DISABLE_PREFETCHING, 1);
     if (r==0) {
-        r = toku_c_getf_set(c, flags, key, f, extra);
-        int r2 = toku_c_close(c);
+        r = toku_c_getf_set(&c, flags, key, f, extra);
+        int r2 = toku_c_close_internal(&c);
         if (r==0) r = r2;
     }
     return r;
@@ -258,12 +258,12 @@ toku_db_get (DB * db, DB_TXN * txn, DBT * key, DBT * data, uint32_t flags) {
     // And DB_GET_BOTH is no longer supported. #2862.
     if (flags != 0) return EINVAL;
 
-    DBC *dbc;
+    DBC dbc;
     r = toku_db_cursor_internal(db, txn, &dbc, iso_flags | DBC_DISABLE_PREFETCHING, 1);
     if (r!=0) return r;
     uint32_t c_get_flags = DB_SET;
-    r = toku_c_get(dbc, key, data, c_get_flags | lock_flags);
-    int r2 = toku_c_close(dbc);
+    r = toku_c_get(&dbc, key, data, c_get_flags | lock_flags);
+    int r2 = toku_c_close_internal(&dbc);
     return r ? r : r2;
 }
 

From 79a9a40d5ede081b35705090d0442777c1d19ac6 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:25:34 -0400
Subject: [PATCH 018/190] FT-261 Don't clone an empty live root transaction OMT

---
 ft/txn_manager.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/ft/txn_manager.cc b/ft/txn_manager.cc
index 12c6ba4e887..877d6ebacfd 100644
--- a/ft/txn_manager.cc
+++ b/ft/txn_manager.cc
@@ -339,7 +339,11 @@ int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), T
 // Create list of root transactions that were live when this txn began.
 static inline void
 setup_live_root_txn_list(xid_omt_t* live_root_txnid, xid_omt_t* live_root_txn_list) {
-    live_root_txn_list->clone(*live_root_txnid);
+    if (live_root_txnid->size() > 0) {
+        live_root_txn_list->clone(*live_root_txnid);
+    } else {
+        live_root_txn_list->create_no_array();
+    }
 }
 
 //Heaviside function to search through an OMT by a TXNID

From ac575d01ba4f064a02e89bbb93eab68619ae6dbf Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:25:34 -0400
Subject: [PATCH 019/190] FT-260 Move ftnode related structures and
 declarations to ft/node.h and ft/node.cc

---
 ft/CMakeLists.txt                           |    1 +
 ft/ft-cachetable-wrappers.cc                |   11 +-
 ft/ft-cachetable-wrappers.h                 |    3 +-
 ft/ft-flusher.cc                            |  161 +-
 ft/ft-flusher.h                             |   20 +-
 ft/ft-hot-flusher.cc                        |   19 +-
 ft/ft-internal.h                            |  627 +-----
 ft/ft-node-deserialize.cc                   |    4 +-
 ft/ft-ops.cc                                | 2123 ++-----------------
 ft/ft-test-helpers.cc                       |    3 +-
 ft/ft-verify.cc                             |    5 +-
 ft/ft.cc                                    |   12 +-
 ft/ft_node-serialize.cc                     |  227 +-
 ft/ftverify.cc                              |    1 +
 ft/loader/loader.cc                         |    1 +
 ft/msg_buffer.cc                            |   20 +-
 ft/node.cc                                  | 2034 ++++++++++++++++++
 ft/node.h                                   |  531 +++++
 ft/tests/test-pick-child-to-flush.cc        |   12 +-
 ft/tests/test.h                             |    1 +
 ft/tests/test_rightmost_leaf_split_merge.cc |    2 +-
 ft/tokuftdump.cc                            |    1 +
 22 files changed, 2982 insertions(+), 2837 deletions(-)
 create mode 100644 ft/node.cc
 create mode 100644 ft/node.h

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index ddf5eda4dfe..b0916ef8de0 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -53,6 +53,7 @@ set(FT_SOURCES
   logger
   log_upgrade
   msg_buffer
+  node
   quicklz
   recover
   rollback
diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index 91a0040b02e..14d6e874d1b 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -89,12 +89,13 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <ft-cachetable-wrappers.h>
+#include "ft/fttypes.h"
+#include "ft/ft-cachetable-wrappers.h"
+#include "ft/ft-flusher.h"
+#include "ft/ft-internal.h"
+#include "ft/ft.h"
+#include "ft/node.h"
 
-#include <fttypes.h>
-#include <ft-flusher.h>
-#include <ft-internal.h>
-#include <ft.h>
 #include <util/context.h>
 
 static void
diff --git a/ft/ft-cachetable-wrappers.h b/ft/ft-cachetable-wrappers.h
index b98cdd0fd19..12e55cfea23 100644
--- a/ft/ft-cachetable-wrappers.h
+++ b/ft/ft-cachetable-wrappers.h
@@ -92,8 +92,9 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/ft-internal.h"
 #include "ft/cachetable.h"
+#include "ft/ft-internal.h"
+#include "ft/node.h"
 
 /**
  * Put an empty node (that is, no fields filled) into the cachetable. 
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index dc4096a7993..2b51c55f040 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -89,15 +89,16 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <ft-internal.h>
-#include <ft-flusher.h>
-#include <ft-flusher-internal.h>
-#include <ft-cachetable-wrappers.h>
-#include <ft.h>
-#include <toku_assert.h>
-#include <portability/toku_atomic.h>
-#include <util/status.h>
-#include <util/context.h>
+#include "ft/ft.h"
+#include "ft/ft-cachetable-wrappers.h"
+#include "ft/ft-internal.h"
+#include "ft/ft-flusher.h"
+#include "ft/ft-flusher-internal.h"
+#include "ft/node.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_atomic.h"
+#include "util/status.h"
+#include "util/context.h"
 
 /* Status is intended for display to humans to help understand system behavior.
  * It does not need to be perfectly thread-safe.
@@ -307,7 +308,7 @@ static bool
 recurse_if_child_is_gorged(FTNODE child, void* extra)
 {
     struct flush_status_update_extra *fste = (flush_status_update_extra *)extra;
-    return toku_ft_nonleaf_is_gorged(child, fste->nodesize);
+    return toku_ftnode_nonleaf_is_gorged(child, fste->nodesize);
 }
 
 int
@@ -497,7 +498,7 @@ ct_maybe_merge_child(struct flusher_advice *fa,
             struct ftnode_fetch_extra bfe;
             fill_bfe_for_full_read(&bfe, h);
             toku_pin_ftnode(h, root, fullhash, &bfe, PL_WRITE_EXPENSIVE, &root_node, true);
-            toku_assert_entire_node_in_memory(root_node);
+            toku_ftnode_assert_fully_in_memory(root_node);
         }
 
         (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED), 1);
@@ -545,13 +546,12 @@ ct_flusher_advice_init(struct flusher_advice *fa, struct flush_status_update_ext
 // a leaf node that is not entirely in memory. If so, then
 // we cannot be sure if the node is reactive.
 //
-static bool may_node_be_reactive(FT ft, FTNODE node)
+static bool ft_ftnode_may_be_reactive(FT ft, FTNODE node)
 {
     if (node->height == 0) {
         return true;
-    }
-    else {
-        return (get_nonleaf_reactivity(node, ft->h->fanout) != RE_STABLE);
+    } else {
+        return toku_ftnode_get_nonleaf_reactivity(node, ft->h->fanout) != RE_STABLE;
     }
 }
 
@@ -576,9 +576,9 @@ handle_split_of_child(
     paranoid_invariant(node->height>0);
     paranoid_invariant(0 <= childnum);
     paranoid_invariant(childnum < node->n_children);
-    toku_assert_entire_node_in_memory(node);
-    toku_assert_entire_node_in_memory(childa);
-    toku_assert_entire_node_in_memory(childb);
+    toku_ftnode_assert_fully_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(childa);
+    toku_ftnode_assert_fully_in_memory(childb);
     NONLEAF_CHILDINFO old_bnc = BNC(node, childnum);
     paranoid_invariant(toku_bnc_nbytesinbuf(old_bnc)==0);
     int cnum;
@@ -653,9 +653,9 @@ handle_split_of_child(
                  )
 
     /* Keep pushing to the children, but not if the children would require a pushdown */
-    toku_assert_entire_node_in_memory(node);
-    toku_assert_entire_node_in_memory(childa);
-    toku_assert_entire_node_in_memory(childb);
+    toku_ftnode_assert_fully_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(childa);
+    toku_ftnode_assert_fully_in_memory(childb);
 
     VERIFY_NODE(t, node);
     VERIFY_NODE(t, childa);
@@ -680,7 +680,7 @@ ftleaf_disk_size(FTNODE node)
 // Effect: get the disk size of a leafentry
 {
     paranoid_invariant(node->height == 0);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     uint64_t retval = 0;
     for (int i = 0; i < node->n_children; i++) {
         retval += BLB_DATA(node, i)->get_disk_size();
@@ -771,8 +771,8 @@ move_leafentries(
 
 static void ftnode_finalize_split(FTNODE node, FTNODE B, MSN max_msn_applied_to_node) {
 // Effect: Finalizes a split by updating some bits and dirtying both nodes
-    toku_assert_entire_node_in_memory(node);
-    toku_assert_entire_node_in_memory(B);
+    toku_ftnode_assert_fully_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(B);
     verify_all_in_mempool(node);
     verify_all_in_mempool(B);
 
@@ -851,7 +851,7 @@ ftleaf_split(
 
 
     paranoid_invariant(node->height==0);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     verify_all_in_mempool(node);
     MSN max_msn_applied_to_node = node->max_msn_applied_to_node_on_disk;
 
@@ -996,7 +996,7 @@ ft_nonleaf_split(
 {
     //VERIFY_NODE(t,node);
     STATUS_VALUE(FT_FLUSHER_SPLIT_NONLEAF)++;
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     int old_n_children = node->n_children;
     int n_children_in_a = old_n_children/2;
     int n_children_in_b = old_n_children-n_children_in_a;
@@ -1112,7 +1112,7 @@ ft_split_child(
 }
 
 static void bring_node_fully_into_memory(FTNODE node, FT ft) {
-    if (!is_entire_node_in_memory(node)) {
+    if (!toku_ftnode_fully_in_memory(node)) {
         struct ftnode_fetch_extra bfe;
         fill_bfe_for_full_read(&bfe, ft);
         toku_cachetable_pf_pinned_pair(
@@ -1136,12 +1136,12 @@ flush_this_child(
 // Effect: Push everything in the CHILDNUMth buffer of node down into the child.
 {
     update_flush_status(child, 0);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     if (fa->should_destroy_basement_nodes(fa)) {
         maybe_destroy_child_blbs(node, child, h);
     }
     bring_node_fully_into_memory(child, h);
-    toku_assert_entire_node_in_memory(child);
+    toku_ftnode_assert_fully_in_memory(child);
     paranoid_invariant(node->height>0);
     paranoid_invariant(child->thisnodename.b!=0);
     // VERIFY_NODE does not work off client thread as of now
@@ -1163,8 +1163,8 @@ static void
 merge_leaf_nodes(FTNODE a, FTNODE b)
 {
     STATUS_VALUE(FT_FLUSHER_MERGE_LEAF)++;
-    toku_assert_entire_node_in_memory(a);
-    toku_assert_entire_node_in_memory(b);
+    toku_ftnode_assert_fully_in_memory(a);
+    toku_ftnode_assert_fully_in_memory(b);
     paranoid_invariant(a->height == 0);
     paranoid_invariant(b->height == 0);
     paranoid_invariant(a->n_children > 0);
@@ -1268,7 +1268,7 @@ maybe_merge_pinned_leaf_nodes(
 {
     unsigned int sizea = toku_serialize_ftnode_size(a);
     unsigned int sizeb = toku_serialize_ftnode_size(b);
-    uint32_t num_leafentries = get_leaf_num_entries(a) + get_leaf_num_entries(b);
+    uint32_t num_leafentries = toku_ftnode_leaf_num_entries(a) + toku_ftnode_leaf_num_entries(b);
     if (num_leafentries > 1 && (sizea + sizeb)*4 > (nodesize*3)) {
         // the combined size is more than 3/4 of a node, so don't merge them.
         *did_merge = false;
@@ -1301,8 +1301,8 @@ maybe_merge_pinned_nonleaf_nodes(
     bool *did_rebalance,
     DBT *splitk)
 {
-    toku_assert_entire_node_in_memory(a);
-    toku_assert_entire_node_in_memory(b);
+    toku_ftnode_assert_fully_in_memory(a);
+    toku_ftnode_assert_fully_in_memory(b);
     paranoid_invariant(parent_splitk->data);
     int old_n_children = a->n_children;
     int new_n_children = old_n_children + b->n_children;
@@ -1366,9 +1366,9 @@ maybe_merge_pinned_nodes(
 {
     MSN msn_max;
     paranoid_invariant(a->height == b->height);
-    toku_assert_entire_node_in_memory(parent);
-    toku_assert_entire_node_in_memory(a);
-    toku_assert_entire_node_in_memory(b);
+    toku_ftnode_assert_fully_in_memory(parent);
+    toku_ftnode_assert_fully_in_memory(a);
+    toku_ftnode_assert_fully_in_memory(b);
     parent->dirty = 1;   // just to make sure
     {
         MSN msna = a->max_msn_applied_to_node_on_disk;
@@ -1413,7 +1413,7 @@ ft_merge_child(
     // this function should not be called
     // if the child is not mergable
     paranoid_invariant(node->n_children > 1);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
 
     int childnuma,childnumb;
     if (childnum_to_merge > 0) {
@@ -1577,7 +1577,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
     int dirtied = 0;
     NONLEAF_CHILDINFO bnc = NULL;
     paranoid_invariant(parent->height>0);
-    toku_assert_entire_node_in_memory(parent);
+    toku_ftnode_assert_fully_in_memory(parent);
     TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
 
     // pick the child we want to flush to
@@ -1608,7 +1608,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
     // Let's do a quick check to see if the child may be reactive
     // If the child cannot be reactive, then we can safely unlock
     // the parent before finishing reading in the entire child node.
-    bool may_child_be_reactive = may_node_be_reactive(ft, child);
+    bool may_child_be_reactive = ft_ftnode_may_be_reactive(ft, child);
 
     paranoid_invariant(child->thisnodename.b!=0);
 
@@ -1649,7 +1649,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
     // we wont be splitting/merging child
     // and we have already replaced the bnc
     // for the root with a fresh one
-    enum reactivity child_re = get_node_reactivity(ft, child);
+    enum reactivity child_re = toku_ftnode_get_reactivity(ft, child);
     if (parent && child_re == RE_STABLE) {
         toku_unpin_ftnode(ft, parent);
         parent = NULL;
@@ -1679,7 +1679,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
     // let's get the reactivity of the child again,
     // it is possible that the flush got rid of some values
     // and now the parent is no longer reactive
-    child_re = get_node_reactivity(ft, child);
+    child_re = toku_ftnode_get_reactivity(ft, child);
     // if the parent has been unpinned above, then
     // this is our only option, even if the child is not stable
     // if the child is not stable, we'll handle it the next
@@ -1724,6 +1724,79 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
     }
 }
 
+void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known) {
+    paranoid_invariant(bnc);
+
+    TOKULOGGER logger = toku_cachefile_logger(ft->cf);
+    TXN_MANAGER txn_manager = logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr;
+    TXNID oldest_referenced_xid_for_simple_gc = TXNID_NONE;
+
+    txn_manager_state txn_state_for_gc(txn_manager);
+    bool do_garbage_collection = child->height == 0 && txn_manager != nullptr;
+    if (do_garbage_collection) {
+        txn_state_for_gc.init();
+        oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
+    }
+    txn_gc_info gc_info(&txn_state_for_gc,
+                        oldest_referenced_xid_for_simple_gc,                    
+                        child->oldest_referenced_xid_known,
+                        true);
+    struct flush_msg_fn {
+        FT ft;
+        FTNODE child;
+        NONLEAF_CHILDINFO bnc;
+        txn_gc_info *gc_info;
+
+        STAT64INFO_S stats_delta;
+        size_t remaining_memsize = bnc->msg_buffer.buffer_size_in_use();
+
+        flush_msg_fn(FT t, FTNODE n, NONLEAF_CHILDINFO nl, txn_gc_info *g) :
+            ft(t), child(n), bnc(nl), gc_info(g), remaining_memsize(bnc->msg_buffer.buffer_size_in_use()) {
+            stats_delta = { 0, 0 };
+        }
+        int operator()(FT_MSG msg, bool is_fresh) {
+            size_t flow_deltas[] = { 0, 0 };
+            size_t memsize_in_buffer = message_buffer::msg_memsize_in_buffer(msg);
+            if (remaining_memsize <= bnc->flow[0]) {
+                // this message is in the current checkpoint's worth of
+                // the end of the message buffer
+                flow_deltas[0] = memsize_in_buffer;
+            } else if (remaining_memsize <= bnc->flow[0] + bnc->flow[1]) {
+                // this message is in the last checkpoint's worth of the
+                // end of the message buffer
+                flow_deltas[1] = memsize_in_buffer;
+            }
+            toku_ftnode_put_msg(
+                ft->compare_fun,
+                ft->update_fun,
+                &ft->cmp_descriptor,
+                child,
+                -1,
+                msg,
+                is_fresh,
+                gc_info,
+                flow_deltas,
+                &stats_delta
+                );
+            remaining_memsize -= memsize_in_buffer;
+            return 0;
+        }
+    } flush_fn(ft, child, bnc, &gc_info);
+    bnc->msg_buffer.iterate(flush_fn);
+
+    child->oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
+
+    invariant(flush_fn.remaining_memsize == 0);
+    if (flush_fn.stats_delta.numbytes || flush_fn.stats_delta.numrows) {
+        toku_ft_update_stats(&ft->in_memory_stats, flush_fn.stats_delta);
+    }
+    if (do_garbage_collection) {
+        size_t buffsize = bnc->msg_buffer.buffer_size_in_use();
+        // may be misleading if there's a broadcast message in there
+        toku_ft_status_note_msg_bytes_out(buffsize);
+    }
+}
+
 static void
 update_cleaner_status(
     FTNODE node,
@@ -1912,7 +1985,7 @@ static void flush_node_fun(void *fe_v)
         // If so, call toku_ft_flush_some_child on the node (because this flush intends to
         // pass a meaningful oldest referenced xid for simple garbage collection), and it is the
         // responsibility of the flush to unlock the node. otherwise, we unlock it here.
-        if (fe->node->height > 0 && toku_ft_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) {
+        if (fe->node->height > 0 && toku_ftnode_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) {
             toku_ft_flush_some_child(fe->h, fe->node, &fa);
         }
         else {
@@ -1984,7 +2057,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
         //
         // successfully locked child
         //
-        bool may_child_be_reactive = may_node_be_reactive(h, child);
+        bool may_child_be_reactive = ft_ftnode_may_be_reactive(h, child);
         if (!may_child_be_reactive) {
             // We're going to unpin the parent, so before we do, we must
             // check to see if we need to blow away the basement nodes to
diff --git a/ft/ft-flusher.h b/ft/ft-flusher.h
index 0111827653b..4ee76e2a3c8 100644
--- a/ft/ft-flusher.h
+++ b/ft/ft-flusher.h
@@ -151,11 +151,14 @@ toku_flusher_thread_set_callback(
  * Puts a workitem on the flusher thread queue, scheduling the node to be
  * flushed by toku_ft_flush_some_child.
  */
-void
-toku_ft_flush_node_on_background_thread(
-    FT ft,
-    FTNODE parent
-    );
+void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent);
+
+enum split_mode {
+    SPLIT_EVENLY,
+    SPLIT_LEFT_HEAVY,
+    SPLIT_RIGHT_HEAVY
+};
+
 
 // Given pinned node and pinned child, split child into two
 // and update node with information about its new child.
@@ -166,6 +169,7 @@ void toku_ft_split_child(
     FTNODE child,
     enum split_mode split_mode
     );
+
 // Given pinned node, merge childnum with a neighbor and update node with
 // information about the change
 void toku_ft_merge_child(
@@ -217,8 +221,6 @@ ft_nonleaf_split(
     FTNODE* dependent_nodes
     );
 
-
-
 /************************************************************************
  * HOT optimize, should perhaps be factored out to its own header file  *
  ************************************************************************
@@ -249,5 +251,5 @@ void toku_ft_hot_get_status(FT_HOT_STATUS);
  */
 int
 toku_ft_hot_optimize(FT_HANDLE ft_h, DBT* left, DBT* right,
-                      int (*progress_callback)(void *extra, float progress),
-                      void *progress_extra, uint64_t* loops_run);
+                     int (*progress_callback)(void *extra, float progress),
+                     void *progress_extra, uint64_t* loops_run);
diff --git a/ft/ft-hot-flusher.cc b/ft/ft-hot-flusher.cc
index 066e075ee0e..fa0b031e2f1 100644
--- a/ft/ft-hot-flusher.cc
+++ b/ft/ft-hot-flusher.cc
@@ -89,14 +89,15 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <ft-flusher.h>
-#include <ft-flusher-internal.h>
-#include <ft-cachetable-wrappers.h>
-#include <ft-internal.h>
-#include <ft.h>
-#include <portability/toku_atomic.h>
-#include <util/status.h>
-#include <util/context.h>
+#include "ft/ft.h"
+#include "ft/ft-cachetable-wrappers.h"
+#include "ft/ft-flusher.h"
+#include "ft/ft-flusher-internal.h"
+#include "ft/ft-internal.h"
+#include "ft/node.h"
+#include "portability/toku_atomic.h"
+#include "util/context.h"
+#include "util/status.h"
 
 // Member Descirption:
 // 1. highest_pivot_key - this is the key that corresponds to the 
@@ -339,7 +340,7 @@ toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right,
                             PL_WRITE_EXPENSIVE, 
                             &root,
                             true);
-            toku_assert_entire_node_in_memory(root);
+            toku_ftnode_assert_fully_in_memory(root);
         }
 
         // Prepare HOT diagnostics.
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 239892db796..54574f56786 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -129,348 +129,6 @@ enum { FT_DEFAULT_BASEMENT_NODE_SIZE = 128 * 1024 };
 // happen into the rightmost leaf node due to promotion.
 enum { FT_SEQINSERT_SCORE_THRESHOLD = 100 };
 
-//
-// Field in ftnode_fetch_extra that tells the 
-// partial fetch callback what piece of the node
-// is needed by the ydb
-//
-enum ftnode_fetch_type {
-    ftnode_fetch_none=1, // no partitions needed.  
-    ftnode_fetch_subset, // some subset of partitions needed
-    ftnode_fetch_prefetch, // this is part of a prefetch call
-    ftnode_fetch_all, // every partition is needed
-    ftnode_fetch_keymatch, // one child is needed if it holds both keys
-};
-
-enum split_mode {
-    SPLIT_EVENLY,
-    SPLIT_LEFT_HEAVY,
-    SPLIT_RIGHT_HEAVY
-};
-
-enum reactivity {
-    RE_STABLE,
-    RE_FUSIBLE,
-    RE_FISSIBLE
-};
-
-static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) UU();
-static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) {
-    switch (type) {
-        case ftnode_fetch_none:
-        case ftnode_fetch_subset:
-        case ftnode_fetch_prefetch:
-        case ftnode_fetch_all:
-        case ftnode_fetch_keymatch:
-            return true;
-        default:
-            return false;
-    }
-}
-
-//
-// An extra parameter passed to cachetable functions 
-// That is used in all types of fetch callbacks.
-// The contents help the partial fetch and fetch
-// callbacks retrieve the pieces of a node necessary
-// for the ensuing operation (flush, query, ...)
-//
-struct ftnode_fetch_extra {
-    enum ftnode_fetch_type type;
-    // needed for reading a node off disk
-    FT h;
-    // used in the case where type == ftnode_fetch_subset
-    // parameters needed to find out which child needs to be decompressed (so it can be read)
-    ft_search *search;
-    DBT range_lock_left_key, range_lock_right_key;
-    bool left_is_neg_infty, right_is_pos_infty;
-    // states if we should try to aggressively fetch basement nodes 
-    // that are not specifically needed for current query, 
-    // but may be needed for other cursor operations user is doing
-    // For example, if we have not disabled prefetching,
-    // and the user is doing a dictionary wide scan, then
-    // even though a query may only want one basement node,
-    // we fetch all basement nodes in a leaf node.
-    bool disable_prefetching;
-    // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
-    // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
-    int child_to_read;
-    // when we read internal nodes, we want to read all the data off disk in one I/O
-    // then we'll treat it as normal and only decompress the needed partitions etc.
-
-    bool read_all_partitions;
-    // Accounting: How many bytes were read, and how much time did we spend doing I/O?
-    uint64_t bytes_read;
-    tokutime_t io_time;
-    tokutime_t decompress_time;
-    tokutime_t deserialize_time;
-};
-typedef struct ftnode_fetch_extra *FTNODE_FETCH_EXTRA;
-
-struct toku_msg_buffer_key_msn_heaviside_extra {
-    DESCRIPTOR desc;
-    ft_compare_func cmp;
-    message_buffer *msg_buffer;
-    const DBT *key;
-    MSN msn;
-};
-
-// comparison function for inserting messages into a
-// ftnode_nonleaf_childinfo's message_tree
-int
-toku_msg_buffer_key_msn_heaviside(const int32_t &v, const struct toku_msg_buffer_key_msn_heaviside_extra &extra);
-
-struct toku_msg_buffer_key_msn_cmp_extra {
-    DESCRIPTOR desc;
-    ft_compare_func cmp;
-    message_buffer *msg_buffer;
-};
-
-// same thing for qsort_r
-int
-toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extrap, const int &a, const int &b);
-
-typedef toku::omt<int32_t> off_omt_t;
-typedef toku::omt<int32_t, int32_t, true> marked_off_omt_t;
-
-// data of an available partition of a nonleaf ftnode
-struct ftnode_nonleaf_childinfo {
-    message_buffer msg_buffer;
-    off_omt_t broadcast_list;
-    marked_off_omt_t fresh_message_tree;
-    off_omt_t stale_message_tree;
-    uint64_t flow[2];  // current and last checkpoint
-};
-
-unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc);
-int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc);
-long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
-long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
-void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp);
-void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
-void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
-bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
-bool toku_ft_nonleaf_is_gorged(FTNODE node, uint32_t nodesize);
-
-enum reactivity get_nonleaf_reactivity(FTNODE node, unsigned int fanout);
-enum reactivity get_node_reactivity(FT ft, FTNODE node);
-uint32_t get_leaf_num_entries(FTNODE node);
-
-// data of an available partition of a leaf ftnode
-struct ftnode_leaf_basement_node {
-    bn_data data_buffer;
-    unsigned int seqinsert;         // number of sequential inserts to this leaf 
-    MSN max_msn_applied;            // max message sequence number applied
-    bool stale_ancestor_messages_applied;
-    STAT64INFO_S stat64_delta;      // change in stat64 counters since basement was last written to disk
-};
-
-enum   pt_state {  // declare this to be packed so that when used below it will only take 1 byte.
-    PT_INVALID = 0,
-    PT_ON_DISK = 1,
-    PT_COMPRESSED = 2,
-    PT_AVAIL = 3};
-
-enum  ftnode_child_tag {
-    BCT_INVALID = 0,
-    BCT_NULL,
-    BCT_SUBBLOCK,
-    BCT_LEAF,
-    BCT_NONLEAF
-};
-    
-typedef struct  ftnode_child_pointer {
-    union {
-	struct sub_block *subblock;
-	struct ftnode_nonleaf_childinfo *nonleaf;
-	struct ftnode_leaf_basement_node *leaf;
-    } u;
-    enum ftnode_child_tag tag;
-} FTNODE_CHILD_POINTER;
-
-
-struct ftnode_disk_data {
-    //
-    // stores the offset to the beginning of the partition on disk from the ftnode, and the length, needed to read a partition off of disk
-    // the value is only meaningful if the node is clean. If the node is dirty, then the value is meaningless
-    //  The START is the distance from the end of the compressed node_info data, to the beginning of the compressed partition
-    //  The SIZE is the size of the compressed partition.
-    // Rationale:  We cannot store the size from the beginning of the node since we don't know how big the header will be.
-    //  However, later when we are doing aligned writes, we won't be able to store the size from the end since we want things to align.
-    uint32_t start;
-    uint32_t size;
-};
-#define BP_START(node_dd,i) ((node_dd)[i].start)
-#define BP_SIZE(node_dd,i) ((node_dd)[i].size)
-
-
-// a ftnode partition, associated with a child of a node
-struct ftnode_partition {
-    // the following three variables are used for nonleaf nodes
-    // for leaf nodes, they are meaningless
-    BLOCKNUM     blocknum; // blocknum of child 
-
-    // How many bytes worth of work was performed by messages in each buffer.
-    uint64_t     workdone;
-
-    //
-    // pointer to the partition. Depending on the state, they may be different things
-    // if state == PT_INVALID, then the node was just initialized and ptr == NULL
-    // if state == PT_ON_DISK, then ptr == NULL
-    // if state == PT_COMPRESSED, then ptr points to a struct sub_block*
-    // if state == PT_AVAIL, then ptr is:
-    //         a struct ftnode_nonleaf_childinfo for internal nodes, 
-    //         a struct ftnode_leaf_basement_node for leaf nodes
-    //
-    struct ftnode_child_pointer ptr;
-    //
-    // at any time, the partitions may be in one of the following three states (stored in pt_state):
-    //   PT_INVALID - means that the partition was just initialized
-    //   PT_ON_DISK - means that the partition is not in memory and needs to be read from disk. To use, must read off disk and decompress
-    //   PT_COMPRESSED - means that the partition is compressed in memory. To use, must decompress
-    //   PT_AVAIL - means the partition is decompressed and in memory
-    //
-    enum pt_state state; // make this an enum to make debugging easier.  
-
-    // clock count used to for pe_callback to determine if a node should be evicted or not
-    // for now, saturating the count at 1
-    uint8_t clock_count;
-};
-
-struct ftnode {
-    MSN      max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk
-    unsigned int flags;
-    BLOCKNUM thisnodename;   // Which block number is this node?
-    int    layout_version; // What version of the data structure?
-    int    layout_version_original;	// different (<) from layout_version if upgraded from a previous version (useful for debugging)
-    int    layout_version_read_from_disk;  // transient, not serialized to disk, (useful for debugging)
-    uint32_t build_id;       // build_id (svn rev number) of software that wrote this node to disk
-    int    height; /* height is always >= 0.  0 for leaf, >0 for nonleaf. */
-    int    dirty;
-    uint32_t fullhash;
-    int n_children; //for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced.
-                    // for leaf nodes, represents number of basement nodes
-    unsigned int    totalchildkeylens;
-    DBT *childkeys;   /* Pivot keys.  Child 0's keys are <= childkeys[0].  Child 1's keys are <= childkeys[1].
-                                                                        Child 1's keys are > childkeys[0]. */
-
-    // What's the oldest referenced xid that this node knows about? The real oldest
-    // referenced xid might be younger, but this is our best estimate. We use it
-    // as a heuristic to transition provisional mvcc entries from provisional to
-    // committed (from implicity committed to really committed).
-    //
-    // A better heuristic would be the oldest live txnid, but we use this since it
-    // still works well most of the time, and its readily available on the inject
-    // code path.
-    TXNID oldest_referenced_xid_known;
-
-    // array of size n_children, consisting of ftnode partitions
-    // each one is associated with a child
-    // for internal nodes, the ith partition corresponds to the ith message buffer
-    // for leaf nodes, the ith partition corresponds to the ith basement node
-    struct ftnode_partition *bp;
-    PAIR ct_pair;
-};
-
-// ftnode partition macros
-// BP stands for ftnode_partition
-#define BP_BLOCKNUM(node,i) ((node)->bp[i].blocknum)
-#define BP_STATE(node,i) ((node)->bp[i].state)
-#define BP_WORKDONE(node, i)((node)->bp[i].workdone)
-
-//
-// macros for managing a node's clock
-// Should be managed by ft-ops.c, NOT by serialize/deserialize
-//
-
-//
-// BP_TOUCH_CLOCK uses a compare and swap because multiple threads
-// that have a read lock on an internal node may try to touch the clock
-// simultaneously
-//
-#define BP_TOUCH_CLOCK(node, i) ((node)->bp[i].clock_count = 1)
-#define BP_SWEEP_CLOCK(node, i) ((node)->bp[i].clock_count = 0)
-#define BP_SHOULD_EVICT(node, i) ((node)->bp[i].clock_count == 0)
-// not crazy about having these two here, one is for the case where we create new
-// nodes, such as in splits and creating new roots, and the other is for when
-// we are deserializing a node and not all bp's are touched
-#define BP_INIT_TOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 1)
-#define BP_INIT_UNTOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 0)
-
-// internal node macros
-static inline void set_BNULL(FTNODE node, int i) {
-    paranoid_invariant(i >= 0);
-    paranoid_invariant(i < node->n_children);
-    node->bp[i].ptr.tag = BCT_NULL;
-}
-static inline bool is_BNULL (FTNODE node, int i) {
-    paranoid_invariant(i >= 0);
-    paranoid_invariant(i < node->n_children);
-    return node->bp[i].ptr.tag == BCT_NULL;
-}
-static inline NONLEAF_CHILDINFO BNC(FTNODE node, int i) {
-    paranoid_invariant(i >= 0);
-    paranoid_invariant(i < node->n_children);
-    FTNODE_CHILD_POINTER p = node->bp[i].ptr;
-    paranoid_invariant(p.tag==BCT_NONLEAF);
-    return p.u.nonleaf;
-}
-static inline void set_BNC(FTNODE node, int i, NONLEAF_CHILDINFO nl) {
-    paranoid_invariant(i >= 0);
-    paranoid_invariant(i < node->n_children);
-    FTNODE_CHILD_POINTER *p = &node->bp[i].ptr;
-    p->tag = BCT_NONLEAF;
-    p->u.nonleaf = nl;
-}
-
-static inline BASEMENTNODE BLB(FTNODE node, int i) {
-    paranoid_invariant(i >= 0);
-    // The optimizer really doesn't like it when we compare
-    // i to n_children as signed integers. So we assert that
-    // n_children is in fact positive before doing a comparison
-    // on the values forcibly cast to unsigned ints.
-    paranoid_invariant(node->n_children > 0);
-    paranoid_invariant((unsigned) i < (unsigned) node->n_children);
-    FTNODE_CHILD_POINTER p = node->bp[i].ptr;
-    paranoid_invariant(p.tag==BCT_LEAF);
-    return p.u.leaf;
-}
-static inline void set_BLB(FTNODE node, int i, BASEMENTNODE bn) {
-    paranoid_invariant(i >= 0);
-    paranoid_invariant(i < node->n_children);
-    FTNODE_CHILD_POINTER *p = &node->bp[i].ptr;
-    p->tag = BCT_LEAF;
-    p->u.leaf = bn;
-}
-
-static inline SUB_BLOCK BSB(FTNODE node, int i) {
-    paranoid_invariant(i >= 0);
-    paranoid_invariant(i < node->n_children);
-    FTNODE_CHILD_POINTER p = node->bp[i].ptr;
-    paranoid_invariant(p.tag==BCT_SUBBLOCK);
-    return p.u.subblock;
-}
-static inline void set_BSB(FTNODE node, int i, SUB_BLOCK sb) {
-    paranoid_invariant(i >= 0);
-    paranoid_invariant(i < node->n_children);
-    FTNODE_CHILD_POINTER *p = &node->bp[i].ptr;
-    p->tag = BCT_SUBBLOCK;
-    p->u.subblock = sb;
-}
-
-// ftnode leaf basementnode macros, 
-#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied)
-#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied)
-#define BLB_DATA(node,i) (&(BLB(node,i)->data_buffer))
-#define BLB_NBYTESINDATA(node,i) (BLB_DATA(node,i)->get_disk_size())
-#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert)
-
-/* pivot flags  (must fit in 8 bits) */
-enum {
-    FT_PIVOT_TRUNC = 4,
-    FT_PIVOT_FRONT_COMPRESS = 8,
-};
-
 uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum);
 
 // The ft_header is not managed by the cachetable.  Instead, it hangs off the cachefile as userdata.
@@ -638,18 +296,25 @@ struct ft_handle {
     struct ft_options options;
 };
 
+// TODO: Move to cachetable header
 PAIR_ATTR make_ftnode_pair_attr(FTNODE node);
 PAIR_ATTR make_invalid_pair_attr(void);
+// Cachetable callbacks for ftnodes.
+void toku_ftnode_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs);
+void toku_ftnode_checkpoint_complete_callback(void *value_data);
+void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone);
+int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs);
+void toku_ftnode_pe_est_callback(void* ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs);
+int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *extraargs,
+                            void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);
+bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs);
+int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep);
+int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs);
 
 /* serialization code */
-void
-toku_create_compressed_partition_from_available(
-    FTNODE node,
-    int childnum,
-    enum toku_compression_method compression_method,
-    SUB_BLOCK sb
-    );
-void rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize);
+void toku_create_compressed_partition_from_available(FTNODE node, int childnum,
+                                                     enum toku_compression_method compression_method,
+                                                     SUB_BLOCK sb);
 int toku_serialize_ftnode_to_memory (FTNODE node,
                                       FTNODE_DISK_DATA* ndd,
                                       unsigned int basementnodesize,
@@ -727,19 +392,6 @@ void toku_serialize_ft_to_wbuf (
 int toku_deserialize_ft_from (int fd, LSN max_acceptable_lsn, FT *ft);
 void toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset);
 void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc);
-BASEMENTNODE toku_create_empty_bn(void);
-BASEMENTNODE toku_create_empty_bn_no_buffer(void); // create a basement node with a null buffer.
-NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo);
-BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn);
-NONLEAF_CHILDINFO toku_create_empty_nl(void);
-// FIXME needs toku prefix
-void destroy_basement_node (BASEMENTNODE bn);
-// FIXME needs toku prefix
-void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl);
-void toku_destroy_ftnode_internals(FTNODE node);
-void toku_ftnode_free (FTNODE *node);
-bool is_entire_node_in_memory(FTNODE node);
-void toku_assert_entire_node_in_memory(FTNODE node);
 
 // append a child node to a parent node
 void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey);
@@ -760,31 +412,10 @@ void toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe);
 void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint);
 void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time);
 void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time);
-
-void toku_ftnode_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs);
-void toku_ftnode_checkpoint_complete_callback(void *value_data);
-void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone);
-int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs);
-void toku_ftnode_pe_est_callback(void* ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs);
-int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *extraargs,
-                            void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);
-bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs);
-int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep);
-int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs);
-void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h);
-BASEMENTNODE toku_detach_bn(FTNODE node, int childnum);
-
-static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT h) {
-    CACHETABLE_WRITE_CALLBACK wc;
-    wc.flush_callback = toku_ftnode_flush_callback;
-    wc.pe_est_callback = toku_ftnode_pe_est_callback;
-    wc.pe_callback = toku_ftnode_pe_callback;
-    wc.cleaner_callback = toku_ftnode_cleaner_callback;
-    wc.clone_callback = toku_ftnode_clone_callback;
-    wc.checkpoint_complete_callback = toku_ftnode_checkpoint_complete_callback;
-    wc.write_extraargs = h;
-    return wc;
-}
+void toku_ft_status_note_msn_discard(void);
+void toku_ft_status_note_update(bool broadcast);
+void toku_ft_status_note_msg_bytes_out(size_t buffsize);
+void toku_ft_status_note_ftnode(int height, bool created); // created = false means destroyed
 
 //
 // Helper function to fill a ftnode_fetch_extra with data
@@ -792,22 +423,7 @@ static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT h) {
 // necessary. Used in cases where the entire node
 // is required, such as for flushes.
 //
-static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h) {
-    bfe->type = ftnode_fetch_all;
-    bfe->h = h;
-    bfe->search = NULL;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    bfe->left_is_neg_infty = false;
-    bfe->right_is_pos_infty = false;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = false;
-    bfe->read_all_partitions = false;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
-}
+void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT ft);
 
 //
 // Helper function to fill a ftnode_fetch_extra with data
@@ -816,79 +432,19 @@ static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h)
 // is known in advance, e.g. for keysrange when the left and right key
 // are in the same basement node.
 //
-static inline void fill_bfe_for_keymatch(
-    struct ftnode_fetch_extra *bfe,
-    FT h,
-    const DBT *left,
-    const DBT *right,
-    bool disable_prefetching,
-    bool read_all_partitions
-    )
-{
-    paranoid_invariant(h->h->type == FT_CURRENT);
-    bfe->type = ftnode_fetch_keymatch;
-    bfe->h = h;
-    bfe->search = nullptr;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    if (left) {
-        toku_copyref_dbt(&bfe->range_lock_left_key, *left);
-    }
-
-    if (right) {
-        toku_copyref_dbt(&bfe->range_lock_right_key, *right);
-    }
-    bfe->left_is_neg_infty = left == nullptr;
-    bfe->right_is_pos_infty = right == nullptr;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = disable_prefetching;
-    bfe->read_all_partitions = read_all_partitions;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
-}
-
+void fill_bfe_for_keymatch(struct ftnode_fetch_extra *bfe, FT ft,
+                           const DBT *left, const DBT *right,
+                           bool disable_prefetching, bool read_all_partitions);
 //
 // Helper function to fill a ftnode_fetch_extra with data
 // that will tell the fetch callback that some subset of the node
 // necessary. Used in cases where some of the node is required
 // such as for a point query.
 //
-static inline void fill_bfe_for_subset_read(
-    struct ftnode_fetch_extra *bfe,
-    FT h,
-    ft_search *search,
-    const DBT *left,
-    const DBT *right,
-    bool left_is_neg_infty,
-    bool right_is_pos_infty,
-    bool disable_prefetching,
-    bool read_all_partitions
-    )
-{
-    paranoid_invariant(h->h->type == FT_CURRENT);
-    bfe->type = ftnode_fetch_subset;
-    bfe->h = h;
-    bfe->search = search;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    if (left) {
-        toku_copyref_dbt(&bfe->range_lock_left_key, *left);
-    }
-    if (right) {
-        toku_copyref_dbt(&bfe->range_lock_right_key, *right);
-    }
-    bfe->left_is_neg_infty = left_is_neg_infty;
-    bfe->right_is_pos_infty = right_is_pos_infty;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = disable_prefetching;
-    bfe->read_all_partitions = read_all_partitions;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
-}
+void fill_bfe_for_subset_read(struct ftnode_fetch_extra *bfe, FT ft, ft_search *search,
+                              const DBT *left, const DBT *right,
+                              bool left_is_neg_infty, bool right_is_pos_infty,
+                              bool disable_prefetching, bool read_all_partitions);
 
 //
 // Helper function to fill a ftnode_fetch_extra with data
@@ -896,41 +452,11 @@ static inline void fill_bfe_for_subset_read(
 // necessary, only the pivots and/or subtree estimates.
 // Currently used for stat64.
 //
-static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) {
-    paranoid_invariant(h->h->type == FT_CURRENT);
-    bfe->type = ftnode_fetch_none;
-    bfe->h = h;
-    bfe->search = NULL;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    bfe->left_is_neg_infty = false;
-    bfe->right_is_pos_infty = false;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = false;
-    bfe->read_all_partitions = false;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
-}
+void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT ft);
 
-static inline void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) {
-    paranoid_invariant(bfe->type == ftnode_fetch_prefetch);
-    toku_destroy_dbt(&bfe->range_lock_left_key);
-    toku_destroy_dbt(&bfe->range_lock_right_key);
-}
+void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_cursor *cursor);
 
-// this is in a strange place because it needs the cursor struct to be defined
-void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
-                           FT h,
-                           struct ft_cursor *c);
-
-struct ancestors {
-    FTNODE   node;     // This is the root node if next is NULL.
-    int       childnum; // which buffer holds messages destined to the node whose ancestors this list represents.
-    struct ancestors *next;     // Parent of this node (so next->node.(next->childnum) refers to this node).
-};
-typedef struct ancestors *ANCESTORS;
+void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe);
 
 struct pivot_bounds {
     const DBT * const lower_bound_exclusive;
@@ -938,21 +464,9 @@ struct pivot_bounds {
 };
 typedef struct pivot_bounds const * const PIVOT_BOUNDS;
 
-__attribute__((nonnull))
-void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node);
-void toku_apply_ancestors_messages_to_node (FT_HANDLE t, FTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool* msgs_applied, int child_to_read);
-__attribute__((nonnull))
-bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds, MSN *const max_msn_in_path, int child_to_read);
-__attribute__((nonnull))
-void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read);
-
-int
-toku_ft_search_which_child(
-    DESCRIPTOR desc,
-    ft_compare_func cmp,
-    FTNODE node,
-    ft_search *search
-    );
+const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound_exclusive);
+const DBT *postpivotkey (FTNODE node, int childnum, const DBT * const upper_bound_inclusive);
+struct pivot_bounds next_pivot_keys (FTNODE node, int childnum, struct pivot_bounds const * const old_pb);
 
 bool
 toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum);
@@ -967,29 +481,6 @@ toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
 // put the ftnode into the cache table
 void toku_create_new_ftnode (FT_HANDLE t, FTNODE *result, int height, int n_children);
 
-// Effect: Fill in N as an empty ftnode.
-void toku_initialize_empty_ftnode (FTNODE n, BLOCKNUM nodename, int height, int num_children, 
-                                    int layout_version, unsigned int flags);
-
-int toku_ftnode_which_child(FTNODE node, const DBT *k,
-                            DESCRIPTOR desc, ft_compare_func cmp)
-    __attribute__((__warn_unused_result__));
-
-/**
- * Finds the next child for HOT to flush to, given that everything up to
- * and including k has been flattened.
- *
- * If k falls between pivots in node, then we return the childnum where k
- * lies.
- *
- * If k is equal to some pivot, then we return the next (to the right)
- * childnum.
- */
-int toku_ftnode_hot_next_child(FTNODE node,
-                               const DBT *k,
-                               DESCRIPTOR desc,
-                               ft_compare_func cmp);
-
 /* Stuff for testing */
 // toku_testsetup_initialize() must be called before any other test_setup_xxx() functions are called.
 void toku_testsetup_initialize(void);
@@ -1174,57 +665,6 @@ typedef struct {
 
 void toku_ft_get_status(FT_STATUS);
 
-void
-toku_ft_bn_apply_msg_once(
-    BASEMENTNODE bn,
-    const FT_MSG msg,
-    uint32_t idx,
-    uint32_t le_keylen,
-    LEAFENTRY le,
-    txn_gc_info *gc_info,
-    uint64_t *workdonep,
-    STAT64INFO stats_to_update
-    );
-
-void
-toku_ft_bn_apply_msg(
-    ft_compare_func compare_fun,
-    ft_update_func update_fun,
-    DESCRIPTOR desc,
-    BASEMENTNODE bn,
-    FT_MSG msg,
-    txn_gc_info *gc_info,
-    uint64_t *workdone,
-    STAT64INFO stats_to_update
-    );
-
-void
-toku_ft_leaf_apply_msg(
-    ft_compare_func compare_fun,
-    ft_update_func update_fun,
-    DESCRIPTOR desc,
-    FTNODE node,
-    int target_childnum,
-    FT_MSG msg,
-    txn_gc_info *gc_info,
-    uint64_t *workdone,
-    STAT64INFO stats_to_update
-    );
-
-void
-toku_ft_node_put_msg(
-    ft_compare_func compare_fun,
-    ft_update_func update_fun,
-    DESCRIPTOR desc,
-    FTNODE node,
-    int target_childnum,
-    FT_MSG msg,
-    bool is_fresh,
-    txn_gc_info *gc_info,
-    size_t flow_deltas[],
-    STAT64INFO stats_to_update
-    );
-
 void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra);
 
 int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h) __attribute__((nonnull));
@@ -1245,6 +685,5 @@ typedef int (*FT_GET_CALLBACK_FUNCTION)(ITEMLEN keylen, bytevec key, ITEMLEN val
 
 typedef bool (*FT_CHECK_INTERRUPT_CALLBACK)(void *extra);
 
-struct ft_search;
 struct ft_cursor;
 int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, struct ft_cursor *ftcursor, bool can_bulk_fetch);
diff --git a/ft/ft-node-deserialize.cc b/ft/ft-node-deserialize.cc
index f309a32b44a..a5cbfa1511d 100644
--- a/ft/ft-node-deserialize.cc
+++ b/ft/ft-node-deserialize.cc
@@ -89,8 +89,8 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <ft-internal.h>
-#include <db.h>
+#include "ft/node.h"
+#include "ft-internal.h"
 
 /*
  * ft-node-deserialize.c -
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 2029607e912..7aa9aa6ed74 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -206,6 +206,7 @@ basement nodes, bulk fetch,  and partial fetch:
 #include "ft-cachetable-wrappers.h"
 #include "ft-flusher.h"
 #include "ft-internal.h"
+#include "node.h"
 #include "ft_layout_version.h"
 #include "log-internal.h"
 #include "sub_block.h"
@@ -431,99 +432,6 @@ void toku_note_deserialized_basement_node(bool fixed_key_size) {
     }
 }
 
-bool is_entire_node_in_memory(FTNODE node) {
-    for (int i = 0; i < node->n_children; i++) {
-        if(BP_STATE(node,i) != PT_AVAIL) {
-            return false;
-        }
-    }
-    return true;
-}
-
-void
-toku_assert_entire_node_in_memory(FTNODE UU() node) {
-    paranoid_invariant(is_entire_node_in_memory(node));
-}
-
-uint32_t
-get_leaf_num_entries(FTNODE node) {
-    uint32_t result = 0;
-    int i;
-    toku_assert_entire_node_in_memory(node);
-    for ( i = 0; i < node->n_children; i++) {
-        result += BLB_DATA(node, i)->num_klpairs();
-    }
-    return result;
-}
-
-static enum reactivity
-get_leaf_reactivity (FTNODE node, uint32_t nodesize) {
-    enum reactivity re = RE_STABLE;
-    toku_assert_entire_node_in_memory(node);
-    paranoid_invariant(node->height==0);
-    unsigned int size = toku_serialize_ftnode_size(node);
-    if (size > nodesize && get_leaf_num_entries(node) > 1) {
-        re = RE_FISSIBLE;
-    }
-    else if ((size*4) < nodesize && !BLB_SEQINSERT(node, node->n_children-1)) {
-        re = RE_FUSIBLE;
-    }
-    return re;
-}
-
-enum reactivity
-get_nonleaf_reactivity(FTNODE node, unsigned int fanout) {
-    paranoid_invariant(node->height>0);
-    int n_children = node->n_children;
-    if (n_children > (int) fanout) return RE_FISSIBLE;
-    if (n_children*4 < (int) fanout) return RE_FUSIBLE;
-    return RE_STABLE;
-}
-
-enum reactivity
-get_node_reactivity(FT ft, FTNODE node) {
-    toku_assert_entire_node_in_memory(node);
-    if (node->height==0)
-        return get_leaf_reactivity(node, ft->h->nodesize);
-    else
-        return get_nonleaf_reactivity(node, ft->h->fanout);
-}
-
-unsigned int
-toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc)
-{
-    return bnc->msg_buffer.buffer_size_in_use();
-}
-
-// return true if the size of the buffers plus the amount of work done is large enough.   (But return false if there is nothing to be flushed (the buffers empty)).
-bool
-toku_ft_nonleaf_is_gorged (FTNODE node, uint32_t nodesize) {
-    uint64_t size = toku_serialize_ftnode_size(node);
-
-    bool buffers_are_empty = true;
-    toku_assert_entire_node_in_memory(node);
-    //
-    // the nonleaf node is gorged if the following holds true:
-    //  - the buffers are non-empty
-    //  - the total workdone by the buffers PLUS the size of the buffers
-    //     is greater than nodesize (which as of Maxwell should be
-    //     4MB)
-    //
-    paranoid_invariant(node->height > 0);
-    for (int child = 0; child < node->n_children; ++child) {
-        size += BP_WORKDONE(node, child);
-    }
-    for (int child = 0; child < node->n_children; ++child) {
-        if (toku_bnc_nbytesinbuf(BNC(node, child)) > 0) {
-            buffers_are_empty = false;
-            break;
-        }
-    }
-    return ((size > nodesize)
-            &&
-            (!buffers_are_empty));
-}
-
 static void ft_verify_flags(FT UU(ft), FTNODE UU(node)) {
     paranoid_invariant(ft->h->flags == node->flags);
 }
@@ -536,13 +444,7 @@ uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum) {
     return toku_cachetable_hash(cf, BP_BLOCKNUM(node, childnum));
 }
 
-int
-toku_bnc_n_entries(NONLEAF_CHILDINFO bnc)
-{
-    return bnc->msg_buffer.num_entries();
-}
-
-static const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound_exclusive) {
+const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound_exclusive) {
     if (childnum==0)
         return lower_bound_exclusive;
     else {
@@ -550,53 +452,26 @@ static const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lowe
     }
 }
 
-static const DBT *postpivotkey (FTNODE node, int childnum, const DBT * const upper_bound_inclusive) {
+const DBT *postpivotkey (FTNODE node, int childnum, const DBT * const upper_bound_inclusive) {
     if (childnum+1 == node->n_children)
         return upper_bound_inclusive;
     else {
         return &node->childkeys[childnum];
     }
 }
-static struct pivot_bounds next_pivot_keys (FTNODE node, int childnum, struct pivot_bounds const * const old_pb) {
+
+struct pivot_bounds next_pivot_keys (FTNODE node, int childnum, struct pivot_bounds const * const old_pb) {
     struct pivot_bounds pb = {.lower_bound_exclusive = prepivotkey(node, childnum, old_pb->lower_bound_exclusive),
                               .upper_bound_inclusive = postpivotkey(node, childnum, old_pb->upper_bound_inclusive)};
     return pb;
 }
 
-// how much memory does this child buffer consume?
-long
-toku_bnc_memory_size(NONLEAF_CHILDINFO bnc)
-{
-    return (sizeof(*bnc) +
-            bnc->msg_buffer.memory_footprint() +
-            bnc->fresh_message_tree.memory_size() +
-            bnc->stale_message_tree.memory_size() +
-            bnc->broadcast_list.memory_size());
-}
-
-// how much memory in this child buffer holds useful data?
-// originally created solely for use by test program(s).
-long
-toku_bnc_memory_used(NONLEAF_CHILDINFO bnc)
-{
-    return (sizeof(*bnc) +
-            bnc->msg_buffer.memory_size_in_use() +
-            bnc->fresh_message_tree.memory_size() +
-            bnc->stale_message_tree.memory_size() +
-            bnc->broadcast_list.memory_size());
-}
-
-static long
-get_avail_internal_node_partition_size(FTNODE node, int i)
-{
+static long get_avail_internal_node_partition_size(FTNODE node, int i) {
     paranoid_invariant(node->height > 0);
     return toku_bnc_memory_size(BNC(node, i));
 }
 
-
-static long
-ftnode_cachepressure_size(FTNODE node)
-{
+static long ftnode_cachepressure_size(FTNODE node) {
     long retval = 0;
     bool totally_empty = true;
     if (node->height == 0) {
@@ -802,45 +677,6 @@ void toku_ft_status_update_flush_reason(FTNODE node,
     }
 }
 
-static void ftnode_update_disk_stats(
-    FTNODE ftnode,
-    FT ft,
-    bool for_checkpoint
-    )
-{
-    STAT64INFO_S deltas = ZEROSTATS;
-    // capture deltas before rebalancing basements for serialization
-    deltas = toku_get_and_clear_basement_stats(ftnode);
-    // locking not necessary here with respect to checkpointing
-    // in Clayface (because of the pending lock and cachetable lock
-    // in toku_cachetable_begin_checkpoint)
-    // essentially, if we are dealing with a for_checkpoint 
-    // parameter in a function that is called by the flush_callback,
-    // then the cachetable needs to ensure that this is called in a safe
-    // manner that does not interfere with the beginning
-    // of a checkpoint, which it does with the cachetable lock
-    // and pending lock
-    toku_ft_update_stats(&ft->h->on_disk_stats, deltas);
-    if (for_checkpoint) {
-        toku_ft_update_stats(&ft->checkpoint_header->on_disk_stats, deltas);
-    }
-}
-
-static void ftnode_clone_partitions(FTNODE node, FTNODE cloned_node) {
-    for (int i = 0; i < node->n_children; i++) {
-        BP_BLOCKNUM(cloned_node,i) = BP_BLOCKNUM(node,i);
-        paranoid_invariant(BP_STATE(node,i) == PT_AVAIL);
-        BP_STATE(cloned_node,i) = PT_AVAIL;
-        BP_WORKDONE(cloned_node, i) = BP_WORKDONE(node, i);
-        if (node->height == 0) {
-            set_BLB(cloned_node, i, toku_clone_bn(BLB(node,i)));
-        }
-        else {
-            set_BNC(cloned_node, i, toku_clone_nl(BNC(node,i)));
-        }
-    }
-}
-
 void toku_ftnode_checkpoint_complete_callback(void *value_data) {
     FTNODE node = static_cast<FTNODE>(value_data);
     if (node->height > 0) {
@@ -864,14 +700,14 @@ void toku_ftnode_clone_callback(
     )
 {
     FTNODE node = static_cast<FTNODE>(value_data);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     FT ft = static_cast<FT>(write_extraargs);
     FTNODE XCALLOC(cloned_node);
     if (node->height == 0) {
         // set header stats, must be done before rebalancing
-        ftnode_update_disk_stats(node, ft, for_checkpoint);
+        toku_ftnode_update_disk_stats(node, ft, for_checkpoint);
         // rebalance the leaf node
-        rebalance_ftnode_leaf(node, ft->h->basementnodesize);
+        toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize);
     }
 
     cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known;
@@ -900,7 +736,7 @@ void toku_ftnode_clone_callback(
         toku_move_ftnode_messages_to_stale(ft, node);
     }
     // clone partition
-    ftnode_clone_partitions(node, cloned_node);
+    toku_ftnode_clone_partitions(node, cloned_node);
 
     // clear dirty bit
     node->dirty = 0;
@@ -917,8 +753,6 @@ void toku_ftnode_clone_callback(
     *cloned_value_data = cloned_node;
 }
 
-static void ft_leaf_run_gc(FT ft, FTNODE node);
-
 void toku_ftnode_flush_callback(
     CACHEFILE UU(cachefile),
     int fd,
@@ -940,14 +774,14 @@ void toku_ftnode_flush_callback(
     assert(ftnode->thisnodename.b==nodename.b);
     int height = ftnode->height;
     if (write_me) {
-        toku_assert_entire_node_in_memory(ftnode);
+        toku_ftnode_assert_fully_in_memory(ftnode);
         if (height > 0 && !is_clone) {
             // cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback()
             toku_move_ftnode_messages_to_stale(h, ftnode);
         } else if (height == 0) {
-            ft_leaf_run_gc(h, ftnode);
+            toku_ftnode_leaf_run_gc(h, ftnode);
             if (!is_clone) {
-                ftnode_update_disk_stats(ftnode, h, for_checkpoint);
+                toku_ftnode_update_disk_stats(ftnode, h, for_checkpoint);
             }
         }
         int r = toku_serialize_ftnode_to(fd, ftnode->thisnodename, ftnode, ndd, !is_clone, h, for_checkpoint);
@@ -1091,12 +925,8 @@ exit:
     return;
 }
 
-static void ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc);
-
 // replace the child buffer with a compressed version of itself.
-static void 
-compress_internal_node_partition(FTNODE node, int i, enum toku_compression_method compression_method)
-{
+static void compress_internal_node_partition(FTNODE node, int i, enum toku_compression_method compression_method) {
     // if we should evict, compress the
     // message buffer into a sub_block
     assert(BP_STATE(node, i) == PT_AVAIL);
@@ -1110,24 +940,6 @@ compress_internal_node_partition(FTNODE node, int i, enum toku_compression_metho
     BP_STATE(node,i) = PT_COMPRESSED;
 }
 
-void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h) {
-    // free the basement node
-    assert(!node->dirty);
-    BASEMENTNODE bn = BLB(node, childnum);
-    toku_ft_decrease_stats(&h->in_memory_stats, bn->stat64_delta);
-    destroy_basement_node(bn);
-    set_BNULL(node, childnum);
-    BP_STATE(node, childnum) = PT_ON_DISK;
-}
-
-BASEMENTNODE toku_detach_bn(FTNODE node, int childnum) {
-    assert(BP_STATE(node, childnum) == PT_AVAIL);
-    BASEMENTNODE bn = BLB(node, childnum);
-    set_BNULL(node, childnum);
-    BP_STATE(node, childnum) = PT_ON_DISK;
-    return bn;
-}
-
 // callback for partially evicting a node
 int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
                             void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra) {
@@ -1171,7 +983,7 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
                         // this rule would cause upgrade code to upgrade this partition
                         // again after we serialize it as the current version, which is bad.
                         node->layout_version == node->layout_version_read_from_disk) {
-                        ft_bnc_move_messages_to_stale(ft, bnc);
+                        toku_ft_bnc_move_messages_to_stale(ft, bnc);
                         compress_internal_node_partition(
                             node,
                             i,
@@ -1466,6 +1278,34 @@ void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize
     }
 }
 
+void toku_ft_status_note_msn_discard(void) {
+    STATUS_INC(FT_MSN_DISCARDS, 1);
+}
+
+void toku_ft_status_note_update(bool broadcast) {
+    if (broadcast) {
+        STATUS_INC(FT_UPDATES_BROADCAST, 1);
+    } else {
+        STATUS_INC(FT_UPDATES, 1);
+    }
+}
+
+void toku_ft_status_note_msg_bytes_out(size_t buffsize) {
+    STATUS_INC(FT_MSG_BYTES_OUT, buffsize);
+    STATUS_INC(FT_MSG_BYTES_CURR, -buffsize);
+}
+void toku_ft_status_note_ftnode(int height, bool created) {
+    if (created) {
+        if (height == 0) {
+            STATUS_INC(FT_CREATE_LEAF, 1);
+        } else {
+            STATUS_INC(FT_CREATE_NONLEAF, 1);
+        }
+    } else {
+        // created = false means destroyed
+    }
+}
+
 // callback for partially reading a node
 // could have just used toku_ftnode_fetch_callback, but wanted to separate the two cases to separate functions
 int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraargs, int fd, PAIR_ATTR* sizep) {
@@ -1522,118 +1362,131 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar
     return 0;
 }
 
-struct msg_leafval_heaviside_extra {
-    ft_compare_func compare_fun;
-    DESCRIPTOR desc;
-    DBT const * const key;
-};
-
-//TODO: #1125 optimize
-static int
-toku_msg_leafval_heaviside(DBT const &kdbt, const struct msg_leafval_heaviside_extra &be) {
+int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_heaviside_extra &be) {
     FAKE_DB(db, be.desc);
-    DBT const * const key = be.key;
+    DBT const *const key = be.key;
     return be.compare_fun(&db, &kdbt, key);
 }
 
-static int
-ft_compare_pivot(DESCRIPTOR desc, ft_compare_func cmp, const DBT *key, const DBT *pivot)
-{
-    int r;
-    FAKE_DB(db, desc);
-    r = cmp(&db, key, pivot);
-    return r;
+void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h) {
+    bfe->type = ftnode_fetch_all;
+    bfe->h = h;
+    bfe->search = nullptr;
+    toku_init_dbt(&bfe->range_lock_left_key);
+    toku_init_dbt(&bfe->range_lock_right_key);
+    bfe->left_is_neg_infty = false;
+    bfe->right_is_pos_infty = false;
+    bfe->child_to_read = -1;
+    bfe->disable_prefetching = false;
+    bfe->read_all_partitions = false;
+    bfe->bytes_read = 0;
+    bfe->io_time = 0;
+    bfe->deserialize_time = 0;
+    bfe->decompress_time = 0;
 }
 
-
-// destroys the internals of the ftnode, but it does not free the values
-// that are stored
-// this is common functionality for toku_ftnode_free and rebalance_ftnode_leaf
-// MUST NOT do anything besides free the structures that have been allocated
-void toku_destroy_ftnode_internals(FTNODE node)
-{
-    for (int i=0; i<node->n_children-1; i++) {
-        toku_destroy_dbt(&node->childkeys[i]);
+void fill_bfe_for_keymatch(struct ftnode_fetch_extra *bfe, FT h,
+                           const DBT *left, const DBT *right,
+                           bool disable_prefetching, bool read_all_partitions) {
+    paranoid_invariant(h->h->type == FT_CURRENT);
+    bfe->type = ftnode_fetch_keymatch;
+    bfe->h = h;
+    bfe->search = nullptr;
+    toku_init_dbt(&bfe->range_lock_left_key);
+    toku_init_dbt(&bfe->range_lock_right_key);
+    if (left) {
+        toku_copyref_dbt(&bfe->range_lock_left_key, *left);
     }
-    toku_free(node->childkeys);
-    node->childkeys = NULL;
 
-    for (int i=0; i < node->n_children; i++) {
-        if (BP_STATE(node,i) == PT_AVAIL) {
-            if (node->height > 0) {
-                destroy_nonleaf_childinfo(BNC(node,i));
-            } else {
-                destroy_basement_node(BLB(node, i));
-            }
-        } else if (BP_STATE(node,i) == PT_COMPRESSED) {
-            SUB_BLOCK sb = BSB(node,i);
-            toku_free(sb->compressed_ptr);
-            toku_free(sb);
-        } else {
-            paranoid_invariant(is_BNULL(node, i));
-        }
-        set_BNULL(node, i);
+    if (right) {
+        toku_copyref_dbt(&bfe->range_lock_right_key, *right);
     }
-    toku_free(node->bp);
-    node->bp = NULL;
+    bfe->left_is_neg_infty = left == nullptr;
+    bfe->right_is_pos_infty = right == nullptr;
+    bfe->child_to_read = -1;
+    bfe->disable_prefetching = disable_prefetching;
+    bfe->read_all_partitions = read_all_partitions;
+    bfe->bytes_read = 0;
+    bfe->io_time = 0;
+    bfe->deserialize_time = 0;
+    bfe->decompress_time = 0;
 }
 
-/* Frees a node, including all the stuff in the hash table. */
-void toku_ftnode_free(FTNODE *nodep) {
-    FTNODE node = *nodep;
-    if (node->height == 0) {
-        STATUS_INC(FT_DESTROY_LEAF, 1);
-    } else {
-        STATUS_INC(FT_DESTROY_NONLEAF, 1);
+void fill_bfe_for_subset_read(struct ftnode_fetch_extra *bfe, FT h, ft_search *search,
+                              const DBT *left, const DBT *right,
+                              bool left_is_neg_infty, bool right_is_pos_infty,
+                              bool disable_prefetching, bool read_all_partitions) {
+    paranoid_invariant(h->h->type == FT_CURRENT);
+    bfe->type = ftnode_fetch_subset;
+    bfe->h = h;
+    bfe->search = search;
+    toku_init_dbt(&bfe->range_lock_left_key);
+    toku_init_dbt(&bfe->range_lock_right_key);
+    if (left) {
+        toku_copyref_dbt(&bfe->range_lock_left_key, *left);
     }
-    toku_destroy_ftnode_internals(node);
-    toku_free(node);
-    *nodep = nullptr;
+    if (right) {
+        toku_copyref_dbt(&bfe->range_lock_right_key, *right);
+    }
+    bfe->left_is_neg_infty = left_is_neg_infty;
+    bfe->right_is_pos_infty = right_is_pos_infty;
+    bfe->child_to_read = -1;
+    bfe->disable_prefetching = disable_prefetching;
+    bfe->read_all_partitions = read_all_partitions;
+    bfe->bytes_read = 0;
+    bfe->io_time = 0;
+    bfe->deserialize_time = 0;
+    bfe->decompress_time = 0;
 }
 
-void
-toku_initialize_empty_ftnode (FTNODE n, BLOCKNUM nodename, int height, int num_children, int layout_version, unsigned int flags)
-// Effect: Fill in N as an empty ftnode.
-{
-    paranoid_invariant(layout_version != 0);
-    paranoid_invariant(height >= 0);
+void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT ft) {
+    paranoid_invariant(ft->h->type == FT_CURRENT);
+    bfe->type = ftnode_fetch_none;
+    bfe->h = ft;
+    bfe->search = nullptr;
+    toku_init_dbt(&bfe->range_lock_left_key);
+    toku_init_dbt(&bfe->range_lock_right_key);
+    bfe->left_is_neg_infty = false;
+    bfe->right_is_pos_infty = false;
+    bfe->child_to_read = -1;
+    bfe->disable_prefetching = false;
+    bfe->read_all_partitions = false;
+    bfe->bytes_read = 0;
+    bfe->io_time = 0;
+    bfe->deserialize_time = 0;
+    bfe->decompress_time = 0;
+}
 
-    if (height == 0) {
-        STATUS_INC(FT_CREATE_LEAF, 1);
-    } else {
-        STATUS_INC(FT_CREATE_NONLEAF, 1);
+void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_cursor *cursor) {
+    paranoid_invariant(ft->h->type == FT_CURRENT);
+    bfe->type = ftnode_fetch_prefetch;
+    bfe->h = ft;
+    bfe->search = nullptr;
+    toku_init_dbt(&bfe->range_lock_left_key);
+    toku_init_dbt(&bfe->range_lock_right_key);
+    const DBT *left = &cursor->range_lock_left_key;
+    if (left->data) {
+        toku_clone_dbt(&bfe->range_lock_left_key, *left);
     }
-
-    n->max_msn_applied_to_node_on_disk = ZERO_MSN;    // correct value for root node, harmless for others
-    n->flags = flags;
-    n->thisnodename = nodename;
-    n->layout_version               = layout_version;
-    n->layout_version_original = layout_version;
-    n->layout_version_read_from_disk = layout_version;
-    n->height = height;
-    n->totalchildkeylens = 0;
-    n->childkeys = 0;
-    n->bp = 0;
-    n->n_children = num_children;
-    n->oldest_referenced_xid_known = TXNID_NONE;
-
-    if (num_children > 0) {
-        XMALLOC_N(num_children-1, n->childkeys);
-        XMALLOC_N(num_children, n->bp);
-        for (int i = 0; i < num_children; i++) {
-            BP_BLOCKNUM(n,i).b=0;
-            BP_STATE(n,i) = PT_INVALID;
-            BP_WORKDONE(n,i) = 0;
-            BP_INIT_TOUCHED_CLOCK(n, i);
-            set_BNULL(n,i);
-            if (height > 0) {
-                set_BNC(n, i, toku_create_empty_nl());
-            } else {
-                set_BLB(n, i, toku_create_empty_bn());
-            }
-        }
+    const DBT *right = &cursor->range_lock_right_key;
+    if (right->data) {
+        toku_clone_dbt(&bfe->range_lock_right_key, *right);
     }
-    n->dirty = 1;  // special case exception, it's okay to mark as dirty because the basements are empty
+    bfe->left_is_neg_infty = cursor->left_is_neg_infty;
+    bfe->right_is_pos_infty = cursor->right_is_pos_infty;
+    bfe->child_to_read = -1;
+    bfe->disable_prefetching = cursor->disable_prefetching;
+    bfe->read_all_partitions = false;
+    bfe->bytes_read = 0;
+    bfe->io_time = 0;
+    bfe->deserialize_time = 0;
+    bfe->decompress_time = 0;
+}
+
+void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) {
+    paranoid_invariant(bfe->type == ftnode_fetch_prefetch);
+    toku_destroy_dbt(&bfe->range_lock_left_key);
+    toku_destroy_dbt(&bfe->range_lock_right_key);
 }
 
 static void
@@ -1707,608 +1560,6 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp)
         );
 }
 
-static void
-init_childinfo(FTNODE node, int childnum, FTNODE child) {
-    BP_BLOCKNUM(node,childnum) = child->thisnodename;
-    BP_STATE(node,childnum) = PT_AVAIL;
-    BP_WORKDONE(node, childnum)   = 0;
-    set_BNC(node, childnum, toku_create_empty_nl());
-}
-
-static void
-init_childkey(FTNODE node, int childnum, const DBT *pivotkey) {
-    toku_clone_dbt(&node->childkeys[childnum], *pivotkey);
-    node->totalchildkeylens += pivotkey->size;
-}
-
-// Used only by test programs: append a child node to a parent node
-void
-toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey) {
-    int childnum = node->n_children;
-    node->n_children++;
-    XREALLOC_N(node->n_children, node->bp);
-    init_childinfo(node, childnum, child);
-    XREALLOC_N(node->n_children-1, node->childkeys);
-    if (pivotkey) {
-        invariant(childnum > 0);
-        init_childkey(node, childnum-1, pivotkey);
-    }
-    node->dirty = 1;
-}
-
-void
-toku_ft_bn_apply_msg_once (
-    BASEMENTNODE bn,
-    const FT_MSG msg,
-    uint32_t idx,
-    uint32_t le_keylen,
-    LEAFENTRY le,
-    txn_gc_info *gc_info,
-    uint64_t *workdone,
-    STAT64INFO stats_to_update
-    )
-// Effect: Apply msg to leafentry (msn is ignored)
-//         Calculate work done by message on leafentry and add it to caller's workdone counter.
-//   idx is the location where it goes
-//   le is old leafentry
-{
-    size_t newsize=0, oldsize=0, workdone_this_le=0;
-    LEAFENTRY new_le=0;
-    int64_t numbytes_delta = 0;  // how many bytes of user data (not including overhead) were added or deleted from this row
-    int64_t numrows_delta = 0;   // will be +1 or -1 or 0 (if row was added or deleted or not)
-    uint32_t key_storage_size = ft_msg_get_keylen(msg) + sizeof(uint32_t);
-    if (le) {
-        oldsize = leafentry_memsize(le) + key_storage_size;
-    }
-
-    // toku_le_apply_msg() may call bn_data::mempool_malloc_and_update_dmt() to allocate more space.
-    // That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is
-    // no longer in use.  We'll have to release the old mempool later.
-    toku_le_apply_msg(
-        msg, 
-        le,
-        &bn->data_buffer,
-        idx,
-        le_keylen,
-        gc_info, 
-        &new_le, 
-        &numbytes_delta
-        );
-    // at this point, we cannot trust cmd->u.id.key to be valid.
-    // The dmt may have realloced its mempool and freed the one containing key.
-
-    newsize = new_le ? (leafentry_memsize(new_le) +  + key_storage_size) : 0;
-    if (le && new_le) {
-        workdone_this_le = (oldsize > newsize ? oldsize : newsize);  // work done is max of le size before and after message application
-
-    } else {           // we did not just replace a row, so ...
-        if (le) {
-            //            ... we just deleted a row ...
-            workdone_this_le = oldsize;
-            numrows_delta = -1;
-        }
-        if (new_le) {
-            //            ... or we just added a row
-            workdone_this_le = newsize;
-            numrows_delta = 1;
-        }
-    }
-    if (workdone) {  // test programs may call with NULL
-        *workdone += workdone_this_le;
-    }
-
-    // now update stat64 statistics
-    bn->stat64_delta.numrows  += numrows_delta;
-    bn->stat64_delta.numbytes += numbytes_delta;
-    // the only reason stats_to_update may be null is for tests
-    if (stats_to_update) {
-        stats_to_update->numrows += numrows_delta;
-        stats_to_update->numbytes += numbytes_delta;
-    }
-
-}
-
-static const uint32_t setval_tag = 0xee0ccb99; // this was gotten by doing "cat /dev/random|head -c4|od -x" to get a random number.  We want to make sure that the user actually passes us the setval_extra_s that we passed in.
-struct setval_extra_s {
-    uint32_t  tag;
-    bool did_set_val;
-    int         setval_r;    // any error code that setval_fun wants to return goes here.
-    // need arguments for toku_ft_bn_apply_msg_once
-    BASEMENTNODE bn;
-    MSN msn;              // captured from original message, not currently used
-    XIDS xids;
-    const DBT *key;
-    uint32_t idx;
-    uint32_t le_keylen;
-    LEAFENTRY le;
-    txn_gc_info *gc_info;
-    uint64_t * workdone;  // set by toku_ft_bn_apply_msg_once()
-    STAT64INFO stats_to_update;
-};
-
-/*
- * If new_val == NULL, we send a delete message instead of an insert.
- * This happens here instead of in do_delete() for consistency.
- * setval_fun() is called from handlerton, passing in svextra_v
- * from setval_extra_s input arg to ft->update_fun().
- */
-static void setval_fun (const DBT *new_val, void *svextra_v) {
-    struct setval_extra_s *CAST_FROM_VOIDP(svextra, svextra_v);
-    paranoid_invariant(svextra->tag==setval_tag);
-    paranoid_invariant(!svextra->did_set_val);
-    svextra->did_set_val = true;
-
-    {
-        // can't leave scope until toku_ft_bn_apply_msg_once if
-        // this is a delete
-        DBT val;
-        FT_MSG_S msg = { FT_NONE, svextra->msn, svextra->xids,
-                         .u = { .id = {svextra->key, NULL} } };
-        if (new_val) {
-            msg.type = FT_INSERT;
-            msg.u.id.val = new_val;
-        } else {
-            msg.type = FT_DELETE_ANY;
-            toku_init_dbt(&val);
-            msg.u.id.val = &val;
-        }
-        toku_ft_bn_apply_msg_once(svextra->bn, &msg,
-                                  svextra->idx, svextra->le_keylen, svextra->le,
-                                  svextra->gc_info,
-                                  svextra->workdone, svextra->stats_to_update);
-        svextra->setval_r = 0;
-    }
-}
-
-// We are already past the msn filter (in toku_ft_bn_apply_msg(), which calls do_update()),
-// so capturing the msn in the setval_extra_s is not strictly required.         The alternative
-// would be to put a dummy msn in the messages created by setval_fun(), but preserving
-// the original msn seems cleaner and it preserves accountability at a lower layer.
-static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn, FT_MSG msg, uint32_t idx,
-                     LEAFENTRY le,
-                     void* keydata,
-                     uint32_t keylen,
-                     txn_gc_info *gc_info,
-                     uint64_t * workdone,
-                     STAT64INFO stats_to_update) {
-    LEAFENTRY le_for_update;
-    DBT key;
-    const DBT *keyp;
-    const DBT *update_function_extra;
-    DBT vdbt;
-    const DBT *vdbtp;
-
-    // the location of data depends whether this is a regular or
-    // broadcast update
-    if (msg->type == FT_UPDATE) {
-        // key is passed in with command (should be same as from le)
-        // update function extra is passed in with command
-        STATUS_INC(FT_UPDATES, 1);
-        keyp = msg->u.id.key;
-        update_function_extra = msg->u.id.val;
-    } else if (msg->type == FT_UPDATE_BROADCAST_ALL) {
-        // key is not passed in with broadcast, it comes from le
-        // update function extra is passed in with command
-        paranoid_invariant(le);  // for broadcast updates, we just hit all leafentries
-                     // so this cannot be null
-        paranoid_invariant(keydata);
-        paranoid_invariant(keylen);
-        paranoid_invariant(msg->u.id.key->size == 0);
-        STATUS_INC(FT_UPDATES_BROADCAST, 1);
-        keyp = toku_fill_dbt(&key, keydata, keylen);
-        update_function_extra = msg->u.id.val;
-    } else {
-        abort();
-    }
-
-    if (le && !le_latest_is_del(le)) {
-        // if the latest val exists, use it, and we'll use the leafentry later
-        uint32_t vallen;
-        void *valp = le_latest_val_and_len(le, &vallen);
-        vdbtp = toku_fill_dbt(&vdbt, valp, vallen);
-    } else {
-        // otherwise, the val and leafentry are both going to be null
-        vdbtp = NULL;
-    }
-    le_for_update = le;
-
-    struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, msg->msn, msg->xids,
-                                          keyp, idx, keylen, le_for_update, gc_info,
-                                          workdone, stats_to_update};
-    // call handlerton's ft->update_fun(), which passes setval_extra to setval_fun()
-    FAKE_DB(db, desc);
-    int r = update_fun(
-        &db,
-        keyp,
-        vdbtp,
-        update_function_extra,
-        setval_fun, &setval_extra
-        );
-
-    if (r == 0) { r = setval_extra.setval_r; }
-    return r;
-}
-
-// Should be renamed as something like "apply_msg_to_basement()."
-void
-toku_ft_bn_apply_msg (
-    ft_compare_func compare_fun,
-    ft_update_func update_fun,
-    DESCRIPTOR desc,
-    BASEMENTNODE bn,
-    FT_MSG msg,
-    txn_gc_info *gc_info, 
-    uint64_t *workdone,
-    STAT64INFO stats_to_update
-    )
-// Effect:
-//   Put a msg into a leaf.
-//   Calculate work done by message on leafnode and add it to caller's workdone counter.
-// The leaf could end up "too big" or "too small".  The caller must fix that up.
-{
-    LEAFENTRY storeddata;
-    void* key = NULL;
-    uint32_t keylen = 0;
-
-    uint32_t num_klpairs;
-    int r;
-    struct msg_leafval_heaviside_extra be = {compare_fun, desc, msg->u.id.key};
-
-    unsigned int doing_seqinsert = bn->seqinsert;
-    bn->seqinsert = 0;
-
-    switch (msg->type) {
-    case FT_INSERT_NO_OVERWRITE:
-    case FT_INSERT: {
-        uint32_t idx;
-        if (doing_seqinsert) {
-            idx = bn->data_buffer.num_klpairs();
-            DBT kdbt;
-            r = bn->data_buffer.fetch_key_and_len(idx-1, &kdbt.size, &kdbt.data);
-            if (r != 0) goto fz;
-            int cmp = toku_msg_leafval_heaviside(kdbt, be);
-            if (cmp >= 0) goto fz;
-            r = DB_NOTFOUND;
-        } else {
-        fz:
-            r = bn->data_buffer.find_zero<decltype(be), toku_msg_leafval_heaviside>(
-                be,
-                &storeddata,
-                &key,
-                &keylen,
-                &idx
-                );
-        }
-        if (r==DB_NOTFOUND) {
-            storeddata = 0;
-        } else {
-            assert_zero(r);
-        }
-        toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update);
-
-        // if the insertion point is within a window of the right edge of
-        // the leaf then it is sequential
-        // window = min(32, number of leaf entries/16)
-        {
-            uint32_t s = bn->data_buffer.num_klpairs();
-            uint32_t w = s / 16;
-            if (w == 0) w = 1;
-            if (w > 32) w = 32;
-
-            // within the window?
-            if (s - idx <= w)
-                bn->seqinsert = doing_seqinsert + 1;
-        }
-        break;
-    }
-    case FT_DELETE_ANY:
-    case FT_ABORT_ANY:
-    case FT_COMMIT_ANY: {
-        uint32_t idx;
-        // Apply to all the matches
-
-        r = bn->data_buffer.find_zero<decltype(be), toku_msg_leafval_heaviside>(
-            be,
-            &storeddata,
-            &key,
-            &keylen,
-            &idx
-            );
-        if (r == DB_NOTFOUND) break;
-        assert_zero(r);
-        toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update);
-
-        break;
-    }
-    case FT_OPTIMIZE_FOR_UPGRADE:
-        // fall through so that optimize_for_upgrade performs rest of the optimize logic
-    case FT_COMMIT_BROADCAST_ALL:
-    case FT_OPTIMIZE:
-        // Apply to all leafentries
-        num_klpairs = bn->data_buffer.num_klpairs();
-        for (uint32_t idx = 0; idx < num_klpairs; ) {
-            DBT curr_keydbt;
-            void* curr_keyp = NULL;
-            uint32_t curr_keylen = 0;
-            r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp);
-            assert_zero(r);
-            toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen);
-            // because this is a broadcast message, we need
-            // to fill the key in the message that we pass into toku_ft_bn_apply_msg_once
-            msg->u.id.key = &curr_keydbt;
-            int deleted = 0;
-            if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
-                toku_ft_bn_apply_msg_once(bn, msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
-                // at this point, we cannot trust msg->u.id.key to be valid.
-                uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
-                if (new_dmt_size != num_klpairs) {
-                    paranoid_invariant(new_dmt_size + 1 == num_klpairs);
-                    //Item was deleted.
-                    deleted = 1;
-                }
-            }
-            if (deleted)
-                num_klpairs--;
-            else
-                idx++;
-        }
-        paranoid_invariant(bn->data_buffer.num_klpairs() == num_klpairs);
-
-        break;
-    case FT_COMMIT_BROADCAST_TXN:
-    case FT_ABORT_BROADCAST_TXN:
-        // Apply to all leafentries if txn is represented
-        num_klpairs = bn->data_buffer.num_klpairs();
-        for (uint32_t idx = 0; idx < num_klpairs; ) {
-            DBT curr_keydbt;
-            void* curr_keyp = NULL;
-            uint32_t curr_keylen = 0;
-            r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp);
-            assert_zero(r);
-            toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen);
-            // because this is a broadcast message, we need
-            // to fill the key in the message that we pass into toku_ft_bn_apply_msg_once
-            msg->u.id.key = &curr_keydbt;
-            int deleted = 0;
-            if (le_has_xids(storeddata, msg->xids)) {
-                toku_ft_bn_apply_msg_once(bn, msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
-                uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
-                if (new_dmt_size != num_klpairs) {
-                    paranoid_invariant(new_dmt_size + 1 == num_klpairs);
-                    //Item was deleted.
-                    deleted = 1;
-                }
-            }
-            if (deleted)
-                num_klpairs--;
-            else
-                idx++;
-        }
-        paranoid_invariant(bn->data_buffer.num_klpairs() == num_klpairs);
-
-        break;
-    case FT_UPDATE: {
-        uint32_t idx;
-        r = bn->data_buffer.find_zero<decltype(be), toku_msg_leafval_heaviside>(
-            be,
-            &storeddata,
-            &key,
-            &keylen,
-            &idx
-            );
-        if (r==DB_NOTFOUND) {
-            {
-                //Point to msg's copy of the key so we don't worry about le being freed
-                //TODO: 46 MAYBE Get rid of this when le_apply message memory is better handled
-                key = msg->u.id.key->data;
-                keylen = msg->u.id.key->size;
-            }
-            r = do_update(update_fun, desc, bn, msg, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update);
-        } else if (r==0) {
-            r = do_update(update_fun, desc, bn, msg, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update);
-        } // otherwise, a worse error, just return it
-        break;
-    }
-    case FT_UPDATE_BROADCAST_ALL: {
-        // apply to all leafentries.
-        uint32_t idx = 0;
-        uint32_t num_leafentries_before;
-        while (idx < (num_leafentries_before = bn->data_buffer.num_klpairs())) {
-            void* curr_key = nullptr;
-            uint32_t curr_keylen = 0;
-            r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_key);
-            assert_zero(r);
-
-            //TODO: 46 replace this with something better than cloning key
-            // TODO: (Zardosht) This may be unnecessary now, due to how the key
-            // is handled in the bndata. Investigate and determine
-            char clone_mem[curr_keylen];  // only lasts one loop, alloca would overflow (end of function)
-            memcpy((void*)clone_mem, curr_key, curr_keylen);
-            curr_key = (void*)clone_mem;
-
-            // This is broken below. Have a compilation error checked
-            // in as a reminder
-            r = do_update(update_fun, desc, bn, msg, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update);
-            assert_zero(r);
-
-            if (num_leafentries_before == bn->data_buffer.num_klpairs()) {
-                // we didn't delete something, so increment the index.
-                idx++;
-            }
-        }
-        break;
-    }
-    case FT_NONE: break; // don't do anything
-    }
-
-    return;
-}
-
-static inline int
-key_msn_cmp(const DBT *a, const DBT *b, const MSN amsn, const MSN bmsn,
-            DESCRIPTOR descriptor, ft_compare_func key_cmp)
-{
-    FAKE_DB(db, descriptor);
-    int r = key_cmp(&db, a, b);
-    if (r == 0) {
-        if (amsn.msn > bmsn.msn) {
-            r = +1;
-        } else if (amsn.msn < bmsn.msn) {
-            r = -1;
-        } else {
-            r = 0;
-        }
-    }
-    return r;
-}
-
-int
-toku_msg_buffer_key_msn_heaviside(const int32_t &offset, const struct toku_msg_buffer_key_msn_heaviside_extra &extra)
-{
-    MSN query_msn;
-    DBT query_key;
-    extra.msg_buffer->get_message_key_msn(offset, &query_key, &query_msn);
-    return key_msn_cmp(&query_key, extra.key, query_msn, extra.msn,
-                       extra.desc, extra.cmp);
-}
-
-int
-toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo)
-{
-    MSN amsn, bmsn;
-    DBT akey, bkey;
-    extra.msg_buffer->get_message_key_msn(ao, &akey, &amsn);
-    extra.msg_buffer->get_message_key_msn(bo, &bkey, &bmsn);
-    return key_msn_cmp(&akey, &bkey, amsn, bmsn,
-                       extra.desc, extra.cmp);
-}
-
-// Effect: Enqueue the message represented by the parameters into the
-//   bnc's buffer, and put it in either the fresh or stale message tree,
-//   or the broadcast list.
-static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, FT_MSG msg, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp) {
-    int r = 0;
-    int32_t offset;
-    bnc->msg_buffer.enqueue(msg, is_fresh, &offset);
-    enum ft_msg_type type = ft_msg_get_type(msg);
-    if (ft_msg_type_applies_once(type)) {
-        DBT key;
-        toku_fill_dbt(&key, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
-        struct toku_msg_buffer_key_msn_heaviside_extra extra = { .desc = desc, .cmp = cmp, .msg_buffer = &bnc->msg_buffer, .key = &key, .msn = msg->msn };
-        if (is_fresh) {
-            r = bnc->fresh_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, extra, nullptr);
-            assert_zero(r);
-        } else {
-            r = bnc->stale_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, extra, nullptr);
-            assert_zero(r);
-        }
-    } else {
-        invariant(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type));
-        const uint32_t idx = bnc->broadcast_list.size();
-        r = bnc->broadcast_list.insert_at(offset, idx);
-        assert_zero(r);
-    }
-}
-
-// This is only exported for tests.
-void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp)
-{
-    DBT k, v;
-    FT_MSG_S msg = {
-        type, msn, xids, .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, data, datalen) } }
-    };
-    bnc_insert_msg(bnc, &msg, is_fresh, desc, cmp);
-}
-
-// append a msg to a nonleaf node's child buffer
-static void ft_append_msg_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node,
-                                          int childnum, FT_MSG msg, bool is_fresh) {
-    paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL);
-    bnc_insert_msg(BNC(node, childnum), msg, is_fresh, desc, compare_fun);
-    node->dirty = 1;
-}
-
-// This is only exported for tests.
-void toku_ft_append_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) {
-    FT_MSG_S msg = {
-        type, msn, xids, .u = { .id = { key, val } }
-    };
-    ft_append_msg_to_child_buffer(compare_fun, desc, node, childnum, &msg, is_fresh);
-}
-
-static void ft_nonleaf_msg_once_to_child(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
-// Previously we had passive aggressive promotion, but that causes a lot of I/O a the checkpoint.  So now we are just putting it in the buffer here.
-// Also we don't worry about the node getting overfull here.  It's the caller's problem.
-{
-    unsigned int childnum = (target_childnum >= 0
-                             ? target_childnum
-                             : toku_ftnode_which_child(node, msg->u.id.key, desc, compare_fun));
-    ft_append_msg_to_child_buffer(compare_fun, desc, node, childnum, msg, is_fresh);
-    NONLEAF_CHILDINFO bnc = BNC(node, childnum);
-    bnc->flow[0] += flow_deltas[0];
-    bnc->flow[1] += flow_deltas[1];
-}
-
-/* Find the leftmost child that may contain the key.
- * If the key exists it will be in the child whose number
- * is the return value of this function.
- */
-int toku_ftnode_which_child(FTNODE node, const DBT *k,
-                            DESCRIPTOR desc, ft_compare_func cmp) {
-    // a funny case of no pivots
-    if (node->n_children <= 1) return 0;
-
-    // check the last key to optimize seq insertions
-    int n = node->n_children-1;
-    int c = ft_compare_pivot(desc, cmp, k, &node->childkeys[n-1]);
-    if (c > 0) return n;
-
-    // binary search the pivots
-    int lo = 0;
-    int hi = n-1; // skip the last one, we checked it above
-    int mi;
-    while (lo < hi) {
-        mi = (lo + hi) / 2;
-        c = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]);
-        if (c > 0) {
-            lo = mi+1;
-            continue;
-        }
-        if (c < 0) {
-            hi = mi;
-            continue;
-        }
-        return mi;
-    }
-    return lo;
-}
-
-// Used for HOT.
-int
-toku_ftnode_hot_next_child(FTNODE node,
-                           const DBT *k,
-                           DESCRIPTOR desc,
-                           ft_compare_func cmp) {
-    int low = 0;
-    int hi = node->n_children - 1;
-    int mi;
-    while (low < hi) {
-        mi = (low + hi) / 2;
-        int r = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]);
-        if (r > 0) {
-            low = mi + 1;
-        } else if (r < 0) {
-            hi = mi;
-        } else {
-            // if they were exactly equal, then we want the sub-tree under
-            // the next pivot.
-            return mi + 1;
-        }
-    }
-    invariant(low == hi);
-    return low;
-}
-
 // TODO Use this function to clean up other places where bits of messages are passed around
 //      such as toku_bnc_insert_msg() and the call stack above it.
 static uint64_t
@@ -2318,418 +1569,6 @@ ft_msg_size(FT_MSG msg) {
     return keyval_size + KEY_VALUE_OVERHEAD + FT_MSG_OVERHEAD + xids_size;
 }
 
-static void
-ft_nonleaf_msg_all(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
-// Effect: Put the message into a nonleaf node.  We put it into all children, possibly causing the children to become reactive.
-//  We don't do the splitting and merging.  That's up to the caller after doing all the puts it wants to do.
-//  The re_array[i] gets set to the reactivity of any modified child i.         (And there may be several such children.)
-{
-    for (int i = 0; i < node->n_children; i++) {
-        ft_nonleaf_msg_once_to_child(compare_fun, desc, node, i, msg, is_fresh, flow_deltas);
-    }
-}
-
-static bool
-ft_msg_applies_once(FT_MSG msg)
-{
-    return ft_msg_type_applies_once(msg->type);
-}
-
-static bool
-ft_msg_applies_all(FT_MSG msg)
-{
-    return ft_msg_type_applies_all(msg->type);
-}
-
-static bool
-ft_msg_does_nothing(FT_MSG msg)
-{
-    return ft_msg_type_does_nothing(msg->type);
-}
-
-static void
-ft_nonleaf_put_msg(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
-// Effect: Put the message into a nonleaf node.  We may put it into a child, possibly causing the child to become reactive.
-//  We don't do the splitting and merging.  That's up to the caller after doing all the puts it wants to do.
-//  The re_array[i] gets set to the reactivity of any modified child i.         (And there may be several such children.)
-//
-{
-
-    //
-    // see comments in toku_ft_leaf_apply_msg
-    // to understand why we handle setting
-    // node->max_msn_applied_to_node_on_disk here,
-    // and don't do it in toku_ft_node_put_msg
-    //
-    MSN msg_msn = msg->msn;
-    invariant(msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn);
-    node->max_msn_applied_to_node_on_disk = msg_msn;
-
-    if (ft_msg_applies_once(msg)) {
-        ft_nonleaf_msg_once_to_child(compare_fun, desc, node, target_childnum, msg, is_fresh, flow_deltas);
-    } else if (ft_msg_applies_all(msg)) {
-        ft_nonleaf_msg_all(compare_fun, desc, node, msg, is_fresh, flow_deltas);
-    } else {
-        paranoid_invariant(ft_msg_does_nothing(msg));
-    }
-}
-
-// Garbage collect one leaf entry.
-static void
-ft_basement_node_gc_once(BASEMENTNODE bn,
-                          uint32_t index,
-                          void* keyp,
-                          uint32_t keylen,
-                          LEAFENTRY leaf_entry,
-                          txn_gc_info *gc_info,
-                          STAT64INFO_S * delta)
-{
-    paranoid_invariant(leaf_entry);
-
-    // Don't run garbage collection on non-mvcc leaf entries.
-    if (leaf_entry->type != LE_MVCC) {
-        goto exit;
-    }
-
-    // Don't run garbage collection if this leafentry decides it's not worth it.
-    if (!toku_le_worth_running_garbage_collection(leaf_entry, gc_info)) {
-        goto exit;
-    }
-
-    LEAFENTRY new_leaf_entry;
-    new_leaf_entry = NULL;
-
-    // The mempool doesn't free itself.  When it allocates new memory,
-    // this pointer will be set to the older memory that must now be
-    // freed.
-    void * maybe_free;
-    maybe_free = NULL;
-
-    // These will represent the number of bytes and rows changed as
-    // part of the garbage collection.
-    int64_t numbytes_delta;
-    int64_t numrows_delta;
-    toku_le_garbage_collect(leaf_entry,
-                            &bn->data_buffer,
-                            index,
-                            keyp,
-                            keylen,
-                            gc_info,
-                            &new_leaf_entry,
-                            &numbytes_delta);
-
-    numrows_delta = 0;
-    if (new_leaf_entry) {
-        numrows_delta = 0;
-    } else {
-        numrows_delta = -1;
-    }
-
-    // If we created a new mempool buffer we must free the
-    // old/original buffer.
-    if (maybe_free) {
-        toku_free(maybe_free);
-    }
-
-    // Update stats.
-    bn->stat64_delta.numrows += numrows_delta;
-    bn->stat64_delta.numbytes += numbytes_delta;
-    delta->numrows += numrows_delta;
-    delta->numbytes += numbytes_delta;
-
-exit:
-    return;
-}
-
-// Garbage collect all leaf entries for a given basement node.
-static void
-basement_node_gc_all_les(BASEMENTNODE bn,
-                         txn_gc_info *gc_info,
-                         STAT64INFO_S * delta)
-{
-    int r = 0;
-    uint32_t index = 0;
-    uint32_t num_leafentries_before;
-    while (index < (num_leafentries_before = bn->data_buffer.num_klpairs())) {
-        void* keyp = NULL;
-        uint32_t keylen = 0;
-        LEAFENTRY leaf_entry;
-        r = bn->data_buffer.fetch_klpair(index, &leaf_entry, &keylen, &keyp);
-        assert_zero(r);
-        ft_basement_node_gc_once(
-            bn,
-            index,
-            keyp,
-            keylen,
-            leaf_entry,
-            gc_info,
-            delta
-            );
-        // Check if the leaf entry was deleted or not.
-        if (num_leafentries_before == bn->data_buffer.num_klpairs()) {
-            ++index;
-        }
-    }
-}
-
-// Garbage collect all leaf entires in all basement nodes.
-static void
-ft_leaf_gc_all_les(FT ft, FTNODE node, txn_gc_info *gc_info)
-{
-    toku_assert_entire_node_in_memory(node);
-    paranoid_invariant_zero(node->height);
-    // Loop through each leaf entry, garbage collecting as we go.
-    for (int i = 0; i < node->n_children; ++i) {
-        // Perform the garbage collection.
-        BASEMENTNODE bn = BLB(node, i);
-        STAT64INFO_S delta;
-        delta.numrows = 0;
-        delta.numbytes = 0;
-        basement_node_gc_all_les(bn, gc_info, &delta);
-        toku_ft_update_stats(&ft->in_memory_stats, delta);
-    }
-}
-
-static void
-ft_leaf_run_gc(FT ft, FTNODE node) {
-    TOKULOGGER logger = toku_cachefile_logger(ft->cf);
-    if (logger) {
-        TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
-        txn_manager_state txn_state_for_gc(txn_manager);
-        txn_state_for_gc.init();
-        TXNID oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
-        
-        // Perform full garbage collection.
-        //
-        // - txn_state_for_gc
-        //     a fresh snapshot of the transaction system.
-        // - oldest_referenced_xid_for_simple_gc
-        //     the oldest xid in any live list as of right now - suitible for simple gc 
-        // - node->oldest_referenced_xid_known
-        //     the last known oldest referenced xid for this node and any unapplied messages.
-        //     it is a lower bound on the actual oldest referenced xid - but becasue there
-        //     may be abort messages above us, we need to be careful to only use this value
-        //     for implicit promotion (as opposed to the oldest referenced xid for simple gc)
-        //
-        // The node has its own oldest referenced xid because it must be careful not to implicitly promote
-        // provisional entries for transactions that are no longer live, but may have abort messages
-        // somewhere above us in the tree.
-        txn_gc_info gc_info(&txn_state_for_gc,
-                            oldest_referenced_xid_for_simple_gc,
-                            node->oldest_referenced_xid_known,
-                            true);
-        ft_leaf_gc_all_les(ft, node, &gc_info);
-    }
-}
-
-void toku_bnc_flush_to_child(
-    FT ft,
-    NONLEAF_CHILDINFO bnc,
-    FTNODE child,
-    TXNID parent_oldest_referenced_xid_known
-    )
-{
-    paranoid_invariant(bnc);
-
-    TOKULOGGER logger = toku_cachefile_logger(ft->cf);
-    TXN_MANAGER txn_manager = logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr;
-    TXNID oldest_referenced_xid_for_simple_gc = TXNID_NONE;
-
-    txn_manager_state txn_state_for_gc(txn_manager);
-    bool do_garbage_collection = child->height == 0 && txn_manager != nullptr;
-    if (do_garbage_collection) {
-        txn_state_for_gc.init();
-        oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
-    }
-    txn_gc_info gc_info(&txn_state_for_gc,
-                        oldest_referenced_xid_for_simple_gc,                    
-                        child->oldest_referenced_xid_known,
-                        true);
-    struct flush_msg_fn {
-        FT ft;
-        FTNODE child;
-        NONLEAF_CHILDINFO bnc;
-        txn_gc_info *gc_info;
-
-        STAT64INFO_S stats_delta;
-        size_t remaining_memsize = bnc->msg_buffer.buffer_size_in_use();
-
-        flush_msg_fn(FT t, FTNODE n, NONLEAF_CHILDINFO nl, txn_gc_info *g) :
-            ft(t), child(n), bnc(nl), gc_info(g), remaining_memsize(bnc->msg_buffer.buffer_size_in_use()) {
-            stats_delta = { 0, 0 };
-        }
-        int operator()(FT_MSG msg, bool is_fresh) {
-            size_t flow_deltas[] = { 0, 0 };
-            size_t memsize_in_buffer = message_buffer::msg_memsize_in_buffer(msg);
-            if (remaining_memsize <= bnc->flow[0]) {
-                // this message is in the current checkpoint's worth of
-                // the end of the message buffer
-                flow_deltas[0] = memsize_in_buffer;
-            } else if (remaining_memsize <= bnc->flow[0] + bnc->flow[1]) {
-                // this message is in the last checkpoint's worth of the
-                // end of the message buffer
-                flow_deltas[1] = memsize_in_buffer;
-            }
-            toku_ft_node_put_msg(
-                ft->compare_fun,
-                ft->update_fun,
-                &ft->cmp_descriptor,
-                child,
-                -1,
-                msg,
-                is_fresh,
-                gc_info,
-                flow_deltas,
-                &stats_delta
-                );
-            remaining_memsize -= memsize_in_buffer;
-            return 0;
-        }
-    } flush_fn(ft, child, bnc, &gc_info);
-    bnc->msg_buffer.iterate(flush_fn);
-
-    child->oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
-
-    invariant(flush_fn.remaining_memsize == 0);
-    if (flush_fn.stats_delta.numbytes || flush_fn.stats_delta.numrows) {
-        toku_ft_update_stats(&ft->in_memory_stats, flush_fn.stats_delta);
-    }
-    if (do_garbage_collection) {
-        size_t buffsize = bnc->msg_buffer.buffer_size_in_use();
-        STATUS_INC(FT_MSG_BYTES_OUT, buffsize);
-        // may be misleading if there's a broadcast message in there
-        STATUS_INC(FT_MSG_BYTES_CURR, -buffsize);
-    }
-}
-
-bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) {
-    static const double factor = 0.125;
-    const uint64_t flow_threshold = ft->h->nodesize * factor;
-    return bnc->flow[0] >= flow_threshold || bnc->flow[1] >= flow_threshold;
-}
-
-void
-toku_ft_node_put_msg (
-    ft_compare_func compare_fun,
-    ft_update_func update_fun,
-    DESCRIPTOR desc,
-    FTNODE node,
-    int target_childnum,
-    FT_MSG msg,
-    bool is_fresh,
-    txn_gc_info *gc_info,
-    size_t flow_deltas[],
-    STAT64INFO stats_to_update
-    )
-// Effect: Push message into the subtree rooted at NODE.
-//   If NODE is a leaf, then
-//   put message into leaf, applying it to the leafentries
-//   If NODE is a nonleaf, then push the message into the message buffer(s) of the relevent child(ren).
-//   The node may become overfull.  That's not our problem.
-{
-    toku_assert_entire_node_in_memory(node);
-    //
-    // see comments in toku_ft_leaf_apply_msg
-    // to understand why we don't handle setting
-    // node->max_msn_applied_to_node_on_disk here,
-    // and instead defer to these functions
-    //
-    if (node->height==0) {
-        toku_ft_leaf_apply_msg(compare_fun, update_fun, desc, node, target_childnum, msg, gc_info, nullptr, stats_to_update);
-    } else {
-        ft_nonleaf_put_msg(compare_fun, desc, node, target_childnum, msg, is_fresh, flow_deltas);
-    }
-}
-
-static const struct pivot_bounds infinite_bounds = {.lower_bound_exclusive=NULL,
-                                                    .upper_bound_inclusive=NULL};
-
-
-// Effect: applies the message to the leaf if the appropriate basement node is in memory.
-//           This function is called during message injection and/or flushing, so the entire
-//           node MUST be in memory.
-void toku_ft_leaf_apply_msg(
-    ft_compare_func compare_fun,
-    ft_update_func update_fun,
-    DESCRIPTOR desc,
-    FTNODE node,
-    int target_childnum,  // which child to inject to, or -1 if unknown
-    FT_MSG msg,
-    txn_gc_info *gc_info,
-    uint64_t *workdone,
-    STAT64INFO stats_to_update
-    )
-{
-    VERIFY_NODE(t, node);
-    toku_assert_entire_node_in_memory(node);
-
-    //
-    // Because toku_ft_leaf_apply_msg is called with the intent of permanently
-    // applying a message to a leaf node (meaning the message is permanently applied
-    // and will be purged from the system after this call, as opposed to
-    // toku_apply_ancestors_messages_to_node, which applies a message
-    // for a query, but the message may still reside in the system and
-    // be reapplied later), we mark the node as dirty and
-    // take the opportunity to update node->max_msn_applied_to_node_on_disk.
-    //
-    node->dirty = 1;
-
-    //
-    // we cannot blindly update node->max_msn_applied_to_node_on_disk,
-    // we must check to see if the msn is greater that the one already stored,
-    // because the message may have already been applied earlier (via
-    // toku_apply_ancestors_messages_to_node) to answer a query
-    //
-    // This is why we handle node->max_msn_applied_to_node_on_disk both here
-    // and in ft_nonleaf_put_msg, as opposed to in one location, toku_ft_node_put_msg.
-    //
-    MSN msg_msn = msg->msn;
-    if (msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn) {
-        node->max_msn_applied_to_node_on_disk = msg_msn;
-    }
-
-    if (ft_msg_applies_once(msg)) {
-        unsigned int childnum = (target_childnum >= 0
-                                 ? target_childnum
-                                 : toku_ftnode_which_child(node, msg->u.id.key, desc, compare_fun));
-        BASEMENTNODE bn = BLB(node, childnum);
-        if (msg->msn.msn > bn->max_msn_applied.msn) {
-            bn->max_msn_applied = msg->msn;
-            toku_ft_bn_apply_msg(compare_fun,
-                                 update_fun,
-                                 desc,
-                                 bn,
-                                 msg,
-                                 gc_info,
-                                 workdone,
-                                 stats_to_update);
-        } else {
-            STATUS_INC(FT_MSN_DISCARDS, 1);
-        }
-    }
-    else if (ft_msg_applies_all(msg)) {
-        for (int childnum=0; childnum<node->n_children; childnum++) {
-            if (msg->msn.msn > BLB(node, childnum)->max_msn_applied.msn) {
-                BLB(node, childnum)->max_msn_applied = msg->msn;
-                toku_ft_bn_apply_msg(compare_fun,
-                                     update_fun,
-                                     desc,
-                                     BLB(node, childnum),
-                                     msg,
-                                     gc_info,
-                                     workdone,
-                                     stats_to_update);
-            } else {
-                STATUS_INC(FT_MSN_DISCARDS, 1);
-            }
-        }
-    }
-    else if (!ft_msg_does_nothing(msg)) {
-        abort();
-    }
-    VERIFY_NODE(t, node);
-}
-
 static void inject_message_in_locked_node(
     FT ft, 
     FTNODE node, 
@@ -2744,7 +1583,7 @@ static void inject_message_in_locked_node(
     // check in frwlock.  Should be possible with TOKU_PTHREAD_DEBUG, nop
     // otherwise.
     invariant(toku_ctpair_is_write_locked(node->ct_pair));
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
 
     // Take the newer of the two oldest referenced xid values from the node and gc_info.
     // The gc_info usually has a newer value, because we got it at the top of this call
@@ -2762,7 +1601,7 @@ static void inject_message_in_locked_node(
     msg->msn.msn = toku_sync_add_and_fetch(&ft->h->max_msn_in_ft.msn, 1);
     paranoid_invariant(msg->msn.msn > node->max_msn_applied_to_node_on_disk.msn);
     STAT64INFO_S stats_delta = {0,0};
-    toku_ft_node_put_msg(
+    toku_ftnode_put_msg(
         ft->compare_fun,
         ft->update_fun,
         &ft->cmp_descriptor,
@@ -2778,20 +1617,19 @@ static void inject_message_in_locked_node(
         toku_ft_update_stats(&ft->in_memory_stats, stats_delta);
     }
     //
-    // assumption is that toku_ft_node_put_msg will
+    // assumption is that toku_ftnode_put_msg will
     // mark the node as dirty.
     // enforcing invariant here.
     //
     paranoid_invariant(node->dirty != 0);
 
-    // TODO: Why not at height 0?
     // update some status variables
     if (node->height != 0) {
         uint64_t msgsize = ft_msg_size(msg);
         STATUS_INC(FT_MSG_BYTES_IN, msgsize);
         STATUS_INC(FT_MSG_BYTES_CURR, msgsize);
         STATUS_INC(FT_MSG_NUM, 1);
-        if (ft_msg_applies_all(msg)) {
+        if (ft_msg_type_applies_all(msg->type)) {
             STATUS_INC(FT_MSG_NUM_BROADCAST, 1);
         }
     }
@@ -2811,7 +1649,7 @@ static void inject_message_in_locked_node(
 
     // if we call toku_ft_flush_some_child, then that function unpins the root
     // otherwise, we unpin ourselves
-    if (node->height > 0 && toku_ft_nonleaf_is_gorged(node, ft->h->nodesize)) {
+    if (node->height > 0 && toku_ftnode_nonleaf_is_gorged(node, ft->h->nodesize)) {
         toku_ft_flush_node_on_background_thread(ft, node);
     }
     else {
@@ -2836,7 +1674,7 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
 //  true if relocking is needed
 //  false otherwise
 {
-    enum reactivity re = get_node_reactivity(ft, child);
+    enum reactivity re = toku_ftnode_get_reactivity(ft, child);
     enum reactivity newre;
     BLOCKNUM child_blocknum;
     uint32_t child_fullhash;
@@ -2870,7 +1708,7 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
             child_blocknum = BP_BLOCKNUM(newparent, childnum);
             child_fullhash = compute_child_fullhash(ft->cf, newparent, childnum);
             toku_pin_ftnode_with_dep_nodes(ft, child_blocknum, child_fullhash, &bfe, PL_WRITE_CHEAP, 1, &newparent, &newchild, true);
-            newre = get_node_reactivity(ft, newchild);
+            newre = toku_ftnode_get_reactivity(ft, newchild);
             if (newre == RE_FISSIBLE) {
                 enum split_mode split_mode;
                 if (newparent->height == 1 && (loc & LEFT_EXTREME) && childnum == 0) {
@@ -2916,7 +1754,7 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
             child_blocknum = BP_BLOCKNUM(newparent, childnum);
             child_fullhash = compute_child_fullhash(ft->cf, newparent, childnum);
             toku_pin_ftnode_with_dep_nodes(ft, child_blocknum, child_fullhash, &bfe, PL_READ, 1, &newparent, &newchild, true);
-            newre = get_node_reactivity(ft, newchild);
+            newre = toku_ftnode_get_reactivity(ft, newchild);
             if (newre == RE_FUSIBLE && newparent->n_children >= 2) {
                 toku_unpin_ftnode_read_only(ft, newchild);
                 toku_ft_merge_child(ft, newparent, childnum);
@@ -2949,7 +1787,7 @@ static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t f
     struct ftnode_fetch_extra bfe;
     fill_bfe_for_full_read(&bfe, ft);
     toku_pin_ftnode(ft, cachekey, fullhash, &bfe, PL_WRITE_CHEAP, &node, true);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     paranoid_invariant(node->fullhash==fullhash);
     ft_verify_flags(ft, node);
     inject_message_in_locked_node(ft, node, -1, msg, flow_deltas, gc_info);
@@ -2980,6 +1818,12 @@ static void ft_set_or_verify_rightmost_blocknum(FT ft, BLOCKNUM b)
     invariant(ft->rightmost_blocknum.b == b.b);
 }
 
+bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) {
+    static const double factor = 0.125;
+    const uint64_t flow_threshold = ft->h->nodesize * factor;
+    return bnc->flow[0] >= flow_threshold || bnc->flow[1] >= flow_threshold;
+}
+
 static void push_something_in_subtree(
     FT ft, 
     FTNODE subtree_root, 
@@ -3013,7 +1857,7 @@ static void push_something_in_subtree(
 //   When the birdie is still saying we should promote, we use get_and_pin so that we wait to get the node.
 //   If the birdie doesn't say to promote, we try maybe_get_and_pin.  If we get the node cheaply, and it's dirty, we promote anyway.
 {
-    toku_assert_entire_node_in_memory(subtree_root);
+    toku_ftnode_assert_fully_in_memory(subtree_root);
     if (should_inject_in_node(loc, subtree_root->height, depth)) {
         switch (depth) {
         case 0:
@@ -3042,7 +1886,7 @@ static void push_something_in_subtree(
         NONLEAF_CHILDINFO bnc;
 
         // toku_ft_root_put_msg should not have called us otherwise.
-        paranoid_invariant(ft_msg_applies_once(msg));
+        paranoid_invariant(ft_msg_type_applies_once(msg->type));
 
         childnum = (target_childnum >= 0 ? target_childnum
                     : toku_ftnode_which_child(subtree_root, msg->u.id.key, &ft->cmp_descriptor, ft->compare_fun));
@@ -3107,7 +1951,7 @@ static void push_something_in_subtree(
                         STATUS_INC(FT_PRO_NUM_STOP_LOCK_CHILD, 1);
                         goto relock_and_push_here;
                     }
-                    if (is_entire_node_in_memory(child)) {
+                    if (toku_ftnode_fully_in_memory(child)) {
                         // toku_pin_ftnode... touches the clock but toku_maybe_pin_ftnode... doesn't.
                         // This prevents partial eviction.
                         for (int i = 0; i < child->n_children; ++i) {
@@ -3229,7 +2073,7 @@ void toku_ft_root_put_msg(
  change_lock_type:
     // get the root node
     toku_pin_ftnode(ft, root_key, fullhash, &bfe, lock_type, &node, true);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     paranoid_invariant(node->fullhash==fullhash);
     ft_verify_flags(ft, node);
 
@@ -3238,7 +2082,7 @@ void toku_ft_root_put_msg(
     // injection thread to change lock type back and forth, when only one
     // of them needs to in order to handle the split.  That's not great,
     // but root splits are incredibly rare.
-    enum reactivity re = get_node_reactivity(ft, node);
+    enum reactivity re = toku_ftnode_get_reactivity(ft, node);
     switch (re) {
     case RE_STABLE:
     case RE_FUSIBLE: // cannot merge anything at the root
@@ -3281,7 +2125,7 @@ void toku_ft_root_put_msg(
     // anyway.
 
     // Now, either inject here or promote.  We decide based on a heuristic:
-    if (node->height == 0 || !ft_msg_applies_once(msg)) {
+    if (node->height == 0 || !ft_msg_type_applies_once(msg->type)) {
         // If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here.
         toku_unpin_ftnode_read_only(ft, node);
         STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1);
@@ -3404,7 +2248,7 @@ static int ft_leaf_get_relative_key_pos(FT ft, FTNODE leaf, const DBT *key, bool
                 LEAFENTRY target_le;
                 int childnum = toku_ftnode_which_child(leaf, key, &ft->cmp_descriptor, ft->compare_fun);
                 BASEMENTNODE bn = BLB(leaf, childnum);
-                struct msg_leafval_heaviside_extra extra = { ft->compare_fun, &ft->cmp_descriptor, key };
+                struct toku_msg_leafval_heaviside_extra extra = { ft->compare_fun, &ft->cmp_descriptor, key };
                 int r = bn->data_buffer.find_zero<decltype(extra), toku_msg_leafval_heaviside>(
                     extra,
                     &target_le,
@@ -3479,7 +2323,7 @@ static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS m
     // If the rightmost leaf is reactive, bail out out and let the normal promotion pass
     // take care of it. This also ensures that if any of our ancestors are reactive,
     // they'll be taken care of too.
-    if (get_leaf_reactivity(rightmost_leaf, ft->h->nodesize) != RE_STABLE) {
+    if (toku_ftnode_get_leaf_reactivity(rightmost_leaf, ft->h->nodesize) != RE_STABLE) {
         STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE, 1);
         goto cleanup;
     }
@@ -4464,580 +3308,7 @@ void toku_ft_handle_create(FT_HANDLE *ft_handle_ptr) {
     *ft_handle_ptr = ft_handle;
 }
 
-struct store_msg_buffer_offset_extra {
-    int32_t *offsets;
-    int i;
-};
-
-int store_msg_buffer_offset(const int32_t &offset, const uint32_t UU(idx), struct store_msg_buffer_offset_extra *const extra) __attribute__((nonnull(3)));
-int store_msg_buffer_offset(const int32_t &offset, const uint32_t UU(idx), struct store_msg_buffer_offset_extra *const extra)
-{
-    extra->offsets[extra->i] = offset;
-    extra->i++;
-    return 0;
-}
-
-/**
- * Given pointers to offsets within a message buffer where we can find messages,
- * figure out the MSN of each message, and compare those MSNs.  Returns 1,
- * 0, or -1 if a is larger than, equal to, or smaller than b.
- */
-int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, const int32_t &bo);
-int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, const int32_t &bo)
-{
-    MSN amsn, bmsn;
-    msg_buffer.get_message_key_msn(ao, nullptr, &amsn);
-    msg_buffer.get_message_key_msn(bo, nullptr, &bmsn);
-    if (amsn.msn > bmsn.msn) {
-        return +1;
-    }
-    if (amsn.msn < bmsn.msn) {
-        return -1;
-    }
-    return 0;
-}
-
-/**
- * Given a message buffer and and offset, apply the message with toku_ft_bn_apply_msg, or discard it,
- * based on its MSN and the MSN of the basement node.
- */
-static void
-do_bn_apply_msg(FT_HANDLE ft_handle, BASEMENTNODE bn, message_buffer *msg_buffer, int32_t offset,
-                txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update) {
-    DBT k, v;
-    FT_MSG_S msg = msg_buffer->get_message(offset, &k, &v);
-
-    // The messages are being iterated over in (key,msn) order or just in
-    // msn order, so all the messages for one key, from one buffer, are in
-    // ascending msn order.  So it's ok that we don't update the basement
-    // node's msn until the end.
-    if (msg.msn.msn > bn->max_msn_applied.msn) {
-        toku_ft_bn_apply_msg(
-            ft_handle->ft->compare_fun,
-            ft_handle->ft->update_fun,
-            &ft_handle->ft->cmp_descriptor,
-            bn,
-            &msg,
-            gc_info,
-            workdone,
-            stats_to_update
-            );
-    } else {
-        STATUS_INC(FT_MSN_DISCARDS, 1);
-    }
-
-    // We must always mark message as stale since it has been marked
-    // (using omt::iterate_and_mark_range)
-    // It is possible to call do_bn_apply_msg even when it won't apply the message because
-    // the node containing it could have been evicted and brought back in.
-    msg_buffer->set_freshness(offset, false);
-}
-
-
-struct iterate_do_bn_apply_msg_extra {
-    FT_HANDLE t;
-    BASEMENTNODE bn;
-    NONLEAF_CHILDINFO bnc;
-    txn_gc_info *gc_info;
-    uint64_t *workdone;
-    STAT64INFO stats_to_update;
-};
-
-int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e) __attribute__((nonnull(3)));
-int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e)
-{
-    do_bn_apply_msg(e->t, e->bn, &e->bnc->msg_buffer, offset, e->gc_info, e->workdone, e->stats_to_update);
-    return 0;
-}
-
-/**
- * Given the bounds of the basement node to which we will apply messages,
- * find the indexes within message_tree which contain the range of
- * relevant messages.
- *
- * The message tree contains offsets into the buffer, where messages are
- * found.  The pivot_bounds are the lower bound exclusive and upper bound
- * inclusive, because they come from pivot keys in the tree.  We want OMT
- * indices, which must have the lower bound be inclusive and the upper
- * bound exclusive.  We will get these by telling omt::find to look
- * for something strictly bigger than each of our pivot bounds.
- *
- * Outputs the OMT indices in lbi (lower bound inclusive) and ube (upper
- * bound exclusive).
- */
-template<typename find_bounds_omt_t>
-static void
-find_bounds_within_message_tree(
-    DESCRIPTOR desc,       /// used for cmp
-    ft_compare_func cmp,  /// used to compare keys
-    const find_bounds_omt_t &message_tree,      /// tree holding message buffer offsets, in which we want to look for indices
-    message_buffer *msg_buffer,           /// message buffer in which messages are found
-    struct pivot_bounds const * const bounds,  /// key bounds within the basement node we're applying messages to
-    uint32_t *lbi,        /// (output) "lower bound inclusive" (index into message_tree)
-    uint32_t *ube         /// (output) "upper bound exclusive" (index into message_tree)
-    )
-{
-    int r = 0;
-
-    if (bounds->lower_bound_exclusive) {
-        // By setting msn to MAX_MSN and by using direction of +1, we will
-        // get the first message greater than (in (key, msn) order) any
-        // message (with any msn) with the key lower_bound_exclusive.
-        // This will be a message we want to try applying, so it is the
-        // "lower bound inclusive" within the message_tree.
-        struct toku_msg_buffer_key_msn_heaviside_extra lbi_extra;
-        ZERO_STRUCT(lbi_extra);
-        lbi_extra.desc = desc;
-        lbi_extra.cmp = cmp;
-        lbi_extra.msg_buffer = msg_buffer;
-        lbi_extra.key = bounds->lower_bound_exclusive;
-        lbi_extra.msn = MAX_MSN;
-        int32_t found_lb;
-        r = message_tree.template find<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(lbi_extra, +1, &found_lb, lbi);
-        if (r == DB_NOTFOUND) {
-            // There is no relevant data (the lower bound is bigger than
-            // any message in this tree), so we have no range and we're
-            // done.
-            *lbi = 0;
-            *ube = 0;
-            return;
-        }
-        if (bounds->upper_bound_inclusive) {
-            // Check if what we found for lbi is greater than the upper
-            // bound inclusive that we have.  If so, there are no relevant
-            // messages between these bounds.
-            const DBT *ubi = bounds->upper_bound_inclusive;
-            const int32_t offset = found_lb;
-            DBT found_lbidbt;
-            msg_buffer->get_message_key_msn(offset, &found_lbidbt, nullptr);
-            FAKE_DB(db, desc);
-            int c = cmp(&db, &found_lbidbt, ubi);
-            // These DBTs really are both inclusive bounds, so we need
-            // strict inequality in order to determine that there's
-            // nothing between them.  If they're equal, then we actually
-            // need to apply the message pointed to by lbi, and also
-            // anything with the same key but a bigger msn.
-            if (c > 0) {
-                *lbi = 0;
-                *ube = 0;
-                return;
-            }
-        }
-    } else {
-        // No lower bound given, it's negative infinity, so we start at
-        // the first message in the OMT.
-        *lbi = 0;
-    }
-    if (bounds->upper_bound_inclusive) {
-        // Again, we use an msn of MAX_MSN and a direction of +1 to get
-        // the first thing bigger than the upper_bound_inclusive key.
-        // This is therefore the smallest thing we don't want to apply,
-        // and omt::iterate_on_range will not examine it.
-        struct toku_msg_buffer_key_msn_heaviside_extra ube_extra;
-        ZERO_STRUCT(ube_extra);
-        ube_extra.desc = desc;
-        ube_extra.cmp = cmp;
-        ube_extra.msg_buffer = msg_buffer;
-        ube_extra.key = bounds->upper_bound_inclusive;
-        ube_extra.msn = MAX_MSN;
-        r = message_tree.template find<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(ube_extra, +1, nullptr, ube);
-        if (r == DB_NOTFOUND) {
-            // Couldn't find anything in the buffer bigger than our key,
-            // so we need to look at everything up to the end of
-            // message_tree.
-            *ube = message_tree.size();
-        }
-    } else {
-        // No upper bound given, it's positive infinity, so we need to go
-        // through the end of the OMT.
-        *ube = message_tree.size();
-    }
-}
-
-/**
- * For each message in the ancestor's buffer (determined by childnum) that
- * is key-wise between lower_bound_exclusive and upper_bound_inclusive,
- * apply the message to the basement node.  We treat the bounds as minus
- * or plus infinity respectively if they are NULL.  Do not mark the node
- * as dirty (preserve previous state of 'dirty' bit).
- */
-static void
-bnc_apply_messages_to_basement_node(
-    FT_HANDLE t,             // used for comparison function
-    BASEMENTNODE bn,   // where to apply messages
-    FTNODE ancestor,  // the ancestor node where we can find messages to apply
-    int childnum,      // which child buffer of ancestor contains messages we want
-    struct pivot_bounds const * const bounds,  // contains pivot key bounds of this basement node
-    txn_gc_info *gc_info,
-    bool* msgs_applied
-    )
-{
-    int r;
-    NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
-
-    // Determine the offsets in the message trees between which we need to
-    // apply messages from this buffer
-    STAT64INFO_S stats_delta = {0,0};
-    uint64_t workdone_this_ancestor = 0;
-
-    uint32_t stale_lbi, stale_ube;
-    if (!bn->stale_ancestor_messages_applied) {
-        find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->stale_message_tree, &bnc->msg_buffer, bounds, &stale_lbi, &stale_ube);
-    } else {
-        stale_lbi = 0;
-        stale_ube = 0;
-    }
-    uint32_t fresh_lbi, fresh_ube;
-    find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->fresh_message_tree, &bnc->msg_buffer, bounds, &fresh_lbi, &fresh_ube);
-
-    // We now know where all the messages we must apply are, so one of the
-    // following 4 cases will do the application, depending on which of
-    // the lists contains relevant messages:
-    //
-    // 1. broadcast messages and anything else, or a mix of fresh and stale
-    // 2. only fresh messages
-    // 3. only stale messages
-    if (bnc->broadcast_list.size() > 0 ||
-        (stale_lbi != stale_ube && fresh_lbi != fresh_ube)) {
-        // We have messages in multiple trees, so we grab all
-        // the relevant messages' offsets and sort them by MSN, then apply
-        // them in MSN order.
-        const int buffer_size = ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + bnc->broadcast_list.size());
-        toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
-        int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
-        struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
-
-        // Populate offsets array with offsets to stale messages
-        r = bnc->stale_message_tree.iterate_on_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(stale_lbi, stale_ube, &sfo_extra);
-        assert_zero(r);
-
-        // Then store fresh offsets, and mark them to be moved to stale later.
-        r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(fresh_lbi, fresh_ube, &sfo_extra);
-        assert_zero(r);
-
-        // Store offsets of all broadcast messages.
-        r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(&sfo_extra);
-        assert_zero(r);
-        invariant(sfo_extra.i == buffer_size);
-
-        // Sort by MSN.
-        r = toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
-        assert_zero(r);
-
-        // Apply the messages in MSN order.
-        for (int i = 0; i < buffer_size; ++i) {
-            *msgs_applied = true;
-            do_bn_apply_msg(t, bn, &bnc->msg_buffer, offsets[i], gc_info, &workdone_this_ancestor, &stats_delta);
-        }
-    } else if (stale_lbi == stale_ube) {
-        // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
-        struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
-        if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
-        r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(fresh_lbi, fresh_ube, &iter_extra);
-        assert_zero(r);
-    } else {
-        invariant(fresh_lbi == fresh_ube);
-        // No fresh messages to apply, we just apply stale messages.
-
-        if (stale_ube - stale_lbi > 0) *msgs_applied = true;
-        struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
-
-        r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(stale_lbi, stale_ube, &iter_extra);
-        assert_zero(r);
-    }
-    //
-    // update stats
-    //
-    if (workdone_this_ancestor > 0) {
-        (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor);
-    }
-    if (stats_delta.numbytes || stats_delta.numrows) {
-        toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
-    }
-}
-
-static void
-apply_ancestors_messages_to_bn(
-    FT_HANDLE t,
-    FTNODE node,
-    int childnum,
-    ANCESTORS ancestors,
-    struct pivot_bounds const * const bounds, 
-    txn_gc_info *gc_info,
-    bool* msgs_applied
-    )
-{
-    BASEMENTNODE curr_bn = BLB(node, childnum);
-    struct pivot_bounds curr_bounds = next_pivot_keys(node, childnum, bounds);
-    for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
-        if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > curr_bn->max_msn_applied.msn) {
-            paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL);
-            bnc_apply_messages_to_basement_node(
-                t,
-                curr_bn,
-                curr_ancestors->node,
-                curr_ancestors->childnum,
-                &curr_bounds,
-                gc_info,
-                msgs_applied
-                );
-            // We don't want to check this ancestor node again if the
-            // next time we query it, the msn hasn't changed.
-            curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk;
-        }
-    }
-    // At this point, we know all the stale messages above this
-    // basement node have been applied, and any new messages will be
-    // fresh, so we don't need to look at stale messages for this
-    // basement node, unless it gets evicted (and this field becomes
-    // false when it's read in again).
-    curr_bn->stale_ancestor_messages_applied = true;
-}
-
-void
-toku_apply_ancestors_messages_to_node (
-    FT_HANDLE t, 
-    FTNODE node, 
-    ANCESTORS ancestors, 
-    struct pivot_bounds const * const bounds, 
-    bool* msgs_applied, 
-    int child_to_read
-    )
-// Effect:
-//   Bring a leaf node up-to-date according to all the messages in the ancestors.
-//   If the leaf node is already up-to-date then do nothing.
-//   If the leaf node is not already up-to-date, then record the work done
-//   for that leaf in each ancestor.
-// Requires:
-//   This is being called when pinning a leaf node for the query path.
-//   The entire root-to-leaf path is pinned and appears in the ancestors list.
-{
-    VERIFY_NODE(t, node);
-    paranoid_invariant(node->height == 0);
-
-    TXN_MANAGER txn_manager = toku_ft_get_txn_manager(t);
-    txn_manager_state txn_state_for_gc(txn_manager);
-
-    TXNID oldest_referenced_xid_for_simple_gc = toku_ft_get_oldest_referenced_xid_estimate(t);
-    txn_gc_info gc_info(&txn_state_for_gc,
-                        oldest_referenced_xid_for_simple_gc,
-                        node->oldest_referenced_xid_known,
-                        true);
-    if (!node->dirty && child_to_read >= 0) {
-        paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
-        apply_ancestors_messages_to_bn(
-            t,
-            node,
-            child_to_read,
-            ancestors,
-            bounds,
-            &gc_info,
-            msgs_applied
-            );
-    }
-    else {
-        // know we are a leaf node
-        // An important invariant:
-        // We MUST bring every available basement node for a dirty node up to date.
-        // flushing on the cleaner thread depends on this. This invariant
-        // allows the cleaner thread to just pick an internal node and flush it
-        // as opposed to being forced to start from the root.
-        for (int i = 0; i < node->n_children; i++) {
-            if (BP_STATE(node, i) != PT_AVAIL) { continue; }
-            apply_ancestors_messages_to_bn(
-                t,
-                node,
-                i,
-                ancestors,
-                bounds,
-                &gc_info,
-                msgs_applied
-                );
-        }
-    }
-    VERIFY_NODE(t, node);
-}
-
-static bool bn_needs_ancestors_messages(
-    FT ft,
-    FTNODE node,
-    int childnum,
-    struct pivot_bounds const * const bounds,
-    ANCESTORS ancestors, 
-    MSN* max_msn_applied
-    ) 
-{
-    BASEMENTNODE bn = BLB(node, childnum);
-    struct pivot_bounds curr_bounds = next_pivot_keys(node, childnum, bounds);
-    bool needs_ancestors_messages = false;
-    for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
-        if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > bn->max_msn_applied.msn) {
-            paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL);
-            NONLEAF_CHILDINFO bnc = BNC(curr_ancestors->node, curr_ancestors->childnum);
-            if (bnc->broadcast_list.size() > 0) {
-                needs_ancestors_messages = true;
-                goto cleanup;
-            }
-            if (!bn->stale_ancestor_messages_applied) {
-                uint32_t stale_lbi, stale_ube;
-                find_bounds_within_message_tree(&ft->cmp_descriptor,
-                                                ft->compare_fun,
-                                                bnc->stale_message_tree,
-                                                &bnc->msg_buffer,
-                                                &curr_bounds,
-                                                &stale_lbi,
-                                                &stale_ube);
-                if (stale_lbi < stale_ube) {
-                    needs_ancestors_messages = true;
-                    goto cleanup;
-                }
-            }
-            uint32_t fresh_lbi, fresh_ube;
-            find_bounds_within_message_tree(&ft->cmp_descriptor,
-                                            ft->compare_fun,
-                                            bnc->fresh_message_tree,
-                                            &bnc->msg_buffer,
-                                            &curr_bounds,
-                                            &fresh_lbi,
-                                            &fresh_ube);
-            if (fresh_lbi < fresh_ube) {
-                needs_ancestors_messages = true;
-                goto cleanup;
-            }
-            if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > max_msn_applied->msn) {
-                max_msn_applied->msn = curr_ancestors->node->max_msn_applied_to_node_on_disk.msn;
-            }
-        }
-    }
-cleanup:
-    return needs_ancestors_messages;
-}
-
-bool toku_ft_leaf_needs_ancestors_messages(
-    FT ft, 
-    FTNODE node, 
-    ANCESTORS ancestors, 
-    struct pivot_bounds const * const bounds, 
-    MSN *const max_msn_in_path, 
-    int child_to_read
-    )
-// Effect: Determine whether there are messages in a node's ancestors
-//  which must be applied to it.  These messages are in the correct
-//  keyrange for any available basement nodes, and are in nodes with the
-//  correct max_msn_applied_to_node_on_disk.
-// Notes:
-//  This is an approximate query.
-// Output:
-//  max_msn_in_path: max of "max_msn_applied_to_node_on_disk" over
-//    ancestors.  This is used later to update basement nodes'
-//    max_msn_applied values in case we don't do the full algorithm.
-// Returns:
-//  true if there may be some such messages
-//  false only if there are definitely no such messages
-// Rationale:
-//  When we pin a node with a read lock, we want to quickly determine if
-//  we should exchange it for a write lock in preparation for applying
-//  messages.  If there are no messages, we don't need the write lock.
-{
-    paranoid_invariant(node->height == 0);
-    bool needs_ancestors_messages = false;
-    // child_to_read may be -1 in test cases
-    if (!node->dirty && child_to_read >= 0) {
-        paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
-        needs_ancestors_messages = bn_needs_ancestors_messages(
-            ft,
-            node,
-            child_to_read,
-            bounds,
-            ancestors,
-            max_msn_in_path
-            );
-    }
-    else {
-        for (int i = 0; i < node->n_children; ++i) {
-            if (BP_STATE(node, i) != PT_AVAIL) { continue; }
-            needs_ancestors_messages = bn_needs_ancestors_messages(
-                ft,
-                node,
-                i,
-                bounds,
-                ancestors,
-                max_msn_in_path
-                );
-            if (needs_ancestors_messages) {
-                goto cleanup;
-            }
-        }
-    }
-cleanup:
-    return needs_ancestors_messages;
-}
-
-void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read) {
-    invariant(node->height == 0);
-    if (!node->dirty && child_to_read >= 0) {
-        paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
-        BASEMENTNODE bn = BLB(node, child_to_read);
-        if (max_msn_applied.msn > bn->max_msn_applied.msn) {
-            // see comment below
-            (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn);
-        }
-    }
-    else {
-        for (int i = 0; i < node->n_children; ++i) {
-            if (BP_STATE(node, i) != PT_AVAIL) { continue; }
-            BASEMENTNODE bn = BLB(node, i);
-            if (max_msn_applied.msn > bn->max_msn_applied.msn) {
-                // This function runs in a shared access context, so to silence tools
-                // like DRD, we use a CAS and ignore the result.
-                // Any threads trying to update these basement nodes should be
-                // updating them to the same thing (since they all have a read lock on
-                // the same root-to-leaf path) so this is safe.
-                (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn);
-            }
-        }
-    }
-}
-
-struct copy_to_stale_extra {
-    FT ft;
-    NONLEAF_CHILDINFO bnc;
-};
-
-int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) __attribute__((nonnull(3)));
-int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra)
-{
-    MSN msn;
-    DBT key;
-    extra->bnc->msg_buffer.get_message_key_msn(offset, &key, &msn);
-    struct toku_msg_buffer_key_msn_heaviside_extra heaviside_extra = { .desc = &extra->ft->cmp_descriptor, .cmp = extra->ft->compare_fun, .msg_buffer = &extra->bnc->msg_buffer, .key = &key, .msn = msn };
-    int r = extra->bnc->stale_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, heaviside_extra, nullptr);
-    invariant_zero(r);
-    return 0;
-}
-
-static void ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc) {
-    struct copy_to_stale_extra cts_extra = { .ft = ft, .bnc = bnc };
-    int r = bnc->fresh_message_tree.iterate_over_marked<struct copy_to_stale_extra, copy_to_stale>(&cts_extra);
-    invariant_zero(r);
-    bnc->fresh_message_tree.delete_all_marked();
-}
-
-__attribute__((nonnull))
-void
-toku_move_ftnode_messages_to_stale(FT ft, FTNODE node) {
-    invariant(node->height > 0);
-    for (int i = 0; i < node->n_children; ++i) {
-        if (BP_STATE(node, i) != PT_AVAIL) {
-            continue;
-        }
-        NONLEAF_CHILDINFO bnc = BNC(node, i);
-        // We can't delete things out of the fresh tree inside the above
-        // procedures because we're still looking at the fresh tree.  Instead
-        // we have to move messages after we're done looking at it.
-        ft_bnc_move_messages_to_stale(ft, bnc);
-    }
-}
+/******************************* search ***************************************/
 
 // Return true if this key is within the search bound.  If there is no search bound then the tree search continues.
 static bool search_continue(ft_search *search, void *key, uint32_t key_len) {
@@ -5208,32 +3479,16 @@ ftnode_pf_callback_and_free_bfe(void *ftnode_pv, void* disk_data, void *read_ext
     return r;
 }
 
-void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
-                           FT h,
-                           struct ft_cursor *c) {
-    paranoid_invariant(h->h->type == FT_CURRENT);
-    bfe->type = ftnode_fetch_prefetch;
-    bfe->h = h;
-    bfe->search = NULL;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    const DBT *left = &c->range_lock_left_key;
-    if (left->data) {
-        toku_clone_dbt(&bfe->range_lock_left_key, *left);
-    }
-    const DBT *right = &c->range_lock_right_key;
-    if (right->data) {
-        toku_clone_dbt(&bfe->range_lock_right_key, *right);
-    }
-    bfe->left_is_neg_infty = c->left_is_neg_infty;
-    bfe->right_is_pos_infty = c->right_is_pos_infty;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = c->disable_prefetching;
-    bfe->read_all_partitions = false;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
+CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft) {
+    CACHETABLE_WRITE_CALLBACK wc;
+    wc.flush_callback = toku_ftnode_flush_callback;
+    wc.pe_est_callback = toku_ftnode_pe_est_callback;
+    wc.pe_callback = toku_ftnode_pe_callback;
+    wc.cleaner_callback = toku_ftnode_cleaner_callback;
+    wc.clone_callback = toku_ftnode_clone_callback;
+    wc.checkpoint_complete_callback = toku_ftnode_checkpoint_complete_callback;
+    wc.write_extraargs = ft;
+    return wc;
 }
 
 static void
@@ -5276,6 +3531,7 @@ struct unlock_ftnode_extra {
     FTNODE node;
     bool msgs_applied;
 };
+
 // When this is called, the cachetable lock is held
 static void
 unlock_ftnode_fun (void *v) {
@@ -5576,6 +3832,11 @@ ft_search_node(
     return r;
 }
 
+static const struct pivot_bounds infinite_bounds = {
+    .lower_bound_exclusive = nullptr,
+    .upper_bound_inclusive = nullptr,
+};
+
 int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, FT_CURSOR ftcursor, bool can_bulk_fetch)
 // Effect: Perform a search.  Associate cursor with a leaf if possible.
 // All searches are performed through this function.
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index 575bd69ab7e..68dd5cd408a 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -93,6 +93,7 @@ PATENT RIGHTS GRANT:
 #include "ft-flusher.h"
 #include "ft-internal.h"
 #include "ft.h"
+#include "node.h"
 #include "fttypes.h"
 #include "ule.h"
 
@@ -222,7 +223,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, const
 
     static size_t zero_flow_deltas[] = { 0, 0 };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
-    toku_ft_node_put_msg (
+    toku_ftnode_put_msg(
         ft_handle->ft->compare_fun,
         ft_handle->ft->update_fun,
         &ft_handle->ft->cmp_descriptor,
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index 51df7be4881..62591ba804b 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -100,6 +100,7 @@ PATENT RIGHTS GRANT:
 #include "ft-cachetable-wrappers.h"
 #include "ft-internal.h"
 #include "ft.h"
+#include "node.h"
 
 static int 
 compare_pairs (FT_HANDLE ft_handle, const DBT *a, const DBT *b) {
@@ -399,7 +400,7 @@ toku_verify_ftnode_internal(FT_HANDLE ft_handle,
     BLOCKNUM blocknum = node->thisnodename;
 
     //printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     this_msn = node->max_msn_applied_to_node_on_disk;
 
     if (height >= 0) {
@@ -501,7 +502,7 @@ toku_verify_ftnode (FT_HANDLE ft_handle,
     MSN   this_msn;
 
     //printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     this_msn = node->max_msn_applied_to_node_on_disk;
 
     int result = 0;
diff --git a/ft/ft.cc b/ft/ft.cc
index 26111334211..80b59293cbc 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -89,12 +89,12 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft.h"
-#include "ft-internal.h"
-#include "ft-cachetable-wrappers.h"
-#include "log-internal.h"
-
-#include <ft/log_header.h>
+#include "ft/ft.h"
+#include "ft/ft-cachetable-wrappers.h"
+#include "ft/ft-internal.h"
+#include "ft/log-internal.h"
+#include "ft/log_header.h"
+#include "ft/node.h"
 
 #include <memory.h>
 #include <toku_assert.h>
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 1e4951d00c7..f5733fc7958 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -89,17 +89,18 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft-internal.h"
-#include "log-internal.h"
-#include <compress.h>
-#include <portability/toku_atomic.h>
-#include <util/sort.h>
-#include <util/threadpool.h>
-#include "ft.h"
-#include <util/status.h>
-#include <util/scoped_malloc.h>
 #include "ft/cachetable.h"
+#include "ft/compress.h"
+#include "ft/ft.h"
+#include "ft/ft-internal.h"
+#include "ft/node.h"
+#include "ft/log-internal.h"
 #include "ft/rollback.h"
+#include "portability/toku_atomic.h"
+#include "util/sort.h"
+#include "util/threadpool.h"
+#include "util/status.h"
+#include "util/scoped_malloc.h"
 
 static FT_UPGRADE_STATUS_S ft_upgrade_status;
 
@@ -532,7 +533,7 @@ toku_serialize_ftnode_size (FTNODE node) {
     // As of now, this seems to be called if and only if the entire node is supposed
     // to be in memory, so we will assert it.
     //
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     result += serialize_node_header_size(node);
     result += serialize_ftnode_info_size(node);
     for (int i = 0; i < node->n_children; i++) {
@@ -541,208 +542,6 @@ toku_serialize_ftnode_size (FTNODE node) {
     return result;
 }
 
-struct array_info {
-    uint32_t offset;
-    LEAFENTRY* le_array;
-    uint32_t* key_sizes_array;
-    const void** key_ptr_array;
-};
-
-static int
-array_item(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t idx, struct array_info *const ai) {
-    ai->le_array[idx+ai->offset] = le;
-    ai->key_sizes_array[idx+ai->offset] = keylen;
-    ai->key_ptr_array[idx+ai->offset] = key;
-    return 0;
-}
-
-// There must still be at least one child
-// Requires that all messages in buffers above have been applied.
-// Because all messages above have been applied, setting msn of all new basements 
-// to max msn of existing basements is correct.  (There cannot be any messages in
-// buffers above that still need to be applied.)
-void
-rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
-{
-    assert(node->height == 0);
-    assert(node->dirty);
-
-    uint32_t num_orig_basements = node->n_children;
-    // Count number of leaf entries in this leaf (num_le).
-    uint32_t num_le = 0;
-    for (uint32_t i = 0; i < num_orig_basements; i++) {
-        num_le += BLB_DATA(node, i)->num_klpairs();
-    }
-
-    uint32_t num_alloc = num_le ? num_le : 1;  // simplify logic below by always having at least one entry per array
-
-    // Create an array of OMTVALUE's that store all the pointers to all the data.
-    // Each element in leafpointers is a pointer to a leaf.
-    toku::scoped_malloc leafpointers_buf(sizeof(LEAFENTRY) * num_alloc);
-    LEAFENTRY *leafpointers = reinterpret_cast<LEAFENTRY *>(leafpointers_buf.get());
-    leafpointers[0] = NULL;
-
-    toku::scoped_malloc key_pointers_buf(sizeof(void *) * num_alloc);
-    const void **key_pointers = reinterpret_cast<const void **>(key_pointers_buf.get());
-    key_pointers[0] = NULL;
-
-    toku::scoped_malloc key_sizes_buf(sizeof(uint32_t) * num_alloc);
-    uint32_t *key_sizes = reinterpret_cast<uint32_t *>(key_sizes_buf.get());
-
-    // Capture pointers to old mempools' buffers (so they can be destroyed)
-    toku::scoped_malloc old_bns_buf(sizeof(BASEMENTNODE) * num_orig_basements);
-    BASEMENTNODE *old_bns = reinterpret_cast<BASEMENTNODE *>(old_bns_buf.get());
-    old_bns[0] = NULL;
-
-    uint32_t curr_le = 0;
-    for (uint32_t i = 0; i < num_orig_basements; i++) {
-        bn_data* bd = BLB_DATA(node, i);
-        struct array_info ai {.offset = curr_le, .le_array = leafpointers, .key_sizes_array = key_sizes, .key_ptr_array = key_pointers };
-        bd->iterate<array_info, array_item>(&ai);
-        curr_le += bd->num_klpairs();
-    }
-
-    // Create an array that will store indexes of new pivots.
-    // Each element in new_pivots is the index of a pivot key.
-    // (Allocating num_le of them is overkill, but num_le is an upper bound.)
-    toku::scoped_malloc new_pivots_buf(sizeof(uint32_t) * num_alloc);
-    uint32_t *new_pivots = reinterpret_cast<uint32_t *>(new_pivots_buf.get());
-    new_pivots[0] = 0;
-
-    // Each element in le_sizes is the size of the leafentry pointed to by leafpointers.
-    toku::scoped_malloc le_sizes_buf(sizeof(size_t) * num_alloc);
-    size_t *le_sizes = reinterpret_cast<size_t *>(le_sizes_buf.get());
-    le_sizes[0] = 0;
-
-    // Create an array that will store the size of each basement.
-    // This is the sum of the leaf sizes of all the leaves in that basement.
-    // We don't know how many basements there will be, so we use num_le as the upper bound.
-
-    // Sum of all le sizes in a single basement
-    toku::scoped_calloc bn_le_sizes_buf(sizeof(size_t) * num_alloc);
-    size_t *bn_le_sizes = reinterpret_cast<size_t *>(bn_le_sizes_buf.get());
-
-    // Sum of all key sizes in a single basement
-    toku::scoped_calloc bn_key_sizes_buf(sizeof(size_t) * num_alloc);
-    size_t *bn_key_sizes = reinterpret_cast<size_t *>(bn_key_sizes_buf.get());
-
-    // TODO 4050: All these arrays should be combined into a single array of some bn_info struct (pivot, msize, num_les).
-    // Each entry is the number of leafentries in this basement.  (Again, num_le is overkill upper baound.)
-    toku::scoped_malloc num_les_this_bn_buf(sizeof(uint32_t) * num_alloc);
-    uint32_t *num_les_this_bn = reinterpret_cast<uint32_t *>(num_les_this_bn_buf.get());
-    num_les_this_bn[0] = 0;
-    
-    // Figure out the new pivots.  
-    // We need the index of each pivot, and for each basement we need
-    // the number of leaves and the sum of the sizes of the leaves (memory requirement for basement).
-    uint32_t curr_pivot = 0;
-    uint32_t num_le_in_curr_bn = 0;
-    uint32_t bn_size_so_far = 0;
-    for (uint32_t i = 0; i < num_le; i++) {
-        uint32_t curr_le_size = leafentry_disksize((LEAFENTRY) leafpointers[i]); 
-        le_sizes[i] = curr_le_size;
-        if ((bn_size_so_far + curr_le_size + sizeof(uint32_t) + key_sizes[i] > basementnodesize) && (num_le_in_curr_bn != 0)) {
-            // cap off the current basement node to end with the element before i
-            new_pivots[curr_pivot] = i-1;
-            curr_pivot++;
-            num_le_in_curr_bn = 0;
-            bn_size_so_far = 0;
-        }
-        num_le_in_curr_bn++;
-        num_les_this_bn[curr_pivot] = num_le_in_curr_bn;
-        bn_le_sizes[curr_pivot] += curr_le_size;
-        bn_key_sizes[curr_pivot] += sizeof(uint32_t) + key_sizes[i];  // uint32_t le_offset
-        bn_size_so_far += curr_le_size + sizeof(uint32_t) + key_sizes[i];
-    }
-    // curr_pivot is now the total number of pivot keys in the leaf node
-    int num_pivots   = curr_pivot;
-    int num_children = num_pivots + 1;
-
-    // now we need to fill in the new basement nodes and pivots
-
-    // TODO: (Zardosht) this is an ugly thing right now
-    // Need to figure out how to properly deal with seqinsert.
-    // I am not happy with how this is being
-    // handled with basement nodes
-    uint32_t tmp_seqinsert = BLB_SEQINSERT(node, num_orig_basements - 1);
-
-    // choose the max msn applied to any basement as the max msn applied to all new basements
-    MSN max_msn = ZERO_MSN;
-    for (uint32_t i = 0; i < num_orig_basements; i++) {
-        MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i);
-        max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn;
-    }
-    // remove the basement node in the node, we've saved a copy
-    for (uint32_t i = 0; i < num_orig_basements; i++) {
-        // save a reference to the old basement nodes
-        // we will need them to ensure that the memory
-        // stays intact
-        old_bns[i] = toku_detach_bn(node, i);
-    }
-    // Now destroy the old basements, but do not destroy leaves
-    toku_destroy_ftnode_internals(node);
-
-    // now reallocate pieces and start filling them in
-    invariant(num_children > 0);
-    node->totalchildkeylens = 0;
-
-    XCALLOC_N(num_pivots, node->childkeys);        // allocate pointers to pivot structs
-    node->n_children = num_children;
-    XCALLOC_N(num_children, node->bp);             // allocate pointers to basements (bp)
-    for (int i = 0; i < num_children; i++) {
-        set_BLB(node, i, toku_create_empty_bn());  // allocate empty basements and set bp pointers
-    }
-
-    // now we start to fill in the data
-
-    // first the pivots
-    for (int i = 0; i < num_pivots; i++) {
-        uint32_t keylen = key_sizes[new_pivots[i]];
-        const void *key = key_pointers[new_pivots[i]];
-        toku_memdup_dbt(&node->childkeys[i], key, keylen);
-        node->totalchildkeylens += keylen;
-    }
-
-    uint32_t baseindex_this_bn = 0;
-    // now the basement nodes
-    for (int i = 0; i < num_children; i++) {
-        // put back seqinsert
-        BLB_SEQINSERT(node, i) = tmp_seqinsert;
-
-        // create start (inclusive) and end (exclusive) boundaries for data of basement node
-        uint32_t curr_start = (i==0) ? 0 : new_pivots[i-1]+1;               // index of first leaf in basement
-        uint32_t curr_end = (i==num_pivots) ? num_le : new_pivots[i]+1;     // index of first leaf in next basement
-        uint32_t num_in_bn = curr_end - curr_start;                         // number of leaves in this basement
-
-        // create indexes for new basement
-        invariant(baseindex_this_bn == curr_start);
-        uint32_t num_les_to_copy = num_les_this_bn[i];
-        invariant(num_les_to_copy == num_in_bn); 
-
-        bn_data* bd = BLB_DATA(node, i);
-        bd->set_contents_as_clone_of_sorted_array(
-            num_les_to_copy,
-            &key_pointers[baseindex_this_bn],
-            &key_sizes[baseindex_this_bn],
-            &leafpointers[baseindex_this_bn],
-            &le_sizes[baseindex_this_bn],
-            bn_key_sizes[i],  // Total key sizes
-            bn_le_sizes[i]  // total le sizes
-            );
-
-        BP_STATE(node,i) = PT_AVAIL;
-        BP_TOUCH_CLOCK(node,i);
-        BLB_MAX_MSN_APPLIED(node,i) = max_msn;
-        baseindex_this_bn += num_les_to_copy;  // set to index of next bn
-    }
-    node->max_msn_applied_to_node_on_disk = max_msn;
-
-    // destroy buffers of old mempools
-    for (uint32_t i = 0; i < num_orig_basements; i++) {
-        destroy_basement_node(old_bns[i]);
-    }
-}  // end of rebalance_ftnode_leaf()
-
 struct serialize_times {
     tokutime_t serialize_time;
     tokutime_t compress_time;
@@ -907,10 +706,10 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
 //   The resulting buffer is guaranteed to be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needed).
 //   512-byte padding is for O_DIRECT to work.
 {
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
 
     if (do_rebalancing && node->height == 0) {
-        rebalance_ftnode_leaf(node, basementnodesize);
+        toku_ftnode_leaf_rebalance(node, basementnodesize);
     }
     const int npartitions = node->n_children;
 
diff --git a/ft/ftverify.cc b/ft/ftverify.cc
index 1b103abd55a..d82d4ae1240 100644
--- a/ft/ftverify.cc
+++ b/ft/ftverify.cc
@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
 
 #include "fttypes.h"
 #include "ft-internal.h"
+#include "node.h"
 #include "ft_layout_version.h"
 #include "block_table.h"
 #include "rbuf.h"
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index d2914cf0511..c030e595fb7 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -111,6 +111,7 @@ PATENT RIGHTS GRANT:
 #include "leafentry.h"
 #include "log-internal.h"
 #include "ft.h"
+#include "node.h"
 
 static size_t (*os_fwrite_fun)(const void *,size_t,size_t,FILE*)=NULL;
 void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) {
diff --git a/ft/msg_buffer.cc b/ft/msg_buffer.cc
index 47bf845b186..7e247f45250 100644
--- a/ft/msg_buffer.cc
+++ b/ft/msg_buffer.cc
@@ -129,23 +129,19 @@ struct message_buffer::buffer_entry *message_buffer::get_buffer_entry(int32_t of
 }
 
 void message_buffer::enqueue(FT_MSG msg, bool is_fresh, int32_t *offset) {
-    ITEMLEN keylen = ft_msg_get_keylen(msg);
-    ITEMLEN datalen = ft_msg_get_vallen(msg);
-    XIDS xids = ft_msg_get_xids(msg);
-    int need_space_here = sizeof(struct buffer_entry)
-                          + keylen + datalen
-                          + xids_get_size(xids)
-                          - sizeof(XIDS_S); //Prevent double counting
+    int need_space_here = msg_memsize_in_buffer(msg);
     int need_space_total = _memory_used + need_space_here;
     if (_memory == nullptr || need_space_total > _memory_size) {
         // resize the buffer to the next power of 2 greater than the needed space
         int next_2 = next_power_of_two(need_space_total);
         resize(next_2);
     }
+    ITEMLEN keylen = ft_msg_get_keylen(msg);
+    ITEMLEN datalen = ft_msg_get_vallen(msg);
     struct buffer_entry *entry = get_buffer_entry(_memory_used);
     entry->type = (unsigned char) ft_msg_get_type(msg);
     entry->msn = msg->msn;
-    xids_cpy(&entry->xids_s, xids);
+    xids_cpy(&entry->xids_s, ft_msg_get_xids(msg));
     entry->is_fresh = is_fresh;
     unsigned char *e_key = xids_get_end_of_array(&entry->xids_s);
     entry->keylen = keylen;
@@ -217,8 +213,8 @@ bool message_buffer::equals(message_buffer *other) const {
 }
 
 size_t message_buffer::msg_memsize_in_buffer(FT_MSG msg) {
-    return sizeof(struct buffer_entry)
-        + msg->u.id.key->size + msg->u.id.val->size
-        + xids_get_size(msg->xids)
-        - sizeof(XIDS_S);
+    const uint32_t keylen = ft_msg_get_keylen(msg);
+    const uint32_t datalen = ft_msg_get_vallen(msg);
+    const size_t xidslen = xids_get_size(msg->xids);
+    return sizeof(struct buffer_entry) + keylen + datalen + xidslen - sizeof(XIDS_S);
 }
diff --git a/ft/node.cc b/ft/node.cc
new file mode 100644
index 00000000000..7e9334a1cec
--- /dev/null
+++ b/ft/node.cc
@@ -0,0 +1,2034 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+
+#include "ft/ft.h"
+#include "ft/ft-internal.h"
+#include "ft/node.h"
+#include "util/scoped_malloc.h"
+#include "util/sort.h"
+
+// Effect: Fill in N as an empty ftnode.
+// TODO: Rename toku_ftnode_create
+void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM nodename, int height, int num_children, int layout_version, unsigned int flags) {
+    paranoid_invariant(layout_version != 0);
+    paranoid_invariant(height >= 0);
+
+    n->max_msn_applied_to_node_on_disk = ZERO_MSN;    // correct value for root node, harmless for others
+    n->flags = flags;
+    n->thisnodename = nodename;
+    n->layout_version               = layout_version;
+    n->layout_version_original = layout_version;
+    n->layout_version_read_from_disk = layout_version;
+    n->height = height;
+    n->totalchildkeylens = 0;
+    n->childkeys = 0;
+    n->bp = 0;
+    n->n_children = num_children;
+    n->oldest_referenced_xid_known = TXNID_NONE;
+
+    if (num_children > 0) {
+        XMALLOC_N(num_children-1, n->childkeys);
+        XMALLOC_N(num_children, n->bp);
+        for (int i = 0; i < num_children; i++) {
+            BP_BLOCKNUM(n,i).b=0;
+            BP_STATE(n,i) = PT_INVALID;
+            BP_WORKDONE(n,i) = 0;
+            BP_INIT_TOUCHED_CLOCK(n, i);
+            set_BNULL(n,i);
+            if (height > 0) {
+                set_BNC(n, i, toku_create_empty_nl());
+            } else {
+                set_BLB(n, i, toku_create_empty_bn());
+            }
+        }
+    }
+    n->dirty = 1;  // special case exception, it's okay to mark as dirty because the basements are empty
+
+    toku_ft_status_note_ftnode(height, true);
+}
+
+// destroys the internals of the ftnode, but it does not free the values
+// that are stored
+// this is common functionality for toku_ftnode_free and rebalance_ftnode_leaf
+// MUST NOT do anything besides free the structures that have been allocated
+void toku_destroy_ftnode_internals(FTNODE node) {
+    for (int i=0; i<node->n_children-1; i++) {
+        toku_destroy_dbt(&node->childkeys[i]);
+    }
+    toku_free(node->childkeys);
+    node->childkeys = NULL;
+
+    for (int i=0; i < node->n_children; i++) {
+        if (BP_STATE(node,i) == PT_AVAIL) {
+            if (node->height > 0) {
+                destroy_nonleaf_childinfo(BNC(node,i));
+            } else {
+                destroy_basement_node(BLB(node, i));
+            }
+        } else if (BP_STATE(node,i) == PT_COMPRESSED) {
+            SUB_BLOCK sb = BSB(node,i);
+            toku_free(sb->compressed_ptr);
+            toku_free(sb);
+        } else {
+            paranoid_invariant(is_BNULL(node, i));
+        }
+        set_BNULL(node, i);
+    }
+    toku_free(node->bp);
+    node->bp = NULL;
+}
+
+/* Frees a node, including all the stuff in the hash table. */
+void toku_ftnode_free(FTNODE *nodep) {
+    FTNODE node = *nodep;
+    toku_ft_status_note_ftnode(node->height, false);
+    toku_destroy_ftnode_internals(node);
+    toku_free(node);
+    *nodep = nullptr;
+}
+
+void toku_ftnode_update_disk_stats(FTNODE ftnode, FT ft, bool for_checkpoint) {
+    STAT64INFO_S deltas = ZEROSTATS;
+    // capture deltas before rebalancing basements for serialization
+    deltas = toku_get_and_clear_basement_stats(ftnode);
+    // locking not necessary here with respect to checkpointing
+    // in Clayface (because of the pending lock and cachetable lock
+    // in toku_cachetable_begin_checkpoint)
+    // essentially, if we are dealing with a for_checkpoint 
+    // parameter in a function that is called by the flush_callback,
+    // then the cachetable needs to ensure that this is called in a safe
+    // manner that does not interfere with the beginning
+    // of a checkpoint, which it does with the cachetable lock
+    // and pending lock
+    toku_ft_update_stats(&ft->h->on_disk_stats, deltas);
+    if (for_checkpoint) {
+        toku_ft_update_stats(&ft->checkpoint_header->on_disk_stats, deltas);
+    }
+}
+
+void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node) {
+    for (int i = 0; i < node->n_children; i++) {
+        BP_BLOCKNUM(cloned_node,i) = BP_BLOCKNUM(node,i);
+        paranoid_invariant(BP_STATE(node,i) == PT_AVAIL);
+        BP_STATE(cloned_node,i) = PT_AVAIL;
+        BP_WORKDONE(cloned_node, i) = BP_WORKDONE(node, i);
+        if (node->height == 0) {
+            set_BLB(cloned_node, i, toku_clone_bn(BLB(node,i)));
+        } else {
+            set_BNC(cloned_node, i, toku_clone_nl(BNC(node,i)));
+        }
+    }
+}
+
+void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h) {
+    // free the basement node
+    assert(!node->dirty);
+    BASEMENTNODE bn = BLB(node, childnum);
+    toku_ft_decrease_stats(&h->in_memory_stats, bn->stat64_delta);
+    destroy_basement_node(bn);
+    set_BNULL(node, childnum);
+    BP_STATE(node, childnum) = PT_ON_DISK;
+}
+
+BASEMENTNODE toku_detach_bn(FTNODE node, int childnum) {
+    assert(BP_STATE(node, childnum) == PT_AVAIL);
+    BASEMENTNODE bn = BLB(node, childnum);
+    set_BNULL(node, childnum);
+    BP_STATE(node, childnum) = PT_ON_DISK;
+    return bn;
+}
+
+// 
+// Orthopush
+//
+
+struct store_msg_buffer_offset_extra {
+    int32_t *offsets;
+    int i;
+};
+
+int store_msg_buffer_offset(const int32_t &offset, const uint32_t UU(idx), struct store_msg_buffer_offset_extra *const extra) __attribute__((nonnull(3)));
+int store_msg_buffer_offset(const int32_t &offset, const uint32_t UU(idx), struct store_msg_buffer_offset_extra *const extra)
+{
+    extra->offsets[extra->i] = offset;
+    extra->i++;
+    return 0;
+}
+
+/**
+ * Given pointers to offsets within a message buffer where we can find messages,
+ * figure out the MSN of each message, and compare those MSNs.  Returns 1,
+ * 0, or -1 if a is larger than, equal to, or smaller than b.
+ */
+int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, const int32_t &bo);
+int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, const int32_t &bo)
+{
+    MSN amsn, bmsn;
+    msg_buffer.get_message_key_msn(ao, nullptr, &amsn);
+    msg_buffer.get_message_key_msn(bo, nullptr, &bmsn);
+    if (amsn.msn > bmsn.msn) {
+        return +1;
+    }
+    if (amsn.msn < bmsn.msn) {
+        return -1;
+    }
+    return 0;
+}
+
+/**
+ * Given a message buffer and and offset, apply the message with toku_ft_bn_apply_msg, or discard it,
+ * based on its MSN and the MSN of the basement node.
+ */
+static void
+do_bn_apply_msg(FT_HANDLE ft_handle, BASEMENTNODE bn, message_buffer *msg_buffer, int32_t offset,
+                txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update) {
+    DBT k, v;
+    FT_MSG_S msg = msg_buffer->get_message(offset, &k, &v);
+
+    // The messages are being iterated over in (key,msn) order or just in
+    // msn order, so all the messages for one key, from one buffer, are in
+    // ascending msn order.  So it's ok that we don't update the basement
+    // node's msn until the end.
+    if (msg.msn.msn > bn->max_msn_applied.msn) {
+        toku_ft_bn_apply_msg(
+            ft_handle->ft->compare_fun,
+            ft_handle->ft->update_fun,
+            &ft_handle->ft->cmp_descriptor,
+            bn,
+            &msg,
+            gc_info,
+            workdone,
+            stats_to_update
+            );
+    } else {
+        toku_ft_status_note_msn_discard();
+    }
+
+    // We must always mark message as stale since it has been marked
+    // (using omt::iterate_and_mark_range)
+    // It is possible to call do_bn_apply_msg even when it won't apply the message because
+    // the node containing it could have been evicted and brought back in.
+    msg_buffer->set_freshness(offset, false);
+}
+
+
+struct iterate_do_bn_apply_msg_extra {
+    FT_HANDLE t;
+    BASEMENTNODE bn;
+    NONLEAF_CHILDINFO bnc;
+    txn_gc_info *gc_info;
+    uint64_t *workdone;
+    STAT64INFO stats_to_update;
+};
+
+int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e) __attribute__((nonnull(3)));
+int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e)
+{
+    do_bn_apply_msg(e->t, e->bn, &e->bnc->msg_buffer, offset, e->gc_info, e->workdone, e->stats_to_update);
+    return 0;
+}
+
+/**
+ * Given the bounds of the basement node to which we will apply messages,
+ * find the indexes within message_tree which contain the range of
+ * relevant messages.
+ *
+ * The message tree contains offsets into the buffer, where messages are
+ * found.  The pivot_bounds are the lower bound exclusive and upper bound
+ * inclusive, because they come from pivot keys in the tree.  We want OMT
+ * indices, which must have the lower bound be inclusive and the upper
+ * bound exclusive.  We will get these by telling omt::find to look
+ * for something strictly bigger than each of our pivot bounds.
+ *
+ * Outputs the OMT indices in lbi (lower bound inclusive) and ube (upper
+ * bound exclusive).
+ */
+template<typename find_bounds_omt_t>
+static void
+find_bounds_within_message_tree(
+    DESCRIPTOR desc,       /// used for cmp
+    ft_compare_func cmp,  /// used to compare keys
+    const find_bounds_omt_t &message_tree,      /// tree holding message buffer offsets, in which we want to look for indices
+    message_buffer *msg_buffer,           /// message buffer in which messages are found
+    struct pivot_bounds const * const bounds,  /// key bounds within the basement node we're applying messages to
+    uint32_t *lbi,        /// (output) "lower bound inclusive" (index into message_tree)
+    uint32_t *ube         /// (output) "upper bound exclusive" (index into message_tree)
+    )
+{
+    int r = 0;
+
+    if (bounds->lower_bound_exclusive) {
+        // By setting msn to MAX_MSN and by using direction of +1, we will
+        // get the first message greater than (in (key, msn) order) any
+        // message (with any msn) with the key lower_bound_exclusive.
+        // This will be a message we want to try applying, so it is the
+        // "lower bound inclusive" within the message_tree.
+        struct toku_msg_buffer_key_msn_heaviside_extra lbi_extra;
+        ZERO_STRUCT(lbi_extra);
+        lbi_extra.desc = desc;
+        lbi_extra.cmp = cmp;
+        lbi_extra.msg_buffer = msg_buffer;
+        lbi_extra.key = bounds->lower_bound_exclusive;
+        lbi_extra.msn = MAX_MSN;
+        int32_t found_lb;
+        r = message_tree.template find<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(lbi_extra, +1, &found_lb, lbi);
+        if (r == DB_NOTFOUND) {
+            // There is no relevant data (the lower bound is bigger than
+            // any message in this tree), so we have no range and we're
+            // done.
+            *lbi = 0;
+            *ube = 0;
+            return;
+        }
+        if (bounds->upper_bound_inclusive) {
+            // Check if what we found for lbi is greater than the upper
+            // bound inclusive that we have.  If so, there are no relevant
+            // messages between these bounds.
+            const DBT *ubi = bounds->upper_bound_inclusive;
+            const int32_t offset = found_lb;
+            DBT found_lbidbt;
+            msg_buffer->get_message_key_msn(offset, &found_lbidbt, nullptr);
+            FAKE_DB(db, desc);
+            int c = cmp(&db, &found_lbidbt, ubi);
+            // These DBTs really are both inclusive bounds, so we need
+            // strict inequality in order to determine that there's
+            // nothing between them.  If they're equal, then we actually
+            // need to apply the message pointed to by lbi, and also
+            // anything with the same key but a bigger msn.
+            if (c > 0) {
+                *lbi = 0;
+                *ube = 0;
+                return;
+            }
+        }
+    } else {
+        // No lower bound given, it's negative infinity, so we start at
+        // the first message in the OMT.
+        *lbi = 0;
+    }
+    if (bounds->upper_bound_inclusive) {
+        // Again, we use an msn of MAX_MSN and a direction of +1 to get
+        // the first thing bigger than the upper_bound_inclusive key.
+        // This is therefore the smallest thing we don't want to apply,
+        // and omt::iterate_on_range will not examine it.
+        struct toku_msg_buffer_key_msn_heaviside_extra ube_extra;
+        ZERO_STRUCT(ube_extra);
+        ube_extra.desc = desc;
+        ube_extra.cmp = cmp;
+        ube_extra.msg_buffer = msg_buffer;
+        ube_extra.key = bounds->upper_bound_inclusive;
+        ube_extra.msn = MAX_MSN;
+        r = message_tree.template find<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(ube_extra, +1, nullptr, ube);
+        if (r == DB_NOTFOUND) {
+            // Couldn't find anything in the buffer bigger than our key,
+            // so we need to look at everything up to the end of
+            // message_tree.
+            *ube = message_tree.size();
+        }
+    } else {
+        // No upper bound given, it's positive infinity, so we need to go
+        // through the end of the OMT.
+        *ube = message_tree.size();
+    }
+}
+
+/**
+ * For each message in the ancestor's buffer (determined by childnum) that
+ * is key-wise between lower_bound_exclusive and upper_bound_inclusive,
+ * apply the message to the basement node.  We treat the bounds as minus
+ * or plus infinity respectively if they are NULL.  Do not mark the node
+ * as dirty (preserve previous state of 'dirty' bit).
+ */
+static void
+bnc_apply_messages_to_basement_node(
+    FT_HANDLE t,             // used for comparison function
+    BASEMENTNODE bn,   // where to apply messages
+    FTNODE ancestor,  // the ancestor node where we can find messages to apply
+    int childnum,      // which child buffer of ancestor contains messages we want
+    struct pivot_bounds const * const bounds,  // contains pivot key bounds of this basement node
+    txn_gc_info *gc_info,
+    bool* msgs_applied
+    )
+{
+    int r;
+    NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
+
+    // Determine the offsets in the message trees between which we need to
+    // apply messages from this buffer
+    STAT64INFO_S stats_delta = {0,0};
+    uint64_t workdone_this_ancestor = 0;
+
+    uint32_t stale_lbi, stale_ube;
+    if (!bn->stale_ancestor_messages_applied) {
+        find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->stale_message_tree, &bnc->msg_buffer, bounds, &stale_lbi, &stale_ube);
+    } else {
+        stale_lbi = 0;
+        stale_ube = 0;
+    }
+    uint32_t fresh_lbi, fresh_ube;
+    find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->fresh_message_tree, &bnc->msg_buffer, bounds, &fresh_lbi, &fresh_ube);
+
+    // We now know where all the messages we must apply are, so one of the
+    // following 4 cases will do the application, depending on which of
+    // the lists contains relevant messages:
+    //
+    // 1. broadcast messages and anything else, or a mix of fresh and stale
+    // 2. only fresh messages
+    // 3. only stale messages
+    if (bnc->broadcast_list.size() > 0 ||
+        (stale_lbi != stale_ube && fresh_lbi != fresh_ube)) {
+        // We have messages in multiple trees, so we grab all
+        // the relevant messages' offsets and sort them by MSN, then apply
+        // them in MSN order.
+        const int buffer_size = ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + bnc->broadcast_list.size());
+        toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
+        int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
+        struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
+
+        // Populate offsets array with offsets to stale messages
+        r = bnc->stale_message_tree.iterate_on_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(stale_lbi, stale_ube, &sfo_extra);
+        assert_zero(r);
+
+        // Then store fresh offsets, and mark them to be moved to stale later.
+        r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(fresh_lbi, fresh_ube, &sfo_extra);
+        assert_zero(r);
+
+        // Store offsets of all broadcast messages.
+        r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(&sfo_extra);
+        assert_zero(r);
+        invariant(sfo_extra.i == buffer_size);
+
+        // Sort by MSN.
+        r = toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
+        assert_zero(r);
+
+        // Apply the messages in MSN order.
+        for (int i = 0; i < buffer_size; ++i) {
+            *msgs_applied = true;
+            do_bn_apply_msg(t, bn, &bnc->msg_buffer, offsets[i], gc_info, &workdone_this_ancestor, &stats_delta);
+        }
+    } else if (stale_lbi == stale_ube) {
+        // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
+        struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
+        if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
+        r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(fresh_lbi, fresh_ube, &iter_extra);
+        assert_zero(r);
+    } else {
+        invariant(fresh_lbi == fresh_ube);
+        // No fresh messages to apply, we just apply stale messages.
+
+        if (stale_ube - stale_lbi > 0) *msgs_applied = true;
+        struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
+
+        r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(stale_lbi, stale_ube, &iter_extra);
+        assert_zero(r);
+    }
+    //
+    // update stats
+    //
+    if (workdone_this_ancestor > 0) {
+        (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor);
+    }
+    if (stats_delta.numbytes || stats_delta.numrows) {
+        toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
+    }
+}
+
+static void
+apply_ancestors_messages_to_bn(
+    FT_HANDLE t,
+    FTNODE node,
+    int childnum,
+    ANCESTORS ancestors,
+    struct pivot_bounds const * const bounds, 
+    txn_gc_info *gc_info,
+    bool* msgs_applied
+    )
+{
+    BASEMENTNODE curr_bn = BLB(node, childnum);
+    struct pivot_bounds curr_bounds = next_pivot_keys(node, childnum, bounds);
+    for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
+        if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > curr_bn->max_msn_applied.msn) {
+            paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL);
+            bnc_apply_messages_to_basement_node(
+                t,
+                curr_bn,
+                curr_ancestors->node,
+                curr_ancestors->childnum,
+                &curr_bounds,
+                gc_info,
+                msgs_applied
+                );
+            // We don't want to check this ancestor node again if the
+            // next time we query it, the msn hasn't changed.
+            curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk;
+        }
+    }
+    // At this point, we know all the stale messages above this
+    // basement node have been applied, and any new messages will be
+    // fresh, so we don't need to look at stale messages for this
+    // basement node, unless it gets evicted (and this field becomes
+    // false when it's read in again).
+    curr_bn->stale_ancestor_messages_applied = true;
+}
+
+void
+toku_apply_ancestors_messages_to_node (
+    FT_HANDLE t, 
+    FTNODE node, 
+    ANCESTORS ancestors, 
+    struct pivot_bounds const * const bounds, 
+    bool* msgs_applied, 
+    int child_to_read
+    )
+// Effect:
+//   Bring a leaf node up-to-date according to all the messages in the ancestors.
+//   If the leaf node is already up-to-date then do nothing.
+//   If the leaf node is not already up-to-date, then record the work done
+//   for that leaf in each ancestor.
+// Requires:
+//   This is being called when pinning a leaf node for the query path.
+//   The entire root-to-leaf path is pinned and appears in the ancestors list.
+{
+    VERIFY_NODE(t, node);
+    paranoid_invariant(node->height == 0);
+
+    TXN_MANAGER txn_manager = toku_ft_get_txn_manager(t);
+    txn_manager_state txn_state_for_gc(txn_manager);
+
+    TXNID oldest_referenced_xid_for_simple_gc = toku_ft_get_oldest_referenced_xid_estimate(t);
+    txn_gc_info gc_info(&txn_state_for_gc,
+                        oldest_referenced_xid_for_simple_gc,
+                        node->oldest_referenced_xid_known,
+                        true);
+    if (!node->dirty && child_to_read >= 0) {
+        paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
+        apply_ancestors_messages_to_bn(
+            t,
+            node,
+            child_to_read,
+            ancestors,
+            bounds,
+            &gc_info,
+            msgs_applied
+            );
+    }
+    else {
+        // know we are a leaf node
+        // An important invariant:
+        // We MUST bring every available basement node for a dirty node up to date.
+        // flushing on the cleaner thread depends on this. This invariant
+        // allows the cleaner thread to just pick an internal node and flush it
+        // as opposed to being forced to start from the root.
+        for (int i = 0; i < node->n_children; i++) {
+            if (BP_STATE(node, i) != PT_AVAIL) { continue; }
+            apply_ancestors_messages_to_bn(
+                t,
+                node,
+                i,
+                ancestors,
+                bounds,
+                &gc_info,
+                msgs_applied
+                );
+        }
+    }
+    VERIFY_NODE(t, node);
+}
+
+static bool bn_needs_ancestors_messages(
+    FT ft,
+    FTNODE node,
+    int childnum,
+    struct pivot_bounds const * const bounds,
+    ANCESTORS ancestors, 
+    MSN* max_msn_applied
+    ) 
+{
+    BASEMENTNODE bn = BLB(node, childnum);
+    struct pivot_bounds curr_bounds = next_pivot_keys(node, childnum, bounds);
+    bool needs_ancestors_messages = false;
+    for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
+        if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > bn->max_msn_applied.msn) {
+            paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL);
+            NONLEAF_CHILDINFO bnc = BNC(curr_ancestors->node, curr_ancestors->childnum);
+            if (bnc->broadcast_list.size() > 0) {
+                needs_ancestors_messages = true;
+                goto cleanup;
+            }
+            if (!bn->stale_ancestor_messages_applied) {
+                uint32_t stale_lbi, stale_ube;
+                find_bounds_within_message_tree(&ft->cmp_descriptor,
+                                                ft->compare_fun,
+                                                bnc->stale_message_tree,
+                                                &bnc->msg_buffer,
+                                                &curr_bounds,
+                                                &stale_lbi,
+                                                &stale_ube);
+                if (stale_lbi < stale_ube) {
+                    needs_ancestors_messages = true;
+                    goto cleanup;
+                }
+            }
+            uint32_t fresh_lbi, fresh_ube;
+            find_bounds_within_message_tree(&ft->cmp_descriptor,
+                                            ft->compare_fun,
+                                            bnc->fresh_message_tree,
+                                            &bnc->msg_buffer,
+                                            &curr_bounds,
+                                            &fresh_lbi,
+                                            &fresh_ube);
+            if (fresh_lbi < fresh_ube) {
+                needs_ancestors_messages = true;
+                goto cleanup;
+            }
+            if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > max_msn_applied->msn) {
+                max_msn_applied->msn = curr_ancestors->node->max_msn_applied_to_node_on_disk.msn;
+            }
+        }
+    }
+cleanup:
+    return needs_ancestors_messages;
+}
+
+bool toku_ft_leaf_needs_ancestors_messages(
+    FT ft, 
+    FTNODE node, 
+    ANCESTORS ancestors, 
+    struct pivot_bounds const * const bounds, 
+    MSN *const max_msn_in_path, 
+    int child_to_read
+    )
+// Effect: Determine whether there are messages in a node's ancestors
+//  which must be applied to it.  These messages are in the correct
+//  keyrange for any available basement nodes, and are in nodes with the
+//  correct max_msn_applied_to_node_on_disk.
+// Notes:
+//  This is an approximate query.
+// Output:
+//  max_msn_in_path: max of "max_msn_applied_to_node_on_disk" over
+//    ancestors.  This is used later to update basement nodes'
+//    max_msn_applied values in case we don't do the full algorithm.
+// Returns:
+//  true if there may be some such messages
+//  false only if there are definitely no such messages
+// Rationale:
+//  When we pin a node with a read lock, we want to quickly determine if
+//  we should exchange it for a write lock in preparation for applying
+//  messages.  If there are no messages, we don't need the write lock.
+{
+    paranoid_invariant(node->height == 0);
+    bool needs_ancestors_messages = false;
+    // child_to_read may be -1 in test cases
+    if (!node->dirty && child_to_read >= 0) {
+        paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
+        needs_ancestors_messages = bn_needs_ancestors_messages(
+            ft,
+            node,
+            child_to_read,
+            bounds,
+            ancestors,
+            max_msn_in_path
+            );
+    }
+    else {
+        for (int i = 0; i < node->n_children; ++i) {
+            if (BP_STATE(node, i) != PT_AVAIL) { continue; }
+            needs_ancestors_messages = bn_needs_ancestors_messages(
+                ft,
+                node,
+                i,
+                bounds,
+                ancestors,
+                max_msn_in_path
+                );
+            if (needs_ancestors_messages) {
+                goto cleanup;
+            }
+        }
+    }
+cleanup:
+    return needs_ancestors_messages;
+}
+
+void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read) {
+    invariant(node->height == 0);
+    if (!node->dirty && child_to_read >= 0) {
+        paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
+        BASEMENTNODE bn = BLB(node, child_to_read);
+        if (max_msn_applied.msn > bn->max_msn_applied.msn) {
+            // see comment below
+            (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn);
+        }
+    }
+    else {
+        for (int i = 0; i < node->n_children; ++i) {
+            if (BP_STATE(node, i) != PT_AVAIL) { continue; }
+            BASEMENTNODE bn = BLB(node, i);
+            if (max_msn_applied.msn > bn->max_msn_applied.msn) {
+                // This function runs in a shared access context, so to silence tools
+                // like DRD, we use a CAS and ignore the result.
+                // Any threads trying to update these basement nodes should be
+                // updating them to the same thing (since they all have a read lock on
+                // the same root-to-leaf path) so this is safe.
+                (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn);
+            }
+        }
+    }
+}
+
+struct copy_to_stale_extra {
+    FT ft;
+    NONLEAF_CHILDINFO bnc;
+};
+
+int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) __attribute__((nonnull(3)));
+int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra)
+{
+    MSN msn;
+    DBT key;
+    extra->bnc->msg_buffer.get_message_key_msn(offset, &key, &msn);
+    struct toku_msg_buffer_key_msn_heaviside_extra heaviside_extra = { .desc = &extra->ft->cmp_descriptor, .cmp = extra->ft->compare_fun, .msg_buffer = &extra->bnc->msg_buffer, .key = &key, .msn = msn };
+    int r = extra->bnc->stale_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, heaviside_extra, nullptr);
+    invariant_zero(r);
+    return 0;
+}
+
+void toku_ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc) {
+    struct copy_to_stale_extra cts_extra = { .ft = ft, .bnc = bnc };
+    int r = bnc->fresh_message_tree.iterate_over_marked<struct copy_to_stale_extra, copy_to_stale>(&cts_extra);
+    invariant_zero(r);
+    bnc->fresh_message_tree.delete_all_marked();
+}
+
+void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node) {
+    invariant(node->height > 0);
+    for (int i = 0; i < node->n_children; ++i) {
+        if (BP_STATE(node, i) != PT_AVAIL) {
+            continue;
+        }
+        NONLEAF_CHILDINFO bnc = BNC(node, i);
+        // We can't delete things out of the fresh tree inside the above
+        // procedures because we're still looking at the fresh tree.  Instead
+        // we have to move messages after we're done looking at it.
+        toku_ft_bnc_move_messages_to_stale(ft, bnc);
+    }
+}
+
+// 
+// Balance // Availibility // Size
+
+struct rebalance_array_info {
+    uint32_t offset;
+    LEAFENTRY *le_array;
+    uint32_t *key_sizes_array;
+    const void **key_ptr_array;
+    static int fn(const void* key, const uint32_t keylen, const LEAFENTRY &le,
+           const uint32_t idx, struct rebalance_array_info *const ai) {
+        ai->le_array[idx+ai->offset] = le;
+        ai->key_sizes_array[idx+ai->offset] = keylen;
+        ai->key_ptr_array[idx+ai->offset] = key;
+        return 0;
+    }
+};
+
+// There must still be at least one child
+// Requires that all messages in buffers above have been applied.
+// Because all messages above have been applied, setting msn of all new basements 
+// to max msn of existing basements is correct.  (There cannot be any messages in
+// buffers above that still need to be applied.)
+void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize) {
+
+    assert(node->height == 0);
+    assert(node->dirty);
+
+    uint32_t num_orig_basements = node->n_children;
+    // Count number of leaf entries in this leaf (num_le).
+    uint32_t num_le = 0;
+    for (uint32_t i = 0; i < num_orig_basements; i++) {
+        num_le += BLB_DATA(node, i)->num_klpairs();
+    }
+
+    uint32_t num_alloc = num_le ? num_le : 1;  // simplify logic below by always having at least one entry per array
+
+    // Create an array of OMTVALUE's that store all the pointers to all the data.
+    // Each element in leafpointers is a pointer to a leaf.
+    toku::scoped_malloc leafpointers_buf(sizeof(LEAFENTRY) * num_alloc);
+    LEAFENTRY *leafpointers = reinterpret_cast<LEAFENTRY *>(leafpointers_buf.get());
+    leafpointers[0] = NULL;
+
+    toku::scoped_malloc key_pointers_buf(sizeof(void *) * num_alloc);
+    const void **key_pointers = reinterpret_cast<const void **>(key_pointers_buf.get());
+    key_pointers[0] = NULL;
+
+    toku::scoped_malloc key_sizes_buf(sizeof(uint32_t) * num_alloc);
+    uint32_t *key_sizes = reinterpret_cast<uint32_t *>(key_sizes_buf.get());
+
+    // Capture pointers to old mempools' buffers (so they can be destroyed)
+    toku::scoped_malloc old_bns_buf(sizeof(BASEMENTNODE) * num_orig_basements);
+    BASEMENTNODE *old_bns = reinterpret_cast<BASEMENTNODE *>(old_bns_buf.get());
+    old_bns[0] = NULL;
+
+    uint32_t curr_le = 0;
+    for (uint32_t i = 0; i < num_orig_basements; i++) {
+        bn_data* bd = BLB_DATA(node, i);
+        struct rebalance_array_info ai {.offset = curr_le, .le_array = leafpointers, .key_sizes_array = key_sizes, .key_ptr_array = key_pointers };
+        bd->iterate<rebalance_array_info, rebalance_array_info::fn>(&ai);
+        curr_le += bd->num_klpairs();
+    }
+
+    // Create an array that will store indexes of new pivots.
+    // Each element in new_pivots is the index of a pivot key.
+    // (Allocating num_le of them is overkill, but num_le is an upper bound.)
+    toku::scoped_malloc new_pivots_buf(sizeof(uint32_t) * num_alloc);
+    uint32_t *new_pivots = reinterpret_cast<uint32_t *>(new_pivots_buf.get());
+    new_pivots[0] = 0;
+
+    // Each element in le_sizes is the size of the leafentry pointed to by leafpointers.
+    toku::scoped_malloc le_sizes_buf(sizeof(size_t) * num_alloc);
+    size_t *le_sizes = reinterpret_cast<size_t *>(le_sizes_buf.get());
+    le_sizes[0] = 0;
+
+    // Create an array that will store the size of each basement.
+    // This is the sum of the leaf sizes of all the leaves in that basement.
+    // We don't know how many basements there will be, so we use num_le as the upper bound.
+
+    // Sum of all le sizes in a single basement
+    toku::scoped_calloc bn_le_sizes_buf(sizeof(size_t) * num_alloc);
+    size_t *bn_le_sizes = reinterpret_cast<size_t *>(bn_le_sizes_buf.get());
+
+    // Sum of all key sizes in a single basement
+    toku::scoped_calloc bn_key_sizes_buf(sizeof(size_t) * num_alloc);
+    size_t *bn_key_sizes = reinterpret_cast<size_t *>(bn_key_sizes_buf.get());
+
+    // TODO 4050: All these arrays should be combined into a single array of some bn_info struct (pivot, msize, num_les).
+    // Each entry is the number of leafentries in this basement.  (Again, num_le is overkill upper baound.)
+    toku::scoped_malloc num_les_this_bn_buf(sizeof(uint32_t) * num_alloc);
+    uint32_t *num_les_this_bn = reinterpret_cast<uint32_t *>(num_les_this_bn_buf.get());
+    num_les_this_bn[0] = 0;
+    
+    // Figure out the new pivots.  
+    // We need the index of each pivot, and for each basement we need
+    // the number of leaves and the sum of the sizes of the leaves (memory requirement for basement).
+    uint32_t curr_pivot = 0;
+    uint32_t num_le_in_curr_bn = 0;
+    uint32_t bn_size_so_far = 0;
+    for (uint32_t i = 0; i < num_le; i++) {
+        uint32_t curr_le_size = leafentry_disksize((LEAFENTRY) leafpointers[i]); 
+        le_sizes[i] = curr_le_size;
+        if ((bn_size_so_far + curr_le_size + sizeof(uint32_t) + key_sizes[i] > basementnodesize) && (num_le_in_curr_bn != 0)) {
+            // cap off the current basement node to end with the element before i
+            new_pivots[curr_pivot] = i-1;
+            curr_pivot++;
+            num_le_in_curr_bn = 0;
+            bn_size_so_far = 0;
+        }
+        num_le_in_curr_bn++;
+        num_les_this_bn[curr_pivot] = num_le_in_curr_bn;
+        bn_le_sizes[curr_pivot] += curr_le_size;
+        bn_key_sizes[curr_pivot] += sizeof(uint32_t) + key_sizes[i];  // uint32_t le_offset
+        bn_size_so_far += curr_le_size + sizeof(uint32_t) + key_sizes[i];
+    }
+    // curr_pivot is now the total number of pivot keys in the leaf node
+    int num_pivots   = curr_pivot;
+    int num_children = num_pivots + 1;
+
+    // now we need to fill in the new basement nodes and pivots
+
+    // TODO: (Zardosht) this is an ugly thing right now
+    // Need to figure out how to properly deal with seqinsert.
+    // I am not happy with how this is being
+    // handled with basement nodes
+    uint32_t tmp_seqinsert = BLB_SEQINSERT(node, num_orig_basements - 1);
+
+    // choose the max msn applied to any basement as the max msn applied to all new basements
+    MSN max_msn = ZERO_MSN;
+    for (uint32_t i = 0; i < num_orig_basements; i++) {
+        MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i);
+        max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn;
+    }
+    // remove the basement node in the node, we've saved a copy
+    for (uint32_t i = 0; i < num_orig_basements; i++) {
+        // save a reference to the old basement nodes
+        // we will need them to ensure that the memory
+        // stays intact
+        old_bns[i] = toku_detach_bn(node, i);
+    }
+    // Now destroy the old basements, but do not destroy leaves
+    toku_destroy_ftnode_internals(node);
+
+    // now reallocate pieces and start filling them in
+    invariant(num_children > 0);
+    node->totalchildkeylens = 0;
+
+    XCALLOC_N(num_pivots, node->childkeys);        // allocate pointers to pivot structs
+    node->n_children = num_children;
+    XCALLOC_N(num_children, node->bp);             // allocate pointers to basements (bp)
+    for (int i = 0; i < num_children; i++) {
+        set_BLB(node, i, toku_create_empty_bn());  // allocate empty basements and set bp pointers
+    }
+
+    // now we start to fill in the data
+
+    // first the pivots
+    for (int i = 0; i < num_pivots; i++) {
+        uint32_t keylen = key_sizes[new_pivots[i]];
+        const void *key = key_pointers[new_pivots[i]];
+        toku_memdup_dbt(&node->childkeys[i], key, keylen);
+        node->totalchildkeylens += keylen;
+    }
+
+    uint32_t baseindex_this_bn = 0;
+    // now the basement nodes
+    for (int i = 0; i < num_children; i++) {
+        // put back seqinsert
+        BLB_SEQINSERT(node, i) = tmp_seqinsert;
+
+        // create start (inclusive) and end (exclusive) boundaries for data of basement node
+        uint32_t curr_start = (i==0) ? 0 : new_pivots[i-1]+1;               // index of first leaf in basement
+        uint32_t curr_end = (i==num_pivots) ? num_le : new_pivots[i]+1;     // index of first leaf in next basement
+        uint32_t num_in_bn = curr_end - curr_start;                         // number of leaves in this basement
+
+        // create indexes for new basement
+        invariant(baseindex_this_bn == curr_start);
+        uint32_t num_les_to_copy = num_les_this_bn[i];
+        invariant(num_les_to_copy == num_in_bn); 
+
+        bn_data* bd = BLB_DATA(node, i);
+        bd->set_contents_as_clone_of_sorted_array(
+            num_les_to_copy,
+            &key_pointers[baseindex_this_bn],
+            &key_sizes[baseindex_this_bn],
+            &leafpointers[baseindex_this_bn],
+            &le_sizes[baseindex_this_bn],
+            bn_key_sizes[i],  // Total key sizes
+            bn_le_sizes[i]  // total le sizes
+            );
+
+        BP_STATE(node,i) = PT_AVAIL;
+        BP_TOUCH_CLOCK(node,i);
+        BLB_MAX_MSN_APPLIED(node,i) = max_msn;
+        baseindex_this_bn += num_les_to_copy;  // set to index of next bn
+    }
+    node->max_msn_applied_to_node_on_disk = max_msn;
+
+    // destroy buffers of old mempools
+    for (uint32_t i = 0; i < num_orig_basements; i++) {
+        destroy_basement_node(old_bns[i]);
+    }
+}
+
+bool toku_ftnode_fully_in_memory(FTNODE node) {
+    for (int i = 0; i < node->n_children; i++) {
+        if (BP_STATE(node,i) != PT_AVAIL) {
+            return false;
+        }
+    }
+    return true;
+}
+
+void toku_ftnode_assert_fully_in_memory(FTNODE UU(node)) {
+    paranoid_invariant(toku_ftnode_fully_in_memory(node));
+}
+
+uint32_t toku_ftnode_leaf_num_entries(FTNODE node) {
+    toku_ftnode_assert_fully_in_memory(node);
+    uint32_t num_entries = 0;
+    for (int i = 0; i < node->n_children; i++) {
+        num_entries += BLB_DATA(node, i)->num_klpairs();
+    }
+    return num_entries;
+}
+
+enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize) {
+    enum reactivity re = RE_STABLE;
+    toku_ftnode_assert_fully_in_memory(node);
+    paranoid_invariant(node->height==0);
+    unsigned int size = toku_serialize_ftnode_size(node);
+    if (size > nodesize && toku_ftnode_leaf_num_entries(node) > 1) {
+        re = RE_FISSIBLE;
+    } else if ((size*4) < nodesize && !BLB_SEQINSERT(node, node->n_children-1)) {
+        re = RE_FUSIBLE;
+    }
+    return re;
+}
+
+enum reactivity toku_ftnode_get_nonleaf_reactivity(FTNODE node, unsigned int fanout) {
+    paranoid_invariant(node->height > 0);
+    int n_children = node->n_children;
+    if (n_children > (int) fanout) {
+        return RE_FISSIBLE;
+    }
+    if (n_children * 4 < (int) fanout) {
+        return RE_FUSIBLE;
+    }
+    return RE_STABLE;
+}
+
+enum reactivity toku_ftnode_get_reactivity(FT ft, FTNODE node) {
+    toku_ftnode_assert_fully_in_memory(node);
+    if (node->height == 0) {
+        return toku_ftnode_get_leaf_reactivity(node, ft->h->nodesize);
+    } else {
+        return toku_ftnode_get_nonleaf_reactivity(node, ft->h->fanout);
+    }
+}
+
+unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc) {
+    return bnc->msg_buffer.buffer_size_in_use();
+}
+
+// Return true if the size of the buffers plus the amount of work done is large enough.
+// Return false if there is nothing to be flushed (the buffers empty).
+bool toku_ftnode_nonleaf_is_gorged(FTNODE node, uint32_t nodesize) {
+    uint64_t size = toku_serialize_ftnode_size(node);
+
+    bool buffers_are_empty = true;
+    toku_ftnode_assert_fully_in_memory(node);
+    //
+    // the nonleaf node is gorged if the following holds true:
+    //  - the buffers are non-empty
+    //  - the total workdone by the buffers PLUS the size of the buffers
+    //     is greater than nodesize (which as of Maxwell should be
+    //     4MB)
+    //
+    paranoid_invariant(node->height > 0);
+    for (int child = 0; child < node->n_children; ++child) {
+        size += BP_WORKDONE(node, child);
+    }
+    for (int child = 0; child < node->n_children; ++child) {
+        if (toku_bnc_nbytesinbuf(BNC(node, child)) > 0) {
+            buffers_are_empty = false;
+            break;
+        }
+    }
+    return ((size > nodesize)
+            &&
+            (!buffers_are_empty));
+}
+
+int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc) {
+    return bnc->msg_buffer.num_entries();
+}
+
+// how much memory does this child buffer consume?
+long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc) {
+    return (sizeof(*bnc) +
+            bnc->msg_buffer.memory_footprint() +
+            bnc->fresh_message_tree.memory_size() +
+            bnc->stale_message_tree.memory_size() +
+            bnc->broadcast_list.memory_size());
+}
+
+// how much memory in this child buffer holds useful data?
+// originally created solely for use by test program(s).
+long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc) {
+    return (sizeof(*bnc) +
+            bnc->msg_buffer.memory_size_in_use() +
+            bnc->fresh_message_tree.memory_size() +
+            bnc->stale_message_tree.memory_size() +
+            bnc->broadcast_list.memory_size());
+}
+
+//
+// Garbage collection
+// Message injection
+// Message application
+//
+
+static void
+init_childinfo(FTNODE node, int childnum, FTNODE child) {
+    BP_BLOCKNUM(node,childnum) = child->thisnodename;
+    BP_STATE(node,childnum) = PT_AVAIL;
+    BP_WORKDONE(node, childnum)   = 0;
+    set_BNC(node, childnum, toku_create_empty_nl());
+}
+
+static void
+init_childkey(FTNODE node, int childnum, const DBT *pivotkey) {
+    toku_clone_dbt(&node->childkeys[childnum], *pivotkey);
+    node->totalchildkeylens += pivotkey->size;
+}
+
+// Used only by test programs: append a child node to a parent node
+void
+toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey) {
+    int childnum = node->n_children;
+    node->n_children++;
+    XREALLOC_N(node->n_children, node->bp);
+    init_childinfo(node, childnum, child);
+    XREALLOC_N(node->n_children-1, node->childkeys);
+    if (pivotkey) {
+        invariant(childnum > 0);
+        init_childkey(node, childnum-1, pivotkey);
+    }
+    node->dirty = 1;
+}
+
+void
+toku_ft_bn_apply_msg_once (
+    BASEMENTNODE bn,
+    const FT_MSG msg,
+    uint32_t idx,
+    uint32_t le_keylen,
+    LEAFENTRY le,
+    txn_gc_info *gc_info,
+    uint64_t *workdone,
+    STAT64INFO stats_to_update
+    )
+// Effect: Apply msg to leafentry (msn is ignored)
+//         Calculate work done by message on leafentry and add it to caller's workdone counter.
+//   idx is the location where it goes
+//   le is old leafentry
+{
+    size_t newsize=0, oldsize=0, workdone_this_le=0;
+    LEAFENTRY new_le=0;
+    int64_t numbytes_delta = 0;  // how many bytes of user data (not including overhead) were added or deleted from this row
+    int64_t numrows_delta = 0;   // will be +1 or -1 or 0 (if row was added or deleted or not)
+    uint32_t key_storage_size = ft_msg_get_keylen(msg) + sizeof(uint32_t);
+    if (le) {
+        oldsize = leafentry_memsize(le) + key_storage_size;
+    }
+
+    // toku_le_apply_msg() may call bn_data::mempool_malloc_and_update_dmt() to allocate more space.
+    // That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is
+    // no longer in use.  We'll have to release the old mempool later.
+    toku_le_apply_msg(
+        msg, 
+        le,
+        &bn->data_buffer,
+        idx,
+        le_keylen,
+        gc_info, 
+        &new_le, 
+        &numbytes_delta
+        );
+    // at this point, we cannot trust cmd->u.id.key to be valid.
+    // The dmt may have realloced its mempool and freed the one containing key.
+
+    newsize = new_le ? (leafentry_memsize(new_le) +  + key_storage_size) : 0;
+    if (le && new_le) {
+        workdone_this_le = (oldsize > newsize ? oldsize : newsize);  // work done is max of le size before and after message application
+
+    } else {           // we did not just replace a row, so ...
+        if (le) {
+            //            ... we just deleted a row ...
+            workdone_this_le = oldsize;
+            numrows_delta = -1;
+        }
+        if (new_le) {
+            //            ... or we just added a row
+            workdone_this_le = newsize;
+            numrows_delta = 1;
+        }
+    }
+    if (workdone) {  // test programs may call with NULL
+        *workdone += workdone_this_le;
+    }
+
+    // now update stat64 statistics
+    bn->stat64_delta.numrows  += numrows_delta;
+    bn->stat64_delta.numbytes += numbytes_delta;
+    // the only reason stats_to_update may be null is for tests
+    if (stats_to_update) {
+        stats_to_update->numrows += numrows_delta;
+        stats_to_update->numbytes += numbytes_delta;
+    }
+
+}
+
+static const uint32_t setval_tag = 0xee0ccb99; // this was gotten by doing "cat /dev/random|head -c4|od -x" to get a random number.  We want to make sure that the user actually passes us the setval_extra_s that we passed in.
+struct setval_extra_s {
+    uint32_t  tag;
+    bool did_set_val;
+    int         setval_r;    // any error code that setval_fun wants to return goes here.
+    // need arguments for toku_ft_bn_apply_msg_once
+    BASEMENTNODE bn;
+    MSN msn;              // captured from original message, not currently used
+    XIDS xids;
+    const DBT *key;
+    uint32_t idx;
+    uint32_t le_keylen;
+    LEAFENTRY le;
+    txn_gc_info *gc_info;
+    uint64_t * workdone;  // set by toku_ft_bn_apply_msg_once()
+    STAT64INFO stats_to_update;
+};
+
+/*
+ * If new_val == NULL, we send a delete message instead of an insert.
+ * This happens here instead of in do_delete() for consistency.
+ * setval_fun() is called from handlerton, passing in svextra_v
+ * from setval_extra_s input arg to ft->update_fun().
+ */
+static void setval_fun (const DBT *new_val, void *svextra_v) {
+    struct setval_extra_s *CAST_FROM_VOIDP(svextra, svextra_v);
+    paranoid_invariant(svextra->tag==setval_tag);
+    paranoid_invariant(!svextra->did_set_val);
+    svextra->did_set_val = true;
+
+    {
+        // can't leave scope until toku_ft_bn_apply_msg_once if
+        // this is a delete
+        DBT val;
+        FT_MSG_S msg = { FT_NONE, svextra->msn, svextra->xids,
+                         .u = { .id = {svextra->key, NULL} } };
+        if (new_val) {
+            msg.type = FT_INSERT;
+            msg.u.id.val = new_val;
+        } else {
+            msg.type = FT_DELETE_ANY;
+            toku_init_dbt(&val);
+            msg.u.id.val = &val;
+        }
+        toku_ft_bn_apply_msg_once(svextra->bn, &msg,
+                                  svextra->idx, svextra->le_keylen, svextra->le,
+                                  svextra->gc_info,
+                                  svextra->workdone, svextra->stats_to_update);
+        svextra->setval_r = 0;
+    }
+}
+
+// We are already past the msn filter (in toku_ft_bn_apply_msg(), which calls do_update()),
+// so capturing the msn in the setval_extra_s is not strictly required.         The alternative
+// would be to put a dummy msn in the messages created by setval_fun(), but preserving
+// the original msn seems cleaner and it preserves accountability at a lower layer.
+static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn, FT_MSG msg, uint32_t idx,
+                     LEAFENTRY le,
+                     void* keydata,
+                     uint32_t keylen,
+                     txn_gc_info *gc_info,
+                     uint64_t * workdone,
+                     STAT64INFO stats_to_update) {
+    LEAFENTRY le_for_update;
+    DBT key;
+    const DBT *keyp;
+    const DBT *update_function_extra;
+    DBT vdbt;
+    const DBT *vdbtp;
+
+    // the location of data depends whether this is a regular or
+    // broadcast update
+    if (msg->type == FT_UPDATE) {
+        // key is passed in with command (should be same as from le)
+        // update function extra is passed in with command
+        keyp = msg->u.id.key;
+        update_function_extra = msg->u.id.val;
+    } else {
+        invariant(msg->type == FT_UPDATE_BROADCAST_ALL);
+        // key is not passed in with broadcast, it comes from le
+        // update function extra is passed in with command
+        paranoid_invariant(le);  // for broadcast updates, we just hit all leafentries
+                     // so this cannot be null
+        paranoid_invariant(keydata);
+        paranoid_invariant(keylen);
+        paranoid_invariant(msg->u.id.key->size == 0);
+        keyp = toku_fill_dbt(&key, keydata, keylen);
+        update_function_extra = msg->u.id.val;
+    }
+    toku_ft_status_note_update(msg->type == FT_UPDATE_BROADCAST_ALL);
+
+    if (le && !le_latest_is_del(le)) {
+        // if the latest val exists, use it, and we'll use the leafentry later
+        uint32_t vallen;
+        void *valp = le_latest_val_and_len(le, &vallen);
+        vdbtp = toku_fill_dbt(&vdbt, valp, vallen);
+    } else {
+        // otherwise, the val and leafentry are both going to be null
+        vdbtp = NULL;
+    }
+    le_for_update = le;
+
+    struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, msg->msn, msg->xids,
+                                          keyp, idx, keylen, le_for_update, gc_info,
+                                          workdone, stats_to_update};
+    // call handlerton's ft->update_fun(), which passes setval_extra to setval_fun()
+    FAKE_DB(db, desc);
+    int r = update_fun(
+        &db,
+        keyp,
+        vdbtp,
+        update_function_extra,
+        setval_fun, &setval_extra
+        );
+
+    if (r == 0) { r = setval_extra.setval_r; }
+    return r;
+}
+
+// Should be renamed as something like "apply_msg_to_basement()."
+void
+toku_ft_bn_apply_msg (
+    ft_compare_func compare_fun,
+    ft_update_func update_fun,
+    DESCRIPTOR desc,
+    BASEMENTNODE bn,
+    FT_MSG msg,
+    txn_gc_info *gc_info, 
+    uint64_t *workdone,
+    STAT64INFO stats_to_update
+    )
+// Effect:
+//   Put a msg into a leaf.
+//   Calculate work done by message on leafnode and add it to caller's workdone counter.
+// The leaf could end up "too big" or "too small".  The caller must fix that up.
+{
+    LEAFENTRY storeddata;
+    void* key = NULL;
+    uint32_t keylen = 0;
+
+    uint32_t num_klpairs;
+    int r;
+    struct toku_msg_leafval_heaviside_extra be = {compare_fun, desc, msg->u.id.key};
+
+    unsigned int doing_seqinsert = bn->seqinsert;
+    bn->seqinsert = 0;
+
+    switch (msg->type) {
+    case FT_INSERT_NO_OVERWRITE:
+    case FT_INSERT: {
+        uint32_t idx;
+        if (doing_seqinsert) {
+            idx = bn->data_buffer.num_klpairs();
+            DBT kdbt;
+            r = bn->data_buffer.fetch_key_and_len(idx-1, &kdbt.size, &kdbt.data);
+            if (r != 0) goto fz;
+            int cmp = toku_msg_leafval_heaviside(kdbt, be);
+            if (cmp >= 0) goto fz;
+            r = DB_NOTFOUND;
+        } else {
+        fz:
+            r = bn->data_buffer.find_zero<decltype(be), toku_msg_leafval_heaviside>(
+                be,
+                &storeddata,
+                &key,
+                &keylen,
+                &idx
+                );
+        }
+        if (r==DB_NOTFOUND) {
+            storeddata = 0;
+        } else {
+            assert_zero(r);
+        }
+        toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update);
+
+        // if the insertion point is within a window of the right edge of
+        // the leaf then it is sequential
+        // window = min(32, number of leaf entries/16)
+        {
+            uint32_t s = bn->data_buffer.num_klpairs();
+            uint32_t w = s / 16;
+            if (w == 0) w = 1;
+            if (w > 32) w = 32;
+
+            // within the window?
+            if (s - idx <= w)
+                bn->seqinsert = doing_seqinsert + 1;
+        }
+        break;
+    }
+    case FT_DELETE_ANY:
+    case FT_ABORT_ANY:
+    case FT_COMMIT_ANY: {
+        uint32_t idx;
+        // Apply to all the matches
+
+        r = bn->data_buffer.find_zero<decltype(be), toku_msg_leafval_heaviside>(
+            be,
+            &storeddata,
+            &key,
+            &keylen,
+            &idx
+            );
+        if (r == DB_NOTFOUND) break;
+        assert_zero(r);
+        toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update);
+
+        break;
+    }
+    case FT_OPTIMIZE_FOR_UPGRADE:
+        // fall through so that optimize_for_upgrade performs rest of the optimize logic
+    case FT_COMMIT_BROADCAST_ALL:
+    case FT_OPTIMIZE:
+        // Apply to all leafentries
+        num_klpairs = bn->data_buffer.num_klpairs();
+        for (uint32_t idx = 0; idx < num_klpairs; ) {
+            void* curr_keyp = NULL;
+            uint32_t curr_keylen = 0;
+            r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp);
+            assert_zero(r);
+            int deleted = 0;
+            if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
+                // message application code needs a key in order to determine how much
+                // work was done by this message. since this is a broadcast message,
+                // we have to create a new message whose key is the current le's key.
+                DBT curr_keydbt;
+                FT_MSG_S curr_msg = *msg;
+                curr_msg.u.id.key = toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen);
+                toku_ft_bn_apply_msg_once(bn, &curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
+                // at this point, we cannot trust msg->u.id.key to be valid.
+                uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
+                if (new_dmt_size != num_klpairs) {
+                    paranoid_invariant(new_dmt_size + 1 == num_klpairs);
+                    //Item was deleted.
+                    deleted = 1;
+                }
+            }
+            if (deleted)
+                num_klpairs--;
+            else
+                idx++;
+        }
+        paranoid_invariant(bn->data_buffer.num_klpairs() == num_klpairs);
+
+        break;
+    case FT_COMMIT_BROADCAST_TXN:
+    case FT_ABORT_BROADCAST_TXN:
+        // Apply to all leafentries if txn is represented
+        num_klpairs = bn->data_buffer.num_klpairs();
+        for (uint32_t idx = 0; idx < num_klpairs; ) {
+            void* curr_keyp = NULL;
+            uint32_t curr_keylen = 0;
+            r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp);
+            assert_zero(r);
+            int deleted = 0;
+            if (le_has_xids(storeddata, msg->xids)) {
+                // message application code needs a key in order to determine how much
+                // work was done by this message. since this is a broadcast message,
+                // we have to create a new message whose key is the current le's key.
+                DBT curr_keydbt;
+                FT_MSG_S curr_msg = *msg;
+                curr_msg.u.id.key = toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen);
+                toku_ft_bn_apply_msg_once(bn, &curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
+                uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
+                if (new_dmt_size != num_klpairs) {
+                    paranoid_invariant(new_dmt_size + 1 == num_klpairs);
+                    //Item was deleted.
+                    deleted = 1;
+                }
+            }
+            if (deleted)
+                num_klpairs--;
+            else
+                idx++;
+        }
+        paranoid_invariant(bn->data_buffer.num_klpairs() == num_klpairs);
+
+        break;
+    case FT_UPDATE: {
+        uint32_t idx;
+        r = bn->data_buffer.find_zero<decltype(be), toku_msg_leafval_heaviside>(
+            be,
+            &storeddata,
+            &key,
+            &keylen,
+            &idx
+            );
+        if (r==DB_NOTFOUND) {
+            {
+                //Point to msg's copy of the key so we don't worry about le being freed
+                //TODO: 46 MAYBE Get rid of this when le_apply message memory is better handled
+                key = msg->u.id.key->data;
+                keylen = msg->u.id.key->size;
+            }
+            r = do_update(update_fun, desc, bn, msg, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update);
+        } else if (r==0) {
+            r = do_update(update_fun, desc, bn, msg, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update);
+        } // otherwise, a worse error, just return it
+        break;
+    }
+    case FT_UPDATE_BROADCAST_ALL: {
+        // apply to all leafentries.
+        uint32_t idx = 0;
+        uint32_t num_leafentries_before;
+        while (idx < (num_leafentries_before = bn->data_buffer.num_klpairs())) {
+            void* curr_key = nullptr;
+            uint32_t curr_keylen = 0;
+            r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_key);
+            assert_zero(r);
+
+            //TODO: 46 replace this with something better than cloning key
+            // TODO: (Zardosht) This may be unnecessary now, due to how the key
+            // is handled in the bndata. Investigate and determine
+            char clone_mem[curr_keylen];  // only lasts one loop, alloca would overflow (end of function)
+            memcpy((void*)clone_mem, curr_key, curr_keylen);
+            curr_key = (void*)clone_mem;
+
+            // This is broken below. Have a compilation error checked
+            // in as a reminder
+            r = do_update(update_fun, desc, bn, msg, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update);
+            assert_zero(r);
+
+            if (num_leafentries_before == bn->data_buffer.num_klpairs()) {
+                // we didn't delete something, so increment the index.
+                idx++;
+            }
+        }
+        break;
+    }
+    case FT_NONE: break; // don't do anything
+    }
+
+    return;
+}
+
+static inline int
+key_msn_cmp(const DBT *a, const DBT *b, const MSN amsn, const MSN bmsn,
+            DESCRIPTOR descriptor, ft_compare_func key_cmp)
+{
+    FAKE_DB(db, descriptor);
+    int r = key_cmp(&db, a, b);
+    if (r == 0) {
+        if (amsn.msn > bmsn.msn) {
+            r = +1;
+        } else if (amsn.msn < bmsn.msn) {
+            r = -1;
+        } else {
+            r = 0;
+        }
+    }
+    return r;
+}
+
+int toku_msg_buffer_key_msn_heaviside(const int32_t &offset, const struct toku_msg_buffer_key_msn_heaviside_extra &extra) {
+    MSN query_msn;
+    DBT query_key;
+    extra.msg_buffer->get_message_key_msn(offset, &query_key, &query_msn);
+    return key_msn_cmp(&query_key, extra.key, query_msn, extra.msn,
+                       extra.desc, extra.cmp);
+}
+
+int
+toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo)
+{
+    MSN amsn, bmsn;
+    DBT akey, bkey;
+    extra.msg_buffer->get_message_key_msn(ao, &akey, &amsn);
+    extra.msg_buffer->get_message_key_msn(bo, &bkey, &bmsn);
+    return key_msn_cmp(&akey, &bkey, amsn, bmsn,
+                       extra.desc, extra.cmp);
+}
+
+// Effect: Enqueue the message represented by the parameters into the
+//   bnc's buffer, and put it in either the fresh or stale message tree,
+//   or the broadcast list.
+static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, FT_MSG msg, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp) {
+    int r = 0;
+    int32_t offset;
+    bnc->msg_buffer.enqueue(msg, is_fresh, &offset);
+    enum ft_msg_type type = ft_msg_get_type(msg);
+    if (ft_msg_type_applies_once(type)) {
+        DBT key;
+        toku_fill_dbt(&key, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
+        struct toku_msg_buffer_key_msn_heaviside_extra extra = { .desc = desc, .cmp = cmp, .msg_buffer = &bnc->msg_buffer, .key = &key, .msn = msg->msn };
+        if (is_fresh) {
+            r = bnc->fresh_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, extra, nullptr);
+            assert_zero(r);
+        } else {
+            r = bnc->stale_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, extra, nullptr);
+            assert_zero(r);
+        }
+    } else {
+        invariant(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type));
+        const uint32_t idx = bnc->broadcast_list.size();
+        r = bnc->broadcast_list.insert_at(offset, idx);
+        assert_zero(r);
+    }
+}
+
+// This is only exported for tests.
+void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp)
+{
+    DBT k, v;
+    FT_MSG_S msg = {
+        type, msn, xids, .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, data, datalen) } }
+    };
+    bnc_insert_msg(bnc, &msg, is_fresh, desc, cmp);
+}
+
+// append a msg to a nonleaf node's child buffer
+static void ft_append_msg_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node,
+                                          int childnum, FT_MSG msg, bool is_fresh) {
+    paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL);
+    bnc_insert_msg(BNC(node, childnum), msg, is_fresh, desc, compare_fun);
+    node->dirty = 1;
+}
+
+// This is only exported for tests.
+void toku_ft_append_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) {
+    FT_MSG_S msg = {
+        type, msn, xids, .u = { .id = { key, val } }
+    };
+    ft_append_msg_to_child_buffer(compare_fun, desc, node, childnum, &msg, is_fresh);
+}
+
+static void ft_nonleaf_msg_once_to_child(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
+// Previously we had passive aggressive promotion, but that causes a lot of I/O a the checkpoint.  So now we are just putting it in the buffer here.
+// Also we don't worry about the node getting overfull here.  It's the caller's problem.
+{
+    unsigned int childnum = (target_childnum >= 0
+                             ? target_childnum
+                             : toku_ftnode_which_child(node, msg->u.id.key, desc, compare_fun));
+    ft_append_msg_to_child_buffer(compare_fun, desc, node, childnum, msg, is_fresh);
+    NONLEAF_CHILDINFO bnc = BNC(node, childnum);
+    bnc->flow[0] += flow_deltas[0];
+    bnc->flow[1] += flow_deltas[1];
+}
+
+static int ft_compare_pivot(DESCRIPTOR desc, ft_compare_func cmp, const DBT *key, const DBT *pivot) {
+    FAKE_DB(db, desc);
+    int r = cmp(&db, key, pivot);
+    return r;
+}
+
+/* Find the leftmost child that may contain the key.
+ * If the key exists it will be in the child whose number
+ * is the return value of this function.
+ */
+int toku_ftnode_which_child(FTNODE node, const DBT *k,
+                            DESCRIPTOR desc, ft_compare_func cmp) {
+    // a funny case of no pivots
+    if (node->n_children <= 1) return 0;
+
+    // check the last key to optimize seq insertions
+    int n = node->n_children-1;
+    int c = ft_compare_pivot(desc, cmp, k, &node->childkeys[n-1]);
+    if (c > 0) return n;
+
+    // binary search the pivots
+    int lo = 0;
+    int hi = n-1; // skip the last one, we checked it above
+    int mi;
+    while (lo < hi) {
+        mi = (lo + hi) / 2;
+        c = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]);
+        if (c > 0) {
+            lo = mi+1;
+            continue;
+        }
+        if (c < 0) {
+            hi = mi;
+            continue;
+        }
+        return mi;
+    }
+    return lo;
+}
+
+// Used for HOT.
+int
+toku_ftnode_hot_next_child(FTNODE node,
+                           const DBT *k,
+                           DESCRIPTOR desc,
+                           ft_compare_func cmp) {
+    int low = 0;
+    int hi = node->n_children - 1;
+    int mi;
+    while (low < hi) {
+        mi = (low + hi) / 2;
+        int r = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]);
+        if (r > 0) {
+            low = mi + 1;
+        } else if (r < 0) {
+            hi = mi;
+        } else {
+            // if they were exactly equal, then we want the sub-tree under
+            // the next pivot.
+            return mi + 1;
+        }
+    }
+    invariant(low == hi);
+    return low;
+}
+static void
+ft_nonleaf_msg_all(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
+// Effect: Put the message into a nonleaf node.  We put it into all children, possibly causing the children to become reactive.
+//  We don't do the splitting and merging.  That's up to the caller after doing all the puts it wants to do.
+//  The re_array[i] gets set to the reactivity of any modified child i.         (And there may be several such children.)
+{
+    for (int i = 0; i < node->n_children; i++) {
+        ft_nonleaf_msg_once_to_child(compare_fun, desc, node, i, msg, is_fresh, flow_deltas);
+    }
+}
+
+static void
+ft_nonleaf_put_msg(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
+// Effect: Put the message into a nonleaf node.  We may put it into a child, possibly causing the child to become reactive.
+//  We don't do the splitting and merging.  That's up to the caller after doing all the puts it wants to do.
+//  The re_array[i] gets set to the reactivity of any modified child i.         (And there may be several such children.)
+//
+{
+
+    //
+    // see comments in toku_ft_leaf_apply_msg
+    // to understand why we handle setting
+    // node->max_msn_applied_to_node_on_disk here,
+    // and don't do it in toku_ftnode_put_msg
+    //
+    MSN msg_msn = msg->msn;
+    invariant(msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn);
+    node->max_msn_applied_to_node_on_disk = msg_msn;
+
+    if (ft_msg_type_applies_once(msg->type)) {
+        ft_nonleaf_msg_once_to_child(compare_fun, desc, node, target_childnum, msg, is_fresh, flow_deltas);
+    } else if (ft_msg_type_applies_all(msg->type)) {
+        ft_nonleaf_msg_all(compare_fun, desc, node, msg, is_fresh, flow_deltas);
+    } else {
+        paranoid_invariant(ft_msg_type_does_nothing(msg->type));
+    }
+}
+
+// Garbage collect one leaf entry.
+static void
+ft_basement_node_gc_once(BASEMENTNODE bn,
+                          uint32_t index,
+                          void* keyp,
+                          uint32_t keylen,
+                          LEAFENTRY leaf_entry,
+                          txn_gc_info *gc_info,
+                          STAT64INFO_S * delta)
+{
+    paranoid_invariant(leaf_entry);
+
+    // Don't run garbage collection on non-mvcc leaf entries.
+    if (leaf_entry->type != LE_MVCC) {
+        goto exit;
+    }
+
+    // Don't run garbage collection if this leafentry decides it's not worth it.
+    if (!toku_le_worth_running_garbage_collection(leaf_entry, gc_info)) {
+        goto exit;
+    }
+
+    LEAFENTRY new_leaf_entry;
+    new_leaf_entry = NULL;
+
+    // The mempool doesn't free itself.  When it allocates new memory,
+    // this pointer will be set to the older memory that must now be
+    // freed.
+    void * maybe_free;
+    maybe_free = NULL;
+
+    // These will represent the number of bytes and rows changed as
+    // part of the garbage collection.
+    int64_t numbytes_delta;
+    int64_t numrows_delta;
+    toku_le_garbage_collect(leaf_entry,
+                            &bn->data_buffer,
+                            index,
+                            keyp,
+                            keylen,
+                            gc_info,
+                            &new_leaf_entry,
+                            &numbytes_delta);
+
+    numrows_delta = 0;
+    if (new_leaf_entry) {
+        numrows_delta = 0;
+    } else {
+        numrows_delta = -1;
+    }
+
+    // If we created a new mempool buffer we must free the
+    // old/original buffer.
+    if (maybe_free) {
+        toku_free(maybe_free);
+    }
+
+    // Update stats.
+    bn->stat64_delta.numrows += numrows_delta;
+    bn->stat64_delta.numbytes += numbytes_delta;
+    delta->numrows += numrows_delta;
+    delta->numbytes += numbytes_delta;
+
+exit:
+    return;
+}
+
+// Garbage collect all leaf entries for a given basement node.
+static void
+basement_node_gc_all_les(BASEMENTNODE bn,
+                         txn_gc_info *gc_info,
+                         STAT64INFO_S * delta)
+{
+    int r = 0;
+    uint32_t index = 0;
+    uint32_t num_leafentries_before;
+    while (index < (num_leafentries_before = bn->data_buffer.num_klpairs())) {
+        void* keyp = NULL;
+        uint32_t keylen = 0;
+        LEAFENTRY leaf_entry;
+        r = bn->data_buffer.fetch_klpair(index, &leaf_entry, &keylen, &keyp);
+        assert_zero(r);
+        ft_basement_node_gc_once(
+            bn,
+            index,
+            keyp,
+            keylen,
+            leaf_entry,
+            gc_info,
+            delta
+            );
+        // Check if the leaf entry was deleted or not.
+        if (num_leafentries_before == bn->data_buffer.num_klpairs()) {
+            ++index;
+        }
+    }
+}
+
+// Garbage collect all leaf entires in all basement nodes.
+static void
+ft_leaf_gc_all_les(FT ft, FTNODE node, txn_gc_info *gc_info)
+{
+    toku_ftnode_assert_fully_in_memory(node);
+    paranoid_invariant_zero(node->height);
+    // Loop through each leaf entry, garbage collecting as we go.
+    for (int i = 0; i < node->n_children; ++i) {
+        // Perform the garbage collection.
+        BASEMENTNODE bn = BLB(node, i);
+        STAT64INFO_S delta;
+        delta.numrows = 0;
+        delta.numbytes = 0;
+        basement_node_gc_all_les(bn, gc_info, &delta);
+        toku_ft_update_stats(&ft->in_memory_stats, delta);
+    }
+}
+
+void toku_ftnode_leaf_run_gc(FT ft, FTNODE node) {
+    TOKULOGGER logger = toku_cachefile_logger(ft->cf);
+    if (logger) {
+        TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
+        txn_manager_state txn_state_for_gc(txn_manager);
+        txn_state_for_gc.init();
+        TXNID oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
+        
+        // Perform full garbage collection.
+        //
+        // - txn_state_for_gc
+        //     a fresh snapshot of the transaction system.
+        // - oldest_referenced_xid_for_simple_gc
+        //     the oldest xid in any live list as of right now - suitible for simple gc 
+        // - node->oldest_referenced_xid_known
+        //     the last known oldest referenced xid for this node and any unapplied messages.
+        //     it is a lower bound on the actual oldest referenced xid - but becasue there
+        //     may be abort messages above us, we need to be careful to only use this value
+        //     for implicit promotion (as opposed to the oldest referenced xid for simple gc)
+        //
+        // The node has its own oldest referenced xid because it must be careful not to implicitly promote
+        // provisional entries for transactions that are no longer live, but may have abort messages
+        // somewhere above us in the tree.
+        txn_gc_info gc_info(&txn_state_for_gc,
+                            oldest_referenced_xid_for_simple_gc,
+                            node->oldest_referenced_xid_known,
+                            true);
+        ft_leaf_gc_all_les(ft, node, &gc_info);
+    }
+}
+
+void
+toku_ftnode_put_msg (
+    ft_compare_func compare_fun,
+    ft_update_func update_fun,
+    DESCRIPTOR desc,
+    FTNODE node,
+    int target_childnum,
+    FT_MSG msg,
+    bool is_fresh,
+    txn_gc_info *gc_info,
+    size_t flow_deltas[],
+    STAT64INFO stats_to_update
+    )
+// Effect: Push message into the subtree rooted at NODE.
+//   If NODE is a leaf, then
+//   put message into leaf, applying it to the leafentries
+//   If NODE is a nonleaf, then push the message into the message buffer(s) of the relevent child(ren).
+//   The node may become overfull.  That's not our problem.
+{
+    toku_ftnode_assert_fully_in_memory(node);
+    //
+    // see comments in toku_ft_leaf_apply_msg
+    // to understand why we don't handle setting
+    // node->max_msn_applied_to_node_on_disk here,
+    // and instead defer to these functions
+    //
+    if (node->height==0) {
+        toku_ft_leaf_apply_msg(compare_fun, update_fun, desc, node, target_childnum, msg, gc_info, nullptr, stats_to_update);
+    } else {
+        ft_nonleaf_put_msg(compare_fun, desc, node, target_childnum, msg, is_fresh, flow_deltas);
+    }
+}
+
+// Effect: applies the message to the leaf if the appropriate basement node is in memory.
+//           This function is called during message injection and/or flushing, so the entire
+//           node MUST be in memory.
+void toku_ft_leaf_apply_msg(
+    ft_compare_func compare_fun,
+    ft_update_func update_fun,
+    DESCRIPTOR desc,
+    FTNODE node,
+    int target_childnum,  // which child to inject to, or -1 if unknown
+    FT_MSG msg,
+    txn_gc_info *gc_info,
+    uint64_t *workdone,
+    STAT64INFO stats_to_update
+    )
+{
+    VERIFY_NODE(t, node);
+    toku_ftnode_assert_fully_in_memory(node);
+
+    //
+    // Because toku_ft_leaf_apply_msg is called with the intent of permanently
+    // applying a message to a leaf node (meaning the message is permanently applied
+    // and will be purged from the system after this call, as opposed to
+    // toku_apply_ancestors_messages_to_node, which applies a message
+    // for a query, but the message may still reside in the system and
+    // be reapplied later), we mark the node as dirty and
+    // take the opportunity to update node->max_msn_applied_to_node_on_disk.
+    //
+    node->dirty = 1;
+
+    //
+    // we cannot blindly update node->max_msn_applied_to_node_on_disk,
+    // we must check to see if the msn is greater that the one already stored,
+    // because the message may have already been applied earlier (via
+    // toku_apply_ancestors_messages_to_node) to answer a query
+    //
+    // This is why we handle node->max_msn_applied_to_node_on_disk both here
+    // and in ft_nonleaf_put_msg, as opposed to in one location, toku_ftnode_put_msg.
+    //
+    MSN msg_msn = msg->msn;
+    if (msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn) {
+        node->max_msn_applied_to_node_on_disk = msg_msn;
+    }
+
+    if (ft_msg_type_applies_once(msg->type)) {
+        unsigned int childnum = (target_childnum >= 0
+                                 ? target_childnum
+                                 : toku_ftnode_which_child(node, msg->u.id.key, desc, compare_fun));
+        BASEMENTNODE bn = BLB(node, childnum);
+        if (msg->msn.msn > bn->max_msn_applied.msn) {
+            bn->max_msn_applied = msg->msn;
+            toku_ft_bn_apply_msg(compare_fun,
+                                 update_fun,
+                                 desc,
+                                 bn,
+                                 msg,
+                                 gc_info,
+                                 workdone,
+                                 stats_to_update);
+        } else {
+            toku_ft_status_note_msn_discard();
+        }
+    }
+    else if (ft_msg_type_applies_all(msg->type)) {
+        for (int childnum=0; childnum<node->n_children; childnum++) {
+            if (msg->msn.msn > BLB(node, childnum)->max_msn_applied.msn) {
+                BLB(node, childnum)->max_msn_applied = msg->msn;
+                toku_ft_bn_apply_msg(compare_fun,
+                                     update_fun,
+                                     desc,
+                                     BLB(node, childnum),
+                                     msg,
+                                     gc_info,
+                                     workdone,
+                                     stats_to_update);
+            } else {
+                toku_ft_status_note_msn_discard();
+            }
+        }
+    }
+    else if (!ft_msg_type_does_nothing(msg->type)) {
+        invariant(ft_msg_type_does_nothing(msg->type));
+    }
+    VERIFY_NODE(t, node);
+}
+
diff --git a/ft/node.h b/ft/node.h
new file mode 100644
index 00000000000..82155334c4a
--- /dev/null
+++ b/ft/node.h
@@ -0,0 +1,531 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#pragma once
+
+#include "ft/cachetable.h"
+#include "ft/bndata.h"
+#include "ft/fttypes.h"
+#include "ft/msg_buffer.h"
+
+struct ftnode {
+    MSN      max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk
+    unsigned int flags;
+    BLOCKNUM thisnodename;   // Which block number is this node?
+    int    layout_version; // What version of the data structure?
+    int    layout_version_original;	// different (<) from layout_version if upgraded from a previous version (useful for debugging)
+    int    layout_version_read_from_disk;  // transient, not serialized to disk, (useful for debugging)
+    uint32_t build_id;       // build_id (svn rev number) of software that wrote this node to disk
+    int    height; /* height is always >= 0.  0 for leaf, >0 for nonleaf. */
+    int    dirty;
+    uint32_t fullhash;
+    int n_children; //for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced.
+                    // for leaf nodes, represents number of basement nodes
+    unsigned int    totalchildkeylens;
+    DBT *childkeys;   /* Pivot keys.  Child 0's keys are <= childkeys[0].  Child 1's keys are <= childkeys[1].
+                                                                        Child 1's keys are > childkeys[0]. */
+
+    // What's the oldest referenced xid that this node knows about? The real oldest
+    // referenced xid might be younger, but this is our best estimate. We use it
+    // as a heuristic to transition provisional mvcc entries from provisional to
+    // committed (from implicity committed to really committed).
+    //
+    // A better heuristic would be the oldest live txnid, but we use this since it
+    // still works well most of the time, and its readily available on the inject
+    // code path.
+    TXNID oldest_referenced_xid_known;
+
+    // array of size n_children, consisting of ftnode partitions
+    // each one is associated with a child
+    // for internal nodes, the ith partition corresponds to the ith message buffer
+    // for leaf nodes, the ith partition corresponds to the ith basement node
+    struct ftnode_partition *bp;
+    struct ctpair *ct_pair;
+};
+
+// data of an available partition of a leaf ftnode
+struct ftnode_leaf_basement_node {
+    bn_data data_buffer;
+    unsigned int seqinsert;         // number of sequential inserts to this leaf 
+    MSN max_msn_applied;            // max message sequence number applied
+    bool stale_ancestor_messages_applied;
+    STAT64INFO_S stat64_delta;      // change in stat64 counters since basement was last written to disk
+};
+
+enum pt_state {  // declare this to be packed so that when used below it will only take 1 byte.
+    PT_INVALID = 0,
+    PT_ON_DISK = 1,
+    PT_COMPRESSED = 2,
+    PT_AVAIL = 3};
+
+enum ftnode_child_tag {
+    BCT_INVALID = 0,
+    BCT_NULL,
+    BCT_SUBBLOCK,
+    BCT_LEAF,
+    BCT_NONLEAF
+};
+
+typedef toku::omt<int32_t> off_omt_t;
+typedef toku::omt<int32_t, int32_t, true> marked_off_omt_t;
+
+// data of an available partition of a nonleaf ftnode
+struct ftnode_nonleaf_childinfo {
+    message_buffer msg_buffer;
+    off_omt_t broadcast_list;
+    marked_off_omt_t fresh_message_tree;
+    off_omt_t stale_message_tree;
+    uint64_t flow[2];  // current and last checkpoint
+};
+    
+typedef struct ftnode_child_pointer {
+    union {
+	struct sub_block *subblock;
+	struct ftnode_nonleaf_childinfo *nonleaf;
+	struct ftnode_leaf_basement_node *leaf;
+    } u;
+    enum ftnode_child_tag tag;
+} FTNODE_CHILD_POINTER;
+
+struct ftnode_disk_data {
+    //
+    // stores the offset to the beginning of the partition on disk from the ftnode, and the length, needed to read a partition off of disk
+    // the value is only meaningful if the node is clean. If the node is dirty, then the value is meaningless
+    //  The START is the distance from the end of the compressed node_info data, to the beginning of the compressed partition
+    //  The SIZE is the size of the compressed partition.
+    // Rationale:  We cannot store the size from the beginning of the node since we don't know how big the header will be.
+    //  However, later when we are doing aligned writes, we won't be able to store the size from the end since we want things to align.
+    uint32_t start;
+    uint32_t size;
+};
+#define BP_START(node_dd,i) ((node_dd)[i].start)
+#define BP_SIZE(node_dd,i) ((node_dd)[i].size)
+
+// a ftnode partition, associated with a child of a node
+struct ftnode_partition {
+    // the following three variables are used for nonleaf nodes
+    // for leaf nodes, they are meaningless
+    BLOCKNUM     blocknum; // blocknum of child 
+
+    // How many bytes worth of work was performed by messages in each buffer.
+    uint64_t     workdone;
+
+    //
+    // pointer to the partition. Depending on the state, they may be different things
+    // if state == PT_INVALID, then the node was just initialized and ptr == NULL
+    // if state == PT_ON_DISK, then ptr == NULL
+    // if state == PT_COMPRESSED, then ptr points to a struct sub_block*
+    // if state == PT_AVAIL, then ptr is:
+    //         a struct ftnode_nonleaf_childinfo for internal nodes, 
+    //         a struct ftnode_leaf_basement_node for leaf nodes
+    //
+    struct ftnode_child_pointer ptr;
+    //
+    // at any time, the partitions may be in one of the following three states (stored in pt_state):
+    //   PT_INVALID - means that the partition was just initialized
+    //   PT_ON_DISK - means that the partition is not in memory and needs to be read from disk. To use, must read off disk and decompress
+    //   PT_COMPRESSED - means that the partition is compressed in memory. To use, must decompress
+    //   PT_AVAIL - means the partition is decompressed and in memory
+    //
+    enum pt_state state; // make this an enum to make debugging easier.  
+
+    // clock count used to for pe_callback to determine if a node should be evicted or not
+    // for now, saturating the count at 1
+    uint8_t clock_count;
+};
+
+//
+// TODO: Fix all these names
+//       Organize declarations
+//       Fix widespread parameter ordering inconsistencies
+//
+BASEMENTNODE toku_create_empty_bn(void);
+BASEMENTNODE toku_create_empty_bn_no_buffer(void); // create a basement node with a null buffer.
+NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo);
+BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn);
+NONLEAF_CHILDINFO toku_create_empty_nl(void);
+void destroy_basement_node (BASEMENTNODE bn);
+void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl);
+void toku_destroy_ftnode_internals(FTNODE node);
+void toku_ftnode_free (FTNODE *node);
+bool toku_ftnode_fully_in_memory(FTNODE node);
+void toku_ftnode_assert_fully_in_memory(FTNODE node);
+void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h);
+BASEMENTNODE toku_detach_bn(FTNODE node, int childnum);
+void toku_ftnode_update_disk_stats(FTNODE ftnode, FT ft, bool for_checkpoint);
+void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node);
+
+void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM nodename, int height, int num_children, 
+                                  int layout_version, unsigned int flags);
+
+int toku_ftnode_which_child(FTNODE node, const DBT *k,
+                            DESCRIPTOR desc, ft_compare_func cmp);
+
+//
+// Field in ftnode_fetch_extra that tells the 
+// partial fetch callback what piece of the node
+// is needed by the ydb
+//
+enum ftnode_fetch_type {
+    ftnode_fetch_none=1, // no partitions needed.  
+    ftnode_fetch_subset, // some subset of partitions needed
+    ftnode_fetch_prefetch, // this is part of a prefetch call
+    ftnode_fetch_all, // every partition is needed
+    ftnode_fetch_keymatch, // one child is needed if it holds both keys
+};
+
+static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) UU();
+static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) {
+    switch (type) {
+        case ftnode_fetch_none:
+        case ftnode_fetch_subset:
+        case ftnode_fetch_prefetch:
+        case ftnode_fetch_all:
+        case ftnode_fetch_keymatch:
+            return true;
+        default:
+            return false;
+    }
+}
+
+//
+// An extra parameter passed to cachetable functions 
+// That is used in all types of fetch callbacks.
+// The contents help the partial fetch and fetch
+// callbacks retrieve the pieces of a node necessary
+// for the ensuing operation (flush, query, ...)
+//
+struct ft_search;
+struct ftnode_fetch_extra {
+    enum ftnode_fetch_type type;
+    // needed for reading a node off disk
+    FT h;
+    // used in the case where type == ftnode_fetch_subset
+    // parameters needed to find out which child needs to be decompressed (so it can be read)
+    ft_search *search;
+    DBT range_lock_left_key, range_lock_right_key;
+    bool left_is_neg_infty, right_is_pos_infty;
+    // states if we should try to aggressively fetch basement nodes 
+    // that are not specifically needed for current query, 
+    // but may be needed for other cursor operations user is doing
+    // For example, if we have not disabled prefetching,
+    // and the user is doing a dictionary wide scan, then
+    // even though a query may only want one basement node,
+    // we fetch all basement nodes in a leaf node.
+    bool disable_prefetching;
+    // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
+    // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
+    int child_to_read;
+    // when we read internal nodes, we want to read all the data off disk in one I/O
+    // then we'll treat it as normal and only decompress the needed partitions etc.
+
+    bool read_all_partitions;
+    // Accounting: How many bytes were read, and how much time did we spend doing I/O?
+    uint64_t bytes_read;
+    tokutime_t io_time;
+    tokutime_t decompress_time;
+    tokutime_t deserialize_time;
+};
+typedef struct ftnode_fetch_extra *FTNODE_FETCH_EXTRA;
+
+//
+// TODO: put the heaviside functions into their respective 'struct .*extra;' namespaces
+//
+struct toku_msg_buffer_key_msn_heaviside_extra {
+    DESCRIPTOR desc;
+    ft_compare_func cmp;
+    message_buffer *msg_buffer;
+    const DBT *key;
+    MSN msn;
+};
+int toku_msg_buffer_key_msn_heaviside(const int32_t &v, const struct toku_msg_buffer_key_msn_heaviside_extra &extra);
+
+struct toku_msg_buffer_key_msn_cmp_extra {
+    DESCRIPTOR desc;
+    ft_compare_func cmp;
+    message_buffer *msg_buffer;
+};
+int toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extrap, const int &a, const int &b);
+
+struct toku_msg_leafval_heaviside_extra {
+    ft_compare_func compare_fun;
+    DESCRIPTOR desc;
+    DBT const * const key;
+};
+int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_heaviside_extra &be);
+
+unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc);
+int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc);
+long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
+long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
+void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp);
+void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
+void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
+bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
+
+bool toku_ftnode_nonleaf_is_gorged(FTNODE node, uint32_t nodesize);
+uint32_t toku_ftnode_leaf_num_entries(FTNODE node);
+void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize);
+
+void toku_ftnode_leaf_run_gc(FT ft, FTNODE node);
+
+enum reactivity {
+    RE_STABLE,
+    RE_FUSIBLE,
+    RE_FISSIBLE
+};
+
+enum reactivity toku_ftnode_get_reactivity(FT ft, FTNODE node);
+enum reactivity toku_ftnode_get_nonleaf_reactivity(FTNODE node, unsigned int fanout);
+enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize);
+
+/**
+ * Finds the next child for HOT to flush to, given that everything up to
+ * and including k has been flattened.
+ *
+ * If k falls between pivots in node, then we return the childnum where k
+ * lies.
+ *
+ * If k is equal to some pivot, then we return the next (to the right)
+ * childnum.
+ */
+int toku_ftnode_hot_next_child(FTNODE node, const DBT *k,
+                               DESCRIPTOR desc, ft_compare_func cmp);
+
+void toku_ftnode_put_msg(ft_compare_func compare_fun, ft_update_func update_fun,
+                         DESCRIPTOR desc, FTNODE node, int target_childnum,
+                         FT_MSG msg, bool is_fresh, txn_gc_info *gc_info,
+                         size_t flow_deltas[], STAT64INFO stats_to_update);
+
+void toku_ft_bn_apply_msg_once(BASEMENTNODE bn, const FT_MSG msg, uint32_t idx,
+                               uint32_t le_keylen, LEAFENTRY le, txn_gc_info *gc_info,
+                               uint64_t *workdonep, STAT64INFO stats_to_update);
+
+void toku_ft_bn_apply_msg(ft_compare_func compare_fun, ft_update_func update_fun,
+                          DESCRIPTOR desc, BASEMENTNODE bn, FT_MSG msg, txn_gc_info *gc_info,
+                          uint64_t *workdone, STAT64INFO stats_to_update);
+
+void toku_ft_leaf_apply_msg(ft_compare_func compare_fun, ft_update_func update_fun,
+                            DESCRIPTOR desc, FTNODE node, int target_childnum,
+                            FT_MSG msg, txn_gc_info *gc_info,
+                            uint64_t *workdone, STAT64INFO stats_to_update);
+
+CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft);
+
+//
+// Message management for orthopush
+//
+
+struct ancestors {
+    // This is the root node if next is NULL (since the root has no ancestors)
+    FTNODE node;
+    // Which buffer holds messages destined to the node whose ancestors this list represents.
+    int childnum;
+    struct ancestors *next;
+};
+typedef struct ancestors *ANCESTORS;
+
+void toku_ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc);
+
+void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node);
+
+// TODO: Should ft_handle just be FT?
+void toku_apply_ancestors_messages_to_node(FT_HANDLE t, FTNODE node, ANCESTORS ancestors,
+                                           struct pivot_bounds const *const bounds,
+                                           bool *msgs_applied, int child_to_read);
+
+bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancestors,
+                                           struct pivot_bounds const *const bounds,
+                                           MSN *const max_msn_in_path, int child_to_read);
+
+void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read);
+
+struct ft_search;
+int toku_ft_search_which_child(DESCRIPTOR desc, ft_compare_func cmp, FTNODE node, ft_search *search);
+
+//
+// internal node inline functions
+// TODO: Turn the macros into real functions
+//
+
+static inline void set_BNULL(FTNODE node, int i) {
+    paranoid_invariant(i >= 0);
+    paranoid_invariant(i < node->n_children);
+    node->bp[i].ptr.tag = BCT_NULL;
+}
+
+static inline bool is_BNULL (FTNODE node, int i) {
+    paranoid_invariant(i >= 0);
+    paranoid_invariant(i < node->n_children);
+    return node->bp[i].ptr.tag == BCT_NULL;
+}
+
+static inline NONLEAF_CHILDINFO BNC(FTNODE node, int i) {
+    paranoid_invariant(i >= 0);
+    paranoid_invariant(i < node->n_children);
+    FTNODE_CHILD_POINTER p = node->bp[i].ptr;
+    paranoid_invariant(p.tag==BCT_NONLEAF);
+    return p.u.nonleaf;
+}
+
+static inline void set_BNC(FTNODE node, int i, NONLEAF_CHILDINFO nl) {
+    paranoid_invariant(i >= 0);
+    paranoid_invariant(i < node->n_children);
+    FTNODE_CHILD_POINTER *p = &node->bp[i].ptr;
+    p->tag = BCT_NONLEAF;
+    p->u.nonleaf = nl;
+}
+
+static inline BASEMENTNODE BLB(FTNODE node, int i) {
+    paranoid_invariant(i >= 0);
+    // The optimizer really doesn't like it when we compare
+    // i to n_children as signed integers. So we assert that
+    // n_children is in fact positive before doing a comparison
+    // on the values forcibly cast to unsigned ints.
+    paranoid_invariant(node->n_children > 0);
+    paranoid_invariant((unsigned) i < (unsigned) node->n_children);
+    FTNODE_CHILD_POINTER p = node->bp[i].ptr;
+    paranoid_invariant(p.tag==BCT_LEAF);
+    return p.u.leaf;
+}
+
+static inline void set_BLB(FTNODE node, int i, BASEMENTNODE bn) {
+    paranoid_invariant(i >= 0);
+    paranoid_invariant(i < node->n_children);
+    FTNODE_CHILD_POINTER *p = &node->bp[i].ptr;
+    p->tag = BCT_LEAF;
+    p->u.leaf = bn;
+}
+
+static inline SUB_BLOCK BSB(FTNODE node, int i) {
+    paranoid_invariant(i >= 0);
+    paranoid_invariant(i < node->n_children);
+    FTNODE_CHILD_POINTER p = node->bp[i].ptr;
+    paranoid_invariant(p.tag==BCT_SUBBLOCK);
+    return p.u.subblock;
+}
+
+static inline void set_BSB(FTNODE node, int i, SUB_BLOCK sb) {
+    paranoid_invariant(i >= 0);
+    paranoid_invariant(i < node->n_children);
+    FTNODE_CHILD_POINTER *p = &node->bp[i].ptr;
+    p->tag = BCT_SUBBLOCK;
+    p->u.subblock = sb;
+}
+
+// ftnode partition macros
+// BP stands for ftnode_partition
+#define BP_BLOCKNUM(node,i) ((node)->bp[i].blocknum)
+#define BP_STATE(node,i) ((node)->bp[i].state)
+#define BP_WORKDONE(node, i)((node)->bp[i].workdone)
+
+//
+// macros for managing a node's clock
+// Should be managed by ft-ops.c, NOT by serialize/deserialize
+//
+
+//
+// BP_TOUCH_CLOCK uses a compare and swap because multiple threads
+// that have a read lock on an internal node may try to touch the clock
+// simultaneously
+//
+#define BP_TOUCH_CLOCK(node, i) ((node)->bp[i].clock_count = 1)
+#define BP_SWEEP_CLOCK(node, i) ((node)->bp[i].clock_count = 0)
+#define BP_SHOULD_EVICT(node, i) ((node)->bp[i].clock_count == 0)
+// not crazy about having these two here, one is for the case where we create new
+// nodes, such as in splits and creating new roots, and the other is for when
+// we are deserializing a node and not all bp's are touched
+#define BP_INIT_TOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 1)
+#define BP_INIT_UNTOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 0)
+
+// ftnode leaf basementnode macros, 
+#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied)
+#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied)
+#define BLB_DATA(node,i) (&(BLB(node,i)->data_buffer))
+#define BLB_NBYTESINDATA(node,i) (BLB_DATA(node,i)->get_disk_size())
+#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert)
diff --git a/ft/tests/test-pick-child-to-flush.cc b/ft/tests/test-pick-child-to-flush.cc
index 7a96ff154db..fe1762dc980 100644
--- a/ft/tests/test-pick-child-to-flush.cc
+++ b/ft/tests/test-pick-child-to-flush.cc
@@ -245,7 +245,7 @@ doit (void) {
     // what we say and flushes the child we pick
     FTNODE node = NULL;
     toku_pin_node_with_min_bfe(&node, node_internal, t);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     assert(node->n_children == 2);
     assert(!node->dirty);
     assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) > 0);
@@ -268,7 +268,7 @@ doit (void) {
     assert(num_flushes_called == 1);
 
     toku_pin_node_with_min_bfe(&node, node_internal, t);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     assert(node->dirty);
     assert(node->n_children == 2);
     // child 0 should have empty buffer because it flushed
@@ -287,7 +287,7 @@ doit (void) {
     
     toku_pin_node_with_min_bfe(&node, node_internal, t);
     assert(node->dirty);
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     assert(node->n_children == 2);
     // both buffers should be empty now
     assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) == 0);
@@ -305,7 +305,7 @@ doit (void) {
 
     toku_pin_node_with_min_bfe(&node, node_internal, t);
     assert(node->dirty); // nothing was flushed, but since we were trying to flush to a leaf, both become dirty
-    toku_assert_entire_node_in_memory(node);
+    toku_ftnode_assert_fully_in_memory(node);
     assert(node->n_children == 2);
     // both buffers should be empty now
     assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) == 0);
@@ -326,7 +326,7 @@ doit (void) {
     // use a for loop so to get us down both paths
     for (int i = 0; i < 2; i++) {
         toku_pin_node_with_min_bfe(&node, node_root, t);
-        toku_assert_entire_node_in_memory(node); // entire root is in memory
+        toku_ftnode_assert_fully_in_memory(node); // entire root is in memory
         curr_child_to_flush = i;
         num_flushes_called = 0;
         toku_ft_flush_some_child(t->ft, node, &fa);
@@ -376,7 +376,7 @@ doit (void) {
 
     //now let's do the same test as above
     toku_pin_node_with_min_bfe(&node, node_root, t);
-    toku_assert_entire_node_in_memory(node); // entire root is in memory
+    toku_ftnode_assert_fully_in_memory(node); // entire root is in memory
     curr_child_to_flush = 0;
     num_flushes_called = 0;
     toku_ft_flush_some_child(t->ft, node, &fa);
diff --git a/ft/tests/test.h b/ft/tests/test.h
index bb3440788c6..3d6b049af5c 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -102,6 +102,7 @@ PATENT RIGHTS GRANT:
 #include <portability/toku_path.h>
 
 #include "ft.h"
+#include "node.h"
 #include "block_table.h"
 #include "log-internal.h"
 #include "logger.h"
diff --git a/ft/tests/test_rightmost_leaf_split_merge.cc b/ft/tests/test_rightmost_leaf_split_merge.cc
index f0a99cc4806..49621d79328 100644
--- a/ft/tests/test_rightmost_leaf_split_merge.cc
+++ b/ft/tests/test_rightmost_leaf_split_merge.cc
@@ -180,7 +180,7 @@ static void test_split_merge(void) {
     toku_pin_ftnode(ft, rightmost_blocknum_before_merge,
                    toku_cachetable_hash(ft->cf, rightmost_blocknum_before_merge),
                    &bfe, PL_WRITE_EXPENSIVE, &rightmost_leaf, true);
-    invariant(get_node_reactivity(ft, rightmost_leaf) == RE_FUSIBLE);
+    invariant(toku_ftnode_get_reactivity(ft, rightmost_leaf) == RE_FUSIBLE);
     toku_unpin_ftnode(ft, rightmost_leaf);
 
     // - merge the rightmost child now that it's fusible
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index 45c4d154087..f2028b24280 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -95,6 +95,7 @@ PATENT RIGHTS GRANT:
 #include "ft.h"
 #include "fttypes.h"
 #include "ft-internal.h"
+#include "ft/node.h"
 #include <ctype.h>
 #include <stdint.h>
 #include <stdio.h>

From b6abf2063c12932c42464518f3fddfa80d1504f1 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:40:33 -0400
Subject: [PATCH 020/190] FT-259 Clean up memarena API / code. Use a memarena
 in the locktree to store each transaction's ranges instead of a hand-rolled
 buffer.

---
 ft/ft_node-serialize.cc             |   8 +-
 ft/logformat.cc                     |   4 +-
 ft/rbuf.h                           |  38 +--
 ft/rollback-apply.cc                |   4 +-
 ft/rollback.cc                      |  23 +-
 ft/rollback.h                       |   2 +-
 locktree/locktree.cc                |  42 +--
 locktree/locktree.h                 |   2 -
 locktree/range_buffer.cc            | 380 ++++++++++++++--------------
 locktree/range_buffer.h             | 192 +++++++-------
 locktree/tests/range_buffer_test.cc |  13 +-
 src/ydb.cc                          |   2 +-
 src/ydb_row_lock.cc                 |  17 +-
 util/memarena.cc                    | 239 ++++++++---------
 util/memarena.h                     |  96 +++++--
 util/tests/memarena-test.cc         | 230 +++++++++++++++++
 16 files changed, 770 insertions(+), 522 deletions(-)
 create mode 100644 util/tests/memarena-test.cc

diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index f5733fc7958..330c65d7a6d 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -2548,7 +2548,7 @@ serialize_rollback_log_node_to_buf(ROLLBACK_LOG_NODE log, char *buf, size_t calc
         wbuf_nocrc_BLOCKNUM(&wb, log->previous);
         wbuf_nocrc_ulonglong(&wb, log->rollentry_resident_bytecount);
         //Write down memarena size needed to restore
-        wbuf_nocrc_ulonglong(&wb, toku_memarena_total_size_in_use(log->rollentry_arena));
+        wbuf_nocrc_ulonglong(&wb, log->rollentry_arena.total_size_in_use());
 
         {
             //Store rollback logs
@@ -2712,8 +2712,8 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p,
     result->rollentry_resident_bytecount = rbuf_ulonglong(rb);
 
     size_t arena_initial_size = rbuf_ulonglong(rb);
-    result->rollentry_arena = toku_memarena_create_presized(arena_initial_size);
-    if (0) { died1: toku_memarena_destroy(&result->rollentry_arena); goto died0; }
+    result->rollentry_arena.create(arena_initial_size);
+    if (0) { died1: result->rollentry_arena.destroy(); goto died0; }
 
     //Load rollback entries
     lazy_assert(rb->size > 4);
@@ -2725,7 +2725,7 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p,
         bytevec item_vec;
         rbuf_literal_bytes(rb, &item_vec, rollback_fsize-4);
         unsigned char* item_buf = (unsigned char*)item_vec;
-        r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, result->rollentry_arena);
+        r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, &result->rollentry_arena);
         if (r!=0) {
             r = toku_db_badformat();
             goto died1;
diff --git a/ft/logformat.cc b/ft/logformat.cc
index 8fdbaf45428..bb35ea86c66 100644
--- a/ft/logformat.cc
+++ b/ft/logformat.cc
@@ -798,7 +798,7 @@ generate_rollbacks (void) {
     fprintf(cf, "  }\n  assert(0);\n  return 0;\n");
     fprintf(cf, "}\n");
 
-    fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, MEMARENA ma)");
+    fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, memarena *ma)");
     fprintf(hf, ";\n");
     fprintf(cf, " {\n  assert(n_bytes>0);\n  struct roll_entry *item;\n  enum rt_cmd cmd = (enum rt_cmd)(buf[0]);\n  size_t mem_needed;\n");
     fprintf(cf, "  struct rbuf rc = {buf, n_bytes, 1};\n");
@@ -806,7 +806,7 @@ generate_rollbacks (void) {
     DO_ROLLBACKS(lt, {
                 fprintf(cf, "  case RT_%s:\n", lt->name);
                 fprintf(cf, "    mem_needed = sizeof(item->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name);
-                fprintf(cf, "    CAST_FROM_VOIDP(item, toku_memarena_malloc(ma, mem_needed));\n");
+                fprintf(cf, "    CAST_FROM_VOIDP(item, ma->malloc_from_arena(mem_needed));\n");
                 fprintf(cf, "    item->cmd = cmd;\n");
                 DO_FIELDS(field_type, lt, fprintf(cf, "    rbuf_ma_%s(&rc, ma, &item->u.%s.%s);\n", field_type->type, lt->name, field_type->name));
                 fprintf(cf, "    *itemp = item;\n");
diff --git a/ft/rbuf.h b/ft/rbuf.h
index 755d0182ff2..83c19e4ceec 100644
--- a/ft/rbuf.h
+++ b/ft/rbuf.h
@@ -92,13 +92,14 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <toku_portability.h>
-#include "toku_assert.h"
-#include "fttypes.h"
-#include "memory.h"
-#include <toku_htonl.h>
+#include <string.h>
 
-#include <util/memarena.h>
+#include "ft/fttypes.h"
+#include "portability/memory.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_htonl.h"
+#include "portability/toku_portability.h"
+#include "util/memarena.h"
 
 struct rbuf {
     unsigned char *buf;
@@ -122,11 +123,11 @@ static inline unsigned char rbuf_char (struct rbuf *r) {
     return r->buf[r->ndone++];
 }
 
-static inline void rbuf_ma_uint8_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint8_t *num) {
+static inline void rbuf_ma_uint8_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint8_t *num) {
     *num = rbuf_char(r);
 }
 
-static inline void rbuf_ma_bool (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), bool *b) {
+static inline void rbuf_ma_bool (struct rbuf *r, memarena *ma __attribute__((__unused__)), bool *b) {
     uint8_t n = rbuf_char(r);
     *b = (n!=0);
 }
@@ -199,15 +200,15 @@ static inline BLOCKNUM rbuf_blocknum (struct rbuf *r) {
     BLOCKNUM result = make_blocknum(rbuf_longlong(r));
     return result;
 }
-static inline void rbuf_ma_BLOCKNUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), BLOCKNUM *blocknum) {
+static inline void rbuf_ma_BLOCKNUM (struct rbuf *r, memarena *ma __attribute__((__unused__)), BLOCKNUM *blocknum) {
     *blocknum = rbuf_blocknum(r);
 }
 
-static inline void rbuf_ma_uint32_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint32_t *num) {
+static inline void rbuf_ma_uint32_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint32_t *num) {
     *num = rbuf_int(r);
 }
 
-static inline void rbuf_ma_uint64_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint64_t *num) {
+static inline void rbuf_ma_uint64_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint64_t *num) {
     *num = rbuf_ulonglong(r);
 }
 
@@ -221,18 +222,18 @@ static inline void rbuf_TXNID_PAIR (struct rbuf *r, TXNID_PAIR *txnid) {
     txnid->child_id64 = rbuf_ulonglong(r);
 }
 
-static inline void rbuf_ma_TXNID (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID *txnid) {
+static inline void rbuf_ma_TXNID (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID *txnid) {
     rbuf_TXNID(r, txnid);
 }
 
-static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID_PAIR *txnid) {
+static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID_PAIR *txnid) {
     rbuf_TXNID_PAIR(r, txnid);
 }
 
 static inline void rbuf_FILENUM (struct rbuf *r, FILENUM *filenum) {
     filenum->fileid = rbuf_int(r);
 }
-static inline void rbuf_ma_FILENUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), FILENUM *filenum) {
+static inline void rbuf_ma_FILENUM (struct rbuf *r, memarena *ma __attribute__((__unused__)), FILENUM *filenum) {
     rbuf_FILENUM(r, filenum);
 }
 
@@ -248,9 +249,9 @@ static inline void rbuf_FILENUMS(struct rbuf *r, FILENUMS *filenums) {
 }
 
 // 2954
-static inline void rbuf_ma_FILENUMS (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), FILENUMS *filenums) {
+static inline void rbuf_ma_FILENUMS (struct rbuf *r, memarena *ma __attribute__((__unused__)), FILENUMS *filenums) {
     rbuf_ma_uint32_t(r, ma, &(filenums->num));
-    filenums->filenums = (FILENUM *) toku_memarena_malloc(ma, filenums->num * sizeof(FILENUM) );
+    filenums->filenums = (FILENUM *) ma->malloc_from_arena(filenums->num * sizeof(FILENUM));
     assert(filenums->filenums != NULL);
     for (uint32_t i=0; i < filenums->num; i++) {
         rbuf_ma_FILENUM(r, ma, &(filenums->filenums[i]));
@@ -267,11 +268,12 @@ static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) {
     r->ndone = newndone;
 }
 
-static inline void rbuf_ma_BYTESTRING  (struct rbuf *r, MEMARENA ma, BYTESTRING *bs) {
+static inline void rbuf_ma_BYTESTRING  (struct rbuf *r, memarena *ma, BYTESTRING *bs) {
     bs->len  = rbuf_int(r);
     uint32_t newndone = r->ndone + bs->len;
     assert(newndone <= r->size);
-    bs->data = (char *) toku_memarena_memdup(ma, &r->buf[r->ndone], (size_t)bs->len);
+    bs->data = (char *) ma->malloc_from_arena(bs->len);
     assert(bs->data);
+    memcpy(bs->data, &r->buf[r->ndone], bs->len);
     r->ndone = newndone;
 }
diff --git a/ft/rollback-apply.cc b/ft/rollback-apply.cc
index 1dd3062b33e..d5f0ab3a18f 100644
--- a/ft/rollback-apply.cc
+++ b/ft/rollback-apply.cc
@@ -258,9 +258,9 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
             }
             child_log->newest_logentry = child_log->oldest_logentry = 0;
             // Put all the memarena data into the parent.
-            if (toku_memarena_total_size_in_use(child_log->rollentry_arena) > 0) {
+            if (child_log->rollentry_arena.total_size_in_use() > 0) {
                 // If there are no bytes to move, then just leave things alone, and let the memory be reclaimed on txn is closed.
-                toku_memarena_move_buffers(parent_log->rollentry_arena, child_log->rollentry_arena);
+                child_log->rollentry_arena.move_memory(&parent_log->rollentry_arena);
             }
             // each txn tries to give back at most one rollback log node
             // to the cache. All other rollback log nodes for this child
diff --git a/ft/rollback.cc b/ft/rollback.cc
index 1b1a99d908e..ccb8fbfa286 100644
--- a/ft/rollback.cc
+++ b/ft/rollback.cc
@@ -120,13 +120,17 @@ toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind) {
     return 0;
 }
 
+// TODO: fix this name
+//       toku_rollback_malloc
 void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size) {
-    return toku_memarena_malloc(log->rollentry_arena, size);
+    return log->rollentry_arena.malloc_from_arena(size);
 }
 
+// TODO: fix this name
+//       toku_rollback_memdup
 void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len) {
-    void *r=toku_malloc_in_rollback(log, len);
-    memcpy(r,v,len);
+    void *r = toku_malloc_in_rollback(log, len);
+    memcpy(r, v, len);
     return r;
 }
 
@@ -145,8 +149,8 @@ static inline PAIR_ATTR make_rollback_pair_attr(long size) {
 PAIR_ATTR
 rollback_memory_size(ROLLBACK_LOG_NODE log) {
     size_t size = sizeof(*log);
-    if (log->rollentry_arena) {
-        size += toku_memarena_total_footprint(log->rollentry_arena);
+    if (&log->rollentry_arena) {
+        size += log->rollentry_arena.total_footprint();
     }
     return make_rollback_pair_attr(size);
 }
@@ -175,12 +179,10 @@ void rollback_empty_log_init(ROLLBACK_LOG_NODE log) {
     log->previous = make_blocknum(0);
     log->oldest_logentry = NULL;
     log->newest_logentry = NULL;
-    log->rollentry_arena = NULL;
+    log->rollentry_arena.create(0);
     log->rollentry_resident_bytecount = 0;
 }
 
-
-
 static void rollback_initialize_for_txn(
     ROLLBACK_LOG_NODE log,
     TOKUTXN txn,
@@ -192,13 +194,14 @@ static void rollback_initialize_for_txn(
     log->previous = previous;
     log->oldest_logentry = NULL;
     log->newest_logentry = NULL;
-    log->rollentry_arena = toku_memarena_create();
+    log->rollentry_arena.create(1024);
     log->rollentry_resident_bytecount = 0;
     log->dirty = true;
 }
 
+// TODO: fix this name
 void make_rollback_log_empty(ROLLBACK_LOG_NODE log) {
-    toku_memarena_destroy(&log->rollentry_arena);
+    log->rollentry_arena.destroy();
     rollback_empty_log_init(log);
 }
 
diff --git a/ft/rollback.h b/ft/rollback.h
index b1441a9b17b..6664ddc5667 100644
--- a/ft/rollback.h
+++ b/ft/rollback.h
@@ -165,7 +165,7 @@ struct rollback_log_node {
     BLOCKNUM           previous; 
     struct roll_entry *oldest_logentry;
     struct roll_entry *newest_logentry;
-    MEMARENA           rollentry_arena;
+    struct memarena    rollentry_arena;
     size_t             rollentry_resident_bytecount; // How many bytes for the rollentries that are stored in main memory.
     PAIR               ct_pair;
 };
diff --git a/locktree/locktree.cc b/locktree/locktree.cc
index 2deb8c2ad78..164d0cbc0da 100644
--- a/locktree/locktree.cc
+++ b/locktree/locktree.cc
@@ -258,18 +258,18 @@ void locktree::sto_append(const DBT *left_key, const DBT *right_key) {
     keyrange range;
     range.create(left_key, right_key);
 
-    buffer_mem = m_sto_buffer.get_num_bytes();
+    buffer_mem = m_sto_buffer.total_memory_size();
     m_sto_buffer.append(left_key, right_key);
-    delta = m_sto_buffer.get_num_bytes() - buffer_mem;
+    delta = m_sto_buffer.total_memory_size() - buffer_mem;
     if (m_mgr != nullptr) {
         m_mgr->note_mem_used(delta);
     }
 }
 
 void locktree::sto_end(void) {
-    uint64_t num_bytes = m_sto_buffer.get_num_bytes();
+    uint64_t mem_size = m_sto_buffer.total_memory_size();
     if (m_mgr != nullptr) {
-        m_mgr->note_mem_released(num_bytes);
+        m_mgr->note_mem_released(mem_size);
     }
     m_sto_buffer.destroy();
     m_sto_buffer.create();
@@ -302,9 +302,8 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
     sto_rangetree.create(m_cmp);
 
     // insert all of the ranges from the single txnid buffer into a new rangtree
-    range_buffer::iterator iter;
+    range_buffer::iterator iter(&m_sto_buffer);
     range_buffer::iterator::record rec;
-    iter.create(&m_sto_buffer);
     while (iter.current(&rec)) {
         sto_lkr.prepare(&sto_rangetree);
         int r = acquire_lock_consolidated(&sto_lkr,
@@ -575,9 +574,8 @@ void locktree::release_locks(TXNID txnid, const range_buffer *ranges) {
     // locks are already released, otherwise we need to do it here.
     bool released = sto_try_release(txnid);
     if (!released) {
-        range_buffer::iterator iter;
+        range_buffer::iterator iter(ranges);
         range_buffer::iterator::record rec;
-        iter.create(ranges);
         while (iter.current(&rec)) {
             const DBT *left_key = rec.get_left_key();
             const DBT *right_key = rec.get_right_key();
@@ -647,10 +645,10 @@ struct txnid_range_buffer {
     TXNID txnid;
     range_buffer buffer;
 
-    static int find_by_txnid(const struct txnid_range_buffer &other_buffer, const TXNID &txnid) {
-        if (txnid < other_buffer.txnid) {
+    static int find_by_txnid(struct txnid_range_buffer *const &other_buffer, const TXNID &txnid) {
+        if (txnid < other_buffer->txnid) {
             return -1;
-        } else if (other_buffer.txnid == txnid) {
+        } else if (other_buffer->txnid == txnid) {
             return 0;
         } else {
             return 1;
@@ -666,7 +664,7 @@ struct txnid_range_buffer {
 // has locks in a random/alternating order, then this does
 // not work so well.
 void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_escalate_callback_extra) {
-    omt<struct txnid_range_buffer, struct txnid_range_buffer *> range_buffers;
+    omt<struct txnid_range_buffer *, struct txnid_range_buffer *> range_buffers;
     range_buffers.create();
 
     // prepare and acquire a locked keyrange on the entire locktree
@@ -716,7 +714,6 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
             // Try to find a range buffer for the current txnid. Create one if it doesn't exist.
             // Then, append the new escalated range to the buffer.
             uint32_t idx;
-            struct txnid_range_buffer new_range_buffer;
             struct txnid_range_buffer *existing_range_buffer;
             int r = range_buffers.find_zero<TXNID, txnid_range_buffer::find_by_txnid>(
                     current_txnid,
@@ -724,9 +721,10 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
                     &idx
                     );
             if (r == DB_NOTFOUND) {
-                new_range_buffer.txnid = current_txnid;
-                new_range_buffer.buffer.create();
-                new_range_buffer.buffer.append(escalated_left_key, escalated_right_key);
+                struct txnid_range_buffer *XMALLOC(new_range_buffer);
+                new_range_buffer->txnid = current_txnid;
+                new_range_buffer->buffer.create();
+                new_range_buffer->buffer.append(escalated_left_key, escalated_right_key);
                 range_buffers.insert_at(new_range_buffer, idx);
             } else {
                 invariant_zero(r);
@@ -754,9 +752,8 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
         invariant_zero(r);
 
         const TXNID current_txnid = current_range_buffer->txnid;
-        range_buffer::iterator iter;
+        range_buffer::iterator iter(&current_range_buffer->buffer);
         range_buffer::iterator::record rec;
-        iter.create(&current_range_buffer->buffer);
         while (iter.current(&rec)) {
             keyrange range;
             range.create(rec.get_left_key(), rec.get_right_key());
@@ -771,6 +768,15 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
         }
         current_range_buffer->buffer.destroy();
     }
+
+    while (range_buffers.size() > 0) {
+        struct txnid_range_buffer *buffer;
+        int r = range_buffers.fetch(0, &buffer);
+        invariant_zero(r);
+        r = range_buffers.delete_at(0);
+        invariant_zero(r);
+        toku_free(buffer);
+    }
     range_buffers.destroy();
 
     lkr.release();
diff --git a/locktree/locktree.h b/locktree/locktree.h
index 2f8dcef6668..ebd736bc746 100644
--- a/locktree/locktree.h
+++ b/locktree/locktree.h
@@ -137,7 +137,6 @@ namespace toku {
     class locktree;
     class locktree_manager;
     class lock_request;
-    class memory_tracker;
     class concurrent_tree;
 
     typedef int  (*lt_create_cb)(locktree *lt, void *extra);
@@ -246,7 +245,6 @@ namespace toku {
         // tracks the current number of locks and lock memory
         uint64_t m_max_lock_memory;
         uint64_t m_current_lock_memory;
-        memory_tracker *m_mem_tracker;
 
         struct lt_counters m_lt_counters;
 
diff --git a/locktree/range_buffer.cc b/locktree/range_buffer.cc
index 5fd86a631c9..e33e6e842a2 100644
--- a/locktree/range_buffer.cc
+++ b/locktree/range_buffer.cc
@@ -97,207 +97,201 @@ PATENT RIGHTS GRANT:
 
 namespace toku {
 
-bool range_buffer::record_header::left_is_infinite(void) const {
-    return left_neg_inf || left_pos_inf;
-}
-
-bool range_buffer::record_header::right_is_infinite(void) const {
-    return right_neg_inf || right_pos_inf;
-}
-
-void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key) {
-    left_neg_inf = left_key == toku_dbt_negative_infinity();
-    left_pos_inf = left_key == toku_dbt_positive_infinity();
-    left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size;
-    if (right_key) {
-        right_neg_inf = right_key == toku_dbt_negative_infinity();
-        right_pos_inf = right_key == toku_dbt_positive_infinity();
-        right_key_size = toku_dbt_is_infinite(right_key) ? 0 : right_key->size; 
-    } else {
-        right_neg_inf = left_neg_inf;
-        right_pos_inf = left_pos_inf;
-        right_key_size = 0;
-    }
-}
-    
-const DBT *range_buffer::iterator::record::get_left_key(void) const {
-    if (m_header.left_neg_inf) {
-        return toku_dbt_negative_infinity();
-    } else if (m_header.left_pos_inf) {
-        return toku_dbt_positive_infinity();
-    } else {
-        return &m_left_key;
-    }
-}
-
-const DBT *range_buffer::iterator::record::get_right_key(void) const {
-    if (m_header.right_neg_inf) {
-        return toku_dbt_negative_infinity();
-    } else if (m_header.right_pos_inf) {
-        return toku_dbt_positive_infinity();
-    } else {
-        return &m_right_key;
-    }
-}
-
-size_t range_buffer::iterator::record::size(void) const {
-    return sizeof(record_header) + m_header.left_key_size + m_header.right_key_size;
-}
-
-void range_buffer::iterator::record::deserialize(const char *buf) {
-    size_t current = 0;
-
-    // deserialize the header
-    memcpy(&m_header, buf, sizeof(record_header));
-    current += sizeof(record_header);
-
-    // deserialize the left key if necessary
-    if (!m_header.left_is_infinite()) {
-        // point the left DBT's buffer into ours
-        toku_fill_dbt(&m_left_key, buf + current, m_header.left_key_size);
-        current += m_header.left_key_size;
+    bool range_buffer::record_header::left_is_infinite(void) const {
+        return left_neg_inf || left_pos_inf;
     }
 
-    // deserialize the right key if necessary
-    if (!m_header.right_is_infinite()) {
-        if (m_header.right_key_size == 0) {
-            toku_copyref_dbt(&m_right_key, m_left_key);
+    bool range_buffer::record_header::right_is_infinite(void) const {
+        return right_neg_inf || right_pos_inf;
+    }
+
+    void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key) {
+        left_neg_inf = left_key == toku_dbt_negative_infinity();
+        left_pos_inf = left_key == toku_dbt_positive_infinity();
+        left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size;
+        if (right_key) {
+            right_neg_inf = right_key == toku_dbt_negative_infinity();
+            right_pos_inf = right_key == toku_dbt_positive_infinity();
+            right_key_size = toku_dbt_is_infinite(right_key) ? 0 : right_key->size; 
         } else {
-            toku_fill_dbt(&m_right_key, buf + current, m_header.right_key_size);
+            right_neg_inf = left_neg_inf;
+            right_pos_inf = left_pos_inf;
+            right_key_size = 0;
         }
     }
-}
-
-void range_buffer::iterator::create(const range_buffer *buffer) {
-    m_buffer = buffer;
-    m_current_offset = 0;
-    m_current_size = 0;
-}
-
-bool range_buffer::iterator::current(record *rec) {
-    if (m_current_offset < m_buffer->m_buf_current) {
-        rec->deserialize(m_buffer->m_buf + m_current_offset);
-        m_current_size = rec->size();
-        return true;
-    } else {
-        return false;
-    }
-}
-
-// move the iterator to the next record in the buffer
-void range_buffer::iterator::next(void) {
-    invariant(m_current_offset < m_buffer->m_buf_current);
-    invariant(m_current_size > 0);
-
-    // the next record is m_current_size bytes forward
-    // now, we don't know how big the current is, set it to 0.
-    m_current_offset += m_current_size;
-    m_current_size = 0;
-}
-
-void range_buffer::create(void) {
-    // allocate buffer space lazily instead of on creation. this way,
-    // no malloc/free is done if the transaction ends up taking no locks.
-    m_buf = nullptr;
-    m_buf_size = 0;
-    m_buf_current = 0;
-    m_num_ranges = 0;
-}
-
-void range_buffer::append(const DBT *left_key, const DBT *right_key) {
-    // if the keys are equal, then only one copy is stored.
-    if (toku_dbt_equals(left_key, right_key)) {
-        append_point(left_key);
-    } else {
-        append_range(left_key, right_key);
-    }
-    m_num_ranges++;
-}
-
-bool range_buffer::is_empty(void) const {
-    return m_buf == nullptr;
-}
-
-uint64_t range_buffer::get_num_bytes(void) const {
-    return m_buf_current;
-}
-
-int range_buffer::get_num_ranges(void) const {
-    return m_num_ranges;
-}
-
-void range_buffer::destroy(void) {
-    if (m_buf) {
-        toku_free(m_buf);
-    }
-}
-
-void range_buffer::append_range(const DBT *left_key, const DBT *right_key) {
-    maybe_grow(sizeof(record_header) + left_key->size + right_key->size);
-
-    record_header h;
-    h.init(left_key, right_key);
-
-    // serialize the header
-    memcpy(m_buf + m_buf_current, &h, sizeof(record_header));
-    m_buf_current += sizeof(record_header);
-
-    // serialize the left key if necessary
-    if (!h.left_is_infinite()) {
-        memcpy(m_buf + m_buf_current, left_key->data, left_key->size);
-        m_buf_current += left_key->size;
-    }
-
-    // serialize the right key if necessary
-    if (!h.right_is_infinite()) {
-        memcpy(m_buf + m_buf_current, right_key->data, right_key->size);
-        m_buf_current += right_key->size;
-    }
-}
-
-void range_buffer::append_point(const DBT *key) {
-    maybe_grow(sizeof(record_header) + key->size);
-
-    record_header h;
-    h.init(key, nullptr);
-
-    // serialize the header
-    memcpy(m_buf + m_buf_current, &h, sizeof(record_header));
-    m_buf_current += sizeof(record_header);
-
-    // serialize the key if necessary
-    if (!h.left_is_infinite()) {
-        memcpy(m_buf + m_buf_current, key->data, key->size);
-        m_buf_current += key->size;
-    }
-}
-
-void range_buffer::maybe_grow(size_t size) {
-    static const size_t initial_size = 4096;
-    static const size_t aggressive_growth_threshold = 128 * 1024;
-    const size_t needed = m_buf_current + size;
-    if (m_buf_size < needed) {
-        if (m_buf_size == 0) {
-            m_buf_size = initial_size;
+        
+    const DBT *range_buffer::iterator::record::get_left_key(void) const {
+        if (_header.left_neg_inf) {
+            return toku_dbt_negative_infinity();
+        } else if (_header.left_pos_inf) {
+            return toku_dbt_positive_infinity();
+        } else {
+            return &_left_key;
         }
-        // aggressively grow the range buffer to the threshold,
-        // but only additivately increase the size after that.
-        while (m_buf_size < needed && m_buf_size < aggressive_growth_threshold) {
-            m_buf_size <<= 1;
-        }
-        while (m_buf_size < needed) {
-            m_buf_size += aggressive_growth_threshold;
-        }
-        XREALLOC(m_buf, m_buf_size);
     }
-}
 
-size_t range_buffer::get_initial_size(size_t n) const {
-    size_t r = 4096;
-    while (r < n) {
-        r *= 2;
+    const DBT *range_buffer::iterator::record::get_right_key(void) const {
+        if (_header.right_neg_inf) {
+            return toku_dbt_negative_infinity();
+        } else if (_header.right_pos_inf) {
+            return toku_dbt_positive_infinity();
+        } else {
+            return &_right_key;
+        }
+    }
+
+    size_t range_buffer::iterator::record::size(void) const {
+        return sizeof(record_header) + _header.left_key_size + _header.right_key_size;
+    }
+
+    void range_buffer::iterator::record::deserialize(const char *buf) {
+        size_t current = 0;
+
+        // deserialize the header
+        memcpy(&_header, buf, sizeof(record_header));
+        current += sizeof(record_header);
+
+        // deserialize the left key if necessary
+        if (!_header.left_is_infinite()) {
+            // point the left DBT's buffer into ours
+            toku_fill_dbt(&_left_key, buf + current, _header.left_key_size);
+            current += _header.left_key_size;
+        }
+
+        // deserialize the right key if necessary
+        if (!_header.right_is_infinite()) {
+            if (_header.right_key_size == 0) {
+                toku_copyref_dbt(&_right_key, _left_key);
+            } else {
+                toku_fill_dbt(&_right_key, buf + current, _header.right_key_size);
+            }
+        }
+    }
+
+    toku::range_buffer::iterator::iterator() :
+        _ma_chunk_iterator(nullptr),
+        _current_chunk_base(nullptr),
+        _current_chunk_offset(0), _current_chunk_max(0),
+        _current_rec_size(0) {
+    }
+
+    toku::range_buffer::iterator::iterator(const range_buffer *buffer) :
+        _ma_chunk_iterator(&buffer->_arena),
+        _current_chunk_base(nullptr),
+        _current_chunk_offset(0), _current_chunk_max(0),
+        _current_rec_size(0) {
+        reset_current_chunk();
+    }
+
+    void range_buffer::iterator::reset_current_chunk() {
+        _current_chunk_base = _ma_chunk_iterator.current(&_current_chunk_max);
+        _current_chunk_offset = 0;
+    }
+
+    bool range_buffer::iterator::current(record *rec) {
+        if (_current_chunk_offset < _current_chunk_max) {
+            const char *buf = reinterpret_cast<const char *>(_current_chunk_base); 
+            rec->deserialize(buf + _current_chunk_offset);
+            _current_rec_size = rec->size();
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    // move the iterator to the next record in the buffer
+    void range_buffer::iterator::next(void) {
+        invariant(_current_chunk_offset < _current_chunk_max);
+        invariant(_current_rec_size > 0);
+
+        // the next record is _current_rec_size bytes forward
+        _current_chunk_offset += _current_rec_size;
+        // now, we don't know how big the current is, set it to 0.
+        _current_rec_size = 0;
+
+        if (_current_chunk_offset >= _current_chunk_max) {
+            // current chunk is exhausted, try moving to the next one
+            if (_ma_chunk_iterator.more()) {
+                _ma_chunk_iterator.next();
+                reset_current_chunk();
+            }
+        }
+    }
+
+    void range_buffer::create(void) {
+        // allocate buffer space lazily instead of on creation. this way,
+        // no malloc/free is done if the transaction ends up taking no locks.
+        _arena.create(0);
+        _num_ranges = 0;
+    }
+
+    void range_buffer::append(const DBT *left_key, const DBT *right_key) {
+        // if the keys are equal, then only one copy is stored.
+        if (toku_dbt_equals(left_key, right_key)) {
+            invariant(left_key->size <= MAX_KEY_SIZE);
+            append_point(left_key);
+        } else {
+            invariant(left_key->size <= MAX_KEY_SIZE);
+            invariant(right_key->size <= MAX_KEY_SIZE);
+            append_range(left_key, right_key);
+        }
+        _num_ranges++;
+    }
+
+    bool range_buffer::is_empty(void) const {
+        return total_memory_size() == 0;
+    }
+
+    uint64_t range_buffer::total_memory_size(void) const {
+        return _arena.total_size_in_use();
+    }
+
+    int range_buffer::get_num_ranges(void) const {
+        return _num_ranges;
+    }
+
+    void range_buffer::destroy(void) {
+        _arena.destroy();
+    }
+
+    void range_buffer::append_range(const DBT *left_key, const DBT *right_key) {
+        size_t record_length = sizeof(record_header) + left_key->size + right_key->size;
+        char *buf = reinterpret_cast<char *>(_arena.malloc_from_arena(record_length));
+
+        record_header h;
+        h.init(left_key, right_key);
+
+        // serialize the header
+        memcpy(buf, &h, sizeof(record_header));
+        buf += sizeof(record_header);
+
+        // serialize the left key if necessary
+        if (!h.left_is_infinite()) {
+            memcpy(buf, left_key->data, left_key->size);
+            buf += left_key->size;
+        }
+
+        // serialize the right key if necessary
+        if (!h.right_is_infinite()) {
+            memcpy(buf, right_key->data, right_key->size);
+        }
+    }
+
+    void range_buffer::append_point(const DBT *key) {
+        size_t record_length = sizeof(record_header) + key->size;
+        char *buf = reinterpret_cast<char *>(_arena.malloc_from_arena(record_length));
+
+        record_header h;
+        h.init(key, nullptr);
+
+        // serialize the header
+        memcpy(buf, &h, sizeof(record_header));
+        buf += sizeof(record_header);
+
+        // serialize the key if necessary
+        if (!h.left_is_infinite()) {
+            memcpy(buf, key->data, key->size);
+        }
     }
-    return r;
-}
 
 } /* namespace toku */
diff --git a/locktree/range_buffer.h b/locktree/range_buffer.h
index 22bb5c43463..845d6c98ced 100644
--- a/locktree/range_buffer.h
+++ b/locktree/range_buffer.h
@@ -91,128 +91,120 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <toku_stdint.h>
-
-#include <ft/ybt.h>
+#include "ft/ybt.h"
+#include "portability/toku_stdint.h"
+#include "util/memarena.h"
 
 namespace toku {
 
-// a key range buffer represents a set of key ranges that can
-// be stored, iterated over, and then destroyed all at once.
+    // a key range buffer represents a set of key ranges that can
+    // be stored, iterated over, and then destroyed all at once.
+    class range_buffer {
+    private:
 
-class range_buffer {
-// Private in spirit: We fail POD asserts when we try to store range_buffers in an omt.
-// So make it all public, but don't touch.
-public:
-//private:
+        // the key range buffer is a bunch of records in a row.
+        // each record has the following header, followed by the
+        // left key and right key data payload, if applicable.
+        // we limit keys to be 2^16, since we store lengths as 2 bytes.
+        static const size_t MAX_KEY_SIZE = 1 << 16;
 
-    // the key range buffer is a bunch of records in a row.
-    // each record has the following header, followed by the
-    // left key and right key data payload, if applicable.
+        struct record_header {
+            bool left_neg_inf;
+            bool left_pos_inf;
+            bool right_pos_inf;
+            bool right_neg_inf;
+            uint16_t left_key_size;
+            uint16_t right_key_size;
 
-    struct record_header {
-        bool left_neg_inf;
-        bool left_pos_inf;
-        bool right_pos_inf;
-        bool right_neg_inf;
-        uint32_t left_key_size;
-        uint32_t right_key_size;
+            bool left_is_infinite(void) const;
 
-        bool left_is_infinite(void) const;
+            bool right_is_infinite(void) const;
 
-        bool right_is_infinite(void) const;
-
-        void init(const DBT *left_key, const DBT *right_key);
-    };
-    static_assert(sizeof(record_header) == 12, "record header format is off");
-    
-public:
-
-    // the iterator abstracts reading over a buffer of variable length
-    // records one by one until there are no more left.
-
-    class iterator {
+            void init(const DBT *left_key, const DBT *right_key);
+        };
+        static_assert(sizeof(record_header) == 8, "record header format is off");
+        
     public:
 
-        // a record represents the user-view of a serialized key range.
-        // it handles positive and negative infinity and the optimized
-        // point range case, where left and right points share memory.
-
-        class record {
+        // the iterator abstracts reading over a buffer of variable length
+        // records one by one until there are no more left.
+        class iterator {
         public:
-            // get a read-only pointer to the left key of this record's range
-            const DBT *get_left_key(void) const;
+            iterator();
+            iterator(const range_buffer *buffer);
 
-            // get a read-only pointer to the right key of this record's range
-            const DBT *get_right_key(void) const;
+            // a record represents the user-view of a serialized key range.
+            // it handles positive and negative infinity and the optimized
+            // point range case, where left and right points share memory.
+            class record {
+            public:
+                // get a read-only pointer to the left key of this record's range
+                const DBT *get_left_key(void) const;
 
-            // how big is this record? this tells us where the next record is
-            size_t size(void) const;
+                // get a read-only pointer to the right key of this record's range
+                const DBT *get_right_key(void) const;
 
-            // populate a record header and point our DBT's
-            // buffers into ours if they are not infinite.
-            void deserialize(const char *buf);
+                // how big is this record? this tells us where the next record is
+                size_t size(void) const;
+
+                // populate a record header and point our DBT's
+                // buffers into ours if they are not infinite.
+                void deserialize(const char *buf);
+
+            private:
+                record_header _header;
+                DBT _left_key;
+                DBT _right_key;
+            };
+
+            // populate the given record object with the current
+            // the memory referred to by record is valid for only
+            // as long as the record exists.
+            bool current(record *rec);
+
+            // move the iterator to the next record in the buffer
+            void next(void);
 
         private:
-            record_header m_header;
-            DBT m_left_key;
-            DBT m_right_key;
+            void reset_current_chunk();
+
+            // the key range buffer we are iterating over, the current
+            // offset in that buffer, and the size of the current record.
+            memarena::chunk_iterator _ma_chunk_iterator;
+            const void *_current_chunk_base;
+            size_t _current_chunk_offset;
+            size_t _current_chunk_max;
+            size_t _current_rec_size;
         };
 
-        void create(const range_buffer *buffer);
+        // allocate buffer space lazily instead of on creation. this way,
+        // no malloc/free is done if the transaction ends up taking no locks.
+        void create(void);
 
-        // populate the given record object with the current
-        // the memory referred to by record is valid for only
-        // as long as the record exists.
-        bool current(record *rec);
+        // append a left/right key range to the buffer.
+        // if the keys are equal, then only one copy is stored.
+        void append(const DBT *left_key, const DBT *right_key);
 
-        // move the iterator to the next record in the buffer
-        void next(void);
+        // is this range buffer empty?
+        bool is_empty(void) const;
+
+        // how much memory is being used by this range buffer?
+        uint64_t total_memory_size(void) const;
+
+        // how many ranges are stored in this range buffer?
+        int get_num_ranges(void) const;
+
+        void destroy(void);
 
     private:
-        // the key range buffer we are iterating over, the current
-        // offset in that buffer, and the size of the current record.
-        const range_buffer *m_buffer;
-        size_t m_current_offset;
-        size_t m_current_size;
+        memarena _arena;
+        int _num_ranges;
+
+        void append_range(const DBT *left_key, const DBT *right_key);
+
+        // append a point to the buffer. this is the space/time saving
+        // optimization for key ranges where left == right.
+        void append_point(const DBT *key);
     };
 
-    // allocate buffer space lazily instead of on creation. this way,
-    // no malloc/free is done if the transaction ends up taking no locks.
-    void create(void);
-
-    // append a left/right key range to the buffer.
-    // if the keys are equal, then only one copy is stored.
-    void append(const DBT *left_key, const DBT *right_key);
-
-    // is this range buffer empty?
-    bool is_empty(void) const;
-
-    // how many bytes are stored in this range buffer?
-    uint64_t get_num_bytes(void) const;
-
-    // how many ranges are stored in this range buffer?
-    int get_num_ranges(void) const;
-
-    void destroy(void);
-
-//private:
-    char *m_buf;
-    size_t m_buf_size;
-    size_t m_buf_current;
-    int m_num_ranges;
-
-    void append_range(const DBT *left_key, const DBT *right_key);
-
-    // append a point to the buffer. this is the space/time saving
-    // optimization for key ranges where left == right.
-    void append_point(const DBT *key);
-
-    void maybe_grow(size_t size);
-
-    // the initial size of the buffer is the next power of 2
-    // greater than the first entry we insert into the buffer.
-    size_t get_initial_size(size_t n) const;
-};
-
 } /* namespace toku */
diff --git a/locktree/tests/range_buffer_test.cc b/locktree/tests/range_buffer_test.cc
index 38ed2469b69..5df3cc522ee 100644
--- a/locktree/tests/range_buffer_test.cc
+++ b/locktree/tests/range_buffer_test.cc
@@ -121,9 +121,8 @@ static void test_points(void) {
     }
 
     size_t i = 0;
-    range_buffer::iterator iter;
+    range_buffer::iterator iter(&buffer);
     range_buffer::iterator::record rec;
-    iter.create(&buffer);
     while (iter.current(&rec)) {
         const DBT *expected_point = get_dbt_by_iteration(i);
         invariant(compare_dbts(nullptr, expected_point, rec.get_left_key()) == 0);
@@ -151,9 +150,8 @@ static void test_ranges(void) {
     }
 
     size_t i = 0;
-    range_buffer::iterator iter;
+    range_buffer::iterator iter(&buffer);
     range_buffer::iterator::record rec;
-    iter.create(&buffer);
     while (iter.current(&rec)) {
         const DBT *expected_left = get_dbt_by_iteration(i);
         const DBT *expected_right = get_dbt_by_iteration(i + 1);
@@ -187,9 +185,8 @@ static void test_mixed(void) {
     }
 
     size_t i = 0;
-    range_buffer::iterator iter;
+    range_buffer::iterator iter(&buffer);
     range_buffer::iterator::record rec;
-    iter.create(&buffer);
     while (iter.current(&rec)) {
         const DBT *expected_left = get_dbt_by_iteration(i);
         const DBT *expected_right = get_dbt_by_iteration(i + 1);
@@ -232,10 +229,10 @@ static void test_small_and_large_points(void) {
 
     // Append a small dbt, the buf should be able to fit it.
     buffer.append(&small_dbt, &small_dbt);
-    invariant(buffer.m_buf_size >= small_dbt.size);
+    invariant(buffer.total_memory_size() >= small_dbt.size);
     // Append a large dbt, the buf should be able to fit it.
     buffer.append(&large_dbt, &large_dbt);
-    invariant(buffer.m_buf_size >= (small_dbt.size + large_dbt.size));
+    invariant(buffer.total_memory_size() >= (small_dbt.size + large_dbt.size));
 
     toku_free(small_buf);
     toku_free(large_buf);
diff --git a/src/ydb.cc b/src/ydb.cc
index 7149e235821..d164eb4adbc 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -2463,7 +2463,7 @@ struct iter_txn_row_locks_callback_extra {
         const int r = lt_map->fetch(which_lt, &ranges);
         invariant_zero(r);
         current_db = locked_get_db_by_dict_id(env, ranges.lt->get_dict_id());
-        iter.create(ranges.buffer);
+        iter = toku::range_buffer::iterator(ranges.buffer);
     }
 
     DB_ENV *env;
diff --git a/src/ydb_row_lock.cc b/src/ydb_row_lock.cc
index db5548a00c3..89b436380ea 100644
--- a/src/ydb_row_lock.cc
+++ b/src/ydb_row_lock.cc
@@ -144,11 +144,11 @@ static void db_txn_note_row_lock(DB *db, DB_TXN *txn, const DBT *left_key, const
     }
 
     // add a new lock range to this txn's row lock buffer
-    size_t old_num_bytes = ranges.buffer->get_num_bytes();
+    size_t old_mem_size = ranges.buffer->total_memory_size();
     ranges.buffer->append(left_key, right_key);
-    size_t new_num_bytes = ranges.buffer->get_num_bytes();
-    invariant(new_num_bytes > old_num_bytes);
-    lt->get_manager()->note_mem_used(new_num_bytes - old_num_bytes);
+    size_t new_mem_size = ranges.buffer->total_memory_size();
+    invariant(new_mem_size > old_mem_size);
+    lt->get_manager()->note_mem_used(new_mem_size - old_mem_size);
 
     toku_mutex_unlock(&db_txn_struct_i(txn)->txn_mutex);
 }
@@ -201,17 +201,16 @@ void toku_db_txn_escalate_callback(TXNID txnid, const toku::locktree *lt, const
             //
             // We could theoretically steal the memory from the caller instead of copying
             // it, but it's simpler to have a callback API that doesn't transfer memory ownership.
-            lt->get_manager()->note_mem_released(ranges.buffer->get_num_bytes());
+            lt->get_manager()->note_mem_released(ranges.buffer->total_memory_size());
             ranges.buffer->destroy();
             ranges.buffer->create();
-            toku::range_buffer::iterator iter;
+            toku::range_buffer::iterator iter(&buffer);
             toku::range_buffer::iterator::record rec;
-            iter.create(&buffer);
             while (iter.current(&rec)) {
                 ranges.buffer->append(rec.get_left_key(), rec.get_right_key());
                 iter.next();
             }
-            lt->get_manager()->note_mem_used(ranges.buffer->get_num_bytes());
+            lt->get_manager()->note_mem_used(ranges.buffer->total_memory_size());
         } else {
             // In rare cases, we may not find the associated locktree, because we are
             // racing with the transaction trying to add this locktree to the lt map
@@ -315,7 +314,7 @@ void toku_db_release_lt_key_ranges(DB_TXN *txn, txn_lt_key_ranges *ranges) {
     // release all of the locks this txn has ever successfully
     // acquired and stored in the range buffer for this locktree
     lt->release_locks(txnid, ranges->buffer);
-    lt->get_manager()->note_mem_released(ranges->buffer->get_num_bytes());
+    lt->get_manager()->note_mem_released(ranges->buffer->total_memory_size());
     ranges->buffer->destroy();
     toku_free(ranges->buffer);
 
diff --git a/util/memarena.cc b/util/memarena.cc
index 773c949e8f4..6fb6eb51cd7 100644
--- a/util/memarena.cc
+++ b/util/memarena.cc
@@ -89,157 +89,142 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
+#include <algorithm>
 #include <string.h>
 #include <memory.h>
 
 #include <util/memarena.h>
 
-struct memarena {
-    char *buf;
-    size_t buf_used, buf_size;
-    size_t size_of_other_bufs; // the buf_size of all the other bufs.
-    size_t footprint_of_other_bufs; // the footprint of all the other bufs.
-    char **other_bufs;
-    int n_other_bufs;
-};
+void memarena::create(size_t initial_size) {
+    _current_chunk = arena_chunk();
+    _other_chunks = nullptr;
+    _size_of_other_chunks = 0;
+    _footprint_of_other_chunks = 0;
+    _n_other_chunks = 0;
 
-MEMARENA toku_memarena_create_presized (size_t initial_size) {
-    MEMARENA XMALLOC(result);
-    result->buf_size = initial_size;
-    result->buf_used = 0;
-    result->other_bufs = NULL;
-    result->size_of_other_bufs = 0;
-    result->footprint_of_other_bufs = 0;
-    result->n_other_bufs = 0;
-    XMALLOC_N(result->buf_size, result->buf);
-    return result;
-}
-
-MEMARENA toku_memarena_create (void) {
-    return toku_memarena_create_presized(1024);
-}
-
-void toku_memarena_clear (MEMARENA ma) {
-    // Free the other bufs.
-    int i;
-    for (i=0; i<ma->n_other_bufs; i++) {
-        toku_free(ma->other_bufs[i]);
-        ma->other_bufs[i]=0;
+    _current_chunk.size = initial_size;
+    if (_current_chunk.size > 0) {
+        XMALLOC_N(_current_chunk.size, _current_chunk.buf);
     }
-    ma->n_other_bufs=0;
-    // But reuse the main buffer
-    ma->buf_used = 0;
-    ma->size_of_other_bufs = 0;
-    ma->footprint_of_other_bufs = 0;
 }
 
-static size_t
-round_to_page (size_t size) {
-    const size_t _PAGE_SIZE = 4096;
-    const size_t result = _PAGE_SIZE+((size-1)&~(_PAGE_SIZE-1));
-    assert(0==(result&(_PAGE_SIZE-1))); // make sure it's aligned
-    assert(result>=size);              // make sure it's not too small
-    assert(result<size+_PAGE_SIZE);     // make sure we didn't grow by more than a page.
-    return result;
-}
-
-void* toku_memarena_malloc (MEMARENA ma, size_t size) {
-    if (ma->buf_size < ma->buf_used + size) {
-        // The existing block isn't big enough.
-        // Add the block to the vector of blocks.
-        if (ma->buf) {
-            int old_n = ma->n_other_bufs;
-            REALLOC_N(old_n+1, ma->other_bufs);
-            assert(ma->other_bufs);
-            ma->other_bufs[old_n]=ma->buf;
-            ma->n_other_bufs = old_n+1;
-            ma->size_of_other_bufs += ma->buf_size;
-            ma->footprint_of_other_bufs += toku_memory_footprint(ma->buf, ma->buf_used);
-        }
-        // Make a new one
-        {
-            size_t new_size = 2*ma->buf_size;
-            if (new_size<size) new_size=size;
-            new_size=round_to_page(new_size); // at least size, but round to the next page size
-            XMALLOC_N(new_size, ma->buf);
-            ma->buf_used = 0;
-            ma->buf_size = new_size;
-        }
+void memarena::destroy(void) {
+    if (_current_chunk.buf) {
+        toku_free(_current_chunk.buf);
     }
-    // allocate in the existing block.
-    char *result=ma->buf+ma->buf_used;
-    ma->buf_used+=size;
-    return result;
+    for (int i = 0; i < _n_other_chunks; i++) {
+        toku_free(_other_chunks[i].buf);
+    }
+    if (_other_chunks) {
+        toku_free(_other_chunks);
+    }
+    _current_chunk = arena_chunk();
+    _other_chunks = nullptr;
+    _n_other_chunks = 0;
 }
 
-void *toku_memarena_memdup (MEMARENA ma, const void *v, size_t len) {
-    void *r=toku_memarena_malloc(ma, len);
-    memcpy(r,v,len);
+static size_t round_to_page(size_t size) {
+    const size_t page_size = 4096;
+    const size_t r = page_size + ((size - 1) & ~(page_size - 1));
+    assert((r & (page_size - 1)) == 0); // make sure it's aligned
+    assert(r >= size);              // make sure it's not too small
+    assert(r < size + page_size);     // make sure we didn't grow by more than a page.
     return r;
 }
 
-void toku_memarena_destroy(MEMARENA *map) {
-    MEMARENA ma=*map;
-    if (ma->buf) {
-        toku_free(ma->buf);
-        ma->buf=0;
+static const size_t MEMARENA_MAX_CHUNK_SIZE = 64 * 1024 * 1024;
+
+void *memarena::malloc_from_arena(size_t size) {
+    if (_current_chunk.buf == nullptr || _current_chunk.size < _current_chunk.used + size) {
+        // The existing block isn't big enough.
+        // Add the block to the vector of blocks.
+        if (_current_chunk.buf) {
+            invariant(_current_chunk.size > 0);
+            int old_n = _n_other_chunks;
+            XREALLOC_N(old_n + 1, _other_chunks);
+            _other_chunks[old_n] = _current_chunk;
+            _n_other_chunks = old_n + 1;
+            _size_of_other_chunks += _current_chunk.size;
+            _footprint_of_other_chunks += toku_memory_footprint(_current_chunk.buf, _current_chunk.used);
+        }
+
+        // Make a new one. Grow the buffer size exponentially until we hit
+        // the max chunk size, but make it at least `size' bytes so the
+        // current allocation always fit.
+        size_t new_size = std::min(MEMARENA_MAX_CHUNK_SIZE, 2 * _current_chunk.size);
+        if (new_size < size) {
+            new_size = size;
+        }
+        new_size = round_to_page(new_size); // at least size, but round to the next page size
+        XMALLOC_N(new_size, _current_chunk.buf);
+        _current_chunk.used = 0;
+        _current_chunk.size = new_size;
     }
-    int i;
-    for (i=0; i<ma->n_other_bufs; i++) {
-        toku_free(ma->other_bufs[i]);
+    invariant(_current_chunk.buf != nullptr);
+
+    // allocate in the existing block.
+    char *p = _current_chunk.buf + _current_chunk.used;
+    _current_chunk.used += size;
+    return p;
+}
+
+void memarena::move_memory(memarena *dest) {
+    // Move memory to dest
+    XREALLOC_N(dest->_n_other_chunks + _n_other_chunks + 1, dest->_other_chunks);
+    dest->_size_of_other_chunks += _size_of_other_chunks + _current_chunk.size;
+    dest->_footprint_of_other_chunks += _footprint_of_other_chunks + toku_memory_footprint(_current_chunk.buf, _current_chunk.used);
+    for (int i = 0; i < _n_other_chunks; i++) {
+        dest->_other_chunks[dest->_n_other_chunks++] = _other_chunks[i];
     }
-    if (ma->other_bufs) toku_free(ma->other_bufs);
-    ma->other_bufs=0;
-    ma->n_other_bufs=0;
-    toku_free(ma);
-    *map = 0;
+    dest->_other_chunks[dest->_n_other_chunks++] = _current_chunk;
+
+    // Clear out this memarena's memory
+    toku_free(_other_chunks);
+    _current_chunk = arena_chunk();
+    _other_chunks = nullptr;
+    _size_of_other_chunks = 0;
+    _footprint_of_other_chunks = 0;
+    _n_other_chunks = 0;
 }
 
-void toku_memarena_move_buffers(MEMARENA dest, MEMARENA source) {
-    int i;
-    char **other_bufs = dest->other_bufs;
-    static int move_counter = 0;
-    move_counter++;
-    REALLOC_N(dest->n_other_bufs + source->n_other_bufs + 1, other_bufs);
+size_t memarena::total_memory_size(void) const {
+    return sizeof(*this) +
+           total_size_in_use() +
+           _n_other_chunks * sizeof(*_other_chunks);
+}
 
-    dest  ->size_of_other_bufs += source->size_of_other_bufs + source->buf_size;
-    dest  ->footprint_of_other_bufs += source->footprint_of_other_bufs + toku_memory_footprint(source->buf, source->buf_used);
-    source->size_of_other_bufs = 0;
-    source->footprint_of_other_bufs = 0;
+size_t memarena::total_size_in_use(void) const {
+    return _size_of_other_chunks + _current_chunk.used;
+}
 
-    assert(other_bufs);
-    dest->other_bufs = other_bufs;
-    for (i=0; i<source->n_other_bufs; i++) {
-        dest->other_bufs[dest->n_other_bufs++] = source->other_bufs[i];
+size_t memarena::total_footprint(void) const {
+    return sizeof(*this) +
+           _footprint_of_other_chunks +
+           toku_memory_footprint(_current_chunk.buf, _current_chunk.used) +
+           _n_other_chunks * sizeof(*_other_chunks);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+const void *memarena::chunk_iterator::current(size_t *used) const {
+    if (_chunk_idx < 0) {
+        *used = _ma->_current_chunk.used;
+        return _ma->_current_chunk.buf;
+    } else if (_chunk_idx < _ma->_n_other_chunks) {
+        *used = _ma->_other_chunks[_chunk_idx].used;
+        return _ma->_other_chunks[_chunk_idx].buf;
     }
-    dest->other_bufs[dest->n_other_bufs++] = source->buf;
-    source->n_other_bufs = 0;
-    toku_free(source->other_bufs);
-    source->other_bufs = 0;
-    source->buf = 0;
-    source->buf_size = 0;
-    source->buf_used = 0;
-
+    *used = 0;
+    return nullptr;
 }
 
-size_t
-toku_memarena_total_memory_size (MEMARENA m)
-{
-    return (toku_memarena_total_size_in_use(m) +
-            sizeof(*m) +
-            m->n_other_bufs * sizeof(*m->other_bufs));
+void memarena::chunk_iterator::next() {
+    _chunk_idx++;
 }
 
-size_t
-toku_memarena_total_size_in_use (MEMARENA m)
-{
-    return m->size_of_other_bufs + m->buf_used;
-}
-
-size_t
-toku_memarena_total_footprint (MEMARENA m)
-{
-    return m->footprint_of_other_bufs + toku_memory_footprint(m->buf, m->buf_used) +
-            sizeof(*m) +
-            m->n_other_bufs * sizeof(*m->other_bufs);
+bool memarena::chunk_iterator::more() const {
+    if (_chunk_idx < 0) {
+        return _ma->_current_chunk.buf != nullptr;
+    }
+    return _chunk_idx < _ma->_n_other_chunks;
 }
diff --git a/util/memarena.h b/util/memarena.h
index 46b901063d6..c9ce8ce93f2 100644
--- a/util/memarena.h
+++ b/util/memarena.h
@@ -92,43 +92,85 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-/* We have too many memory management tricks:
- *  memarena (this code) is for a collection of objects that cannot be moved.
- *    The pattern is allocate more and more stuff.
- *    Don't free items as you go.
- *    Free all the items at once.
- *    Then reuse the same buffer again.
- *    Allocated objects never move.
- *  A memarena (as currently implemented) is not suitable for interprocess memory sharing.  No reason it couldn't be made to work though.
+/*
+ * A memarena is used to efficiently store a collection of objects that never move
+ * The pattern is allocate more and more stuff and free all of the items at once.
+ * The underlying memory will store 1 or more objects per chunk. Each chunk is 
+ * contiguously laid out in memory but chunks are not necessarily contiguous with
+ * each other.
  */
+class memarena {
+public:
+    memarena() :
+        _current_chunk(arena_chunk()),
+        _other_chunks(nullptr),
+        _n_other_chunks(0),
+        _size_of_other_chunks(0),
+        _footprint_of_other_chunks(0) {
+    }
 
-struct memarena;
+    // Effect: Create a memarena with the specified initial size
+    void create(size_t initial_size);
 
-typedef struct memarena *MEMARENA;
+    void destroy(void);
 
-MEMARENA toku_memarena_create_presized (size_t initial_size);
-// Effect: Create a memarena with initial size.  In case of ENOMEM, aborts.
+    // Effect: Allocate some memory.  The returned value remains valid until the memarena is cleared or closed.
+    //  In case of ENOMEM, aborts.
+    void *malloc_from_arena(size_t size);
 
-MEMARENA toku_memarena_create (void);
-// Effect: Create a memarena with default initial size.  In case of ENOMEM, aborts.
+    // Effect: Move all the memory from this memarena into DEST. 
+    //         When SOURCE is closed the memory won't be freed. 
+    //         When DEST is closed, the memory will be freed, unless DEST moves its memory to another memarena...
+    void move_memory(memarena *dest);
 
-void toku_memarena_clear (MEMARENA ma);
-// Effect: Reset the internal state so that the allocated memory can be used again.
+    // Effect: Calculate the amount of memory used by a memory arena.
+    size_t total_memory_size(void) const;
 
-void* toku_memarena_malloc (MEMARENA ma, size_t size);
-// Effect: Allocate some memory.  The returned value remains valid until the memarena is cleared or closed.
-//  In case of ENOMEM, aborts.
+    // Effect: Calculate the used space of the memory arena (ie: excludes unused space)
+    size_t total_size_in_use(void) const;
 
-void *toku_memarena_memdup (MEMARENA ma, const void *v, size_t len);
+    // Effect: Calculate the amount of memory used, according to toku_memory_footprint(),
+    //         which is a more expensive but more accurate count of memory used.
+    size_t total_footprint(void) const;
 
-void toku_memarena_destroy(MEMARENA *ma);
+    // iterator over the underlying chunks that store objects in the memarena.
+    // a chunk is represented by a pointer to const memory and a usable byte count.
+    class chunk_iterator {
+    public:
+        chunk_iterator(const memarena *ma) :
+            _ma(ma), _chunk_idx(-1) {
+        }
 
-void toku_memarena_move_buffers(MEMARENA dest, MEMARENA source);
-// Effect: Move all the memory from SOURCE into DEST.  When SOURCE is closed the memory won't be freed.  When DEST is closed, the memory will be freed.  (Unless DEST moves its memory to another memarena...)
+        // returns: base pointer to the current chunk
+        //          *used set to the number of usable bytes
+        //          if more() is false, returns nullptr and *used = 0
+        const void *current(size_t *used) const;
 
-size_t toku_memarena_total_memory_size (MEMARENA);
-// Effect: Calculate the amount of memory used by a memory arena.
+        // requires: more() is true
+        void next();
 
-size_t toku_memarena_total_size_in_use (MEMARENA);
+        bool more() const;
 
-size_t toku_memarena_total_footprint (MEMARENA);
+    private:
+        // -1 represents the 'initial' chunk in a memarena, ie: ma->_current_chunk
+        // >= 0 represents the i'th chunk in the ma->_other_chunks array
+        const memarena *_ma;
+        int _chunk_idx;
+    };
+
+private:
+    struct arena_chunk {
+        arena_chunk() : buf(nullptr), used(0), size(0) { }
+        char *buf;
+        size_t used;
+        size_t size;
+    };
+
+    struct arena_chunk _current_chunk;
+    struct arena_chunk *_other_chunks;
+    int _n_other_chunks;
+    size_t _size_of_other_chunks; // the buf_size of all the other chunks.
+    size_t _footprint_of_other_chunks; // the footprint of all the other chunks.
+
+    friend class memarena_unit_test;
+};
diff --git a/util/tests/memarena-test.cc b/util/tests/memarena-test.cc
new file mode 100644
index 00000000000..b687a9a4287
--- /dev/null
+++ b/util/tests/memarena-test.cc
@@ -0,0 +1,230 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#include <string.h>
+
+#include "portability/toku_assert.h"
+
+#include "util/memarena.h"
+
+class memarena_unit_test {
+private:
+    static const int magic = 37;
+
+    template <typename F>
+    void iterate_chunks(memarena *ma, F &fn) {
+        for (memarena::chunk_iterator it(ma); it.more(); it.next()) {
+            size_t used = 0;
+            const void *buf = it.current(&used);
+            fn(buf, used);
+        }
+    }
+
+    void test_create(size_t size) {
+        memarena ma;
+        ma.create(size);
+        invariant(ma._current_chunk.size == size);
+        invariant(ma._current_chunk.used == 0);
+        if (size == 0) {
+            invariant_null(ma._current_chunk.buf);
+        } else {
+            invariant_notnull(ma._current_chunk.buf);
+        }
+
+        // make sure memory was allocated ok by
+        // writing to buf and reading it back
+        memset(ma._current_chunk.buf, magic, size);
+        for (size_t i = 0; i < size; i++) {
+            const char *buf = reinterpret_cast<char *>(ma._current_chunk.buf);
+            invariant(buf[i] == magic);
+        }
+        ma.destroy();
+    }
+
+    void test_malloc(size_t size) {
+        memarena ma;
+        ma.create(14);
+        void *v = ma.malloc_from_arena(size);
+        invariant_notnull(v);
+
+        // make sure memory was allocated ok by
+        // writing to buf and reading it back
+        memset(ma._current_chunk.buf, magic, size);
+        for (size_t i = 0; i < size; i++) {
+            const char *c = reinterpret_cast<char *>(ma._current_chunk.buf);
+            invariant(c[i] == magic);
+        }
+        ma.destroy();
+    }
+
+    static void test_iterate_fn(const void *buf, size_t used) {
+        for (size_t i = 0; i < used; i++) {
+            const char *c = reinterpret_cast<const char *>(buf);
+            invariant(c[i] == (char) ((intptr_t) &c[i]));
+        }
+    }
+
+    void test_iterate(size_t size) {
+        memarena ma;
+        ma.create(14);
+        for (size_t k = 0; k < size / 64; k += 64) {
+            void *v = ma.malloc_from_arena(64);
+            for (size_t i = 0; i < 64; i++) {
+                char *c = reinterpret_cast<char *>(v);
+                c[i] = (char) ((intptr_t) &c[i]);
+            }
+        }
+        size_t rest = size % 64;
+        if (rest != 0) {
+            void *v = ma.malloc_from_arena(64);
+            for (size_t i = 0; i < 64; i++) {
+                char *c = reinterpret_cast<char *>(v);
+                c[i] = (char) ((intptr_t) &c[i]);
+            }
+        }
+
+        iterate_chunks(&ma, test_iterate_fn);
+        ma.destroy();
+    }
+
+    void test_move_memory(size_t size) {
+        memarena ma;
+        ma.create(14);
+        for (size_t k = 0; k < size / 64; k += 64) {
+            void *v = ma.malloc_from_arena(64);
+            for (size_t i = 0; i < 64; i++) {
+                char *c = reinterpret_cast<char *>(v);
+                c[i] = (char) ((intptr_t) &c[i]);
+            }
+        }
+        size_t rest = size % 64;
+        if (rest != 0) {
+            void *v = ma.malloc_from_arena(64);
+            for (size_t i = 0; i < 64; i++) {
+                char *c = reinterpret_cast<char *>(v);
+                c[i] = (char) ((intptr_t) &c[i]);
+            }
+        }
+
+        memarena ma2;
+        ma.move_memory(&ma2);
+        iterate_chunks(&ma2, test_iterate_fn);
+
+        ma.destroy();
+        ma2.destroy();
+    }
+
+public:
+    void test() {
+        test_create(0);
+        test_create(64);
+        test_create(128 * 1024 * 1024);
+        test_malloc(0);
+        test_malloc(63);
+        test_malloc(64);
+        test_malloc(64 * 1024 * 1024);
+        test_malloc((64 * 1024 * 1024) + 1);
+        test_iterate(0);
+        test_iterate(63);
+        test_iterate(128 * 1024);
+        test_iterate(64 * 1024 * 1024);
+        test_iterate((64 * 1024 * 1024) + 1);
+        test_move_memory(0);
+        test_move_memory(1);
+        test_move_memory(63);
+        test_move_memory(65);
+        test_move_memory(65 * 1024 * 1024);
+        test_move_memory(101 * 1024 * 1024);
+    }
+};
+
+int main(void) {
+    memarena_unit_test test;
+    test.test();
+    return 0;
+}

From 291dfdc9d96d5fcffea02a61d7fed9501ee454a2 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:40:34 -0400
Subject: [PATCH 021/190] FT-262 Rename thisnodename or nodename to blocknum

---
 ft/ft-cachetable-wrappers.cc                |  6 ++--
 ft/ft-flusher.cc                            | 26 ++++++++--------
 ft/ft-internal.h                            |  4 +--
 ft/ft-node-deserialize.cc                   |  2 +-
 ft/ft-ops.cc                                | 34 ++++++++++-----------
 ft/ft-test-helpers.cc                       |  4 +--
 ft/ft-verify.cc                             |  4 +--
 ft/ft_node-serialize.cc                     |  6 ++--
 ft/node.cc                                  |  6 ++--
 ft/node.h                                   |  4 +--
 ft/tests/ft-bfe-query.cc                    |  2 +-
 ft/tests/ft-clock-test.cc                   |  4 +--
 ft/tests/ft-serialize-benchmark.cc          |  4 +--
 ft/tests/ft-serialize-test.cc               | 28 ++++++++---------
 ft/tests/make-tree.cc                       |  2 +-
 ft/tests/mempool-115.cc                     |  2 +-
 ft/tests/msnfilter.cc                       |  2 +-
 ft/tests/test3884.cc                        |  2 +-
 ft/tests/test_rightmost_leaf_split_merge.cc |  2 +-
 ft/tests/verify-bad-msn.cc                  |  2 +-
 ft/tests/verify-bad-pivots.cc               |  2 +-
 ft/tests/verify-dup-in-leaf.cc              |  2 +-
 ft/tests/verify-dup-pivots.cc               |  2 +-
 ft/tests/verify-misrouted-msgs.cc           |  2 +-
 ft/tests/verify-unsorted-leaf.cc            |  2 +-
 ft/tests/verify-unsorted-pivots.cc          |  2 +-
 ft/tokuftdump.cc                            |  2 +-
 27 files changed, 80 insertions(+), 80 deletions(-)

diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index 14d6e874d1b..8b4bb4bacb4 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -409,15 +409,15 @@ void toku_ftnode_swap_pair_values(FTNODE a, FTNODE b)
 // Effect: Swap the blocknum, fullhash, and PAIR for for a and b
 // Requires: Both nodes are pinned
 {
-    BLOCKNUM tmp_blocknum = a->thisnodename;
+    BLOCKNUM tmp_blocknum = a->blocknum;
     uint32_t tmp_fullhash = a->fullhash;
     PAIR tmp_pair = a->ct_pair;
 
-    a->thisnodename = b->thisnodename;
+    a->blocknum = b->blocknum;
     a->fullhash = b->fullhash;
     a->ct_pair = b->ct_pair;
 
-    b->thisnodename = tmp_blocknum;
+    b->blocknum = tmp_blocknum;
     b->fullhash = tmp_fullhash;
     b->ct_pair = tmp_pair;
 
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index 2b51c55f040..e1a455b983c 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -606,21 +606,21 @@ handle_split_of_child(
     memset(&node->bp[childnum+1],0,sizeof(node->bp[0]));
     node->n_children++;
 
-    paranoid_invariant(BP_BLOCKNUM(node, childnum).b==childa->thisnodename.b); // use the same child
+    paranoid_invariant(BP_BLOCKNUM(node, childnum).b==childa->blocknum.b); // use the same child
 
     // We never set the rightmost blocknum to be the root.
     // Instead, we wait for the root to split and let promotion initialize the rightmost
     // blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
     invariant(ft->h->root_blocknum.b != ft->rightmost_blocknum.b);
-    if (childa->thisnodename.b == ft->rightmost_blocknum.b) {
+    if (childa->blocknum.b == ft->rightmost_blocknum.b) {
         // The rightmost leaf (a) split into (a) and (b). We want (b) to swap pair values
         // with (a), now that it is the new rightmost leaf. This keeps the rightmost blocknum
         // constant, the same the way we keep the root blocknum constant.
         toku_ftnode_swap_pair_values(childa, childb);
-        BP_BLOCKNUM(node, childnum) = childa->thisnodename;
+        BP_BLOCKNUM(node, childnum) = childa->blocknum;
     }
 
-    BP_BLOCKNUM(node, childnum+1) = childb->thisnodename;
+    BP_BLOCKNUM(node, childnum+1) = childb->blocknum;
     BP_WORKDONE(node, childnum+1) = 0;
     BP_STATE(node,childnum+1) = PT_AVAIL;
 
@@ -1120,8 +1120,8 @@ static void bring_node_fully_into_memory(FTNODE node, FT ft) {
             toku_ftnode_pf_callback,
             &bfe,
             ft->cf,
-            node->thisnodename,
-            toku_cachetable_hash(ft->cf, node->thisnodename)
+            node->blocknum,
+            toku_cachetable_hash(ft->cf, node->blocknum)
             );
     }
 }
@@ -1143,7 +1143,7 @@ flush_this_child(
     bring_node_fully_into_memory(child, h);
     toku_ftnode_assert_fully_in_memory(child);
     paranoid_invariant(node->height>0);
-    paranoid_invariant(child->thisnodename.b!=0);
+    paranoid_invariant(child->blocknum.b!=0);
     // VERIFY_NODE does not work off client thread as of now
     //VERIFY_NODE(t, child);
     node->dirty = 1;
@@ -1504,13 +1504,13 @@ ft_merge_child(
             REALLOC_N(node->n_children-1, node->childkeys);
 
             // Handle a merge of the rightmost leaf node.
-            if (did_merge && childb->thisnodename.b == h->rightmost_blocknum.b) {
-                invariant(childb->thisnodename.b != h->h->root_blocknum.b);
+            if (did_merge && childb->blocknum.b == h->rightmost_blocknum.b) {
+                invariant(childb->blocknum.b != h->h->root_blocknum.b);
                 toku_ftnode_swap_pair_values(childa, childb);
-                BP_BLOCKNUM(node, childnuma) = childa->thisnodename;
+                BP_BLOCKNUM(node, childnuma) = childa->blocknum;
             }
 
-            paranoid_invariant(BP_BLOCKNUM(node, childnuma).b == childa->thisnodename.b);
+            paranoid_invariant(BP_BLOCKNUM(node, childnuma).b == childa->blocknum.b);
             childa->dirty = 1;  // just to make sure
             childb->dirty = 1;  // just to make sure
         } else {
@@ -1610,7 +1610,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
     // the parent before finishing reading in the entire child node.
     bool may_child_be_reactive = ft_ftnode_may_be_reactive(ft, child);
 
-    paranoid_invariant(child->thisnodename.b!=0);
+    paranoid_invariant(child->blocknum.b!=0);
 
     // only do the following work if there is a flush to perform
     if (toku_bnc_n_entries(BNC(parent, childnum)) > 0 || parent->height == 1) {
@@ -1914,7 +1914,7 @@ toku_ftnode_cleaner_callback(
     void *extraargs)
 {
     FTNODE node = (FTNODE) ftnode_pv;
-    invariant(node->thisnodename.b == blocknum.b);
+    invariant(node->blocknum.b == blocknum.b);
     invariant(node->fullhash == fullhash);
     invariant(node->height > 0);   // we should never pick a leaf node (for now at least)
     FT h = (FT) extraargs;
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 54574f56786..7d1725a7d0a 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -302,8 +302,8 @@ PAIR_ATTR make_invalid_pair_attr(void);
 // Cachetable callbacks for ftnodes.
 void toku_ftnode_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs);
 void toku_ftnode_checkpoint_complete_callback(void *value_data);
-void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone);
-int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs);
+void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM blocknum, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone);
+int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs);
 void toku_ftnode_pe_est_callback(void* ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs);
 int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *extraargs,
                             void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);
diff --git a/ft/ft-node-deserialize.cc b/ft/ft-node-deserialize.cc
index a5cbfa1511d..500b7960875 100644
--- a/ft/ft-node-deserialize.cc
+++ b/ft/ft-node-deserialize.cc
@@ -111,7 +111,7 @@ void
 initialize_ftnode(FTNODE node, BLOCKNUM blocknum)
 {
     node->fullhash = 0xDEADBEEF; // <CER> Is this 'spoof' ok?
-    node->thisnodename = blocknum;
+    node->blocknum = blocknum;
     node->dirty = 0;
     node->bp = NULL;
     // <CER> Can we use this initialization as a correctness assert in
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 7aa9aa6ed74..c699e2c8e9a 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -713,7 +713,7 @@ void toku_ftnode_clone_callback(
     cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known;
     cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk;
     cloned_node->flags = node->flags;
-    cloned_node->thisnodename = node->thisnodename;
+    cloned_node->blocknum = node->blocknum;
     cloned_node->layout_version = node->layout_version;
     cloned_node->layout_version_original = node->layout_version_original;
     cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk;
@@ -756,7 +756,7 @@ void toku_ftnode_clone_callback(
 void toku_ftnode_flush_callback(
     CACHEFILE UU(cachefile),
     int fd,
-    BLOCKNUM nodename,
+    BLOCKNUM blocknum,
     void *ftnode_v,
     void** disk_data,
     void *extraargs,
@@ -771,7 +771,7 @@ void toku_ftnode_flush_callback(
     FT h = (FT) extraargs;
     FTNODE ftnode = (FTNODE) ftnode_v;
     FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
-    assert(ftnode->thisnodename.b==nodename.b);
+    assert(ftnode->blocknum.b == blocknum.b);
     int height = ftnode->height;
     if (write_me) {
         toku_ftnode_assert_fully_in_memory(ftnode);
@@ -784,7 +784,7 @@ void toku_ftnode_flush_callback(
                 toku_ftnode_update_disk_stats(ftnode, h, for_checkpoint);
             }
         }
-        int r = toku_serialize_ftnode_to(fd, ftnode->thisnodename, ftnode, ndd, !is_clone, h, for_checkpoint);
+        int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, h, for_checkpoint);
         assert_zero(r);
         ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION;
     }
@@ -835,7 +835,7 @@ toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe)
     }
 }
 
-int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash,
+int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash,
                                  void **ftnode_pv,  void** disk_data, PAIR_ATTR *sizep, int *dirtyp, void *extraargs) {
     assert(extraargs);
     assert(*ftnode_pv == NULL);
@@ -845,7 +845,7 @@ int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNU
     // deserialize the node, must pass the bfe in because we cannot
     // evaluate what piece of the the node is necessary until we get it at
     // least partially into memory
-    int r = toku_deserialize_ftnode_from(fd, nodename, fullhash, node, ndd, bfe);
+    int r = toku_deserialize_ftnode_from(fd, blocknum, fullhash, node, ndd, bfe);
     if (r != 0) {
         if (r == TOKUDB_BAD_CHECKSUM) {
             fprintf(stderr,
@@ -1497,7 +1497,7 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp)
 {
     FTNODE newroot;
 
-    BLOCKNUM old_blocknum = oldroot->thisnodename;
+    BLOCKNUM old_blocknum = oldroot->blocknum;
     uint32_t old_fullhash = oldroot->fullhash;
     
     int new_height = oldroot->height+1;
@@ -1637,7 +1637,7 @@ static void inject_message_in_locked_node(
     // verify that msn of latest message was captured in root node
     paranoid_invariant(msg->msn.msn == node->max_msn_applied_to_node_on_disk.msn);
 
-    if (node->thisnodename.b == ft->rightmost_blocknum.b) {
+    if (node->blocknum.b == ft->rightmost_blocknum.b) {
         if (ft->seqinsert_score < FT_SEQINSERT_SCORE_THRESHOLD) {
             // we promoted to the rightmost leaf node and the seqinsert score has not yet saturated.
             toku_sync_fetch_and_add(&ft->seqinsert_score, 1);
@@ -1684,7 +1684,7 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
     case RE_FISSIBLE:
         {
             // We only have a read lock on the parent.  We need to drop both locks, and get write locks.
-            BLOCKNUM parent_blocknum = parent->thisnodename;
+            BLOCKNUM parent_blocknum = parent->blocknum;
             uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum);
             int parent_height = parent->height;
             int parent_n_children = parent->n_children;
@@ -1738,7 +1738,7 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
             }
 
             int parent_height = parent->height;
-            BLOCKNUM parent_blocknum = parent->thisnodename;
+            BLOCKNUM parent_blocknum = parent->blocknum;
             uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum);
             toku_unpin_ftnode_read_only(ft, child);
             toku_unpin_ftnode_read_only(ft, parent);
@@ -1876,8 +1876,8 @@ static void push_something_in_subtree(
         // because promotion would not chose to inject directly into this leaf
         // otherwise. We explicitly skip the root node because then we don't have
         // to worry about changing the rightmost blocknum when the root splits.
-        if (subtree_root->height == 0 && loc == RIGHT_EXTREME && subtree_root->thisnodename.b != ft->h->root_blocknum.b) {
-            ft_set_or_verify_rightmost_blocknum(ft, subtree_root->thisnodename);
+        if (subtree_root->height == 0 && loc == RIGHT_EXTREME && subtree_root->blocknum.b != ft->h->root_blocknum.b) {
+            ft_set_or_verify_rightmost_blocknum(ft, subtree_root->blocknum);
         }
         inject_message_in_locked_node(ft, subtree_root, target_childnum, msg, flow_deltas, gc_info);
     } else {
@@ -1967,7 +1967,7 @@ static void push_something_in_subtree(
             paranoid_invariant_notnull(child);
 
             if (!just_did_split_or_merge) {
-                BLOCKNUM subtree_root_blocknum = subtree_root->thisnodename;
+                BLOCKNUM subtree_root_blocknum = subtree_root->blocknum;
                 uint32_t subtree_root_fullhash = toku_cachetable_hash(ft->cf, subtree_root_blocknum);
                 const bool did_split_or_merge = process_maybe_reactive_child(ft, subtree_root, child, childnum, loc);
                 if (did_split_or_merge) {
@@ -2003,7 +2003,7 @@ static void push_something_in_subtree(
         {
             // Right now we have a read lock on subtree_root, but we want
             // to inject into it so we get a write lock instead.
-            BLOCKNUM subtree_root_blocknum = subtree_root->thisnodename;
+            BLOCKNUM subtree_root_blocknum = subtree_root->blocknum;
             uint32_t subtree_root_fullhash = toku_cachetable_hash(ft->cf, subtree_root_blocknum);
             toku_unpin_ftnode_read_only(ft, subtree_root);
             switch (depth) {
@@ -2318,7 +2318,7 @@ static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS m
 
     // The rightmost blocknum never chances once it is initialized to something
     // other than null. Verify that the pinned node has the correct blocknum.
-    invariant(rightmost_leaf->thisnodename.b == rightmost_blocknum.b);
+    invariant(rightmost_leaf->blocknum.b == rightmost_blocknum.b);
 
     // If the rightmost leaf is reactive, bail out out and let the normal promotion pass
     // take care of it. This also ensures that if any of our ancestors are reactive,
@@ -3460,9 +3460,9 @@ ft_search_node (
     );
 
 static int
-ftnode_fetch_callback_and_free_bfe(CACHEFILE cf, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs)
+ftnode_fetch_callback_and_free_bfe(CACHEFILE cf, PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs)
 {
-    int r = toku_ftnode_fetch_callback(cf, p, fd, nodename, fullhash, ftnode_pv, disk_data, sizep, dirtyp, extraargs);
+    int r = toku_ftnode_fetch_callback(cf, p, fd, blocknum, fullhash, ftnode_pv, disk_data, sizep, dirtyp, extraargs);
     struct ftnode_fetch_extra *CAST_FROM_VOIDP(ffe, extraargs);
     destroy_bfe_for_prefetch(ffe);
     toku_free(ffe);
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index 68dd5cd408a..43d6e188113 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -134,7 +134,7 @@ int toku_testsetup_leaf(FT_HANDLE ft_handle, BLOCKNUM *blocknum, int n_children,
         node->totalchildkeylens += keylens[i];
     }
 
-    *blocknum = node->thisnodename;
+    *blocknum = node->blocknum;
     toku_unpin_ftnode(ft_handle->ft, node);
     return 0;
 }
@@ -153,7 +153,7 @@ int toku_testsetup_nonleaf (FT_HANDLE ft_handle, int height, BLOCKNUM *blocknum,
         toku_memdup_dbt(&node->childkeys[i], keys[i], keylens[i]);
         node->totalchildkeylens += keylens[i];
     }
-    *blocknum = node->thisnodename;
+    *blocknum = node->blocknum;
     toku_unpin_ftnode(ft_handle->ft, node);
     return 0;
 }
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index 62591ba804b..df7c637e0cd 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -397,7 +397,7 @@ toku_verify_ftnode_internal(FT_HANDLE ft_handle,
 {
     int result=0;
     MSN   this_msn;
-    BLOCKNUM blocknum = node->thisnodename;
+    BLOCKNUM blocknum = node->blocknum;
 
     //printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v);
     toku_ftnode_assert_fully_in_memory(node);
@@ -440,7 +440,7 @@ toku_verify_ftnode_internal(FT_HANDLE ft_handle,
             int r = bnc->msg_buffer.iterate(verify_msg);
             if (r != 0) { result = r; goto done; }
 
-            struct verify_message_tree_extra extra = { .msg_buffer = &bnc->msg_buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->thisnodename, .keep_going_on_failure = keep_going_on_failure, .messages_have_been_moved = messages_have_been_moved };
+            struct verify_message_tree_extra extra = { .msg_buffer = &bnc->msg_buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->blocknum, .keep_going_on_failure = keep_going_on_failure, .messages_have_been_moved = messages_have_been_moved };
             r = bnc->fresh_message_tree.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra);
             if (r != 0) { result = r; goto done; }
             extra.is_fresh = false;
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 330c65d7a6d..e62aac4a291 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -1360,7 +1360,7 @@ setup_partitions_using_bfe(FTNODE node,
     //
     // setup memory needed for the node
     //
-    //printf("node height %d, blocknum %" PRId64 ", type %d lc %d rc %d\n", node->height, node->thisnodename.b, bfe->type, lc, rc);
+    //printf("node height %d, blocknum %" PRId64 ", type %d lc %d rc %d\n", node->height, node->blocknum.b, bfe->type, lc, rc);
     for (int i = 0; i < node->n_children; i++) {
         BP_INIT_UNTOUCHED_CLOCK(node,i);
         if (data_in_memory) {
@@ -1496,7 +1496,7 @@ static FTNODE alloc_ftnode_for_deserialize(uint32_t fullhash, BLOCKNUM blocknum)
 // Effect: Allocate an FTNODE and fill in the values that are not read from
     FTNODE XMALLOC(node);
     node->fullhash = fullhash;
-    node->thisnodename = blocknum;
+    node->blocknum = blocknum;
     node->dirty = 0;
     node->bp = nullptr;
     node->oldest_referenced_xid_known = TXNID_NONE;
@@ -2346,7 +2346,7 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i
     DISKOFF node_offset, total_node_disk_size;
     toku_translate_blocknum_to_offset_size(
         bfe->h->blocktable, 
-        node->thisnodename, 
+        node->blocknum, 
         &node_offset, 
         &total_node_disk_size
         );
diff --git a/ft/node.cc b/ft/node.cc
index 7e9334a1cec..db802227e08 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -97,13 +97,13 @@ PATENT RIGHTS GRANT:
 
 // Effect: Fill in N as an empty ftnode.
 // TODO: Rename toku_ftnode_create
-void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM nodename, int height, int num_children, int layout_version, unsigned int flags) {
+void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int num_children, int layout_version, unsigned int flags) {
     paranoid_invariant(layout_version != 0);
     paranoid_invariant(height >= 0);
 
     n->max_msn_applied_to_node_on_disk = ZERO_MSN;    // correct value for root node, harmless for others
     n->flags = flags;
-    n->thisnodename = nodename;
+    n->blocknum = blocknum;
     n->layout_version               = layout_version;
     n->layout_version_original = layout_version;
     n->layout_version_read_from_disk = layout_version;
@@ -1126,7 +1126,7 @@ long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc) {
 
 static void
 init_childinfo(FTNODE node, int childnum, FTNODE child) {
-    BP_BLOCKNUM(node,childnum) = child->thisnodename;
+    BP_BLOCKNUM(node,childnum) = child->blocknum;
     BP_STATE(node,childnum) = PT_AVAIL;
     BP_WORKDONE(node, childnum)   = 0;
     set_BNC(node, childnum, toku_create_empty_nl());
diff --git a/ft/node.h b/ft/node.h
index 82155334c4a..30f497c3612 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -96,7 +96,7 @@ PATENT RIGHTS GRANT:
 struct ftnode {
     MSN      max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk
     unsigned int flags;
-    BLOCKNUM thisnodename;   // Which block number is this node?
+    BLOCKNUM blocknum;   // Which block number is this node?
     int    layout_version; // What version of the data structure?
     int    layout_version_original;	// different (<) from layout_version if upgraded from a previous version (useful for debugging)
     int    layout_version_read_from_disk;  // transient, not serialized to disk, (useful for debugging)
@@ -240,7 +240,7 @@ BASEMENTNODE toku_detach_bn(FTNODE node, int childnum);
 void toku_ftnode_update_disk_stats(FTNODE ftnode, FT ft, bool for_checkpoint);
 void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node);
 
-void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM nodename, int height, int num_children, 
+void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM blocknum, int height, int num_children, 
                                   int layout_version, unsigned int flags);
 
 int toku_ftnode_which_child(FTNODE node, const DBT *k,
diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc
index 8759732a76e..4b9da2a8270 100644
--- a/ft/tests/ft-bfe-query.cc
+++ b/ft/tests/ft-bfe-query.cc
@@ -372,7 +372,7 @@ test_prefetching(void) {
     //    source_ft.fd=fd;
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 1;
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index 6af8e797431..5c0d308cfab 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -311,7 +311,7 @@ test_serialize_nonleaf(void) {
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     char *hello_string;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 1;
@@ -411,7 +411,7 @@ test_serialize_leaf(void) {
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 0;
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index 285ee64e8a3..6ee9b7f89f9 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -144,7 +144,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
 
     sn->max_msn_applied_to_node_on_disk.msn = 0;
     sn->flags = 0x11223344;
-    sn->thisnodename.b = 20;
+    sn->blocknum.b = 20;
     sn->layout_version = FT_LAYOUT_VERSION;
     sn->layout_version_original = FT_LAYOUT_VERSION;
     sn->height = 0;
@@ -299,7 +299,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     //    source_ft.fd=fd;
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 1;
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index 95e5f70919a..0c8e29df4dd 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -271,7 +271,7 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
 
     sn.max_msn_applied_to_node_on_disk = PRESERIALIZE_MSN_ON_DISK;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 0;
@@ -330,7 +330,7 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->thisnodename.b==20);
+    assert(dn->blocknum.b==20);
 
     assert(dn->layout_version ==FT_LAYOUT_VERSION);
     assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
@@ -405,7 +405,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 0;
@@ -471,7 +471,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
     
-    assert(dn->thisnodename.b==20);
+    assert(dn->blocknum.b==20);
 
     assert(dn->layout_version ==FT_LAYOUT_VERSION);
     assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
@@ -546,7 +546,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 0;
@@ -606,7 +606,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->thisnodename.b==20);
+    assert(dn->blocknum.b==20);
 
     assert(dn->layout_version ==FT_LAYOUT_VERSION);
     assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
@@ -687,7 +687,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 0;
@@ -750,7 +750,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->thisnodename.b==20);
+    assert(dn->blocknum.b==20);
 
     assert(dn->layout_version ==FT_LAYOUT_VERSION);
     assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
@@ -831,7 +831,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 0;
@@ -893,7 +893,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->thisnodename.b==20);
+    assert(dn->blocknum.b==20);
 
     assert(dn->layout_version ==FT_LAYOUT_VERSION);
     assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
@@ -966,7 +966,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 0;
@@ -1022,7 +1022,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->thisnodename.b==20);
+    assert(dn->blocknum.b==20);
 
     assert(dn->layout_version ==FT_LAYOUT_VERSION);
     assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
@@ -1076,7 +1076,7 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     //    source_ft.fd=fd;
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
-    sn.thisnodename.b = 20;
+    sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
     sn.layout_version_original = FT_LAYOUT_VERSION;
     sn.height = 1;
@@ -1147,7 +1147,7 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->thisnodename.b==20);
+    assert(dn->blocknum.b==20);
 
     assert(dn->layout_version ==FT_LAYOUT_VERSION);
     assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
diff --git a/ft/tests/make-tree.cc b/ft/tests/make-tree.cc
index 50351fd7959..7ebfd28b275 100644
--- a/ft/tests/make-tree.cc
+++ b/ft/tests/make-tree.cc
@@ -222,7 +222,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
     FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey);
 
     // set the new root to point to the new tree
-    toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename);
+    toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum);
 
     ft->ft->h->max_msn_in_ft = last_dummymsn(); // capture msn of last message injected into tree
 
diff --git a/ft/tests/mempool-115.cc b/ft/tests/mempool-115.cc
index 0f44cf39da1..07a97f9e49a 100644
--- a/ft/tests/mempool-115.cc
+++ b/ft/tests/mempool-115.cc
@@ -149,7 +149,7 @@ public:
         // just copy this code from a previous test
         // don't care what it does, just want to get a node up and running
         sn.flags = 0x11223344;
-        sn.thisnodename.b = 20;
+        sn.blocknum.b = 20;
         sn.layout_version = FT_LAYOUT_VERSION;
         sn.layout_version_original = FT_LAYOUT_VERSION;
         sn.height = 0;
diff --git a/ft/tests/msnfilter.cc b/ft/tests/msnfilter.cc
index e6eb8038792..1ab13f745e7 100644
--- a/ft/tests/msnfilter.cc
+++ b/ft/tests/msnfilter.cc
@@ -213,7 +213,7 @@ test_msnfilter(int do_verify) {
     FTNODE newroot = make_node(ft, 0);
 
     // set the new root to point to the new tree
-    toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename);
+    toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum);
 
     // KLUDGE: Unpin the new root so toku_ft_lookup() can pin it.  (Pin lock is no longer a recursive
     //         mutex.)  Just leaving it unpinned for this test program works  because it is the only 
diff --git a/ft/tests/test3884.cc b/ft/tests/test3884.cc
index cb3914529ea..c18a29d0676 100644
--- a/ft/tests/test3884.cc
+++ b/ft/tests/test3884.cc
@@ -154,7 +154,7 @@ static void
 setup_ftnode_header(struct ftnode *node)
 {
     node->flags = 0x11223344;
-    node->thisnodename.b = 20;
+    node->blocknum.b = 20;
     node->layout_version = FT_LAYOUT_VERSION;
     node->layout_version_original = FT_LAYOUT_VERSION;
     node->height = 0;
diff --git a/ft/tests/test_rightmost_leaf_split_merge.cc b/ft/tests/test_rightmost_leaf_split_merge.cc
index 49621d79328..4394217bfc6 100644
--- a/ft/tests/test_rightmost_leaf_split_merge.cc
+++ b/ft/tests/test_rightmost_leaf_split_merge.cc
@@ -149,7 +149,7 @@ static void test_split_merge(void) {
                    toku_cachetable_hash(ft->cf, ft->h->root_blocknum),
                    &bfe, PL_WRITE_EXPENSIVE, &root_node, true);
     // root blocknum should be consistent
-    invariant(root_node->thisnodename.b == ft->h->root_blocknum.b);
+    invariant(root_node->blocknum.b == ft->h->root_blocknum.b);
     // root should have split at least once, and it should now be at height 1
     invariant(root_node->n_children > 1);
     invariant(root_node->height == 1);
diff --git a/ft/tests/verify-bad-msn.cc b/ft/tests/verify-bad-msn.cc
index 99a70b40f37..74a5d07efc8 100644
--- a/ft/tests/verify-bad-msn.cc
+++ b/ft/tests/verify-bad-msn.cc
@@ -225,7 +225,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
     FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey);
 
     // set the new root to point to the new tree
-    toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename);
+    toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum);
 
     // Create bad tree (don't do following):
     // newroot->max_msn_applied_to_node = last_dummymsn(); // capture msn of last message injected into tree
diff --git a/ft/tests/verify-bad-pivots.cc b/ft/tests/verify-bad-pivots.cc
index 6d1ebfa85a6..aac0cbd8ed2 100644
--- a/ft/tests/verify-bad-pivots.cc
+++ b/ft/tests/verify-bad-pivots.cc
@@ -195,7 +195,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
     FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey);
 
     // discard the old root block
-    toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename);
+    toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum);
 
     // unpin the new root
     toku_unpin_ftnode(ft->ft, newroot);
diff --git a/ft/tests/verify-dup-in-leaf.cc b/ft/tests/verify-dup-in-leaf.cc
index 9806c6063a0..510a3ce1de0 100644
--- a/ft/tests/verify-dup-in-leaf.cc
+++ b/ft/tests/verify-dup-in-leaf.cc
@@ -155,7 +155,7 @@ test_dup_in_leaf(int do_verify) {
     populate_leaf(newroot, htonl(2), 2);
 
     // set the new root to point to the new tree
-    toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename);
+    toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum);
 
     // unpin the new root
     toku_unpin_ftnode(ft->ft, newroot);
diff --git a/ft/tests/verify-dup-pivots.cc b/ft/tests/verify-dup-pivots.cc
index c0766a4d035..e2cb20f105d 100644
--- a/ft/tests/verify-dup-pivots.cc
+++ b/ft/tests/verify-dup-pivots.cc
@@ -199,7 +199,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
 
     // discard the old root block
     // set the new root to point to the new tree
-    toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename);
+    toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum);
 
     // unpin the new root
     toku_unpin_ftnode(ft->ft, newroot);
diff --git a/ft/tests/verify-misrouted-msgs.cc b/ft/tests/verify-misrouted-msgs.cc
index 556aaa31522..1a6fa852ecb 100644
--- a/ft/tests/verify-misrouted-msgs.cc
+++ b/ft/tests/verify-misrouted-msgs.cc
@@ -211,7 +211,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
 
     // discard the old root block
     // set the new root to point to the new tree
-    toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename);
+    toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum);
 
     // unpin the new root
     toku_unpin_ftnode(ft->ft, newroot);
diff --git a/ft/tests/verify-unsorted-leaf.cc b/ft/tests/verify-unsorted-leaf.cc
index 6933606afd2..d1178c1d8f0 100644
--- a/ft/tests/verify-unsorted-leaf.cc
+++ b/ft/tests/verify-unsorted-leaf.cc
@@ -156,7 +156,7 @@ test_dup_in_leaf(int do_verify) {
     populate_leaf(newroot, htonl(1), 2);
 
     // set the new root to point to the new tree
-    toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename);
+    toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum);
 
     // unpin the new root
     toku_unpin_ftnode(ft->ft, newroot);
diff --git a/ft/tests/verify-unsorted-pivots.cc b/ft/tests/verify-unsorted-pivots.cc
index eae84382da3..3367cb9af8d 100644
--- a/ft/tests/verify-unsorted-pivots.cc
+++ b/ft/tests/verify-unsorted-pivots.cc
@@ -195,7 +195,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
     FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey);
 
     // discard the old root block
-    toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename);
+    toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum);
 
     // unpin the new root
     toku_unpin_ftnode(ft->ft, newroot);
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index f2028b24280..04b7df280dc 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -239,7 +239,7 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT h) {
     printf(" disksize    =%" PRId64 "\n", disksize);
     printf(" serialize_size =%u\n", toku_serialize_ftnode_size(n));
     printf(" flags       =%u\n", n->flags);
-    printf(" thisnodename=%" PRId64 "\n", n->thisnodename.b);
+    printf(" blocknum=%" PRId64 "\n", n->blocknum.b);
     //printf(" log_lsn     =%lld\n", n->log_lsn.lsn); // The log_lsn is a memory-only value.
     printf(" height      =%d\n",   n->height);
     printf(" layout_version=%d\n", n->layout_version);

From 597dc5b33ed96d91bfd01776948f0b23c99733fa Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 20:40:34 -0400
Subject: [PATCH 022/190] FT-257 Add a abstraction for the pivot keys in an
 ftnode.

---
 ft/ft-cachetable-wrappers.cc       |   2 +-
 ft/ft-flusher.cc                   | 210 ++++++++++-------------------
 ft/ft-hot-flusher.cc               |   2 +-
 ft/ft-ops.cc                       |  33 ++---
 ft/ft-test-helpers.cc              |  22 +--
 ft/ft-verify.cc                    |  14 +-
 ft/ft_node-serialize.cc            |  42 ++----
 ft/loader/loader.cc                |  10 +-
 ft/node.cc                         | 190 +++++++++++++++++++++-----
 ft/node.h                          |  75 ++++++++++-
 ft/tests/ft-bfe-query.cc           |  16 +--
 ft/tests/ft-clock-test.cc          |  27 +---
 ft/tests/ft-serialize-benchmark.cc |  24 +---
 ft/tests/ft-serialize-test.cc      | 136 ++++++-------------
 ft/tests/mempool-115.cc            |  17 +--
 ft/tests/orthopush-flush.cc        |  10 +-
 ft/tests/test3884.cc               |  57 +++-----
 ft/tokuftdump.cc                   |   4 +-
 ft/ybt.cc                          |   8 --
 ft/ybt.h                           |   2 -
 20 files changed, 425 insertions(+), 476 deletions(-)

diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index 8b4bb4bacb4..be78801dd10 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -120,7 +120,7 @@ cachetable_put_empty_node_with_dep_nodes(
     uint32_t* fullhash, //output
     FTNODE* result)
 {
-    FTNODE XMALLOC(new_node);
+    FTNODE XCALLOC(new_node);
     PAIR dependent_pairs[num_dependent_nodes];
     enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
     for (uint32_t i = 0; i < num_dependent_nodes; i++) {
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index e1a455b983c..bf2debe9ed0 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -472,8 +472,7 @@ ct_maybe_merge_child(struct flusher_advice *fa,
             ctme.is_last_child = false;
             pivot_to_save = childnum;
         }
-        const DBT *pivot = &parent->childkeys[pivot_to_save];
-        toku_clone_dbt(&ctme.target_key, *pivot);
+        toku_clone_dbt(&ctme.target_key, *parent->pivotkeys.get_pivot(pivot_to_save));
 
         // at this point, ctme is properly setup, now we can do the merge
         struct flusher_advice new_fa;
@@ -581,26 +580,23 @@ handle_split_of_child(
     toku_ftnode_assert_fully_in_memory(childb);
     NONLEAF_CHILDINFO old_bnc = BNC(node, childnum);
     paranoid_invariant(toku_bnc_nbytesinbuf(old_bnc)==0);
-    int cnum;
     WHEN_NOT_GCOV(
-    if (toku_ft_debug_mode) {
-        int i;
-        printf("%s:%d Child %d splitting on %s\n", __FILE__, __LINE__, childnum, (char*)splitk->data);
-        printf("%s:%d oldsplitkeys:", __FILE__, __LINE__);
-        for(i=0; i<node->n_children-1; i++) printf(" %s", (char *) node->childkeys[i].data);
-        printf("\n");
-    }
-                 )
+        if (toku_ft_debug_mode) {
+            printf("%s:%d Child %d splitting on %s\n", __FILE__, __LINE__, childnum, (char*)splitk->data);
+            printf("%s:%d oldsplitkeys:", __FILE__, __LINE__);
+            for(int i = 0; i < node->n_children - 1; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i)->data);
+            printf("\n");
+        }
+    )
 
     node->dirty = 1;
 
     XREALLOC_N(node->n_children+1, node->bp);
-    XREALLOC_N(node->n_children, node->childkeys);
     // Slide the children over.
     // suppose n_children is 10 and childnum is 5, meaning node->childnum[5] just got split
     // this moves node->bp[6] through node->bp[9] over to
     // node->bp[7] through node->bp[10]
-    for (cnum=node->n_children; cnum>childnum+1; cnum--) {
+    for (int cnum=node->n_children; cnum>childnum+1; cnum--) {
         node->bp[cnum] = node->bp[cnum-1];
     }
     memset(&node->bp[childnum+1],0,sizeof(node->bp[0]));
@@ -633,24 +629,16 @@ handle_split_of_child(
     }
     set_BNC(node, childnum+1, new_bnc);
 
-    // Slide the keys over
-    {
-        for (cnum=node->n_children-2; cnum>childnum; cnum--) {
-            toku_copy_dbt(&node->childkeys[cnum], node->childkeys[cnum-1]);
-        }
-        //if (logger) assert((t->flags&TOKU_DB_DUPSORT)==0); // the setpivot is wrong for TOKU_DB_DUPSORT, so recovery will be broken.
-        toku_copy_dbt(&node->childkeys[childnum], *splitk);
-        node->totalchildkeylens += splitk->size;
-    }
+    // Insert the new split key , sliding the other keys over
+    node->pivotkeys.insert_at(splitk, childnum);
 
     WHEN_NOT_GCOV(
-    if (toku_ft_debug_mode) {
-        int i;
-        printf("%s:%d splitkeys:", __FILE__, __LINE__);
-        for(i=0; i<node->n_children-2; i++) printf(" %s", (char*)node->childkeys[i].data);
-        printf("\n");
-    }
-                 )
+        if (toku_ft_debug_mode) {
+            printf("%s:%d splitkeys:", __FILE__, __LINE__);
+            for (int i = 0; i < node->n_children - 2; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i)->data);
+            printf("\n");
+        }
+    )
 
     /* Keep pushing to the children, but not if the children would require a pushdown */
     toku_ftnode_assert_fully_in_memory(node);
@@ -899,7 +887,6 @@ ftleaf_split(
         }
         else {
             B = *nodeb;
-            REALLOC_N(num_children_in_b-1, B->childkeys);
             REALLOC_N(num_children_in_b,   B->bp);
             B->n_children = num_children_in_b;
             for (int i = 0; i < num_children_in_b; i++) {
@@ -951,20 +938,10 @@ ftleaf_split(
 
         // the child index in the original node that corresponds to the
         // first node in the right node of the split
-        int base_index = num_left_bns - (split_on_boundary ? 0 : 1);
-        // make pivots in B
-        for (int i=0; i < num_children_in_b-1; i++) {
-            toku_copy_dbt(&B->childkeys[i], node->childkeys[i+base_index]);
-            B->totalchildkeylens += node->childkeys[i+base_index].size;
-            node->totalchildkeylens -= node->childkeys[i+base_index].size;
-            toku_init_dbt(&node->childkeys[i+base_index]);
-        }
-        if (split_on_boundary && num_left_bns < node->n_children) {
-            if (splitk) {
-                toku_copy_dbt(splitk, node->childkeys[num_left_bns - 1]);
-            } else {
-                toku_destroy_dbt(&node->childkeys[num_left_bns - 1]);
-            }
+        int split_idx = num_left_bns - (split_on_boundary ? 0 : 1);
+        node->pivotkeys.split_at(split_idx, &B->pivotkeys);
+        if (split_on_boundary && num_left_bns < node->n_children && splitk) {
+            toku_copyref_dbt(splitk, *node->pivotkeys.get_pivot(num_left_bns - 1));
         } else if (splitk) {
             bn_data* bd = BLB_DATA(node, num_left_bns - 1);
             uint32_t keylen;
@@ -976,7 +953,6 @@ ftleaf_split(
 
         node->n_children = num_children_in_node;
         REALLOC_N(num_children_in_node, node->bp);
-        REALLOC_N(num_children_in_node-1, node->childkeys);
     }
 
     ftnode_finalize_split(node, B, max_msn_applied_to_node);
@@ -1009,9 +985,7 @@ ft_nonleaf_split(
         /* The first n_children_in_a go into node a.
          * That means that the first n_children_in_a-1 keys go into node a.
          * The splitter key is key number n_children_in_a */
-        int i;
-
-        for (i=n_children_in_a; i<old_n_children; i++) {
+        for (int i = n_children_in_a; i<old_n_children; i++) {
             int targchild = i-n_children_in_a;
             // TODO: Figure out better way to handle this
             // the problem is that create_new_ftnode_with_dep_nodes for B creates
@@ -1023,26 +997,15 @@ ft_nonleaf_split(
             // now move the bp over
             B->bp[targchild] = node->bp[i];
             memset(&node->bp[i], 0, sizeof(node->bp[0]));
-
-            // Delete a child, removing the preceeding pivot key.  The child number must be > 0
-            {
-                paranoid_invariant(i>0);
-                if (i>n_children_in_a) {
-                    toku_copy_dbt(&B->childkeys[targchild-1], node->childkeys[i-1]);
-                    B->totalchildkeylens += node->childkeys[i-1].size;
-                    node->totalchildkeylens -= node->childkeys[i-1].size;
-                    toku_init_dbt(&node->childkeys[i-1]);
-                }
-            }
         }
 
-        node->n_children=n_children_in_a;
+        // the split key for our parent is the rightmost pivot key in node
+        node->pivotkeys.split_at(n_children_in_a, &B->pivotkeys);
+        toku_clone_dbt(splitk, *node->pivotkeys.get_pivot(n_children_in_a - 1));
+        node->pivotkeys.delete_at(n_children_in_a - 1);
 
-        toku_copy_dbt(splitk, node->childkeys[n_children_in_a-1]);
-        node->totalchildkeylens -= node->childkeys[n_children_in_a-1].size;
-
-        REALLOC_N(n_children_in_a,   node->bp);
-        REALLOC_N(n_children_in_a-1, node->childkeys);
+        node->n_children = n_children_in_a;
+        REALLOC_N(node->n_children, node->bp);
     }
 
     ftnode_finalize_split(node, B, max_msn_applied_to_node);
@@ -1109,6 +1072,8 @@ ft_split_child(
         toku_unpin_ftnode(h, nodea);
         toku_unpin_ftnode(h, nodeb);
     }
+
+    toku_destroy_dbt(&splitk);
 }
 
 static void bring_node_fully_into_memory(FTNODE node, FT ft) {
@@ -1186,52 +1151,47 @@ merge_leaf_nodes(FTNODE a, FTNODE b)
     // of a gets eliminated because we do not have a pivot to store for it (because it has no elements)
     const bool a_has_tail = a_last_bd->num_klpairs() > 0;
 
-    // move each basement node from b to a
-    // move the pivots, adding one of what used to be max(a)
-    // move the estimates
     int num_children = a->n_children + b->n_children;
     if (!a_has_tail) {
-        uint lastchild = a->n_children-1;
+        int lastchild = a->n_children - 1;
         BASEMENTNODE bn = BLB(a, lastchild);
-        {
-            // verify that last basement in a is empty, then destroy mempool
-            size_t used_space = a_last_bd->get_disk_size();
-            invariant_zero(used_space);
-        }
+
+        // verify that last basement in a is empty, then destroy mempool
+        size_t used_space = a_last_bd->get_disk_size();
+        invariant_zero(used_space);
         destroy_basement_node(bn);
-        set_BNULL(a, a->n_children-1);
+        set_BNULL(a, lastchild);
         num_children--;
-    }
-
-    //realloc pivots and basement nodes in a
-    REALLOC_N(num_children, a->bp);
-    REALLOC_N(num_children-1, a->childkeys);
-
-    // fill in pivot for what used to be max of node 'a', if it is needed
-    if (a_has_tail) {
+        if (lastchild < a->pivotkeys.num_pivots()) {
+            a->pivotkeys.delete_at(lastchild);
+        }
+    } else {
+        // fill in pivot for what used to be max of node 'a', if it is needed
         uint32_t keylen;
         void *key;
-        int rr = a_last_bd->fetch_key_and_len(a_last_bd->num_klpairs() - 1, &keylen, &key);
-        invariant_zero(rr);
-        toku_memdup_dbt(&a->childkeys[a->n_children-1], key, keylen);
-        a->totalchildkeylens += keylen;
+        int r = a_last_bd->fetch_key_and_len(a_last_bd->num_klpairs() - 1, &keylen, &key);
+        invariant_zero(r);
+        DBT pivotkey;
+        toku_fill_dbt(&pivotkey, key, keylen);
+        a->pivotkeys.replace_at(&pivotkey, a->n_children - 1);
     }
 
+    // realloc basement nodes in `a'
+    REALLOC_N(num_children, a->bp);
+
+    // move each basement node from b to a
     uint32_t offset = a_has_tail ? a->n_children : a->n_children - 1;
     for (int i = 0; i < b->n_children; i++) {
-        a->bp[i+offset] = b->bp[i];
-        memset(&b->bp[i],0,sizeof(b->bp[0]));
-        if (i < (b->n_children-1)) {
-            toku_copy_dbt(&a->childkeys[i+offset], b->childkeys[i]);
-            toku_init_dbt(&b->childkeys[i]);
-        }
+        a->bp[i + offset] = b->bp[i];
+        memset(&b->bp[i], 0, sizeof(b->bp[0]));
     }
-    a->totalchildkeylens += b->totalchildkeylens;
-    a->n_children = num_children;
+
+    // append b's pivots to a's pivots
+    a->pivotkeys.append(b->pivotkeys);
 
     // now that all the data has been moved from b to a, we can destroy the data in b
-    // b can remain untouched, as it will be destroyed later
-    b->totalchildkeylens = 0;
+    a->n_children = num_children;
+    b->pivotkeys.destroy();
     b->n_children = 0;
 }
 
@@ -1255,7 +1215,7 @@ static void
 maybe_merge_pinned_leaf_nodes(
     FTNODE a,
     FTNODE b,
-    DBT *parent_splitk,
+    const DBT *parent_splitk,
     bool *did_merge,
     bool *did_rebalance,
     DBT *splitk,
@@ -1279,7 +1239,6 @@ maybe_merge_pinned_leaf_nodes(
             return;
         }
         // one is less than 1/4 of a node, and together they are more than 3/4 of a node.
-        toku_destroy_dbt(parent_splitk); // We don't need the parent_splitk any more. If we need a splitk (if we don't merge) we'll malloc a new one.
         *did_rebalance = true;
         balance_leaf_nodes(a, b, splitk);
     } else {
@@ -1287,7 +1246,6 @@ maybe_merge_pinned_leaf_nodes(
         *did_merge = true;
         *did_rebalance = false;
         toku_init_dbt(splitk);
-        toku_destroy_dbt(parent_splitk); // if we are merging, the splitk gets freed.
         merge_leaf_nodes(a, b);
     }
 }
@@ -1303,26 +1261,18 @@ maybe_merge_pinned_nonleaf_nodes(
 {
     toku_ftnode_assert_fully_in_memory(a);
     toku_ftnode_assert_fully_in_memory(b);
-    paranoid_invariant(parent_splitk->data);
+    invariant_notnull(parent_splitk->data);
+
     int old_n_children = a->n_children;
     int new_n_children = old_n_children + b->n_children;
+
     XREALLOC_N(new_n_children, a->bp);
-    memcpy(a->bp + old_n_children,
-           b->bp,
-           b->n_children*sizeof(b->bp[0]));
-    memset(b->bp,0,b->n_children*sizeof(b->bp[0]));
+    memcpy(a->bp + old_n_children, b->bp, b->n_children * sizeof(b->bp[0]));
+    memset(b->bp, 0, b->n_children * sizeof(b->bp[0]));
 
-    XREALLOC_N(new_n_children-1, a->childkeys);
-    toku_copy_dbt(&a->childkeys[old_n_children-1], *parent_splitk);
-    a->totalchildkeylens += parent_splitk->size;
-    for (int i = 0; i < b->n_children - 1; ++i) {
-        toku_copy_dbt(&a->childkeys[old_n_children + i], b->childkeys[i]);
-        a->totalchildkeylens += b->childkeys[i].size;
-        toku_init_dbt(&b->childkeys[i]);
-    }
+    a->pivotkeys.insert_at(parent_splitk, old_n_children - 1);
+    a->pivotkeys.append(b->pivotkeys);
     a->n_children = new_n_children;
-
-    b->totalchildkeylens = 0;
     b->n_children = 0;
 
     a->dirty = 1;
@@ -1338,7 +1288,7 @@ maybe_merge_pinned_nonleaf_nodes(
 static void
 maybe_merge_pinned_nodes(
     FTNODE parent,
-    DBT *parent_splitk,
+    const DBT *parent_splitk,
     FTNODE a,
     FTNODE b,
     bool *did_merge,
@@ -1466,26 +1416,14 @@ ft_merge_child(
     {
         DBT splitk;
         toku_init_dbt(&splitk);
-        DBT *old_split_key = &node->childkeys[childnuma];
-        unsigned int deleted_size = old_split_key->size;
-        maybe_merge_pinned_nodes(node, &node->childkeys[childnuma], childa, childb, &did_merge, &did_rebalance, &splitk, h->h->nodesize);
-        if (childa->height>0) {
-            for (int i=0; i+1<childa->n_children; i++) {
-                paranoid_invariant(childa->childkeys[i].data);
-            }
-        }
+        const DBT *old_split_key = node->pivotkeys.get_pivot(childnuma);
+        maybe_merge_pinned_nodes(node, old_split_key, childa, childb, &did_merge, &did_rebalance, &splitk, h->h->nodesize);
         //toku_verify_estimates(t,childa);
         // the tree did react if a merge (did_merge) or rebalance (new spkit key) occurred
         *did_react = (bool)(did_merge || did_rebalance);
-        if (did_merge) {
-            paranoid_invariant(!splitk.data);
-        } else {
-            paranoid_invariant(splitk.data);
-        }
-
-        node->totalchildkeylens -= deleted_size; // The key was free()'d inside the maybe_merge_pinned_nodes.
 
         if (did_merge) {
+            invariant_null(splitk.data);
             NONLEAF_CHILDINFO remaining_bnc = BNC(node, childnuma);
             NONLEAF_CHILDINFO merged_bnc = BNC(node, childnumb);
             for (unsigned int i = 0; i < (sizeof remaining_bnc->flow) / (sizeof remaining_bnc->flow[0]); ++i) {
@@ -1498,10 +1436,7 @@ ft_merge_child(
                     &node->bp[childnumb+1],
                     (node->n_children-childnumb)*sizeof(node->bp[0]));
             REALLOC_N(node->n_children, node->bp);
-            memmove(&node->childkeys[childnuma],
-                    &node->childkeys[childnuma+1],
-                    (node->n_children-childnumb)*sizeof(node->childkeys[0]));
-            REALLOC_N(node->n_children-1, node->childkeys);
+            node->pivotkeys.delete_at(childnuma);
 
             // Handle a merge of the rightmost leaf node.
             if (did_merge && childb->blocknum.b == h->rightmost_blocknum.b) {
@@ -1519,10 +1454,11 @@ ft_merge_child(
             // pretty far down the tree)
 
             // If we didn't merge the nodes, then we need the correct pivot.
-            toku_copy_dbt(&node->childkeys[childnuma], splitk);
-            node->totalchildkeylens += node->childkeys[childnuma].size;
+            invariant_notnull(splitk.data);
+            node->pivotkeys.replace_at(&splitk, childnuma);
             node->dirty = 1;
         }
+        toku_destroy_dbt(&splitk);
     }
     //
     // now we possibly flush the children
diff --git a/ft/ft-hot-flusher.cc b/ft/ft-hot-flusher.cc
index fa0b031e2f1..3b7b25352a6 100644
--- a/ft/ft-hot-flusher.cc
+++ b/ft/ft-hot-flusher.cc
@@ -202,7 +202,7 @@ hot_update_flusher_keys(FTNODE parent,
     // child node.
     if (childnum < (parent->n_children - 1)) {
         toku_destroy_dbt(&flusher->max_current_key);
-        toku_clone_dbt(&flusher->max_current_key, parent->childkeys[childnum]);
+        toku_clone_dbt(&flusher->max_current_key, *parent->pivotkeys.get_pivot(childnum));
     }
 }
 
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index c699e2c8e9a..d567c72c389 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -448,7 +448,7 @@ const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound
     if (childnum==0)
         return lower_bound_exclusive;
     else {
-        return &node->childkeys[childnum-1];
+        return node->pivotkeys.get_pivot(childnum - 1);
     }
 }
 
@@ -456,7 +456,7 @@ const DBT *postpivotkey (FTNODE node, int childnum, const DBT * const upper_boun
     if (childnum+1 == node->n_children)
         return upper_bound_inclusive;
     else {
-        return &node->childkeys[childnum];
+        return node->pivotkeys.get_pivot(childnum);
     }
 }
 
@@ -512,8 +512,7 @@ ftnode_memory_size (FTNODE node)
     int n_children = node->n_children;
     retval += sizeof(*node);
     retval += (n_children)*(sizeof(node->bp[0]));
-    retval += (n_children > 0 ? n_children-1 : 0)*(sizeof(node->childkeys[0]));
-    retval += node->totalchildkeylens;
+    retval += node->pivotkeys.total_size();
 
     // now calculate the sizes of the partitions
     for (int i = 0; i < n_children; i++) {
@@ -722,14 +721,10 @@ void toku_ftnode_clone_callback(
     cloned_node->dirty = node->dirty;
     cloned_node->fullhash = node->fullhash;
     cloned_node->n_children = node->n_children;
-    cloned_node->totalchildkeylens = node->totalchildkeylens;
 
-    XMALLOC_N(node->n_children-1, cloned_node->childkeys);
     XMALLOC_N(node->n_children, cloned_node->bp);
     // clone pivots
-    for (int i = 0; i < node->n_children-1; i++) {
-        toku_clone_dbt(&cloned_node->childkeys[i], node->childkeys[i]);
-    }
+    cloned_node->pivotkeys.create_from_pivot_keys(node->pivotkeys);
     if (node->height > 0) {
         // need to move messages here so that we don't serialize stale
         // messages to the fresh tree - ft verify code complains otherwise.
@@ -3632,7 +3627,7 @@ ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search *searc
 static inline int
 search_which_child_cmp_with_bound(DB *db, ft_compare_func cmp, FTNODE node, int childnum, ft_search *search, DBT *dbt)
 {
-    return cmp(db, toku_copy_dbt(dbt, node->childkeys[childnum]), &search->pivot_bound);
+    return cmp(db, toku_copyref_dbt(dbt, *node->pivotkeys.get_pivot(childnum)), &search->pivot_bound);
 }
 
 int
@@ -3652,7 +3647,7 @@ toku_ft_search_which_child(
     int mi;
     while (lo < hi) {
         mi = (lo + hi) / 2;
-        toku_copy_dbt(&pivotkey, node->childkeys[mi]);
+        toku_copyref_dbt(&pivotkey, *node->pivotkeys.get_pivot(mi));
         // search->compare is really strange, and only works well with a
         // linear search, it makes binary search a pita.
         //
@@ -3692,7 +3687,7 @@ toku_ft_search_which_child(
                 // searching right to left, same argument as just above
                 // (but we had to pass lo - 1 because the pivot between lo
                 // and the thing just less than it is at that position in
-                // the childkeys array)
+                // the pivot keys array)
                 lo--;
             }
         }
@@ -3709,7 +3704,7 @@ maybe_search_save_bound(
     int p = (search->direction == FT_SEARCH_LEFT) ? child_searched : child_searched - 1;
     if (p >= 0 && p < node->n_children-1) {
         toku_destroy_dbt(&search->pivot_bound);
-        toku_clone_dbt(&search->pivot_bound, node->childkeys[p]);
+        toku_clone_dbt(&search->pivot_bound, *node->pivotkeys.get_pivot(p));
     }
 }
 
@@ -4344,7 +4339,7 @@ static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UN
             } else {
                 *skipped += child_subtree_bytes;
                 if (*skipped >= skip_len && i < node->n_children - 1) {
-                    callback(&node->childkeys[i], *skipped, cb_extra);
+                    callback(node->pivotkeys.get_pivot(i), *skipped, cb_extra);
                     r = 0;
                 }
                 // Otherwise, r is still DB_NOTFOUND.  If this is the last
@@ -4473,7 +4468,7 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
         int i;
         for (i=0; i+1< node->n_children; i++) {
             fprintf(file, "%*spivotkey %d =", depth+1, "", i);
-            toku_print_BYTESTRING(file, node->childkeys[i].size, (char *) node->childkeys[i].data);
+            toku_print_BYTESTRING(file, node->pivotkeys.get_pivot(i)->size, (char *) node->pivotkeys.get_pivot(i)->data);
             fprintf(file, "\n");
         }
         for (i=0; i< node->n_children; i++) {
@@ -4515,12 +4510,12 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
             for (i=0; i<node->n_children; i++) {
                 fprintf(file, "%*schild %d\n", depth, "", i);
                 if (i>0) {
-                    char *CAST_FROM_VOIDP(key, node->childkeys[i-1].data);
-                    fprintf(file, "%*spivot %d len=%u %u\n", depth+1, "", i-1, node->childkeys[i-1].size, (unsigned)toku_dtoh32(*(int*)key));
+                    char *CAST_FROM_VOIDP(key, node->pivotkeys.get_pivot(i - 1)->data);
+                    fprintf(file, "%*spivot %d len=%u %u\n", depth+1, "", i-1, node->pivotkeys.get_pivot(i - 1)->size, (unsigned)toku_dtoh32(*(int*)key));
                 }
                 toku_dump_ftnode(file, ft_handle, BP_BLOCKNUM(node, i), depth+4,
-                                  (i==0) ? lorange : &node->childkeys[i-1],
-                                  (i==node->n_children-1) ? hirange : &node->childkeys[i]);
+                                  (i==0) ? lorange : node->pivotkeys.get_pivot(i - 1),
+                                  (i==node->n_children-1) ? hirange : node->pivotkeys.get_pivot(i));
             }
         }
     }
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index 43d6e188113..ca28adb36f9 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -124,15 +124,15 @@ int toku_testsetup_leaf(FT_HANDLE ft_handle, BLOCKNUM *blocknum, int n_children,
     FTNODE node;
     assert(testsetup_initialized);
     toku_create_new_ftnode(ft_handle, &node, 0, n_children);
-    int i;
-    for (i=0; i<n_children; i++) {
-        BP_STATE(node,i) = PT_AVAIL;
+    for (int i = 0; i < n_children; i++) {
+        BP_STATE(node, i) = PT_AVAIL;
     }
 
-    for (i=0; i+1<n_children; i++) {
-        toku_memdup_dbt(&node->childkeys[i], keys[i], keylens[i]);
-        node->totalchildkeylens += keylens[i];
+    DBT *XMALLOC_N(n_children - 1, pivotkeys);
+    for (int i = 0; i + 1 < n_children; i++) {
+        toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
     }
+    node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
 
     *blocknum = node->blocknum;
     toku_unpin_ftnode(ft_handle->ft, node);
@@ -144,15 +144,15 @@ int toku_testsetup_nonleaf (FT_HANDLE ft_handle, int height, BLOCKNUM *blocknum,
     FTNODE node;
     assert(testsetup_initialized);
     toku_create_new_ftnode(ft_handle, &node, height, n_children);
-    int i;
-    for (i=0; i<n_children; i++) {
+    for (int i = 0; i < n_children; i++) {
         BP_BLOCKNUM(node, i) = children[i];
         BP_STATE(node,i) = PT_AVAIL;
     }
-    for (i=0; i+1<n_children; i++) {
-        toku_memdup_dbt(&node->childkeys[i], keys[i], keylens[i]);
-        node->totalchildkeylens += keylens[i];
+    DBT *XMALLOC_N(n_children - 1, pivotkeys);
+    for (int i = 0; i + 1 < n_children; i++) {
+        toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
     }
+    node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
     *blocknum = node->blocknum;
     toku_unpin_ftnode(ft_handle->ft, node);
     return 0;
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index df7c637e0cd..2a6d9fcbb6e 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -411,24 +411,24 @@ toku_verify_ftnode_internal(FT_HANDLE ft_handle,
     }
     // Verify that all the pivot keys are in order.
     for (int i = 0; i < node->n_children-2; i++) {
-        int compare = compare_pairs(ft_handle, &node->childkeys[i], &node->childkeys[i+1]);
+        int compare = compare_pairs(ft_handle, node->pivotkeys.get_pivot(i), node->pivotkeys.get_pivot(i + 1));
         VERIFY_ASSERTION(compare < 0, i, "Value is >= the next value");
     }
     // Verify that all the pivot keys are lesser_pivot < pivot <= greatereq_pivot
     for (int i = 0; i < node->n_children-1; i++) {
         if (lesser_pivot) {
-            int compare = compare_pairs(ft_handle, lesser_pivot, &node->childkeys[i]);
+            int compare = compare_pairs(ft_handle, lesser_pivot, node->pivotkeys.get_pivot(i));
             VERIFY_ASSERTION(compare < 0, i, "Pivot is >= the lower-bound pivot");
         }
         if (greatereq_pivot) {
-            int compare = compare_pairs(ft_handle, greatereq_pivot, &node->childkeys[i]);
+            int compare = compare_pairs(ft_handle, greatereq_pivot, node->pivotkeys.get_pivot(i));
             VERIFY_ASSERTION(compare >= 0, i, "Pivot is < the upper-bound pivot");
         }
     }
 
     for (int i = 0; i < node->n_children; i++) {
-        const DBT *curr_less_pivot = (i==0) ? lesser_pivot : &node->childkeys[i-1];
-        const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : &node->childkeys[i];
+        const DBT *curr_less_pivot = (i==0) ? lesser_pivot : node->pivotkeys.get_pivot(i - 1);
+        const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.get_pivot(i);
         if (node->height > 0) {
             NONLEAF_CHILDINFO bnc = BNC(node, i);
             // Verify that messages in the buffers are in the right place.
@@ -537,8 +537,8 @@ toku_verify_ftnode (FT_HANDLE ft_handle,
                                         : parentmsn_with_messages),
                                        messages_exist_above || toku_bnc_n_entries(BNC(node, i)) > 0,
                                        child_node, node->height-1,
-                                       (i==0)                  ? lesser_pivot        : &node->childkeys[i-1],
-                                       (i==node->n_children-1) ? greatereq_pivot     : &node->childkeys[i],
+                                       (i==0)                  ? lesser_pivot        : node->pivotkeys.get_pivot(i - 1),
+                                       (i==node->n_children-1) ? greatereq_pivot     : node->pivotkeys.get_pivot(i),
                                        progress_callback, progress_extra,
                                        recurse, verbose, keep_going_on_failure);
             if (r) {
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index e62aac4a291..bf3780cfa42 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -483,7 +483,7 @@ serialize_ftnode_info_size(FTNODE node)
     retval += 4; // flags
     retval += 4; // height;
     retval += 8; // oldest_referenced_xid_known
-    retval += node->totalchildkeylens; // total length of pivots
+    retval += node->pivotkeys.total_size();
     retval += (node->n_children-1)*4; // encode length of each pivot
     if (node->height > 0) {
         retval += node->n_children*8; // child blocknum's
@@ -507,11 +507,8 @@ static void serialize_ftnode_info(FTNODE node,
     wbuf_nocrc_uint(&wb, node->flags);
     wbuf_nocrc_int (&wb, node->height);    
     wbuf_TXNID(&wb, node->oldest_referenced_xid_known);
+    node->pivotkeys.serialize_to_wbuf(&wb);
 
-    // pivot information
-    for (int i = 0; i < node->n_children-1; i++) {
-        wbuf_nocrc_bytes(&wb, node->childkeys[i].data, node->childkeys[i].size);
-    }
     // child blocks, only for internal nodes
     if (node->height > 0) {
         for (int i = 0; i < node->n_children; i++) {
@@ -1261,20 +1258,10 @@ deserialize_ftnode_info(
     // n_children is now in the header, nd the allocatio of the node->bp is in deserialize_ftnode_from_rbuf.
 
     // now the pivots
-    node->totalchildkeylens = 0;
     if (node->n_children > 1) {
-        XMALLOC_N(node->n_children - 1, node->childkeys);
-        for (int i=0; i < node->n_children-1; i++) {
-            bytevec childkeyptr;
-            unsigned int cklen;
-            rbuf_bytes(&rb, &childkeyptr, &cklen);
-            toku_memdup_dbt(&node->childkeys[i], childkeyptr, cklen);
-            node->totalchildkeylens += cklen;
-        }
-    }
-    else {
-        node->childkeys = NULL;
-        node->totalchildkeylens = 0;
+        node->pivotkeys.deserialize_from_rbuf(&rb, node->n_children - 1);
+    } else {
+        node->pivotkeys.create_empty();
     }
 
     // if this is an internal node, unpack the block nums, and fill in necessary fields
@@ -1725,18 +1712,8 @@ deserialize_and_upgrade_internal_node(FTNODE node,
         }
     }
 
-    node->childkeys = NULL;
-    node->totalchildkeylens = 0;
-    // I. Allocate keys based on number of children.
-    XMALLOC_N(node->n_children - 1, node->childkeys);
-    // II. Copy keys from buffer to allocated keys in ftnode.
-    for (int i = 0; i < node->n_children - 1; ++i) {
-        bytevec childkeyptr;
-        unsigned int cklen;
-        rbuf_bytes(rb, &childkeyptr, &cklen);         // 17. child key pointers
-        toku_memdup_dbt(&node->childkeys[i], childkeyptr, cklen);
-        node->totalchildkeylens += cklen;
-    }
+    // Pivot keys
+    node->pivotkeys.deserialize_from_rbuf(rb, node->n_children - 1);
 
     // Create space for the child node buffers (a.k.a. partitions).
     XMALLOC_N(node->n_children, node->bp);
@@ -1932,10 +1909,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
     // basement node.
     node->n_children = 1;
     XMALLOC_N(node->n_children, node->bp);
-    // This is a malloc(0), but we need to do it in order to get a pointer
-    // we can free() later.
-    XMALLOC_N(node->n_children - 1, node->childkeys);
-    node->totalchildkeylens = 0;
+    node->pivotkeys.create_empty();
 
     // Create one basement node to contain all the leaf entries by
     // setting up the single partition and updating the bfe.
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index c030e595fb7..37d08b30c5a 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -3168,11 +3168,7 @@ static void write_nonleaf_node (FTLOADER bl, struct dbout *out, int64_t blocknum
     FTNODE XMALLOC(node);
     toku_initialize_empty_ftnode(node, make_blocknum(blocknum_of_new_node), height, n_children,
                                   FT_LAYOUT_VERSION, 0);
-    node->totalchildkeylens = 0;
-    for (int i=0; i<n_children-1; i++) {
-        toku_clone_dbt(&node->childkeys[i], pivots[i]);
-        node->totalchildkeylens += pivots[i].size;
-    }
+    node->pivotkeys.create_from_dbts(pivots, n_children - 1);
     assert(node->bp);
     for (int i=0; i<n_children; i++) {
         BP_BLOCKNUM(node,i)  = make_blocknum(subtree_info[i].block); 
@@ -3206,14 +3202,14 @@ static void write_nonleaf_node (FTLOADER bl, struct dbout *out, int64_t blocknum
 
     for (int i=0; i<n_children-1; i++) {
         toku_free(pivots[i].data);
-        toku_free(node->childkeys[i].data);
     }
     for (int i=0; i<n_children; i++) {
         destroy_nonleaf_childinfo(BNC(node,i));
     }
     toku_free(pivots);
+    // TODO: Should be using toku_destroy_ftnode_internals, which should be renamed to toku_ftnode_destroy
     toku_free(node->bp);
-    toku_free(node->childkeys);
+    node->pivotkeys.destroy();
     toku_free(node);
     toku_free(ndd);
     toku_free(subtree_info);
diff --git a/ft/node.cc b/ft/node.cc
index db802227e08..229e0e0bfe1 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -92,9 +92,145 @@ PATENT RIGHTS GRANT:
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
 #include "ft/node.h"
+#include "ft/rbuf.h"
+#include "ft/wbuf.h"
 #include "util/scoped_malloc.h"
 #include "util/sort.h"
 
+void ftnode_pivot_keys::create_empty() {
+    _num_pivots = 0;
+    _total_size = 0;
+    _keys = nullptr;
+}
+
+void ftnode_pivot_keys::create_from_dbts(const DBT *keys, int n) {
+    _num_pivots = n;
+    _total_size = 0;
+    XMALLOC_N(_num_pivots, _keys);
+    for (int i = 0; i < _num_pivots; i++) {
+        size_t size = keys[i].size;
+        toku_memdup_dbt(&_keys[i], keys[i].data, size);
+        _total_size += size;
+    }
+}
+
+// effect: create pivot keys as a clone of an existing set of pivotkeys
+void ftnode_pivot_keys::create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys) {
+    create_from_dbts(pivotkeys._keys, pivotkeys._num_pivots);
+}
+
+void ftnode_pivot_keys::destroy() {
+    if (_keys != nullptr) {
+        for (int i = 0; i < _num_pivots; i++) {
+            toku_destroy_dbt(&_keys[i]);
+        }
+        toku_free(_keys);
+    }
+    _keys = nullptr;
+    _num_pivots = 0;
+    _total_size = 0;
+}
+
+void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
+    XMALLOC_N(n, _keys);
+    _num_pivots = n;
+    _total_size = 0;
+    for (int i = 0; i < _num_pivots; i++) {
+        bytevec pivotkeyptr;
+        uint32_t size;
+        rbuf_bytes(rb, &pivotkeyptr, &size);
+        toku_memdup_dbt(&_keys[i], pivotkeyptr, size);
+        _total_size += size;
+    }
+}
+
+const DBT *ftnode_pivot_keys::get_pivot(int i) const {
+    paranoid_invariant(i < _num_pivots);
+    return &_keys[i];
+}
+
+void ftnode_pivot_keys::_add_key(const DBT *key, int i) {
+    toku_clone_dbt(&_keys[i], *key);
+    _total_size += _keys[i].size;
+}
+
+void ftnode_pivot_keys::_destroy_key(int i) {
+    invariant(_total_size >= _keys[i].size);
+    _total_size -= _keys[i].size;
+    toku_destroy_dbt(&_keys[i]);
+}
+
+void ftnode_pivot_keys::insert_at(const DBT *key, int i) {
+    invariant(i <= _num_pivots); // it's ok to insert at the end, so we check <= n
+
+    // make space for a new pivot, slide existing keys to the right
+    REALLOC_N(_num_pivots + 1, _keys);
+    memmove(&_keys[i + 1], &_keys[i], (_num_pivots - i) * sizeof(DBT));
+
+    _num_pivots++;
+    _add_key(key, i);
+}
+
+void ftnode_pivot_keys::append(const ftnode_pivot_keys &pivotkeys) {
+    REALLOC_N(_num_pivots + pivotkeys._num_pivots, _keys);
+    for (int i = 0; i < pivotkeys._num_pivots; i++) {
+        const DBT *key = &pivotkeys._keys[i];
+        toku_memdup_dbt(&_keys[_num_pivots + i], key->data, key->size);
+    }
+    _num_pivots += pivotkeys._num_pivots;
+    _total_size += pivotkeys._total_size;
+}
+
+void ftnode_pivot_keys::replace_at(const DBT *key, int i) {
+    if (i < _num_pivots) {
+        _destroy_key(i);
+        _add_key(key, i);
+    } else {
+        invariant(i == _num_pivots); // appending to the end is ok
+        insert_at(key, i);
+    }
+}
+
+void ftnode_pivot_keys::delete_at(int i) {
+    invariant(i < _num_pivots);
+    _destroy_key(i);
+
+    // slide over existing keys
+    memmove(&_keys[i], &_keys[i + 1], (_num_pivots - 1 - i) * sizeof(DBT));
+
+    // shrink down to the new size
+    _num_pivots--;
+    REALLOC_N(_num_pivots, _keys);
+}
+
+void ftnode_pivot_keys::split_at(int i, ftnode_pivot_keys *other) {
+    if (i < _num_pivots) {
+        other->create_from_dbts(&_keys[i], _num_pivots - i);
+
+        // destroy everything greater
+        for (int k = i; k < _num_pivots; k++) {
+            _destroy_key(k);
+        }
+
+        _num_pivots = i;
+        REALLOC_N(_num_pivots, _keys);
+    }
+}
+
+int ftnode_pivot_keys::num_pivots() const {
+    return _num_pivots;
+}
+
+size_t ftnode_pivot_keys::total_size() const {
+    return _total_size;
+}
+
+void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const {
+    for (int i = 0; i < _num_pivots; i++) {
+        wbuf_nocrc_bytes(wb, _keys[i].data, _keys[i].size);
+    }
+}
+
 // Effect: Fill in N as an empty ftnode.
 // TODO: Rename toku_ftnode_create
 void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int num_children, int layout_version, unsigned int flags) {
@@ -108,14 +244,12 @@ void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int n
     n->layout_version_original = layout_version;
     n->layout_version_read_from_disk = layout_version;
     n->height = height;
-    n->totalchildkeylens = 0;
-    n->childkeys = 0;
+    n->pivotkeys.create_empty();
     n->bp = 0;
     n->n_children = num_children;
     n->oldest_referenced_xid_known = TXNID_NONE;
 
     if (num_children > 0) {
-        XMALLOC_N(num_children-1, n->childkeys);
         XMALLOC_N(num_children, n->bp);
         for (int i = 0; i < num_children; i++) {
             BP_BLOCKNUM(n,i).b=0;
@@ -140,13 +274,8 @@ void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int n
 // this is common functionality for toku_ftnode_free and rebalance_ftnode_leaf
 // MUST NOT do anything besides free the structures that have been allocated
 void toku_destroy_ftnode_internals(FTNODE node) {
-    for (int i=0; i<node->n_children-1; i++) {
-        toku_destroy_dbt(&node->childkeys[i]);
-    }
-    toku_free(node->childkeys);
-    node->childkeys = NULL;
-
-    for (int i=0; i < node->n_children; i++) {
+    node->pivotkeys.destroy();
+    for (int i = 0; i < node->n_children; i++) {
         if (BP_STATE(node,i) == PT_AVAIL) {
             if (node->height > 0) {
                 destroy_nonleaf_childinfo(BNC(node,i));
@@ -947,9 +1076,7 @@ void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize) {
 
     // now reallocate pieces and start filling them in
     invariant(num_children > 0);
-    node->totalchildkeylens = 0;
 
-    XCALLOC_N(num_pivots, node->childkeys);        // allocate pointers to pivot structs
     node->n_children = num_children;
     XCALLOC_N(num_children, node->bp);             // allocate pointers to basements (bp)
     for (int i = 0; i < num_children; i++) {
@@ -959,12 +1086,14 @@ void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize) {
     // now we start to fill in the data
 
     // first the pivots
+    toku::scoped_malloc pivotkeys_buf(num_pivots * sizeof(DBT));
+    DBT *pivotkeys = reinterpret_cast<DBT *>(pivotkeys_buf.get());
     for (int i = 0; i < num_pivots; i++) {
-        uint32_t keylen = key_sizes[new_pivots[i]];
+        uint32_t size = key_sizes[new_pivots[i]];
         const void *key = key_pointers[new_pivots[i]];
-        toku_memdup_dbt(&node->childkeys[i], key, keylen);
-        node->totalchildkeylens += keylen;
+        toku_fill_dbt(&pivotkeys[i], key, size);
     }
+    node->pivotkeys.create_from_dbts(pivotkeys, num_pivots);
 
     uint32_t baseindex_this_bn = 0;
     // now the basement nodes
@@ -1124,31 +1253,18 @@ long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc) {
 // Message application
 //
 
-static void
-init_childinfo(FTNODE node, int childnum, FTNODE child) {
+// Used only by test programs: append a child node to a parent node
+void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey) {
+    int childnum = node->n_children;
+    node->n_children++;
+    REALLOC_N(node->n_children, node->bp);
     BP_BLOCKNUM(node,childnum) = child->blocknum;
     BP_STATE(node,childnum) = PT_AVAIL;
     BP_WORKDONE(node, childnum)   = 0;
     set_BNC(node, childnum, toku_create_empty_nl());
-}
-
-static void
-init_childkey(FTNODE node, int childnum, const DBT *pivotkey) {
-    toku_clone_dbt(&node->childkeys[childnum], *pivotkey);
-    node->totalchildkeylens += pivotkey->size;
-}
-
-// Used only by test programs: append a child node to a parent node
-void
-toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey) {
-    int childnum = node->n_children;
-    node->n_children++;
-    XREALLOC_N(node->n_children, node->bp);
-    init_childinfo(node, childnum, child);
-    XREALLOC_N(node->n_children-1, node->childkeys);
     if (pivotkey) {
         invariant(childnum > 0);
-        init_childkey(node, childnum-1, pivotkey);
+        node->pivotkeys.insert_at(pivotkey, childnum - 1);
     }
     node->dirty = 1;
 }
@@ -1681,7 +1797,7 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k,
 
     // check the last key to optimize seq insertions
     int n = node->n_children-1;
-    int c = ft_compare_pivot(desc, cmp, k, &node->childkeys[n-1]);
+    int c = ft_compare_pivot(desc, cmp, k, node->pivotkeys.get_pivot(n - 1));
     if (c > 0) return n;
 
     // binary search the pivots
@@ -1690,7 +1806,7 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k,
     int mi;
     while (lo < hi) {
         mi = (lo + hi) / 2;
-        c = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]);
+        c = ft_compare_pivot(desc, cmp, k, node->pivotkeys.get_pivot(mi));
         if (c > 0) {
             lo = mi+1;
             continue;
@@ -1715,7 +1831,7 @@ toku_ftnode_hot_next_child(FTNODE node,
     int mi;
     while (low < hi) {
         mi = (low + hi) / 2;
-        int r = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]);
+        int r = ft_compare_pivot(desc, cmp, k, node->pivotkeys.get_pivot(mi));
         if (r > 0) {
             low = mi + 1;
         } else if (r < 0) {
diff --git a/ft/node.h b/ft/node.h
index 30f497c3612..640995308d4 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -93,6 +93,68 @@ PATENT RIGHTS GRANT:
 #include "ft/fttypes.h"
 #include "ft/msg_buffer.h"
 
+/* Pivot keys.
+ * Child 0's keys are <= pivotkeys[0]. 
+ * Child 1's keys are <= pivotkeys[1]. 
+ * Child 1's keys are > pivotkeys[0].
+ * etc
+ */
+class ftnode_pivot_keys {
+public:
+    // effect: create an empty set of pivot keys
+    void create_empty();
+
+    // effect: create pivot keys by copying the given DBT array
+    void create_from_dbts(const DBT *keys, int num_pivots);
+
+    // effect: create pivot keys as a clone of an existing set of pivotkeys
+    void create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys);
+
+    void destroy();
+
+    // effect: deserialize pivot keys previously serialized by serialize_to_wbuf()
+    void deserialize_from_rbuf(struct rbuf *rb, int num_pivots);
+
+    // returns: unowned DBT representing the i'th pivot key
+    const DBT *get_pivot(int i) const;
+
+    // effect: insert a pivot into the i'th position, shifting others to the right
+    void insert_at(const DBT *key, int i);
+
+    // effect: append pivotkeys to the end of our own pivot keys
+    void append(const ftnode_pivot_keys &pivotkeys);
+
+    // effect: replace the pivot at the i'th position
+    void replace_at(const DBT *key, int i);
+
+    // effect: removes the i'th pivot key, shifting others to the left
+    void delete_at(int i);
+
+    // effect: split the pivot keys, removing all pivots at position greater
+    //         than or equal to `i' and storing them in *other
+    // requires: *other is empty (size == 0)
+    void split_at(int i, ftnode_pivot_keys *other);
+
+    int num_pivots() const;
+
+    // return: the sum of the keys sizes of each pivot
+    size_t total_size() const;
+
+    // effect: serialize pivot keys to a wbuf
+    // requires: wbuf has at least ftnode_pivot_keys::total_size() bytes available
+    void serialize_to_wbuf(struct wbuf *wb) const;
+
+private:
+    // adds/destroys keys at a certain index, maintaining _total_size, but not _num_pivots
+    void _add_key(const DBT *key, int i);
+    void _destroy_key(int i);
+
+    DBT *_keys;
+    int _num_pivots;
+    size_t _total_size;
+};
+
+// TODO: class me up
 struct ftnode {
     MSN      max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk
     unsigned int flags;
@@ -104,11 +166,11 @@ struct ftnode {
     int    height; /* height is always >= 0.  0 for leaf, >0 for nonleaf. */
     int    dirty;
     uint32_t fullhash;
-    int n_children; //for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced.
-                    // for leaf nodes, represents number of basement nodes
-    unsigned int    totalchildkeylens;
-    DBT *childkeys;   /* Pivot keys.  Child 0's keys are <= childkeys[0].  Child 1's keys are <= childkeys[1].
-                                                                        Child 1's keys are > childkeys[0]. */
+
+    // for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced.
+    // for leaf nodes, represents number of basement nodes
+    int n_children;
+    ftnode_pivot_keys pivotkeys;
 
     // What's the oldest referenced xid that this node knows about? The real oldest
     // referenced xid might be younger, but this is our best estimate. We use it
@@ -243,8 +305,7 @@ void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node);
 void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM blocknum, int height, int num_children, 
                                   int layout_version, unsigned int flags);
 
-int toku_ftnode_which_child(FTNODE node, const DBT *k,
-                            DESCRIPTOR desc, ft_compare_func cmp);
+int toku_ftnode_which_child(FTNODE node, const DBT *k, DESCRIPTOR desc, ft_compare_func cmp);
 
 //
 // Field in ftnode_fetch_extra that tells the 
diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc
index 4b9da2a8270..00b93345762 100644
--- a/ft/tests/ft-bfe-query.cc
+++ b/ft/tests/ft-bfe-query.cc
@@ -384,10 +384,10 @@ test_prefetching(void) {
     uint64_t key2 = 200;
     
     MALLOC_N(sn.n_children, sn.bp);
-    MALLOC_N(sn.n_children-1, sn.childkeys);
-    toku_memdup_dbt(&sn.childkeys[0], &key1, sizeof(key1));
-    toku_memdup_dbt(&sn.childkeys[1], &key2, sizeof(key2));
-    sn.totalchildkeylens = sizeof(key1) + sizeof(key2);
+    DBT pivotkeys[2];
+    toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1));
+    toku_fill_dbt(&pivotkeys[1], &key2, sizeof(key2));
+    sn.pivotkeys.create_from_dbts(pivotkeys, 2);
     BP_BLOCKNUM(&sn, 0).b = 30;
     BP_BLOCKNUM(&sn, 1).b = 35;
     BP_BLOCKNUM(&sn, 2).b = 40;
@@ -449,13 +449,7 @@ test_prefetching(void) {
     test_prefetch_read(fd, ft, ft_h);    
     test_subset_read(fd, ft, ft_h);
 
-    toku_free(sn.childkeys[0].data);
-    toku_free(sn.childkeys[1].data);
-    destroy_nonleaf_childinfo(BNC(&sn, 0));
-    destroy_nonleaf_childinfo(BNC(&sn, 1));
-    destroy_nonleaf_childinfo(BNC(&sn, 2));
-    toku_free(sn.bp);
-    toku_free(sn.childkeys);
+    toku_destroy_ftnode_internals(&sn);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index 5c0d308cfab..1590c9e9019 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -309,7 +309,6 @@ test_serialize_nonleaf(void) {
 
     //    source_ft.fd=fd;
     sn.max_msn_applied_to_node_on_disk.msn = 0;
-    char *hello_string;
     sn.flags = 0x11223344;
     sn.blocknum.b = 20;
     sn.layout_version = FT_LAYOUT_VERSION;
@@ -318,11 +317,9 @@ test_serialize_nonleaf(void) {
     sn.n_children = 2;
     sn.dirty = 1;
     sn.oldest_referenced_xid_known = TXNID_NONE;
-    hello_string = toku_strdup("hello");
     MALLOC_N(2, sn.bp);
-    MALLOC_N(1, sn.childkeys);
-    toku_fill_dbt(&sn.childkeys[0], hello_string, 6);
-    sn.totalchildkeylens = 6;
+    DBT pivotkey;
+    sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
     BP_BLOCKNUM(&sn, 0).b = 30;
     BP_BLOCKNUM(&sn, 1).b = 35;
     BP_STATE(&sn,0) = PT_AVAIL;
@@ -384,11 +381,7 @@ test_serialize_nonleaf(void) {
     test1(fd, ft_h, &dn);
     test2(fd, ft_h, &dn);
 
-    toku_free(hello_string);
-    destroy_nonleaf_childinfo(BNC(&sn, 0));
-    destroy_nonleaf_childinfo(BNC(&sn, 1));
-    toku_free(sn.bp);
-    toku_free(sn.childkeys);
+    toku_destroy_ftnode_internals(&sn);
     toku_free(ndd);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
@@ -419,9 +412,8 @@ test_serialize_leaf(void) {
     sn.dirty = 1;
     sn.oldest_referenced_xid_known = TXNID_NONE;
     MALLOC_N(sn.n_children, sn.bp);
-    MALLOC_N(1, sn.childkeys);
-    toku_memdup_dbt(&sn.childkeys[0], "b", 2);
-    sn.totalchildkeylens = 2;
+    DBT pivotkey;
+    sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
     BP_STATE(&sn,0) = PT_AVAIL;
     BP_STATE(&sn,1) = PT_AVAIL;
     set_BLB(&sn, 0, toku_create_empty_bn());
@@ -468,14 +460,7 @@ test_serialize_leaf(void) {
     test1(fd, ft_h, &dn);
     test3_leaf(fd, ft_h,&dn);
 
-    for (int i = 0; i < sn.n_children-1; ++i) {
-        toku_free(sn.childkeys[i].data);
-    }
-    for (int i = 0; i < sn.n_children; i++) {
-        destroy_basement_node(BLB(&sn, i));
-    }
-    toku_free(sn.bp);
-    toku_free(sn.childkeys);
+    toku_destroy_ftnode_internals(&sn);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index 6ee9b7f89f9..5f7266533d3 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -152,8 +152,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
     sn->dirty = 1;
     sn->oldest_referenced_xid_known = TXNID_NONE;
     MALLOC_N(sn->n_children, sn->bp);
-    MALLOC_N(sn->n_children-1, sn->childkeys);
-    sn->totalchildkeylens = 0;
+    sn->pivotkeys.create_empty();
     for (int i = 0; i < sn->n_children; ++i) {
         BP_STATE(sn,i) = PT_AVAIL;
         set_BLB(sn, i, toku_create_empty_bn());
@@ -181,8 +180,8 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
                 );
         }
         if (ck < 7) {
-            toku_memdup_dbt(&sn->childkeys[ck], &k, sizeof k);
-            sn->totalchildkeylens += sizeof k;
+            DBT pivotkey;
+            sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck);
         }
     }
 
@@ -307,8 +306,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     sn.dirty = 1;
     sn.oldest_referenced_xid_known = TXNID_NONE;
     MALLOC_N(sn.n_children, sn.bp);
-    MALLOC_N(sn.n_children-1, sn.childkeys);
-    sn.totalchildkeylens = 0;
+    sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
         BP_BLOCKNUM(&sn, i).b = 30 + (i*5);
         BP_STATE(&sn,i) = PT_AVAIL;
@@ -337,8 +335,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
             toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, NULL, long_key_cmp);
         }
         if (ck < 7) {
-            toku_memdup_dbt(&sn.childkeys[ck], &k, sizeof k);
-            sn.totalchildkeylens += sizeof k;
+            DBT pivotkey;
+            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck);
         }
     }
 
@@ -408,15 +406,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
            );
 
     toku_ftnode_free(&dn);
-
-    for (int i = 0; i < sn.n_children-1; ++i) {
-        toku_free(sn.childkeys[i].data);
-    }
-    for (int i = 0; i < sn.n_children; ++i) {
-        destroy_nonleaf_childinfo(BNC(&sn, i));
-    }
-    toku_free(sn.bp);
-    toku_free(sn.childkeys);
+    toku_destroy_ftnode_internals(&sn);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index 0c8e29df4dd..5e2e93f09b3 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -279,9 +279,8 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
     sn.dirty = 1;
     sn.oldest_referenced_xid_known = TXNID_NONE;
     MALLOC_N(sn.n_children, sn.bp);
-    MALLOC_N(1, sn.childkeys);
-    toku_memdup_dbt(&sn.childkeys[0], "b", 2);
-    sn.totalchildkeylens = 2;
+    DBT pivotkey;
+    sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
     BP_STATE(&sn,0) = PT_AVAIL;
     BP_STATE(&sn,1) = PT_AVAIL;
     set_BLB(&sn, 0, toku_create_empty_bn());
@@ -346,7 +345,6 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
         elts[1].init("b", "bval");
         elts[2].init("x", "xval");
         const uint32_t npartitions = dn->n_children;
-        assert(dn->totalchildkeylens==(2*(npartitions-1)));
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
             assert(BLB_MAX_MSN_APPLIED(dn, bn).msn == POSTSERIALIZE_MSN_ON_DISK.msn);
@@ -363,7 +361,7 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
                 assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
                 assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
                 if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->childkeys[bn].data, elts[last_i].keyp) <= 0);
+                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, elts[last_i].keyp) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
@@ -372,16 +370,9 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
         }
         assert(last_i == 3);
     }
-    toku_ftnode_free(&dn);
 
-    for (int i = 0; i < sn.n_children-1; ++i) {
-        toku_free(sn.childkeys[i].data);
-    }
-    for (int i = 0; i < sn.n_children; i++) {
-        destroy_basement_node(BLB(&sn, i));
-    }
-    toku_free(sn.bp);
-    toku_free(sn.childkeys);
+    toku_ftnode_free(&dn);
+    toku_destroy_ftnode_internals(&sn);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
@@ -414,8 +405,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     sn.oldest_referenced_xid_known = TXNID_NONE;
 
     MALLOC_N(sn.n_children, sn.bp);
-    MALLOC_N(sn.n_children-1, sn.childkeys);
-    sn.totalchildkeylens = (sn.n_children-1)*sizeof(int);
+    sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
         BP_STATE(&sn,i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
@@ -430,7 +420,8 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
             uint32_t keylen;
             void* curr_key;
             BLB_DATA(&sn, i)->fetch_key_and_len(0, &keylen, &curr_key);
-            toku_memdup_dbt(&sn.childkeys[i], curr_key, keylen);
+            DBT pivotkey;
+            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), i);
         }
     }
 
@@ -489,7 +480,6 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
             }
         }
         const uint32_t npartitions = dn->n_children;
-        assert(dn->totalchildkeylens==(keylens*(npartitions-1)));
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
             assert(dest_ndd[bn].start > 0);
@@ -506,7 +496,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
                 assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
                 assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
                 if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->childkeys[bn].data, les[last_i].keyp) <= 0);
+                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, les[last_i].keyp) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
@@ -517,14 +507,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     }
 
     toku_ftnode_free(&dn);
-    for (int i = 0; i < sn.n_children-1; ++i) {
-        toku_free(sn.childkeys[i].data);
-    }
-    toku_free(sn.childkeys);
-    for (int i = 0; i < sn.n_children; i++) {
-        destroy_basement_node(BLB(&sn, i));
-    }
-    toku_free(sn.bp);
+    toku_destroy_ftnode_internals(&sn);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
@@ -555,8 +538,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
     sn.oldest_referenced_xid_known = TXNID_NONE;
 
     XMALLOC_N(sn.n_children, sn.bp);
-    XMALLOC_N(sn.n_children-1, sn.childkeys);
-    sn.totalchildkeylens = (sn.n_children-1)*sizeof(int);
+    sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
         BP_STATE(&sn,i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn()); 
@@ -621,7 +603,6 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
             }
         }
         const uint32_t npartitions = dn->n_children;
-        assert(dn->totalchildkeylens==(sizeof(int)*(npartitions-1)));
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
             assert(dest_ndd[bn].start > 0);
@@ -638,7 +619,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
                 assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
                 assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
                 if (bn < npartitions-1) {
-                    uint32_t *CAST_FROM_VOIDP(pivot, dn->childkeys[bn].data);
+                    uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn)->data);
                     void* tmp = les[last_i].keyp;
                     uint32_t *CAST_FROM_VOIDP(item, tmp);
                     assert(*pivot >= *item);
@@ -654,14 +635,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
     }
 
     toku_ftnode_free(&dn);
-    for (int i = 0; i < sn.n_children-1; ++i) {
-        toku_free(sn.childkeys[i].data);
-    }
-    for (int i = 0; i < sn.n_children; i++) {
-        destroy_basement_node(BLB(&sn, i));
-    }
-    toku_free(sn.bp);
-    toku_free(sn.childkeys);
+    toku_destroy_ftnode_internals(&sn);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
@@ -696,8 +670,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
     sn.oldest_referenced_xid_known = TXNID_NONE;
     
     MALLOC_N(sn.n_children, sn.bp);
-    MALLOC_N(sn.n_children-1, sn.childkeys);
-    sn.totalchildkeylens = (sn.n_children-1)*8;
+    sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
         BP_STATE(&sn,i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
@@ -771,7 +744,6 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
         }
         const uint32_t npartitions = dn->n_children;
         assert(npartitions == nrows);
-        assert(dn->totalchildkeylens==(key_size*(npartitions-1)));
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
             assert(dest_ndd[bn].start > 0);
@@ -788,7 +760,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
                 assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
                 assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
                 if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->childkeys[bn].data, (char*)(les[last_i].keyp)) <= 0);
+                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, (char*)(les[last_i].keyp)) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
@@ -800,14 +772,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
     }
 
     toku_ftnode_free(&dn);
-    for (int i = 0; i < sn.n_children-1; ++i) {
-        toku_free(sn.childkeys[i].data);
-    }
-    for (int i = 0; i < sn.n_children; i++) {
-        destroy_basement_node(BLB(&sn, i));
-    }
-    toku_free(sn.bp);
-    toku_free(sn.childkeys);
+    toku_destroy_ftnode_internals(&sn);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
@@ -839,14 +804,14 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
     sn.dirty = 1;
     sn.oldest_referenced_xid_known = TXNID_NONE;
     MALLOC_N(sn.n_children, sn.bp);
-    MALLOC_N(sn.n_children-1, sn.childkeys);
-    toku_memdup_dbt(&sn.childkeys[0], "A", 2);
-    toku_memdup_dbt(&sn.childkeys[1], "a", 2);
-    toku_memdup_dbt(&sn.childkeys[2], "a", 2);
-    toku_memdup_dbt(&sn.childkeys[3], "b", 2);
-    toku_memdup_dbt(&sn.childkeys[4], "b", 2);
-    toku_memdup_dbt(&sn.childkeys[5], "x", 2);
-    sn.totalchildkeylens = (sn.n_children-1)*2;
+    DBT pivotkeys[6];
+    toku_fill_dbt(&pivotkeys[0], "A", 2);
+    toku_fill_dbt(&pivotkeys[1], "a", 2);
+    toku_fill_dbt(&pivotkeys[2], "a", 2);
+    toku_fill_dbt(&pivotkeys[3], "b", 2);
+    toku_fill_dbt(&pivotkeys[4], "b", 2);
+    toku_fill_dbt(&pivotkeys[5], "x", 2);
+    sn.pivotkeys.create_from_dbts(pivotkeys, 6);
     for (int i = 0; i < sn.n_children; ++i) {
         BP_STATE(&sn,i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
@@ -909,7 +874,6 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
         elts[1].init("b", "bval");
         elts[2].init("x", "xval");
         const uint32_t npartitions = dn->n_children;
-        assert(dn->totalchildkeylens==(2*(npartitions-1)));
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
             assert(dest_ndd[bn].start > 0);
@@ -925,7 +889,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
                 assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
                 assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
                 if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->childkeys[bn].data, (char*)(elts[last_i].keyp)) <= 0);
+                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, (char*)(elts[last_i].keyp)) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
@@ -934,16 +898,9 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
         }
         assert(last_i == 3);
     }
-    toku_ftnode_free(&dn);
 
-    for (int i = 0; i < sn.n_children-1; ++i) {
-        toku_free(sn.childkeys[i].data);
-    }
-    for (int i = 0; i < sn.n_children; i++) {
-        destroy_basement_node(BLB(&sn, i));
-    }
-    toku_free(sn.bp);
-    toku_free(sn.childkeys);
+    toku_ftnode_free(&dn);
+    toku_destroy_ftnode_internals(&sn);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
@@ -974,11 +931,11 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
     sn.dirty = 1;
     sn.oldest_referenced_xid_known = TXNID_NONE;
     MALLOC_N(sn.n_children, sn.bp);
-    MALLOC_N(sn.n_children-1, sn.childkeys);
-    toku_memdup_dbt(&sn.childkeys[0], "A", 2);
-    toku_memdup_dbt(&sn.childkeys[1], "A", 2);
-    toku_memdup_dbt(&sn.childkeys[2], "A", 2);
-    sn.totalchildkeylens = (sn.n_children-1)*2;
+    DBT pivotkeys[3];
+    toku_fill_dbt(&pivotkeys[0], "A", 2);
+    toku_fill_dbt(&pivotkeys[1], "A", 2);
+    toku_fill_dbt(&pivotkeys[2], "A", 2);
+    sn.pivotkeys.create_from_dbts(pivotkeys, 3);
     for (int i = 0; i < sn.n_children; ++i) {
         BP_STATE(&sn,i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
@@ -1031,7 +988,6 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
     assert(dn->n_children == 1);
     {
         const uint32_t npartitions = dn->n_children;
-        assert(dn->totalchildkeylens==(2*(npartitions-1)));
         for (uint32_t i = 0; i < npartitions; ++i) {
             assert(dest_ndd[i].start > 0);
             assert(dest_ndd[i].size  > 0);
@@ -1041,16 +997,9 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
             assert(BLB_DATA(dn, i)->num_klpairs() == 0);
         }
     }
+    
     toku_ftnode_free(&dn);
-
-    for (int i = 0; i < sn.n_children-1; ++i) {
-        toku_free(sn.childkeys[i].data);
-    }
-    for (int i = 0; i < sn.n_children; i++) {
-        destroy_basement_node(BLB(&sn, i));
-    }
-    toku_free(sn.bp);
-    toku_free(sn.childkeys);
+    toku_destroy_ftnode_internals(&sn);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
@@ -1084,9 +1033,8 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     sn.dirty = 1;
     sn.oldest_referenced_xid_known = TXNID_NONE;
     MALLOC_N(2, sn.bp);
-    MALLOC_N(1, sn.childkeys);
-    toku_memdup_dbt(&sn.childkeys[0], "hello", 6);
-    sn.totalchildkeylens = 6;
+    DBT pivotkey;
+    sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
     BP_BLOCKNUM(&sn, 0).b = 30;
     BP_BLOCKNUM(&sn, 1).b = 35;
     BP_STATE(&sn,0) = PT_AVAIL;
@@ -1154,9 +1102,8 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
     assert(dn->height == 1);
     assert(dn->n_children==2);
-    assert(strcmp((char*)dn->childkeys[0].data, "hello")==0);
-    assert(dn->childkeys[0].size==6);
-    assert(dn->totalchildkeylens==6);
+    assert(strcmp((char*)dn->pivotkeys.get_pivot(0)->data, "hello")==0);
+    assert(dn->pivotkeys.get_pivot(0)->size==6);
     assert(BP_BLOCKNUM(dn,0).b==30);
     assert(BP_BLOCKNUM(dn,1).b==35);
 
@@ -1169,12 +1116,7 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     assert(src_msg_buffer2->equals(dest_msg_buffer2));
 
     toku_ftnode_free(&dn);
-
-    toku_free(sn.childkeys[0].data);
-    destroy_nonleaf_childinfo(BNC(&sn, 0));
-    destroy_nonleaf_childinfo(BNC(&sn, 1));
-    toku_free(sn.bp);
-    toku_free(sn.childkeys);
+    toku_destroy_ftnode_internals(&sn);
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
diff --git a/ft/tests/mempool-115.cc b/ft/tests/mempool-115.cc
index 07a97f9e49a..4ecb624763f 100644
--- a/ft/tests/mempool-115.cc
+++ b/ft/tests/mempool-115.cc
@@ -157,9 +157,8 @@ public:
         sn.dirty = 1;
         sn.oldest_referenced_xid_known = TXNID_NONE;
         MALLOC_N(sn.n_children, sn.bp);
-        MALLOC_N(1, sn.childkeys);
-        toku_memdup_dbt(&sn.childkeys[0], "b", 2);
-        sn.totalchildkeylens = 2;
+        DBT pivotkey;
+        sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
         BP_STATE(&sn,0) = PT_AVAIL;
         BP_STATE(&sn,1) = PT_AVAIL;
         set_BLB(&sn, 0, toku_create_empty_bn());
@@ -168,8 +167,6 @@ public:
         le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5);
         le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5);
     
-    
-    
         // now this is the test. If I keep getting space for overwrite
         // like crazy, it should expose the bug
         bn_data* bnd = BLB_DATA(&sn, 0);
@@ -187,15 +184,7 @@ public:
         // on. It may be that some algorithm has changed.
         assert(new_size < 5*old_size);
     
-    
-        for (int i = 0; i < sn.n_children-1; ++i) {
-            toku_free(sn.childkeys[i].data);
-        }
-        for (int i = 0; i < sn.n_children; i++) {
-            destroy_basement_node(BLB(&sn, i));
-        }
-        toku_free(sn.bp);
-        toku_free(sn.childkeys);
+        toku_destroy_ftnode_internals(&sn);
     }
 };
 
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index 83dce2a08ce..139de67bf91 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -527,7 +527,7 @@ flush_to_internal_multiple(FT_HANDLE t) {
         set_BNC(child, i, child_bncs[i]);
         BP_STATE(child, i) = PT_AVAIL;
         if (i < 7) {
-            toku_clone_dbt(&child->childkeys[i], *childkeys[i]->u.id.key);
+            child->pivotkeys.insert_at(childkeys[i]->u.id.key, i);
         }
     }
 
@@ -717,7 +717,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
     int num_parent_messages = i;
 
     for (i = 0; i < 7; ++i) {
-        toku_clone_dbt(&child->childkeys[i], childkeys[i]);
+        child->pivotkeys.insert_at(&childkeys[i], i);
     }
 
     if (make_leaf_up_to_date) {
@@ -942,7 +942,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
     int num_parent_messages = i;
 
     for (i = 0; i < 7; ++i) {
-        toku_clone_dbt(&child->childkeys[i], childkeys[i]);
+        child->pivotkeys.insert_at(&childkeys[i], i);
     }
 
     if (make_leaf_up_to_date) {
@@ -1148,8 +1148,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     int num_parent_messages = i;
 
     for (i = 0; i < 7; ++i) {
-        toku_clone_dbt(&child1->childkeys[i], child1keys[i]);
-        toku_clone_dbt(&child2->childkeys[i], child2keys[i]);
+        child1->pivotkeys.insert_at(&child1keys[i], i);
+        child2->pivotkeys.insert_at(&child2keys[i], i);
     }
 
     if (make_leaf_up_to_date) {
diff --git a/ft/tests/test3884.cc b/ft/tests/test3884.cc
index c18a29d0676..e076731141f 100644
--- a/ft/tests/test3884.cc
+++ b/ft/tests/test3884.cc
@@ -159,7 +159,6 @@ setup_ftnode_header(struct ftnode *node)
     node->layout_version_original = FT_LAYOUT_VERSION;
     node->height = 0;
     node->dirty = 1;
-    node->totalchildkeylens = 0;
     node->oldest_referenced_xid_known = TXNID_NONE;
 }
 
@@ -169,12 +168,12 @@ setup_ftnode_partitions(struct ftnode *node, int n_children, const MSN msn, size
     node->n_children = n_children;
     node->max_msn_applied_to_node_on_disk = msn;
     MALLOC_N(node->n_children, node->bp);
-    MALLOC_N(node->n_children - 1, node->childkeys);
     for (int bn = 0; bn < node->n_children; ++bn) {
         BP_STATE(node, bn) = PT_AVAIL;
         set_BLB(node, bn, toku_create_empty_bn());
         BLB_MAX_MSN_APPLIED(node, bn) = msn;
     }
+    node->pivotkeys.create_empty();
 }
 
 static void
@@ -210,8 +209,8 @@ test_split_on_boundary(void)
             insert_dummy_value(&sn, bn, k, i);
         }
         if (bn < sn.n_children - 1) {
-            toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
-            sn.totalchildkeylens += (sizeof k);
+            DBT pivotkey;
+            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
         }
     }
 
@@ -233,10 +232,7 @@ test_split_on_boundary(void)
     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
     toku_cachetable_close(&ct);
 
-    if (splitk.data) {
-        toku_free(splitk.data);
-    }
-
+    toku_destroy_dbt(&splitk);
     toku_destroy_ftnode_internals(&sn);
 }
 
@@ -270,8 +266,8 @@ test_split_with_everything_on_the_left(void)
                 k = bn * eltsperbn + i;
                 big_val_size += insert_dummy_value(&sn, bn, k, i);
             }
-            toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
-            sn.totalchildkeylens += (sizeof k);
+            DBT pivotkey;
+            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
         } else {
             k = bn * eltsperbn;
             // we want this to be as big as the rest of our data and a
@@ -300,10 +296,7 @@ test_split_with_everything_on_the_left(void)
     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
     toku_cachetable_close(&ct);
 
-    if (splitk.data) {
-        toku_free(splitk.data);
-    }
-
+    toku_destroy_dbt(&splitk);
     toku_destroy_ftnode_internals(&sn);
 }
 
@@ -339,8 +332,8 @@ test_split_on_boundary_of_last_node(void)
                 k = bn * eltsperbn + i;
                 big_val_size += insert_dummy_value(&sn, bn, k, i);
             }
-            toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
-            sn.totalchildkeylens += (sizeof k);
+            DBT pivotkey;
+            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
         } else {
             k = bn * eltsperbn;
             // we want this to be slightly smaller than all the rest of
@@ -372,10 +365,7 @@ test_split_on_boundary_of_last_node(void)
     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
     toku_cachetable_close(&ct);
 
-    if (splitk.data) {
-        toku_free(splitk.data);
-    }
-
+    toku_destroy_dbt(&splitk);
     toku_destroy_ftnode_internals(&sn);
 }
 
@@ -405,8 +395,8 @@ test_split_at_begin(void)
             totalbytes += insert_dummy_value(&sn, bn, k, i-1);
         }
         if (bn < sn.n_children - 1) {
-            toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
-            sn.totalchildkeylens += (sizeof k);
+            DBT pivotkey;
+            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
         }
     }
     {  // now add the first element
@@ -436,10 +426,7 @@ test_split_at_begin(void)
     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
     toku_cachetable_close(&ct);
 
-    if (splitk.data) {
-        toku_free(splitk.data);
-    }
-
+    toku_destroy_dbt(&splitk);
     toku_destroy_ftnode_internals(&sn);
 }
 
@@ -476,8 +463,8 @@ test_split_at_end(void)
             }
         }
         if (bn < sn.n_children - 1) {
-            toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
-            sn.totalchildkeylens += (sizeof k);
+            DBT pivotkey;
+            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
         }
     }
 
@@ -496,10 +483,7 @@ test_split_at_end(void)
     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
     toku_cachetable_close(&ct);
 
-    if (splitk.data) {
-        toku_free(splitk.data);
-    }
-
+    toku_destroy_dbt(&splitk);
     toku_destroy_ftnode_internals(&sn);
 }
 
@@ -530,8 +514,8 @@ test_split_odd_nodes(void)
             insert_dummy_value(&sn, bn, k, i);
         }
         if (bn < sn.n_children - 1) {
-            toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k);
-            sn.totalchildkeylens += (sizeof k);
+            DBT pivotkey;
+            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
         }
     }
 
@@ -553,10 +537,7 @@ test_split_odd_nodes(void)
     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
     toku_cachetable_close(&ct);
 
-    if (splitk.data) {
-        toku_free(splitk.data);
-    }
-
+    toku_destroy_dbt(&splitk);
     toku_destroy_ftnode_internals(&sn);
 }
 
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index 04b7df280dc..d49046e8c8f 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -254,11 +254,11 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT h) {
         );
 
     printf(" n_children=%d\n", n->n_children);
-    printf(" total_childkeylens=%u\n", n->totalchildkeylens);
+    printf(" pivotkeys.total_size()=%u\n", (unsigned) n->pivotkeys.total_size());
 
     printf(" pivots:\n");
     for (int i=0; i<n->n_children-1; i++) {
-        const DBT *piv = &n->childkeys[i];
+        const DBT *piv = n->pivotkeys.get_pivot(i);
         printf("  pivot %2d:", i);
         if (n->flags)
             printf(" flags=%x ", n->flags);
diff --git a/ft/ybt.cc b/ft/ybt.cc
index 68fd3c178ed..42cfecd236c 100644
--- a/ft/ybt.cc
+++ b/ft/ybt.cc
@@ -202,14 +202,6 @@ DBT *toku_copyref_dbt(DBT *dst, const DBT src) {
     return dst;
 }
 
-DBT *toku_copy_dbt(DBT *dst, const DBT &src) {
-    dst->flags = src.flags;
-    dst->ulen = src.ulen;
-    dst->size = src.size;
-    dst->data = src.data;
-    return dst;
-}
-
 DBT *toku_clone_dbt(DBT *dst, const DBT &src) {
     return toku_memdup_dbt(dst, src.data, src.size);
 }
diff --git a/ft/ybt.h b/ft/ybt.h
index 4ddffbafc00..84293f94b9c 100644
--- a/ft/ybt.h
+++ b/ft/ybt.h
@@ -112,8 +112,6 @@ DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len);
 
 DBT *toku_copyref_dbt(DBT *dst, const DBT src);
 
-DBT *toku_copy_dbt(DBT *dst, const DBT &src);
-
 DBT *toku_clone_dbt(DBT *dst, const DBT &src);
 
 int toku_dbt_set(ITEMLEN len, bytevec val, DBT *d, struct simple_dbt *sdbt);

From 0a18b89ad6f070ebb521223725c16afbdc3663c1 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 14 Jun 2014 21:04:43 -0400
Subject: [PATCH 023/190] FT-259 Placate the build slaves with their exotic
 toolchain.

---
 util/tests/memarena-test.cc | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/util/tests/memarena-test.cc b/util/tests/memarena-test.cc
index b687a9a4287..46a55d58847 100644
--- a/util/tests/memarena-test.cc
+++ b/util/tests/memarena-test.cc
@@ -118,7 +118,9 @@ private:
 
         // make sure memory was allocated ok by
         // writing to buf and reading it back
-        memset(ma._current_chunk.buf, magic, size);
+        if (size > 0) {
+            memset(ma._current_chunk.buf, magic, size);
+        }
         for (size_t i = 0; i < size; i++) {
             const char *buf = reinterpret_cast<char *>(ma._current_chunk.buf);
             invariant(buf[i] == magic);
@@ -134,7 +136,9 @@ private:
 
         // make sure memory was allocated ok by
         // writing to buf and reading it back
-        memset(ma._current_chunk.buf, magic, size);
+        if (size > 0) {
+            memset(ma._current_chunk.buf, magic, size);
+        }
         for (size_t i = 0; i < size; i++) {
             const char *c = reinterpret_cast<char *>(ma._current_chunk.buf);
             invariant(c[i] == magic);

From 9d85fa498ca9f1ceaa5d39def11f4d9430e3966d Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sun, 15 Jun 2014 09:25:28 -0400
Subject: [PATCH 024/190] fix leak in ft-test-helpers.cc

---
 ft/ft-test-helpers.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index ca28adb36f9..b46893ff636 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -133,6 +133,7 @@ int toku_testsetup_leaf(FT_HANDLE ft_handle, BLOCKNUM *blocknum, int n_children,
         toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
     }
     node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
+    toku_free(pivotkeys);
 
     *blocknum = node->blocknum;
     toku_unpin_ftnode(ft_handle->ft, node);

From 7fa10b52c2bc062963d4017b0c98478a683e9838 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sun, 15 Jun 2014 12:30:29 -0400
Subject: [PATCH 025/190] Fix the osx build

---
 ft/ft-ops.h   | 2 +-
 ft/rollback.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index 2d62394b5ee..e6f19967b2d 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -232,7 +232,7 @@ void toku_ft_maybe_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn, bool oplsn_valid
 TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
 TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h);
 
-class txn_gc_info;
+struct txn_gc_info;
 void toku_ft_send_insert(FT_HANDLE ft_h, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
 void toku_ft_send_delete(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info);
 void toku_ft_send_commit_any(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info);
diff --git a/ft/rollback.h b/ft/rollback.h
index 6664ddc5667..eb19ea12ff7 100644
--- a/ft/rollback.h
+++ b/ft/rollback.h
@@ -165,7 +165,7 @@ struct rollback_log_node {
     BLOCKNUM           previous; 
     struct roll_entry *oldest_logentry;
     struct roll_entry *newest_logentry;
-    struct memarena    rollentry_arena;
+    memarena           rollentry_arena;
     size_t             rollentry_resident_bytecount; // How many bytes for the rollentries that are stored in main memory.
     PAIR               ct_pair;
 };

From 971a1509616118aacc7cf407b6951b19e3a69d87 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sun, 15 Jun 2014 13:09:53 -0400
Subject: [PATCH 026/190] Avoid shadowing an existing variable

---
 ft/tests/orthopush-flush.cc | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index 139de67bf91..1ca3869eb64 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -406,29 +406,29 @@ flush_to_internal(FT_HANDLE t) {
             MSN msn = msg->msn;
             enum ft_msg_type type = ft_msg_get_type(msg);
             XIDS xids = ft_msg_get_xids(msg);
-            for (int i = 0; i < num_parent_messages; ++i) {
-                if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 &&
-                        msn.msn == parent_messages[i]->msn.msn) {
-                    assert(parent_messages_present[i] == 0);
+            for (int k = 0; k < num_parent_messages; ++k) {
+                if (dummy_cmp(NULL, &keydbt, parent_messages[k]->u.id.key) == 0 &&
+                        msn.msn == parent_messages[k]->msn.msn) {
+                    assert(parent_messages_present[k] == 0);
                     assert(found == 0);
-                    assert(dummy_cmp(NULL, &valdbt, parent_messages[i]->u.id.val) == 0);
-                    assert(type == parent_messages[i]->type);
-                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids));
-                    assert(parent_messages_is_fresh[i] == is_fresh);
-                    parent_messages_present[i]++;
+                    assert(dummy_cmp(NULL, &valdbt, parent_messages[k]->u.id.val) == 0);
+                    assert(type == parent_messages[k]->type);
+                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[k]->xids));
+                    assert(parent_messages_is_fresh[k] == is_fresh);
+                    parent_messages_present[k]++;
                     found++;
                 }
             }
-            for (int i = 0; i < num_child_messages; ++i) {
-                if (dummy_cmp(NULL, &keydbt, child_messages[i]->u.id.key) == 0 &&
-                        msn.msn == child_messages[i]->msn.msn) {
-                    assert(child_messages_present[i] == 0);
+            for (int k = 0; k < num_child_messages; ++k) {
+                if (dummy_cmp(NULL, &keydbt, child_messages[k]->u.id.key) == 0 &&
+                        msn.msn == child_messages[k]->msn.msn) {
+                    assert(child_messages_present[k] == 0);
                     assert(found == 0);
-                    assert(dummy_cmp(NULL, &valdbt, child_messages[i]->u.id.val) == 0);
-                    assert(type == child_messages[i]->type);
-                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids));
-                    assert(child_messages_is_fresh[i] == is_fresh);
-                    child_messages_present[i]++;
+                    assert(dummy_cmp(NULL, &valdbt, child_messages[k]->u.id.val) == 0);
+                    assert(type == child_messages[k]->type);
+                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[k]->xids));
+                    assert(child_messages_is_fresh[k] == is_fresh);
+                    child_messages_present[k]++;
                     found++;
                 }
             }

From 32005e366bd4b3365daeb4e626e7ddfb6989b3c1 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sun, 15 Jun 2014 13:15:43 -0400
Subject: [PATCH 027/190] Fix an invalid free in fifo-test

---
 ft/tests/fifo-test.cc | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/ft/tests/fifo-test.cc b/ft/tests/fifo-test.cc
index 96e48d82674..84e4cee9e99 100644
--- a/ft/tests/fifo-test.cc
+++ b/ft/tests/fifo-test.cc
@@ -146,31 +146,34 @@ test_enqueue(int n) {
     }
 
     struct checkit_fn {
-        char *thekey;
-        int thekeylen;
-        char *theval;
-        int thevallen;
         MSN startmsn;
         int verbose;
         int i;
-        checkit_fn(char *tk, int tkl, char *tv, int tvl, MSN smsn, bool v)
-            : thekey(tk), thekeylen(tkl), theval(tv), thevallen(tvl), startmsn(smsn), verbose(v), i(0) {
+        checkit_fn(MSN smsn, bool v)
+            : startmsn(smsn), verbose(v), i(0) {
         }
         int operator()(FT_MSG msg, bool UU(is_fresh)) {
+            char *thekey = nullptr;
+            int thekeylen = 0;
+            char *theval = nullptr;
+            int thevallen = 0;
+            buildkey(i);
+            buildval(i);
+
             MSN msn = msg->msn;
             enum ft_msg_type type = ft_msg_get_type(msg);
             if (verbose) printf("checkit %d %d %" PRIu64 "\n", i, type, msn.msn);
             assert(msn.msn == startmsn.msn + i);
-            buildkey(i);
-            buildval(i);
             assert((int) ft_msg_get_keylen(msg) == thekeylen); assert(memcmp(ft_msg_get_key(msg), thekey, ft_msg_get_keylen(msg)) == 0);
             assert((int) ft_msg_get_vallen(msg) == thevallen); assert(memcmp(ft_msg_get_val(msg), theval, ft_msg_get_vallen(msg)) == 0);
             assert(i % 256 == (int)type);
             assert((TXNID)i==xids_get_innermost_xid(ft_msg_get_xids(msg)));
+            toku_free(thekey);
+            toku_free(theval);
             i += 1;
             return 0;
         }
-    } checkit(thekey, thekeylen, theval, thevallen, startmsn, verbose);
+    } checkit(startmsn, verbose);
     msg_buffer.iterate(checkit);
     assert(checkit.i == n);
 

From 45b85c4b1abcd728a960eae5a2f1037a19a4e636 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sun, 15 Jun 2014 14:44:34 -0400
Subject: [PATCH 028/190] Refactor this test to be more agreeable with clang

---
 ft/tests/fifo-test.cc | 61 ++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/ft/tests/fifo-test.cc b/ft/tests/fifo-test.cc
index 84e4cee9e99..9281f4db00b 100644
--- a/ft/tests/fifo-test.cc
+++ b/ft/tests/fifo-test.cc
@@ -100,38 +100,36 @@ test_create (void) {
     msg_buffer.destroy();
 }
 
+static char *buildkey(size_t len) {
+    char *XMALLOC_N(len, k);
+    memset(k, 0, len);
+    return k;
+}
+
+static char *buildval(size_t len) {
+    char *XMALLOC_N(len, v);
+    memset(v, ~len, len);
+    return v;
+}
+
 static void
 test_enqueue(int n) {
-    int r;
-    message_buffer msg_buffer;
     MSN startmsn = ZERO_MSN;
 
+    message_buffer msg_buffer;
     msg_buffer.create();
-    char *thekey = 0; int thekeylen;
-    char *theval = 0; int thevallen;
-
-    // this was a function but icc cant handle it    
-#define buildkey(len) { \
-        thekeylen = len+1; \
-        XREALLOC_N(thekeylen, thekey); \
-        memset(thekey, len, thekeylen); \
-    }
-
-#define buildval(len) { \
-        thevallen = len+2; \
-        XREALLOC_N(thevallen, theval); \
-        memset(theval, ~len, thevallen); \
-    }
 
     for (int i=0; i<n; i++) {
-        buildkey(i);
-        buildval(i);
+        int thekeylen = i + 1;
+        int thevallen = i + 2;
+        char *thekey = buildkey(thekeylen);
+        char *theval = buildval(thevallen);
         XIDS xids;
-        if (i==0)
+        if (i == 0) {
             xids = xids_get_root_xids();
-        else {
-            r = xids_create_child(xids_get_root_xids(), &xids, (TXNID)i);
-            assert(r==0);
+        } else {
+            int r = xids_create_child(xids_get_root_xids(), &xids, (TXNID)i);
+            assert_zero(r);
         }
         MSN msn = next_dummymsn();
         if (startmsn.msn == ZERO_MSN.msn)
@@ -143,6 +141,8 @@ test_enqueue(int n) {
         };
         msg_buffer.enqueue(&msg, true, nullptr);
         xids_destroy(&xids);
+        toku_free(thekey);
+        toku_free(theval);
     }
 
     struct checkit_fn {
@@ -153,12 +153,10 @@ test_enqueue(int n) {
             : startmsn(smsn), verbose(v), i(0) {
         }
         int operator()(FT_MSG msg, bool UU(is_fresh)) {
-            char *thekey = nullptr;
-            int thekeylen = 0;
-            char *theval = nullptr;
-            int thevallen = 0;
-            buildkey(i);
-            buildval(i);
+            int thekeylen = i + 1;
+            int thevallen = i + 2;
+            char *thekey = buildkey(thekeylen);
+            char *theval = buildval(thevallen);
 
             MSN msn = msg->msn;
             enum ft_msg_type type = ft_msg_get_type(msg);
@@ -168,18 +166,15 @@ test_enqueue(int n) {
             assert((int) ft_msg_get_vallen(msg) == thevallen); assert(memcmp(ft_msg_get_val(msg), theval, ft_msg_get_vallen(msg)) == 0);
             assert(i % 256 == (int)type);
             assert((TXNID)i==xids_get_innermost_xid(ft_msg_get_xids(msg)));
+            i += 1;
             toku_free(thekey);
             toku_free(theval);
-            i += 1;
             return 0;
         }
     } checkit(startmsn, verbose);
     msg_buffer.iterate(checkit);
     assert(checkit.i == n);
 
-    if (thekey) toku_free(thekey);
-    if (theval) toku_free(theval);
-
     msg_buffer.destroy();
 }
 

From 6e2c421f10fd257fe51001d1980c26bd378890a9 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sun, 15 Jun 2014 14:57:36 -0400
Subject: [PATCH 029/190] FT-253 Remove remaining brt / brt_header era names

---
 CTestCustom.cmake                     |   1 -
 ft/block_allocator.h                  |   2 +-
 ft/block_table.h                      |  14 +-
 ft/ft-cachetable-wrappers.cc          |  22 ++--
 ft/ft-cachetable-wrappers.h           |  10 +-
 ft/ft-flusher-internal.h              |  10 +-
 ft/ft-flusher.cc                      | 182 +++++++++++++-------------
 ft/ft-flusher.h                       |   6 +-
 ft/ft-hot-flusher.cc                  |  14 +-
 ft/ft-internal.h                      |  14 +-
 ft/ft-ops.cc                          |  51 ++++----
 ft/ft-ops.h                           |   2 +-
 ft/ft.cc                              | 126 +++++++++---------
 ft/ft.h                               |  20 +--
 ft/ft_node-serialize.cc               |  98 +++++++-------
 ft/loader/loader.cc                   |  20 +--
 ft/node.cc                            |   4 +-
 ft/node.h                             |   6 +-
 ft/roll.cc                            |  23 ++--
 ft/rollback-ct-callbacks.h            |   4 +-
 ft/rollback.h                         |   2 +-
 ft/tests/ft-test-cursor-2.cc          |   2 +-
 ft/tests/test3884.cc                  |   8 +-
 ft/tokuftdump.cc                      |  34 ++---
 ft/txn.cc                             |   2 +-
 src/tests/hot-optimize-table-tests.cc |   4 +-
 26 files changed, 339 insertions(+), 342 deletions(-)

diff --git a/CTestCustom.cmake b/CTestCustom.cmake
index 9861d8e20a2..1785acabe88 100644
--- a/CTestCustom.cmake
+++ b/CTestCustom.cmake
@@ -3,7 +3,6 @@ cmake_policy(SET CMP0012 NEW)
 ## these tests shouldn't run with valgrind
 list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE
   ft/bnc-insert-benchmark
-  ft/brt-serialize-benchmark
   ft/ft_loader-test-extractor-1
   ft/ft_loader-test-extractor-2
   ft/ft_loader-test-extractor-3
diff --git a/ft/block_allocator.h b/ft/block_allocator.h
index adc9b5369ac..b86bf578fa7 100644
--- a/ft/block_allocator.h
+++ b/ft/block_allocator.h
@@ -145,7 +145,7 @@ void block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t
 // Effect: Allocate a block of the specified size at a particular offset.
 //  Aborts if anything goes wrong.
 //  The performance of this function may be as bad as Theta(N), where N is the number of blocks currently in use.
-// Usage note: To allocate several blocks (e.g., when opening a BRT),  use block_allocator_alloc_blocks_at().
+// Usage note: To allocate several blocks (e.g., when opening a FT),  use block_allocator_alloc_blocks_at().
 // Requires: The resulting block may not overlap any other allocated block.
 //  And the offset must be a multiple of the block alignment.
 // Parameters:
diff --git a/ft/block_table.h b/ft/block_table.h
index 9fbf4f3dcf1..42b52c3e60b 100644
--- a/ft/block_table.h
+++ b/ft/block_table.h
@@ -109,8 +109,8 @@ void toku_blocktable_create_new(BLOCK_TABLE *btp);
 int toku_blocktable_create_from_buffer(int fd, BLOCK_TABLE *btp, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
 void toku_blocktable_destroy(BLOCK_TABLE *btp);
 
-void toku_ft_lock(FT h);
-void toku_ft_unlock(FT h);
+void toku_ft_lock(FT ft);
+void toku_ft_unlock(FT ft);
 
 void toku_block_translation_note_start_checkpoint_unlocked(BLOCK_TABLE bt);
 void toku_block_translation_note_end_checkpoint(BLOCK_TABLE bt, int fd);
@@ -118,15 +118,15 @@ void toku_block_translation_note_skipped_checkpoint(BLOCK_TABLE bt);
 void toku_maybe_truncate_file_on_open(BLOCK_TABLE bt, int fd);
 
 //Blocknums
-void toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, FT h);
-void toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, FT h);
-void toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *b, FT h, bool for_checkpoint);
+void toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, FT ft);
+void toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, FT ft);
+void toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *b, FT ft, bool for_checkpoint);
 void toku_verify_blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b);
 void toku_block_verify_no_data_blocks_except_root(BLOCK_TABLE bt, BLOCKNUM root);
 void toku_free_unused_blocknums(BLOCK_TABLE bt, BLOCKNUM root);
 void toku_block_verify_no_free_blocknums(BLOCK_TABLE bt);
-void toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT h, int fd);
-void toku_realloc_descriptor_on_disk_unlocked(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT h);
+void toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT ft, int fd);
+void toku_realloc_descriptor_on_disk_unlocked(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT ft);
 void toku_get_descriptor_offset_size(BLOCK_TABLE bt, DISKOFF *offset, DISKOFF *size);
 
 //Blocks and Blocknums
diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index be78801dd10..f7834fad991 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -104,16 +104,16 @@ ftnode_get_key_and_fullhash(
     uint32_t* fullhash,
     void* extra)
 {
-    FT h = (FT) extra;
+    FT ft = (FT) extra;
     BLOCKNUM name;
-    toku_allocate_blocknum(h->blocktable, &name, h);
+    toku_allocate_blocknum(ft->blocktable, &name, ft);
     *cachekey = name;
-    *fullhash = toku_cachetable_hash(h->cf, name);
+    *fullhash = toku_cachetable_hash(ft->cf, name);
 }
 
 void
 cachetable_put_empty_node_with_dep_nodes(
-    FT h,
+    FT ft,
     uint32_t num_dependent_nodes,
     FTNODE* dependent_nodes,
     BLOCKNUM* name, //output
@@ -129,12 +129,12 @@ cachetable_put_empty_node_with_dep_nodes(
     }
 
     toku_cachetable_put_with_dep_pairs(
-        h->cf,
+        ft->cf,
         ftnode_get_key_and_fullhash,
         new_node,
         make_pair_attr(sizeof(FTNODE)),
-        get_write_callbacks_for_node(h),
-        h,
+        get_write_callbacks_for_node(ft),
+        ft,
         num_dependent_nodes,
         dependent_pairs,
         dependent_dirty_bits,
@@ -319,7 +319,7 @@ exit:
 
 void
 toku_pin_ftnode_with_dep_nodes(
-    FT h,
+    FT ft,
     BLOCKNUM blocknum,
     uint32_t fullhash,
     FTNODE_FETCH_EXTRA bfe,
@@ -338,12 +338,12 @@ toku_pin_ftnode_with_dep_nodes(
     }
 
     int r = toku_cachetable_get_and_pin_with_dep_pairs(
-        h->cf,
+        ft->cf,
         blocknum,
         fullhash,
         &node_v,
         NULL,
-        get_write_callbacks_for_node(h),
+        get_write_callbacks_for_node(ft),
         toku_ftnode_fetch_callback,
         toku_ftnode_pf_req_callback,
         toku_ftnode_pf_callback,
@@ -356,7 +356,7 @@ toku_pin_ftnode_with_dep_nodes(
     invariant_zero(r);
     FTNODE node = (FTNODE) node_v;
     if (lock_type != PL_READ && node->height > 0 && move_messages) {
-        toku_move_ftnode_messages_to_stale(h, node);
+        toku_move_ftnode_messages_to_stale(ft, node);
     }
     *node_p = node;
 }
diff --git a/ft/ft-cachetable-wrappers.h b/ft/ft-cachetable-wrappers.h
index 12e55cfea23..a25575f3712 100644
--- a/ft/ft-cachetable-wrappers.h
+++ b/ft/ft-cachetable-wrappers.h
@@ -103,7 +103,7 @@ PATENT RIGHTS GRANT:
  */
 void
 cachetable_put_empty_node_with_dep_nodes(
-    FT h,
+    FT ft,
     uint32_t num_dependent_nodes,
     FTNODE* dependent_nodes,
     BLOCKNUM* name, //output
@@ -118,7 +118,7 @@ cachetable_put_empty_node_with_dep_nodes(
  */
 void
 create_new_ftnode_with_dep_nodes(
-    FT h,
+    FT ft,
     FTNODE *result,
     int height,
     int n_children,
@@ -156,7 +156,7 @@ toku_pin_ftnode_for_query(
 
 // Pins an ftnode without dependent pairs
 void toku_pin_ftnode(
-    FT h,
+    FT ft,
     BLOCKNUM blocknum,
     uint32_t fullhash,
     FTNODE_FETCH_EXTRA bfe,
@@ -168,7 +168,7 @@ void toku_pin_ftnode(
 // Pins an ftnode with dependent pairs
 // Unlike toku_pin_ftnode_for_query, this function blocks until the node is pinned.
 void toku_pin_ftnode_with_dep_nodes(
-    FT h,
+    FT ft,
     BLOCKNUM blocknum,
     uint32_t fullhash,
     FTNODE_FETCH_EXTRA bfe,
@@ -188,7 +188,7 @@ int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, pai
 /**
  * Effect: Unpin an ftnode.
  */
-void toku_unpin_ftnode(FT h, FTNODE node);
+void toku_unpin_ftnode(FT ft, FTNODE node);
 void toku_unpin_ftnode_read_only(FT ft, FTNODE node);
 
 // Effect: Swaps pair values of two pinned nodes
diff --git a/ft/ft-flusher-internal.h b/ft/ft-flusher-internal.h
index d2aeea3c4ff..b3568fe95c2 100644
--- a/ft/ft-flusher-internal.h
+++ b/ft/ft-flusher-internal.h
@@ -115,7 +115,7 @@ typedef struct flusher_advice FLUSHER_ADVICE;
  * Cleaner thread merging leaf nodes: follow down to a key
  * Hot optimize table: follow down to the right of a key
  */
-typedef int (*FA_PICK_CHILD)(FT h, FTNODE parent, void* extra);
+typedef int (*FA_PICK_CHILD)(FT ft, FTNODE parent, void* extra);
 
 /**
  * Decide whether to call `toku_ft_flush_some_child` on the child if it is
@@ -139,7 +139,7 @@ typedef bool (*FA_SHOULD_RECURSIVELY_FLUSH)(FTNODE child, void* extra);
  * Hot optimize table: just do the merge
  */
 typedef void (*FA_MAYBE_MERGE_CHILD)(struct flusher_advice *fa,
-                              FT h,
+                              FT ft,
                               FTNODE parent,
                               int childnum,
                               FTNODE child,
@@ -172,7 +172,7 @@ typedef void (*FA_UPDATE_STATUS)(FTNODE child, int dirtied, void* extra);
  * by `ft_split_child`.  If -1 is returned, `ft_split_child` defaults to
  * the old behavior.
  */
-typedef int (*FA_PICK_CHILD_AFTER_SPLIT)(FT h,
+typedef int (*FA_PICK_CHILD_AFTER_SPLIT)(FT ft,
                                          FTNODE node,
                                          int childnuma,
                                          int childnumb,
@@ -223,14 +223,14 @@ dont_destroy_basement_nodes(void* extra);
 
 void
 default_merge_child(struct flusher_advice *fa,
-                    FT h,
+                    FT ft,
                     FTNODE parent,
                     int childnum,
                     FTNODE child,
                     void* extra);
 
 int
-default_pick_child_after_split(FT h,
+default_pick_child_after_split(FT ft,
                                FTNODE parent,
                                int childnuma,
                                int childnumb,
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index bf2debe9ed0..b2142611ef1 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -236,7 +236,7 @@ update_flush_status(FTNODE child, int cascades) {
 }
 
 static void
-maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT h)
+maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT ft)
 {
     // If the node is already fully in memory, as in upgrade, we don't
     // need to destroy the basement nodes because they are all equally
@@ -248,7 +248,7 @@ maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT h)
             if (BP_STATE(child, i) == PT_AVAIL &&
                 node->max_msn_applied_to_node_on_disk.msn < BLB_MAX_MSN_APPLIED(child, i).msn) 
             {
-                toku_evict_bn_from_memory(child, i, h);
+                toku_evict_bn_from_memory(child, i, ft);
             }
         }
     }
@@ -256,14 +256,14 @@ maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT h)
 
 static void
 ft_merge_child(
-    FT h,
+    FT ft,
     FTNODE node,
     int childnum_to_merge,
     bool *did_react,
     struct flusher_advice *fa);
 
 static int
-pick_heaviest_child(FT UU(h),
+pick_heaviest_child(FT UU(ft),
                     FTNODE parent,
                     void* UU(extra))
 {
@@ -312,7 +312,7 @@ recurse_if_child_is_gorged(FTNODE child, void* extra)
 }
 
 int
-default_pick_child_after_split(FT UU(h),
+default_pick_child_after_split(FT UU(ft),
                                FTNODE UU(parent),
                                int UU(childnuma),
                                int UU(childnumb),
@@ -323,7 +323,7 @@ default_pick_child_after_split(FT UU(h),
 
 void
 default_merge_child(struct flusher_advice *fa,
-                    FT h,
+                    FT ft,
                     FTNODE parent,
                     int childnum,
                     FTNODE child,
@@ -335,13 +335,13 @@ default_merge_child(struct flusher_advice *fa,
     // we are just going to unpin child and
     // let ft_merge_child pin it again
     //
-    toku_unpin_ftnode(h, child);
+    toku_unpin_ftnode(ft, child);
     //
     //
     // it is responsibility of ft_merge_child to unlock parent
     //
     bool did_react;
-    ft_merge_child(h, parent, childnum, &did_react, fa);
+    ft_merge_child(ft, parent, childnum, &did_react, fa);
 }
 
 void
@@ -398,7 +398,7 @@ struct ctm_extra {
 };
 
 static int
-ctm_pick_child(FT h,
+ctm_pick_child(FT ft,
                FTNODE parent,
                void* extra)
 {
@@ -411,8 +411,8 @@ ctm_pick_child(FT h,
         childnum = toku_ftnode_which_child(
             parent,
             &ctme->target_key,
-            &h->cmp_descriptor,
-            h->compare_fun);
+            &ft->cmp_descriptor,
+            ft->compare_fun);
     }
     return childnum;
 }
@@ -429,7 +429,7 @@ ctm_update_status(
 
 static void
 ctm_maybe_merge_child(struct flusher_advice *fa,
-                      FT h,
+                      FT ft,
                       FTNODE parent,
                       int childnum,
                       FTNODE child,
@@ -438,19 +438,19 @@ ctm_maybe_merge_child(struct flusher_advice *fa,
     if (child->height == 0) {
         (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED), 1);
     }
-    default_merge_child(fa, h, parent, childnum, child, extra);
+    default_merge_child(fa, ft, parent, childnum, child, extra);
 }
 
 static void
 ct_maybe_merge_child(struct flusher_advice *fa,
-                     FT h,
+                     FT ft,
                      FTNODE parent,
                      int childnum,
                      FTNODE child,
                      void* extra)
 {
     if (child->height > 0) {
-        default_merge_child(fa, h, parent, childnum, child, extra);
+        default_merge_child(fa, ft, parent, childnum, child, extra);
     }
     else {
         struct ctm_extra ctme;
@@ -486,24 +486,24 @@ ct_maybe_merge_child(struct flusher_advice *fa,
             default_pick_child_after_split,
             &ctme);
 
-        toku_unpin_ftnode(h, parent);
-        toku_unpin_ftnode(h, child);
+        toku_unpin_ftnode(ft, parent);
+        toku_unpin_ftnode(ft, child);
 
         FTNODE root_node = NULL;
         {
             uint32_t fullhash;
             CACHEKEY root;
-            toku_calculate_root_offset_pointer(h, &root, &fullhash);
+            toku_calculate_root_offset_pointer(ft, &root, &fullhash);
             struct ftnode_fetch_extra bfe;
-            fill_bfe_for_full_read(&bfe, h);
-            toku_pin_ftnode(h, root, fullhash, &bfe, PL_WRITE_EXPENSIVE, &root_node, true);
+            fill_bfe_for_full_read(&bfe, ft);
+            toku_pin_ftnode(ft, root, fullhash, &bfe, PL_WRITE_EXPENSIVE, &root_node, true);
             toku_ftnode_assert_fully_in_memory(root_node);
         }
 
         (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED), 1);
         (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1);
 
-        toku_ft_flush_some_child(h, root_node, &new_fa);
+        toku_ft_flush_some_child(ft, root_node, &new_fa);
 
         (void) toku_sync_fetch_and_sub(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1);
 
@@ -776,7 +776,7 @@ static void ftnode_finalize_split(FTNODE node, FTNODE B, MSN max_msn_applied_to_
 
 void
 ftleaf_split(
-    FT h,
+    FT ft,
     FTNODE node,
     FTNODE *nodea,
     FTNODE *nodeb,
@@ -825,7 +825,7 @@ ftleaf_split(
         // So, we must call this before evaluating
         // those two values
         cachetable_put_empty_node_with_dep_nodes(
-            h,
+            ft,
             num_dependent_nodes,
             dependent_nodes,
             &name,
@@ -881,8 +881,8 @@ ftleaf_split(
                 name,
                 0,
                 num_children_in_b,
-                h->h->layout_version,
-                h->h->flags);
+                ft->h->layout_version,
+                ft->h->flags);
             B->fullhash = fullhash;
         }
         else {
@@ -962,7 +962,7 @@ ftleaf_split(
 
 void
 ft_nonleaf_split(
-    FT h,
+    FT ft,
     FTNODE node,
     FTNODE *nodea,
     FTNODE *nodeb,
@@ -980,7 +980,7 @@ ft_nonleaf_split(
     FTNODE B;
     paranoid_invariant(node->height>0);
     paranoid_invariant(node->n_children>=2); // Otherwise, how do we split?	 We need at least two children to split. */
-    create_new_ftnode_with_dep_nodes(h, &B, node->height, n_children_in_b, num_dependent_nodes, dependent_nodes);
+    create_new_ftnode_with_dep_nodes(ft, &B, node->height, n_children_in_b, num_dependent_nodes, dependent_nodes);
     {
         /* The first n_children_in_a go into node a.
          * That means that the first n_children_in_a-1 keys go into node a.
@@ -1023,7 +1023,7 @@ ft_nonleaf_split(
 //
 static void
 ft_split_child(
-    FT h,
+    FT ft,
     FTNODE node,
     int childnum,
     FTNODE child,
@@ -1042,12 +1042,12 @@ ft_split_child(
     dep_nodes[0] = node;
     dep_nodes[1] = child;
     if (child->height==0) {
-        ftleaf_split(h, child, &nodea, &nodeb, &splitk, true, split_mode, 2, dep_nodes);
+        ftleaf_split(ft, child, &nodea, &nodeb, &splitk, true, split_mode, 2, dep_nodes);
     } else {
-        ft_nonleaf_split(h, child, &nodea, &nodeb, &splitk, 2, dep_nodes);
+        ft_nonleaf_split(ft, child, &nodea, &nodeb, &splitk, 2, dep_nodes);
     }
     // printf("%s:%d child did split\n", __FILE__, __LINE__);
-    handle_split_of_child (h, node, childnum, nodea, nodeb, &splitk);
+    handle_split_of_child (ft, node, childnum, nodea, nodeb, &splitk);
 
     // for test
     call_flusher_thread_callback(flt_flush_during_split);
@@ -1056,21 +1056,21 @@ ft_split_child(
     // now we need to unlock node,
     // and possibly continue
     // flushing one of the children
-    int picked_child = fa->pick_child_after_split(h, node, childnum, childnum + 1, fa->extra);
-    toku_unpin_ftnode(h, node);
+    int picked_child = fa->pick_child_after_split(ft, node, childnum, childnum + 1, fa->extra);
+    toku_unpin_ftnode(ft, node);
     if (picked_child == childnum ||
         (picked_child < 0 && nodea->height > 0 && fa->should_recursively_flush(nodea, fa->extra))) {
-        toku_unpin_ftnode(h, nodeb);
-        toku_ft_flush_some_child(h, nodea, fa);
+        toku_unpin_ftnode(ft, nodeb);
+        toku_ft_flush_some_child(ft, nodea, fa);
     }
     else if (picked_child == childnum + 1 ||
              (picked_child < 0 && nodeb->height > 0 && fa->should_recursively_flush(nodeb, fa->extra))) {
-        toku_unpin_ftnode(h, nodea);
-        toku_ft_flush_some_child(h, nodeb, fa);
+        toku_unpin_ftnode(ft, nodea);
+        toku_ft_flush_some_child(ft, nodeb, fa);
     }
     else {
-        toku_unpin_ftnode(h, nodea);
-        toku_unpin_ftnode(h, nodeb);
+        toku_unpin_ftnode(ft, nodea);
+        toku_unpin_ftnode(ft, nodeb);
     }
 
     toku_destroy_dbt(&splitk);
@@ -1093,7 +1093,7 @@ static void bring_node_fully_into_memory(FTNODE node, FT ft) {
 
 static void
 flush_this_child(
-    FT h,
+    FT ft,
     FTNODE node,
     FTNODE child,
     int childnum,
@@ -1103,9 +1103,9 @@ flush_this_child(
     update_flush_status(child, 0);
     toku_ftnode_assert_fully_in_memory(node);
     if (fa->should_destroy_basement_nodes(fa)) {
-        maybe_destroy_child_blbs(node, child, h);
+        maybe_destroy_child_blbs(node, child, ft);
     }
-    bring_node_fully_into_memory(child, h);
+    bring_node_fully_into_memory(child, ft);
     toku_ftnode_assert_fully_in_memory(child);
     paranoid_invariant(node->height>0);
     paranoid_invariant(child->blocknum.b!=0);
@@ -1120,7 +1120,7 @@ flush_this_child(
 
     // now we have a bnc to flush to the child. pass down the parent's
     // oldest known referenced xid as we flush down to the child.
-    toku_bnc_flush_to_child(h, bnc, child, node->oldest_referenced_xid_known);
+    toku_bnc_flush_to_child(ft, bnc, child, node->oldest_referenced_xid_known);
     destroy_nonleaf_childinfo(bnc);
 }
 
@@ -1344,8 +1344,8 @@ static void merge_remove_key_callback(
     bool for_checkpoint,
     void *extra)
 {
-    FT h = (FT) extra;
-    toku_free_blocknum(h->blocktable, bp, h, for_checkpoint);
+    FT ft = (FT) extra;
+    toku_free_blocknum(ft->blocktable, bp, ft, for_checkpoint);
 }
 
 //
@@ -1354,7 +1354,7 @@ static void merge_remove_key_callback(
 //
 static void
 ft_merge_child(
-    FT h,
+    FT ft,
     FTNODE node,
     int childnum_to_merge,
     bool *did_react,
@@ -1385,10 +1385,10 @@ ft_merge_child(
 
     FTNODE childa, childb;
     {
-        uint32_t childfullhash = compute_child_fullhash(h->cf, node, childnuma);
+        uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnuma);
         struct ftnode_fetch_extra bfe;
-        fill_bfe_for_full_read(&bfe, h);
-        toku_pin_ftnode_with_dep_nodes(h, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &node, &childa, true);
+        fill_bfe_for_full_read(&bfe, ft);
+        toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &node, &childa, true);
     }
     // for test
     call_flusher_thread_callback(flt_flush_before_pin_second_node_for_merge);
@@ -1396,17 +1396,17 @@ ft_merge_child(
         FTNODE dep_nodes[2];
         dep_nodes[0] = node;
         dep_nodes[1] = childa;
-        uint32_t childfullhash = compute_child_fullhash(h->cf, node, childnumb);
+        uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnumb);
         struct ftnode_fetch_extra bfe;
-        fill_bfe_for_full_read(&bfe, h);
-        toku_pin_ftnode_with_dep_nodes(h, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 2, dep_nodes, &childb, true);
+        fill_bfe_for_full_read(&bfe, ft);
+        toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 2, dep_nodes, &childb, true);
     }
 
     if (toku_bnc_n_entries(BNC(node,childnuma))>0) {
-        flush_this_child(h, node, childa, childnuma, fa);
+        flush_this_child(ft, node, childa, childnuma, fa);
     }
     if (toku_bnc_n_entries(BNC(node,childnumb))>0) {
-        flush_this_child(h, node, childb, childnumb, fa);
+        flush_this_child(ft, node, childb, childnumb, fa);
     }
 
     // now we have both children pinned in main memory, and cachetable locked,
@@ -1417,7 +1417,7 @@ ft_merge_child(
         DBT splitk;
         toku_init_dbt(&splitk);
         const DBT *old_split_key = node->pivotkeys.get_pivot(childnuma);
-        maybe_merge_pinned_nodes(node, old_split_key, childa, childb, &did_merge, &did_rebalance, &splitk, h->h->nodesize);
+        maybe_merge_pinned_nodes(node, old_split_key, childa, childb, &did_merge, &did_rebalance, &splitk, ft->h->nodesize);
         //toku_verify_estimates(t,childa);
         // the tree did react if a merge (did_merge) or rebalance (new spkit key) occurred
         *did_react = (bool)(did_merge || did_rebalance);
@@ -1439,8 +1439,8 @@ ft_merge_child(
             node->pivotkeys.delete_at(childnuma);
 
             // Handle a merge of the rightmost leaf node.
-            if (did_merge && childb->blocknum.b == h->rightmost_blocknum.b) {
-                invariant(childb->blocknum.b != h->h->root_blocknum.b);
+            if (did_merge && childb->blocknum.b == ft->rightmost_blocknum.b) {
+                invariant(childb->blocknum.b != ft->h->root_blocknum.b);
                 toku_ftnode_swap_pair_values(childa, childb);
                 BP_BLOCKNUM(node, childnuma) = childa->blocknum;
             }
@@ -1469,10 +1469,10 @@ ft_merge_child(
 
         // merge_remove_key_callback will free the blocknum
         int rrb = toku_cachetable_unpin_and_remove(
-            h->cf,
+            ft->cf,
             childb->ct_pair,
             merge_remove_key_callback,
-            h
+            ft
             );
         assert_zero(rrb);
 
@@ -1481,7 +1481,7 @@ ft_merge_child(
 
         // unlock the parent
         paranoid_invariant(node->dirty);
-        toku_unpin_ftnode(h, node);
+        toku_unpin_ftnode(ft, node);
     }
     else {
         // for test
@@ -1489,14 +1489,14 @@ ft_merge_child(
 
         // unlock the parent
         paranoid_invariant(node->dirty);
-        toku_unpin_ftnode(h, node);
-        toku_unpin_ftnode(h, childb);
+        toku_unpin_ftnode(ft, node);
+        toku_unpin_ftnode(ft, childb);
     }
     if (childa->height > 0 && fa->should_recursively_flush(childa, fa->extra)) {
-        toku_ft_flush_some_child(h, childa, fa);
+        toku_ft_flush_some_child(ft, childa, fa);
     }
     else {
-        toku_unpin_ftnode(h, childa);
+        toku_unpin_ftnode(ft, childa);
     }
 }
 
@@ -1853,8 +1853,8 @@ toku_ftnode_cleaner_callback(
     invariant(node->blocknum.b == blocknum.b);
     invariant(node->fullhash == fullhash);
     invariant(node->height > 0);   // we should never pick a leaf node (for now at least)
-    FT h = (FT) extraargs;
-    bring_node_fully_into_memory(node, h);
+    FT ft = (FT) extraargs;
+    bring_node_fully_into_memory(node, ft);
     int childnum = find_heaviest_child(node);
     update_cleaner_status(node, childnum);
 
@@ -1862,16 +1862,16 @@ toku_ftnode_cleaner_callback(
     if (toku_bnc_nbytesinbuf(BNC(node, childnum)) > 0) {
         struct flusher_advice fa;
         struct flush_status_update_extra fste;
-        ct_flusher_advice_init(&fa, &fste, h->h->nodesize);
-        toku_ft_flush_some_child(h, node, &fa);
+        ct_flusher_advice_init(&fa, &fste, ft->h->nodesize);
+        toku_ft_flush_some_child(ft, node, &fa);
     } else {
-        toku_unpin_ftnode(h, node);
+        toku_unpin_ftnode(ft, node);
     }
     return 0;
 }
 
 struct flusher_extra {
-    FT h;
+    FT ft;
     FTNODE node;
     NONLEAF_CHILDINFO bnc;
     TXNID parent_oldest_referenced_xid_known;
@@ -1896,12 +1896,12 @@ static void flush_node_fun(void *fe_v)
     // destroyed its basement nodes if necessary, so we now need to either
     // read them back in, or just do the regular partial fetch.  If we
     // don't, that means fe->node is a parent, so we need to do this anyway.
-    bring_node_fully_into_memory(fe->node,fe->h);
+    bring_node_fully_into_memory(fe->node,fe->ft);
     fe->node->dirty = 1;
 
     struct flusher_advice fa;
     struct flush_status_update_extra fste;
-    flt_flusher_advice_init(&fa, &fste, fe->h->h->nodesize);
+    flt_flusher_advice_init(&fa, &fste, fe->ft->h->nodesize);
 
     if (fe->bnc) {
         // In this case, we have a bnc to flush to a node
@@ -1910,7 +1910,7 @@ static void flush_node_fun(void *fe_v)
         call_flusher_thread_callback(flt_flush_before_applying_inbox);
 
         toku_bnc_flush_to_child(
-            fe->h,
+            fe->ft,
             fe->bnc,
             fe->node,
             fe->parent_oldest_referenced_xid_known
@@ -1921,11 +1921,11 @@ static void flush_node_fun(void *fe_v)
         // If so, call toku_ft_flush_some_child on the node (because this flush intends to
         // pass a meaningful oldest referenced xid for simple garbage collection), and it is the
         // responsibility of the flush to unlock the node. otherwise, we unlock it here.
-        if (fe->node->height > 0 && toku_ftnode_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) {
-            toku_ft_flush_some_child(fe->h, fe->node, &fa);
+        if (fe->node->height > 0 && toku_ftnode_nonleaf_is_gorged(fe->node, fe->ft->h->nodesize)) {
+            toku_ft_flush_some_child(fe->ft, fe->node, &fa);
         }
         else {
-            toku_unpin_ftnode(fe->h,fe->node);
+            toku_unpin_ftnode(fe->ft,fe->node);
         }
     }
     else {
@@ -1933,25 +1933,25 @@ static void flush_node_fun(void *fe_v)
         // bnc, which means we are tasked with flushing some
         // buffer in the node.
         // It is the responsibility of flush some child to unlock the node
-        toku_ft_flush_some_child(fe->h, fe->node, &fa);
+        toku_ft_flush_some_child(fe->ft, fe->node, &fa);
     }
-    remove_background_job_from_cf(fe->h->cf);
+    remove_background_job_from_cf(fe->ft->cf);
     toku_free(fe);
 }
 
 static void
 place_node_and_bnc_on_background_thread(
-    FT h,
+    FT ft,
     FTNODE node,
     NONLEAF_CHILDINFO bnc,
     TXNID parent_oldest_referenced_xid_known)
 {
     struct flusher_extra *XMALLOC(fe);
-    fe->h = h;
+    fe->ft = ft;
     fe->node = node;
     fe->bnc = bnc;
     fe->parent_oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
-    cachefile_kibbutz_enq(h->cf, flush_node_fun, fe);
+    cachefile_kibbutz_enq(ft->cf, flush_node_fun, fe);
 }
 
 //
@@ -1967,7 +1967,7 @@ place_node_and_bnc_on_background_thread(
 //     child needs to be split/merged), then we place the parent on the background thread.
 //     The parent will be unlocked on the background thread
 //
-void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
+void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent)
 {
     toku::context flush_ctx(CTX_FLUSH);
     TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
@@ -1981,24 +1981,24 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
     // see if we can pin the child
     //
     FTNODE child;
-    uint32_t childfullhash = compute_child_fullhash(h->cf, parent, childnum);
-    int r = toku_maybe_pin_ftnode_clean(h, BP_BLOCKNUM(parent, childnum), childfullhash, PL_WRITE_EXPENSIVE, &child);
+    uint32_t childfullhash = compute_child_fullhash(ft->cf, parent, childnum);
+    int r = toku_maybe_pin_ftnode_clean(ft, BP_BLOCKNUM(parent, childnum), childfullhash, PL_WRITE_EXPENSIVE, &child);
     if (r != 0) {
         // In this case, we could not lock the child, so just place the parent on the background thread
         // In the callback, we will use toku_ft_flush_some_child, which checks to
         // see if we should blow away the old basement nodes.
-        place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
+        place_node_and_bnc_on_background_thread(ft, parent, NULL, parent_oldest_referenced_xid_known);
     }
     else {
         //
         // successfully locked child
         //
-        bool may_child_be_reactive = ft_ftnode_may_be_reactive(h, child);
+        bool may_child_be_reactive = ft_ftnode_may_be_reactive(ft, child);
         if (!may_child_be_reactive) {
             // We're going to unpin the parent, so before we do, we must
             // check to see if we need to blow away the basement nodes to
             // keep the MSN invariants intact.
-            maybe_destroy_child_blbs(parent, child, h);
+            maybe_destroy_child_blbs(parent, child, ft);
 
             //
             // can detach buffer and unpin root here
@@ -2016,17 +2016,17 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
             // so, because we know for sure the child is not
             // reactive, we can unpin the parent
             //
-            place_node_and_bnc_on_background_thread(h, child, bnc, parent_oldest_referenced_xid_known);
-            toku_unpin_ftnode(h, parent);
+            place_node_and_bnc_on_background_thread(ft, child, bnc, parent_oldest_referenced_xid_known);
+            toku_unpin_ftnode(ft, parent);
         }
         else {
             // because the child may be reactive, we need to
             // put parent on background thread.
             // As a result, we unlock the child here.
-            toku_unpin_ftnode(h, child);
+            toku_unpin_ftnode(ft, child);
             // Again, we'll have the parent on the background thread, so
             // we don't need to destroy the basement nodes yet.
-            place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
+            place_node_and_bnc_on_background_thread(ft, parent, NULL, parent_oldest_referenced_xid_known);
         }
     }
 }
diff --git a/ft/ft-flusher.h b/ft/ft-flusher.h
index 4ee76e2a3c8..43fc5c297da 100644
--- a/ft/ft-flusher.h
+++ b/ft/ft-flusher.h
@@ -163,7 +163,7 @@ enum split_mode {
 // Given pinned node and pinned child, split child into two
 // and update node with information about its new child.
 void toku_ft_split_child(
-    FT h,
+    FT ft,
     FTNODE node,
     int childnum,
     FTNODE child,
@@ -189,7 +189,7 @@ void toku_ft_merge_child(
 // TODO: Rename toku_ft_leaf_split
 void
 ftleaf_split(
-    FT h,
+    FT ft,
     FTNODE node,
     FTNODE *nodea,
     FTNODE *nodeb,
@@ -212,7 +212,7 @@ ftleaf_split(
 void
 // TODO: Rename toku_ft_nonleaf_split
 ft_nonleaf_split(
-    FT h,
+    FT ft,
     FTNODE node,
     FTNODE *nodea,
     FTNODE *nodeb,
diff --git a/ft/ft-hot-flusher.cc b/ft/ft-hot-flusher.cc
index 3b7b25352a6..95a28fba6ee 100644
--- a/ft/ft-hot-flusher.cc
+++ b/ft/ft-hot-flusher.cc
@@ -169,7 +169,7 @@ hot_set_start_key(struct hot_flusher_extra *flusher, const DBT* start)
 }
 
 static int
-hot_just_pick_child(FT h,
+hot_just_pick_child(FT ft,
                     FTNODE parent,
                     struct hot_flusher_extra *flusher)
 {
@@ -186,8 +186,8 @@ hot_just_pick_child(FT h,
         // Find the pivot boundary.
         childnum = toku_ftnode_hot_next_child(parent,
                                                &flusher->highest_pivot_key,
-                                               &h->cmp_descriptor,
-                                               h->compare_fun);
+                                               &ft->cmp_descriptor,
+                                               ft->compare_fun);
     }
 
     return childnum;
@@ -209,12 +209,12 @@ hot_update_flusher_keys(FTNODE parent,
 // Picks which child toku_ft_flush_some_child will use for flushing and
 // recursion.
 static int
-hot_pick_child(FT h,
+hot_pick_child(FT ft,
                FTNODE parent,
                void *extra)
 {
     struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra;
-    int childnum = hot_just_pick_child(h, parent, flusher);
+    int childnum = hot_just_pick_child(ft, parent, flusher);
 
     // Now we determine the percentage of the tree flushed so far.
 
@@ -244,14 +244,14 @@ hot_update_status(FTNODE UU(child),
 // one to flush into.  This gives it a chance to do that, and update the
 // keys it maintains.
 static int
-hot_pick_child_after_split(FT h,
+hot_pick_child_after_split(FT ft,
                            FTNODE parent,
                            int childnuma,
                            int childnumb,
                            void *extra)
 {
     struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra;
-    int childnum = hot_just_pick_child(h, parent, flusher);
+    int childnum = hot_just_pick_child(ft, parent, flusher);
     assert(childnum == childnuma || childnum == childnumb);
     hot_update_flusher_keys(parent, childnum, flusher);
     if (parent->height == 1) {
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 7d1725a7d0a..2656a28a418 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -324,11 +324,11 @@ int toku_serialize_ftnode_to_memory (FTNODE node,
                               /*out*/ size_t *n_bytes_to_write,
                               /*out*/ size_t *n_uncompressed_bytes,
                               /*out*/ char  **bytes_to_write);
-int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT h, bool for_checkpoint);
+int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint);
 int toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
-                                    FT h, bool for_checkpoint);
+                                    FT ft, bool for_checkpoint);
 void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized);
-int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT h);
+int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft);
 int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe);
 int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe);
 int toku_deserialize_ftnode_from (int fd, BLOCKNUM off, uint32_t /*fullhash*/, FTNODE *ftnode, FTNODE_DISK_DATA* ndd, struct ftnode_fetch_extra* bfe);
@@ -353,7 +353,7 @@ deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version);
 void read_block_from_fd_into_rbuf(
     int fd, 
     BLOCKNUM blocknum,
-    FT h,
+    FT ft,
     struct rbuf *rb
     );
 
@@ -492,7 +492,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE ft_h, BLOCKNUM, const char *key, in
 int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_h, BLOCKNUM, enum ft_msg_type, const char *key, int keylen, const char *val, int vallen);
 void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);
 
-void toku_ft_root_put_msg(FT h, FT_MSG msg, txn_gc_info *gc_info);
+void toku_ft_root_put_msg(FT ft, FT_MSG msg, txn_gc_info *gc_info);
 
 void
 toku_get_node_for_verify(
@@ -667,8 +667,8 @@ void toku_ft_get_status(FT_STATUS);
 
 void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra);
 
-int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h) __attribute__((nonnull));
-int toku_upgrade_msn_from_root_to_header(int fd, FT h) __attribute__((nonnull));
+int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) __attribute__((nonnull));
+int toku_upgrade_msn_from_root_to_header(int fd, FT ft) __attribute__((nonnull));
 
 // A callback function is invoked with the key, and the data.
 // The pointers (to the bytevecs) must not be modified.  The data must be copied out before the callback function returns.
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index d567c72c389..006fc231928 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -600,7 +600,7 @@ toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
     } else if (bfe->range_lock_left_key.data == nullptr) {
         return -1;
     } else {
-        return toku_ftnode_which_child(node, &bfe->range_lock_left_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
+        return toku_ftnode_which_child(node, &bfe->range_lock_left_key, &bfe->ft->cmp_descriptor, bfe->ft->compare_fun);
     }
 }
 
@@ -613,7 +613,7 @@ toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
     } else if (bfe->range_lock_right_key.data == nullptr) {
         return -1;
     } else {
-        return toku_ftnode_which_child(node, &bfe->range_lock_right_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
+        return toku_ftnode_which_child(node, &bfe->range_lock_right_key, &bfe->ft->cmp_descriptor, bfe->ft->compare_fun);
     }
 }
 
@@ -763,7 +763,7 @@ void toku_ftnode_flush_callback(
     bool is_clone
     )
 {
-    FT h = (FT) extraargs;
+    FT ft = (FT) extraargs;
     FTNODE ftnode = (FTNODE) ftnode_v;
     FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
     assert(ftnode->blocknum.b == blocknum.b);
@@ -772,14 +772,14 @@ void toku_ftnode_flush_callback(
         toku_ftnode_assert_fully_in_memory(ftnode);
         if (height > 0 && !is_clone) {
             // cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback()
-            toku_move_ftnode_messages_to_stale(h, ftnode);
+            toku_move_ftnode_messages_to_stale(ft, ftnode);
         } else if (height == 0) {
-            toku_ftnode_leaf_run_gc(h, ftnode);
+            toku_ftnode_leaf_run_gc(ft, ftnode);
             if (!is_clone) {
-                toku_ftnode_update_disk_stats(ftnode, h, for_checkpoint);
+                toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint);
             }
         }
-        int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, h, for_checkpoint);
+        int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
         assert_zero(r);
         ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION;
     }
@@ -800,7 +800,7 @@ void toku_ftnode_flush_callback(
                 for (int i = 0; i < ftnode->n_children; i++) {
                     if (BP_STATE(ftnode,i) == PT_AVAIL) {
                         BASEMENTNODE bn = BLB(ftnode, i);
-                        toku_ft_decrease_stats(&h->in_memory_stats, bn->stat64_delta);
+                        toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
                     }
                 }
             }
@@ -1125,11 +1125,11 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
         // we can possibly require is a single basement node
         // we find out what basement node the query cares about
         // and check if it is available
-        paranoid_invariant(bfe->h->compare_fun);
+        paranoid_invariant(bfe->ft->compare_fun);
         paranoid_invariant(bfe->search);
         bfe->child_to_read = toku_ft_search_which_child(
-            &bfe->h->cmp_descriptor,
-            bfe->h->compare_fun,
+            &bfe->ft->cmp_descriptor,
+            bfe->ft->compare_fun,
             node,
             bfe->search
             );
@@ -1154,7 +1154,7 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
         // we can possibly require is a single basement node
         // we find out what basement node the query cares about
         // and check if it is available
-        paranoid_invariant(bfe->h->compare_fun);
+        paranoid_invariant(bfe->ft->compare_fun);
         if (node->height == 0) {
             int left_child = toku_bfe_leftmost_child_wanted(bfe, node);
             int right_child = toku_bfe_rightmost_child_wanted(bfe, node);
@@ -1342,7 +1342,7 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar
             if (r == TOKUDB_BAD_CHECKSUM) {
                 fprintf(stderr,
                         "Checksum failure while reading node partition in file %s.\n",
-                        toku_cachefile_fname_in_env(bfe->h->cf));
+                        toku_cachefile_fname_in_env(bfe->ft->cf));
             } else {
                 fprintf(stderr,
                         "Error while reading node partition %d\n",
@@ -1363,9 +1363,9 @@ int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_he
     return be.compare_fun(&db, &kdbt, key);
 }
 
-void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h) {
+void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT ft) {
     bfe->type = ftnode_fetch_all;
-    bfe->h = h;
+    bfe->ft = ft;
     bfe->search = nullptr;
     toku_init_dbt(&bfe->range_lock_left_key);
     toku_init_dbt(&bfe->range_lock_right_key);
@@ -1380,12 +1380,12 @@ void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h) {
     bfe->decompress_time = 0;
 }
 
-void fill_bfe_for_keymatch(struct ftnode_fetch_extra *bfe, FT h,
+void fill_bfe_for_keymatch(struct ftnode_fetch_extra *bfe, FT ft,
                            const DBT *left, const DBT *right,
                            bool disable_prefetching, bool read_all_partitions) {
-    paranoid_invariant(h->h->type == FT_CURRENT);
+    paranoid_invariant(ft->h->type == FT_CURRENT);
     bfe->type = ftnode_fetch_keymatch;
-    bfe->h = h;
+    bfe->ft = ft;
     bfe->search = nullptr;
     toku_init_dbt(&bfe->range_lock_left_key);
     toku_init_dbt(&bfe->range_lock_right_key);
@@ -1407,13 +1407,13 @@ void fill_bfe_for_keymatch(struct ftnode_fetch_extra *bfe, FT h,
     bfe->decompress_time = 0;
 }
 
-void fill_bfe_for_subset_read(struct ftnode_fetch_extra *bfe, FT h, ft_search *search,
+void fill_bfe_for_subset_read(struct ftnode_fetch_extra *bfe, FT ft, ft_search *search,
                               const DBT *left, const DBT *right,
                               bool left_is_neg_infty, bool right_is_pos_infty,
                               bool disable_prefetching, bool read_all_partitions) {
-    paranoid_invariant(h->h->type == FT_CURRENT);
+    paranoid_invariant(ft->h->type == FT_CURRENT);
     bfe->type = ftnode_fetch_subset;
-    bfe->h = h;
+    bfe->ft = ft;
     bfe->search = search;
     toku_init_dbt(&bfe->range_lock_left_key);
     toku_init_dbt(&bfe->range_lock_right_key);
@@ -1437,7 +1437,7 @@ void fill_bfe_for_subset_read(struct ftnode_fetch_extra *bfe, FT h, ft_search *s
 void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT ft) {
     paranoid_invariant(ft->h->type == FT_CURRENT);
     bfe->type = ftnode_fetch_none;
-    bfe->h = ft;
+    bfe->ft = ft;
     bfe->search = nullptr;
     toku_init_dbt(&bfe->range_lock_left_key);
     toku_init_dbt(&bfe->range_lock_right_key);
@@ -1455,7 +1455,7 @@ void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT ft) {
 void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_cursor *cursor) {
     paranoid_invariant(ft->h->type == FT_CURRENT);
     bfe->type = ftnode_fetch_prefetch;
-    bfe->h = ft;
+    bfe->ft = ft;
     bfe->search = nullptr;
     toku_init_dbt(&bfe->range_lock_left_key);
     toku_init_dbt(&bfe->range_lock_right_key);
@@ -3175,9 +3175,8 @@ toku_ft_handle_open_with_dict_id(
 
 DICTIONARY_ID
 toku_ft_get_dictionary_id(FT_HANDLE ft_handle) {
-    FT h = ft_handle->ft;
-    DICTIONARY_ID dict_id = h->dict_id;
-    return dict_id;
+    FT ft = ft_handle->ft;
+    return ft->dict_id;
 }
 
 void toku_ft_set_flags(FT_HANDLE ft_handle, unsigned int flags) {
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index e6f19967b2d..e64cbb35dee 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -184,7 +184,7 @@ void toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn);
 
 int
 toku_ft_handle_open_with_dict_id(
-    FT_HANDLE t, 
+    FT_HANDLE ft_h, 
     const char *fname_in_env, 
     int is_create, 
     int only_create, 
diff --git a/ft/ft.cc b/ft/ft.cc
index 80b59293cbc..da8b0524ccf 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -472,19 +472,19 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN
 // max_acceptable_lsn is the latest acceptable checkpointed version of the file.
 {
     {
-        FT h;
-        if ((h = (FT) toku_cachefile_get_userdata(cf))!=0) {
-            *header = h;
-            assert(ft_handle->options.update_fun == h->update_fun);
-            assert(ft_handle->options.compare_fun == h->compare_fun);
+        FT ft;
+        if ((ft = (FT) toku_cachefile_get_userdata(cf))!=0) {
+            *header = ft;
+            assert(ft_handle->options.update_fun == ft->update_fun);
+            assert(ft_handle->options.compare_fun == ft->compare_fun);
             return 0;
         }
     }
-    FT h = nullptr;
+    FT ft = nullptr;
     int r;
     {
         int fd = toku_cachefile_get_fd(cf);
-        r = toku_deserialize_ft_from(fd, max_acceptable_lsn, &h);
+        r = toku_deserialize_ft_from(fd, max_acceptable_lsn, &ft);
         if (r == TOKUDB_BAD_CHECKSUM) {
             fprintf(stderr, "Checksum failure while reading header in file %s.\n", toku_cachefile_fname_in_env(cf));
             assert(false);  // make absolutely sure we crash before doing anything else
@@ -492,12 +492,12 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN
     }
     if (r!=0) return r;
     // GCC 4.8 seems to get confused by the gotos in the deserialize code and think h is maybe uninitialized.
-    invariant_notnull(h);
-    h->cf = cf;
-    h->compare_fun = ft_handle->options.compare_fun;
-    h->update_fun = ft_handle->options.update_fun;
+    invariant_notnull(ft);
+    ft->cf = cf;
+    ft->compare_fun = ft_handle->options.compare_fun;
+    ft->update_fun = ft_handle->options.update_fun;
     toku_cachefile_set_userdata(cf,
-                                (void*)h,
+                                reinterpret_cast<void *>(ft),
                                 ft_log_fassociate_during_checkpoint,
                                 ft_close,
                                 ft_free,
@@ -506,7 +506,7 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN
                                 ft_end_checkpoint,
                                 ft_note_pin_by_checkpoint,
                                 ft_note_unpin_by_checkpoint);
-    *header = h;
+    *header = ft;
     return 0;
 }
 
@@ -548,12 +548,12 @@ void toku_ft_evict_from_memory(FT ft, bool oplsn_valid, LSN oplsn) {
 }
 
 // Verifies there exists exactly one ft handle and returns it.
-FT_HANDLE toku_ft_get_only_existing_ft_handle(FT h) {
+FT_HANDLE toku_ft_get_only_existing_ft_handle(FT ft) {
     FT_HANDLE ft_handle_ret = NULL;
-    toku_ft_grab_reflock(h);
-    assert(toku_list_num_elements_est(&h->live_ft_handles) == 1);
-    ft_handle_ret = toku_list_struct(toku_list_head(&h->live_ft_handles), struct ft_handle, live_ft_handle_link);
-    toku_ft_release_reflock(h);
+    toku_ft_grab_reflock(ft);
+    assert(toku_list_num_elements_est(&ft->live_ft_handles) == 1);
+    ft_handle_ret = toku_list_struct(toku_list_head(&ft->live_ft_handles), struct ft_handle, live_ft_handle_link);
+    toku_ft_release_reflock(ft);
     return ft_handle_ret;
 }
 
@@ -628,27 +628,27 @@ toku_ft_init(FT ft,
 
 // Open an ft for use by redirect.  The new ft must have the same dict_id as the old_ft passed in.  (FILENUM is assigned by the ft_handle_open() function.)
 static int
-ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTXN txn, FT old_h) {
-    FT_HANDLE t;
-    assert(old_h->dict_id.dictid != DICTIONARY_ID_NONE.dictid);
-    toku_ft_handle_create(&t);
-    toku_ft_set_bt_compare(t, old_h->compare_fun);
-    toku_ft_set_update(t, old_h->update_fun);
-    toku_ft_handle_set_nodesize(t, old_h->h->nodesize);
-    toku_ft_handle_set_basementnodesize(t, old_h->h->basementnodesize);
-    toku_ft_handle_set_compression_method(t, old_h->h->compression_method);
-    toku_ft_handle_set_fanout(t, old_h->h->fanout);
-    CACHETABLE ct = toku_cachefile_get_cachetable(old_h->cf);
-    int r = toku_ft_handle_open_with_dict_id(t, fname_in_env, 0, 0, ct, txn, old_h->dict_id);
+ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTXN txn, FT old_ft) {
+    FT_HANDLE ft_handle;
+    assert(old_ft->dict_id.dictid != DICTIONARY_ID_NONE.dictid);
+    toku_ft_handle_create(&ft_handle);
+    toku_ft_set_bt_compare(ft_handle, old_ft->compare_fun);
+    toku_ft_set_update(ft_handle, old_ft->update_fun);
+    toku_ft_handle_set_nodesize(ft_handle, old_ft->h->nodesize);
+    toku_ft_handle_set_basementnodesize(ft_handle, old_ft->h->basementnodesize);
+    toku_ft_handle_set_compression_method(ft_handle, old_ft->h->compression_method);
+    toku_ft_handle_set_fanout(ft_handle, old_ft->h->fanout);
+    CACHETABLE ct = toku_cachefile_get_cachetable(old_ft->cf);
+    int r = toku_ft_handle_open_with_dict_id(ft_handle, fname_in_env, 0, 0, ct, txn, old_ft->dict_id);
     if (r != 0) {
         goto cleanup;
     }
-    assert(t->ft->dict_id.dictid == old_h->dict_id.dictid);
-    *new_ftp = t;
+    assert(ft_handle->ft->dict_id.dictid == old_ft->dict_id.dictid);
+    *new_ftp = ft_handle;
 
  cleanup:
     if (r != 0) {
-        toku_ft_handle_close(t);
+        toku_ft_handle_close(ft_handle);
     }
     return r;
 }
@@ -656,81 +656,81 @@ ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTX
 // This function performs most of the work to redirect a dictionary to different file.
 // It is called for redirect and to abort a redirect.  (This function is almost its own inverse.)
 static int
-dictionary_redirect_internal(const char *dst_fname_in_env, FT src_h, TOKUTXN txn, FT *dst_hp) {
+dictionary_redirect_internal(const char *dst_fname_in_env, FT src_ft, TOKUTXN txn, FT *dst_ftp) {
     int r;
 
-    FILENUM src_filenum = toku_cachefile_filenum(src_h->cf);
+    FILENUM src_filenum = toku_cachefile_filenum(src_ft->cf);
     FILENUM dst_filenum = FILENUM_NONE;
 
-    FT dst_h = NULL;
+    FT dst_ft = NULL;
     struct toku_list *list;
     // open a dummy ft based off of 
     // dst_fname_in_env to get the header
     // then we will change all the ft's to have
-    // their headers point to dst_h instead of src_h
+    // their headers point to dst_ft instead of src_ft
     FT_HANDLE tmp_dst_ft = NULL;
-    r = ft_handle_open_for_redirect(&tmp_dst_ft, dst_fname_in_env, txn, src_h);
+    r = ft_handle_open_for_redirect(&tmp_dst_ft, dst_fname_in_env, txn, src_ft);
     if (r != 0) {
         goto cleanup;
     }
-    dst_h = tmp_dst_ft->ft;
+    dst_ft = tmp_dst_ft->ft;
 
     // some sanity checks on dst_filenum
-    dst_filenum = toku_cachefile_filenum(dst_h->cf);
+    dst_filenum = toku_cachefile_filenum(dst_ft->cf);
     assert(dst_filenum.fileid!=FILENUM_NONE.fileid);
     assert(dst_filenum.fileid!=src_filenum.fileid); //Cannot be same file.
 
-    // for each live ft_handle, ft_handle->ft is currently src_h
+    // for each live ft_handle, ft_handle->ft is currently src_ft
     // we want to change it to dummy_dst
-    toku_ft_grab_reflock(src_h);
-    while (!toku_list_empty(&src_h->live_ft_handles)) {
-        list = src_h->live_ft_handles.next;
+    toku_ft_grab_reflock(src_ft);
+    while (!toku_list_empty(&src_ft->live_ft_handles)) {
+        list = src_ft->live_ft_handles.next;
         FT_HANDLE src_handle = NULL;
         src_handle = toku_list_struct(list, struct ft_handle, live_ft_handle_link);
 
         toku_list_remove(&src_handle->live_ft_handle_link);
 
-        toku_ft_note_ft_handle_open(dst_h, src_handle);
+        toku_ft_note_ft_handle_open(dst_ft, src_handle);
         if (src_handle->redirect_callback) {
             src_handle->redirect_callback(src_handle, src_handle->redirect_callback_extra);
         }
     }
-    assert(dst_h);
-    // making sure that we are not leaking src_h
-    assert(toku_ft_needed_unlocked(src_h));
-    toku_ft_release_reflock(src_h);
+    assert(dst_ft);
+    // making sure that we are not leaking src_ft
+    assert(toku_ft_needed_unlocked(src_ft));
+    toku_ft_release_reflock(src_ft);
 
     toku_ft_handle_close(tmp_dst_ft);
 
-    *dst_hp = dst_h;
+    *dst_ftp = dst_ft;
 cleanup:
     return r;
 }
 
 
 
-//This is the 'abort redirect' function.  The redirect of old_h to new_h was done
-//and now must be undone, so here we redirect new_h back to old_h.
+//This is the 'abort redirect' function.  The redirect of old_ft to new_ft was done
+//and now must be undone, so here we redirect new_ft back to old_ft.
 int
-toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) {
-    char *old_fname_in_env = toku_cachefile_fname_in_env(old_h->cf);
+toku_dictionary_redirect_abort(FT old_ft, FT new_ft, TOKUTXN txn) {
+    char *old_fname_in_env = toku_cachefile_fname_in_env(old_ft->cf);
     int r;
     {
-        FILENUM old_filenum = toku_cachefile_filenum(old_h->cf);
-        FILENUM new_filenum = toku_cachefile_filenum(new_h->cf);
+        FILENUM old_filenum = toku_cachefile_filenum(old_ft->cf);
+        FILENUM new_filenum = toku_cachefile_filenum(new_ft->cf);
         assert(old_filenum.fileid!=new_filenum.fileid); //Cannot be same file.
 
         //No living fts in old header.
-        toku_ft_grab_reflock(old_h);
-        assert(toku_list_empty(&old_h->live_ft_handles));
-        toku_ft_release_reflock(old_h);
+        toku_ft_grab_reflock(old_ft);
+        assert(toku_list_empty(&old_ft->live_ft_handles));
+        toku_ft_release_reflock(old_ft);
     }
 
-    FT dst_h;
-    // redirect back from new_h to old_h
-    r = dictionary_redirect_internal(old_fname_in_env, new_h, txn, &dst_h);
+    FT dst_ft;
+    // redirect back from new_ft to old_ft
+    r = dictionary_redirect_internal(old_fname_in_env, new_ft, txn, &dst_ft);
     if (r == 0) {
-        assert(dst_h == old_h);
+        assert(dst_ft == old_ft);
     }
     return r;
 }
diff --git a/ft/ft.h b/ft/ft.h
index 8ef74644cf3..1cf1c1292f4 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -110,7 +110,7 @@ void toku_ft_grab_reflock(FT ft);
 void toku_ft_release_reflock(FT ft);
 
 void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn);
-void toku_ft_free (FT h);
+void toku_ft_free (FT ft);
 
 int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_h, CACHEFILE cf, LSN max_acceptable_lsn, FT *header);
 void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live);
@@ -122,7 +122,7 @@ bool toku_ft_has_one_reference_unlocked(FT ft);
 // will have to read in the ft in a new cachefile and new FT object.
 void toku_ft_evict_from_memory(FT ft, bool oplsn_valid, LSN oplsn);
 
-FT_HANDLE toku_ft_get_only_existing_ft_handle(FT h);
+FT_HANDLE toku_ft_get_only_existing_ft_handle(FT ft);
 
 void toku_ft_note_hot_begin(FT_HANDLE ft_h);
 void toku_ft_note_hot_complete(FT_HANDLE ft_h, bool success, MSN msn_at_start_of_hot);
@@ -141,18 +141,18 @@ toku_ft_init(
 
 int toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) __attribute__ ((warn_unused_result));
 int toku_dictionary_redirect (const char *dst_fname_in_env, FT_HANDLE old_ft, TOKUTXN txn);
-void toku_reset_root_xid_that_created(FT h, TXNID new_root_xid_that_created);
+void toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created);
 // Reset the root_xid_that_created field to the given value.
 // This redefines which xid created the dictionary.
 
-void toku_ft_add_txn_ref(FT h);
-void toku_ft_remove_txn_ref(FT h);
+void toku_ft_add_txn_ref(FT ft);
+void toku_ft_remove_txn_ref(FT ft);
 
-void toku_calculate_root_offset_pointer ( FT h, CACHEKEY* root_key, uint32_t *roothash);
-void toku_ft_set_new_root_blocknum(FT h, CACHEKEY new_root_key);
-LSN toku_ft_checkpoint_lsn(FT h)  __attribute__ ((warn_unused_result));
-void toku_ft_stat64 (FT h, struct ftstat64_s *s);
-void toku_ft_get_fractal_tree_info64 (FT h, struct ftinfo64 *s);
+void toku_calculate_root_offset_pointer (FT ft, CACHEKEY* root_key, uint32_t *roothash);
+void toku_ft_set_new_root_blocknum(FT ft, CACHEKEY new_root_key);
+LSN toku_ft_checkpoint_lsn(FT ft)  __attribute__ ((warn_unused_result));
+void toku_ft_stat64 (FT ft, struct ftstat64_s *s);
+void toku_ft_get_fractal_tree_info64 (FT ft, struct ftinfo64 *s);
 int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *iter_extra);
 
 // unconditionally set the descriptor for an open FT. can't do this when
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index bf3780cfa42..12491707e95 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -819,7 +819,7 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
 }
 
 int
-toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT h, bool for_checkpoint) {
+toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) {
 
     size_t n_to_write;
     size_t n_uncompressed_bytes;
@@ -841,8 +841,8 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
     int r = toku_serialize_ftnode_to_memory(
         node,
         ndd,
-        h->h->basementnodesize,
-        h->h->compression_method,
+        ft->h->basementnodesize,
+        ft->h->compression_method,
         do_rebalancing,
         false, // in_parallel
         &n_to_write,
@@ -857,8 +857,8 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
     invariant(blocknum.b>=0);
     DISKOFF offset;
 
-    toku_blocknum_realloc_on_disk(h->blocktable, blocknum, n_to_write, &offset,
-                                  h, fd, for_checkpoint); //dirties h
+    toku_blocknum_realloc_on_disk(ft->blocktable, blocknum, n_to_write, &offset,
+                                  ft, fd, for_checkpoint); //dirties h
 
     tokutime_t t0 = toku_time_now();
     toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
@@ -1105,13 +1105,13 @@ void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl)
 void read_block_from_fd_into_rbuf(
     int fd, 
     BLOCKNUM blocknum,
-    FT h,
+    FT ft,
     struct rbuf *rb
     ) 
 {
     // get the file offset and block size for the block
     DISKOFF offset, size;
-    toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &offset, &size);
+    toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &offset, &size);
     DISKOFF size_aligned = roundup_to_multiple(512, size);
     uint8_t *XMALLOC_N_ALIGNED(512, size_aligned, raw_block);
     rbuf_init(rb, raw_block, size);
@@ -1305,8 +1305,8 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe)
         // we find out what basement node the query cares about
         // and check if it is available
         bfe->child_to_read = toku_ft_search_which_child(
-            &bfe->h->cmp_descriptor,
-            bfe->h->compare_fun,
+            &bfe->ft->cmp_descriptor,
+            bfe->ft->compare_fun,
             node,
             bfe->search
             );
@@ -1316,7 +1316,7 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe)
         // we can possibly require is a single basement node
         // we find out what basement node the query cares about
         // and check if it is available
-        paranoid_invariant(bfe->h->compare_fun);
+        paranoid_invariant(bfe->ft->compare_fun);
         if (node->height == 0) {
             int left_child = toku_bfe_leftmost_child_wanted(bfe, node);
             int right_child = toku_bfe_rightmost_child_wanted(bfe, node);
@@ -1772,7 +1772,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
         int nfresh = 0;
         int nbroadcast_offsets = 0;
 
-        if (bfe->h->compare_fun) {
+        if (bfe->ft->compare_fun) {
             XMALLOC_N(n_in_this_buffer, fresh_offsets);
             // We skip 'stale' offsets for upgraded nodes.
             XMALLOC_N(n_in_this_buffer, broadcast_offsets);
@@ -1782,7 +1782,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
         // of messages in the buffer.
         MSN lowest;
         uint64_t amount = n_in_this_buffer;
-        lowest.msn = toku_sync_sub_and_fetch(&bfe->h->h->highest_unused_msn_for_upgrade.msn, amount);
+        lowest.msn = toku_sync_sub_and_fetch(&bfe->ft->h->highest_unused_msn_for_upgrade.msn, amount);
         if (highest_msn.msn == 0) {
             highest_msn.msn = lowest.msn + n_in_this_buffer;
         }
@@ -1800,7 +1800,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
 
             // <CER> can we factor this out?
             int32_t *dest;
-            if (bfe->h->compare_fun) {
+            if (bfe->ft->compare_fun) {
                 if (ft_msg_type_applies_once(type)) {
                     dest = &fresh_offsets[nfresh];
                     nfresh++;
@@ -1827,9 +1827,9 @@ deserialize_and_upgrade_internal_node(FTNODE node,
             xids_destroy(&xids);
         }
 
-        if (bfe->h->compare_fun) {
-            struct toku_msg_buffer_key_msn_cmp_extra extra = { .desc = &bfe->h->cmp_descriptor,
-                                                               .cmp = bfe->h->compare_fun,
+        if (bfe->ft->compare_fun) {
+            struct toku_msg_buffer_key_msn_cmp_extra extra = { .desc = &bfe->ft->cmp_descriptor,
+                                                               .cmp = bfe->ft->compare_fun,
                                                                .msg_buffer = &bnc->msg_buffer };
             typedef toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp> key_msn_sort;
             r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
@@ -1859,7 +1859,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
                     actual_xsum);
             fprintf(stderr,
                     "Checksum failure while reading node in file %s.\n",
-                    toku_cachefile_fname_in_env(bfe->h->cf));
+                    toku_cachefile_fname_in_env(bfe->ft->cf));
             fflush(stderr);
             return toku_db_badformat();
         }
@@ -1915,7 +1915,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
     // setting up the single partition and updating the bfe.
     update_bfe_using_ftnode(node, bfe);
     struct ftnode_fetch_extra temp_bfe;
-    fill_bfe_for_full_read(&temp_bfe, bfe->h);
+    fill_bfe_for_full_read(&temp_bfe, bfe->ft);
     setup_partitions_using_bfe(node, &temp_bfe, true);
 
     // 11. Deserialize the partition maps, though they are not used in the
@@ -1980,7 +1980,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
 
     // Whatever this is must be less than the MSNs of every message above
     // it, so it's ok to take it here.
-    bn->max_msn_applied = bfe->h->h->highest_unused_msn_for_upgrade;
+    bn->max_msn_applied = bfe->ft->h->highest_unused_msn_for_upgrade;
     bn->stale_ancestor_messages_applied = false;
     node->max_msn_applied_to_node_on_disk = bn->max_msn_applied;
 
@@ -1996,7 +1996,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
                     actual_xsum);
             fprintf(stderr,
                     "Checksum failure while reading node in file %s.\n",
-                    toku_cachefile_fname_in_env(bfe->h->cf));
+                    toku_cachefile_fname_in_env(bfe->ft->cf));
             fflush(stderr);
             return toku_db_badformat();
         }
@@ -2014,7 +2014,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
 static int
 read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
                                             DISKOFF offset, DISKOFF size,
-                                            FT h,
+                                            FT ft,
                                             struct rbuf *rb,
                                             /* out */ int *layout_version_p);
 
@@ -2037,7 +2037,7 @@ deserialize_and_upgrade_ftnode(FTNODE node,
     // we read the different sub-sections.
     // get the file offset and block size for the block
     DISKOFF offset, size;
-    toku_translate_blocknum_to_offset_size(bfe->h->blocktable,
+    toku_translate_blocknum_to_offset_size(bfe->ft->blocktable,
                                            blocknum,
                                            &offset,
                                            &size);
@@ -2046,7 +2046,7 @@ deserialize_and_upgrade_ftnode(FTNODE node,
                                                     blocknum,
                                                     offset,
                                                     size,
-                                                    bfe->h,
+                                                    bfe->ft,
                                                     &rb,
                                                     &version);
     if (r != 0) {
@@ -2259,7 +2259,7 @@ deserialize_ftnode_from_rbuf(
                 //  case where we read and decompress the partition
                 tokutime_t partition_decompress_time;
                 r = decompress_and_deserialize_worker(curr_rbuf, curr_sb, node, i,
-                        &bfe->h->cmp_descriptor, bfe->h->compare_fun, &partition_decompress_time);
+                        &bfe->ft->cmp_descriptor, bfe->ft->compare_fun, &partition_decompress_time);
                 decompress_time += partition_decompress_time;
                 if (r != 0) {
                     goto cleanup;
@@ -2319,7 +2319,7 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i
     // get the file offset and block size for the block
     DISKOFF node_offset, total_node_disk_size;
     toku_translate_blocknum_to_offset_size(
-        bfe->h->blocktable, 
+        bfe->ft->blocktable, 
         node->blocknum, 
         &node_offset, 
         &total_node_disk_size
@@ -2365,7 +2365,7 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i
     // deserialize
     tokutime_t t2 = toku_time_now();
 
-    r = deserialize_ftnode_partition(&curr_sb, node, childnum, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
+    r = deserialize_ftnode_partition(&curr_sb, node, childnum, &bfe->ft->cmp_descriptor, bfe->ft->compare_fun);
 
     tokutime_t t3 = toku_time_now();
 
@@ -2409,7 +2409,7 @@ toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fet
 
     tokutime_t t1 = toku_time_now();
 
-    r = deserialize_ftnode_partition(curr_sb, node, childnum, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
+    r = deserialize_ftnode_partition(curr_sb, node, childnum, &bfe->ft->cmp_descriptor, bfe->ft->compare_fun);
 
     tokutime_t t2 = toku_time_now();
 
@@ -2436,7 +2436,7 @@ deserialize_ftnode_from_fd(int fd,
     struct rbuf rb = RBUF_INITIALIZER;
 
     tokutime_t t0 = toku_time_now();
-    read_block_from_fd_into_rbuf(fd, blocknum, bfe->h, &rb); 
+    read_block_from_fd_into_rbuf(fd, blocknum, bfe->ft, &rb); 
     tokutime_t t1 = toku_time_now();
 
     // Decompress and deserialize the ftnode. Time statistics
@@ -2469,7 +2469,7 @@ toku_deserialize_ftnode_from (int fd,
     // each function below takes the appropriate io/decompression/deserialize statistics
 
     if (!bfe->read_all_partitions) {
-        read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->h, &rb, bfe);
+        read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->ft, &rb, bfe);
         r = deserialize_ftnode_header_from_rbuf_if_small_enough(ftnode, ndd, blocknum, fullhash, bfe, &rb, fd);
     } else {
         // force us to do it the old way
@@ -2618,7 +2618,7 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL
 
 int
 toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
-                                FT h, bool for_checkpoint) {
+                                FT ft, bool for_checkpoint) {
     size_t n_to_write;
     char *compressed_buf;
     struct serialized_rollback_log_node serialized_local;
@@ -2635,13 +2635,13 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
     //Compress and malloc buffer to write
     serialize_uncompressed_block_to_memory(serialized_log->data,
             serialized_log->n_sub_blocks, serialized_log->sub_block,
-            h->h->compression_method, &n_to_write, &compressed_buf);
+            ft->h->compression_method, &n_to_write, &compressed_buf);
 
     {
         lazy_assert(blocknum.b>=0);
         DISKOFF offset;
-        toku_blocknum_realloc_on_disk(h->blocktable, blocknum, n_to_write, &offset,
-                                      h, fd, for_checkpoint); //dirties h
+        toku_blocknum_realloc_on_disk(ft->blocktable, blocknum, n_to_write, &offset,
+                                      ft, fd, for_checkpoint); //dirties h
         toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
     }
     toku_free(compressed_buf);
@@ -2844,7 +2844,7 @@ decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_blo
 static int
 read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
                                             DISKOFF offset, DISKOFF size,
-                                            FT h,
+                                            FT ft,
                                             struct rbuf *rb,
                                   /* out */ int *layout_version_p) {
     int r = 0;
@@ -2883,7 +2883,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
         if (r == TOKUDB_BAD_CHECKSUM) {
             fprintf(stderr,
                     "Checksum failure while reading raw block in file %s.\n",
-                    toku_cachefile_fname_in_env(h->cf));
+                    toku_cachefile_fname_in_env(ft->cf));
             abort();
         } else {
             r = toku_db_badformat();
@@ -2905,14 +2905,14 @@ cleanup:
 
 // Read rollback log node from file into struct.  Perform version upgrade if necessary.
 int
-toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT h) {
+toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft) {
     int layout_version = 0;
     int r;
     struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0};
 
     // get the file offset and block size for the block
     DISKOFF offset, size;
-    toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &offset, &size);
+    toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &offset, &size);
     // if the size is 0, then the blocknum is unused
     if (size == 0) {
         // blocknum is unused, just create an empty one and get out
@@ -2924,7 +2924,7 @@ toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE
         goto cleanup;
     }
 
-    r = read_and_decompress_block_from_fd_into_rbuf(fd, blocknum, offset, size, h, &rb, &layout_version);
+    r = read_and_decompress_block_from_fd_into_rbuf(fd, blocknum, offset, size, ft, &rb, &layout_version);
     if (r!=0) goto cleanup;
 
     {
@@ -2943,19 +2943,19 @@ cleanup:
 }
 
 int
-toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h)
+toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft)
 {
     int r = 0;
     // 15 was the last version with subtree estimates
-    invariant(h->layout_version_read_from_disk <= FT_LAYOUT_VERSION_15);
+    invariant(ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_15);
 
     FTNODE unused_node = NULL;
     FTNODE_DISK_DATA unused_ndd = NULL;
     struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, h);
-    r = deserialize_ftnode_from_fd(fd, h->h->root_blocknum, 0, &unused_node, &unused_ndd,
-                                   &bfe, &h->h->on_disk_stats);
-    h->in_memory_stats = h->h->on_disk_stats;
+    fill_bfe_for_min_read(&bfe, ft);
+    r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &unused_node, &unused_ndd,
+                                   &bfe, &ft->h->on_disk_stats);
+    ft->in_memory_stats = ft->h->on_disk_stats;
 
     if (unused_node) {
         toku_ftnode_free(&unused_node);
@@ -2967,22 +2967,22 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h)
 }
 
 int
-toku_upgrade_msn_from_root_to_header(int fd, FT h)
+toku_upgrade_msn_from_root_to_header(int fd, FT ft)
 {
     int r;
     // 21 was the first version with max_msn_in_ft in the header
-    invariant(h->layout_version_read_from_disk <= FT_LAYOUT_VERSION_20);
+    invariant(ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_20);
 
     FTNODE node;
     FTNODE_DISK_DATA ndd;
     struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, h);
-    r = deserialize_ftnode_from_fd(fd, h->h->root_blocknum, 0, &node, &ndd, &bfe, nullptr);
+    fill_bfe_for_min_read(&bfe, ft);
+    r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &node, &ndd, &bfe, nullptr);
     if (r != 0) {
         goto exit;
     }
 
-    h->h->max_msn_in_ft = node->max_msn_applied_to_node_on_disk;
+    ft->h->max_msn_in_ft = node->max_msn_applied_to_node_on_disk;
     toku_ftnode_free(&node);
     toku_free(ndd);
  exit:
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index 37d08b30c5a..75272172a46 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -642,7 +642,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
     return 0;
 }
 
-int toku_ft_loader_open (/* out */ FTLOADER *blp,
+int toku_ft_loader_open (FTLOADER *blp, /* out */
                           CACHETABLE cachetable,
                           generate_row_for_put_func g,
                           DB *src_db,
@@ -656,9 +656,9 @@ int toku_ft_loader_open (/* out */ FTLOADER *blp,
                           uint64_t reserve_memory_size,
                           bool compress_intermediates,
                           bool allow_puts) {
-// Effect: called by DB_ENV->create_loader to create a brt loader.
+// Effect: called by DB_ENV->create_loader to create an ft loader.
 // Arguments:
-//   blp                  Return the brt loader here.
+//   blp                  Return a ft loader ("bulk loader") here.
 //   g                    The function for generating a row
 //   src_db               The source database.  Needed by g.  May be NULL if that's ok with g.
 //   N                    The number of dbs to create.
@@ -2220,16 +2220,16 @@ struct dbout {
     int64_t n_translations_limit;
     struct translation *translation;
     toku_mutex_t mutex;
-    FT h;
+    FT ft;
 };
 
-static inline void dbout_init(struct dbout *out, FT h) {
+static inline void dbout_init(struct dbout *out, FT ft) {
     out->fd = -1;
     out->current_off = 0;
     out->n_translations = out->n_translations_limit = 0;
     out->translation = NULL;
     toku_mutex_init(&out->mutex, NULL);
-    out->h = h;
+    out->ft = ft;
 }
 
 static inline void dbout_destroy(struct dbout *out) {
@@ -2615,7 +2615,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
 
     {
         invariant(sts.n_subtrees==1);
-        out.h->h->root_blocknum = make_blocknum(sts.subtrees[0].block);
+        out.ft->h->root_blocknum = make_blocknum(sts.subtrees[0].block);
         toku_free(sts.subtrees); sts.subtrees = NULL;
 
         // write the descriptor
@@ -3037,7 +3037,7 @@ static int write_translation_table (struct dbout *out, long long *off_of_transla
 static int
 write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk) {
     int result = 0;
-    size_t size = toku_serialize_ft_size(out->h->h);
+    size_t size = toku_serialize_ft_size(out->ft->h);
     size_t alloced_size = roundup_to_multiple(512, size);
     struct wbuf wbuf;
     char *MALLOC_N_ALIGNED(512, alloced_size, buf);
@@ -3045,8 +3045,8 @@ write_header (struct dbout *out, long long translation_location_on_disk, long lo
         result = get_error_errno();
     } else {
         wbuf_init(&wbuf, buf, size);
-        out->h->h->on_disk_stats = out->h->in_memory_stats;
-        toku_serialize_ft_to_wbuf(&wbuf, out->h->h, translation_location_on_disk, translation_size_on_disk);
+        out->ft->h->on_disk_stats = out->ft->in_memory_stats;
+        toku_serialize_ft_to_wbuf(&wbuf, out->ft->h, translation_location_on_disk, translation_size_on_disk);
         for (size_t i=size; i<alloced_size; i++) buf[i]=0; // initialize all those unused spots to zero
         if (wbuf.ndone != size)
             result = EINVAL;
diff --git a/ft/node.cc b/ft/node.cc
index 229e0e0bfe1..57ee43458cf 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -337,11 +337,11 @@ void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node) {
     }
 }
 
-void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h) {
+void toku_evict_bn_from_memory(FTNODE node, int childnum, FT ft) {
     // free the basement node
     assert(!node->dirty);
     BASEMENTNODE bn = BLB(node, childnum);
-    toku_ft_decrease_stats(&h->in_memory_stats, bn->stat64_delta);
+    toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
     destroy_basement_node(bn);
     set_BNULL(node, childnum);
     BP_STATE(node, childnum) = PT_ON_DISK;
diff --git a/ft/node.h b/ft/node.h
index 640995308d4..0ef842d9dd0 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -297,7 +297,7 @@ void toku_destroy_ftnode_internals(FTNODE node);
 void toku_ftnode_free (FTNODE *node);
 bool toku_ftnode_fully_in_memory(FTNODE node);
 void toku_ftnode_assert_fully_in_memory(FTNODE node);
-void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h);
+void toku_evict_bn_from_memory(FTNODE node, int childnum, FT ft);
 BASEMENTNODE toku_detach_bn(FTNODE node, int childnum);
 void toku_ftnode_update_disk_stats(FTNODE ftnode, FT ft, bool for_checkpoint);
 void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node);
@@ -345,7 +345,7 @@ struct ft_search;
 struct ftnode_fetch_extra {
     enum ftnode_fetch_type type;
     // needed for reading a node off disk
-    FT h;
+    FT ft;
     // used in the case where type == ftnode_fetch_subset
     // parameters needed to find out which child needs to be decompressed (so it can be read)
     ft_search *search;
@@ -406,7 +406,7 @@ long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
 long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
 void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp);
 void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
-void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
+void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
 bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
 
 bool toku_ftnode_nonleaf_is_gorged(FTNODE node, uint32_t nodesize);
diff --git a/ft/roll.cc b/ft/roll.cc
index 2c8e1d9307c..23514a339d7 100644
--- a/ft/roll.cc
+++ b/ft/roll.cc
@@ -220,9 +220,9 @@ done:
     return 0;
 }
 
-int find_ft_from_filenum (const FT &h, const FILENUM &filenum);
-int find_ft_from_filenum (const FT &h, const FILENUM &filenum) {
-    FILENUM thisfnum = toku_cachefile_filenum(h->cf);
+int find_ft_from_filenum (const FT &ft, const FILENUM &filenum);
+int find_ft_from_filenum (const FT &ft, const FILENUM &filenum) {
+    FILENUM thisfnum = toku_cachefile_filenum(ft->cf);
     if (thisfnum.fileid<filenum.fileid) return -1;
     if (thisfnum.fileid>filenum.fileid) return +1;
     return 0;
@@ -236,9 +236,8 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
                          bool reset_root_xid_that_created) {
     int r = 0;
     //printf("%s:%d committing insert %s %s\n", __FILE__, __LINE__, key.data, data.data);
-    FT h;
-    h = NULL;
-    r = txn->open_fts.find_zero<FILENUM, find_ft_from_filenum>(filenum, &h, NULL);
+    FT ft = nullptr;
+    r = txn->open_fts.find_zero<FILENUM, find_ft_from_filenum>(filenum, &ft, NULL);
     if (r == DB_NOTFOUND) {
         assert(txn->for_recovery);
         r = 0;
@@ -247,7 +246,7 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
     assert(r==0);
 
     if (oplsn.lsn != 0) {  // if we are executing the recovery algorithm
-        LSN treelsn = toku_ft_checkpoint_lsn(h);  
+        LSN treelsn = toku_ft_checkpoint_lsn(ft);  
         if (oplsn.lsn <= treelsn.lsn) {  // if operation was already applied to tree ...
             r = 0;                       // ... do not apply it again.
             goto done;
@@ -275,10 +274,10 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
                             // no messages above us, we can implicitly promote uxrs based on this xid
                             oldest_referenced_xid_estimate,
                             !txn->for_recovery);
-        toku_ft_root_put_msg(h, &ftmsg, &gc_info);
+        toku_ft_root_put_msg(ft, &ftmsg, &gc_info);
         if (reset_root_xid_that_created) {
             TXNID new_root_xid_that_created = xids_get_outermost_xid(xids);
-            toku_reset_root_xid_that_created(h, new_root_xid_that_created);
+            toku_reset_root_xid_that_created(ft, new_root_xid_that_created);
         }
     }
 done:
@@ -579,15 +578,15 @@ toku_rollback_dictionary_redirect (FILENUM old_filenum,
         CACHEFILE new_cf = NULL;
         r = toku_cachefile_of_filenum(txn->logger->ct, new_filenum, &new_cf);
         assert(r == 0);
-        FT CAST_FROM_VOIDP(new_h, toku_cachefile_get_userdata(new_cf));
+        FT CAST_FROM_VOIDP(new_ft, toku_cachefile_get_userdata(new_cf));
 
         CACHEFILE old_cf = NULL;
         r = toku_cachefile_of_filenum(txn->logger->ct, old_filenum, &old_cf);
         assert(r == 0);
-        FT CAST_FROM_VOIDP(old_h, toku_cachefile_get_userdata(old_cf));
+        FT CAST_FROM_VOIDP(old_ft, toku_cachefile_get_userdata(old_cf));
 
         //Redirect back from new to old.
-        r = toku_dictionary_redirect_abort(old_h, new_h, txn);
+        r = toku_dictionary_redirect_abort(old_ft, new_ft, txn);
         assert(r==0);
     }
     return r;
diff --git a/ft/rollback-ct-callbacks.h b/ft/rollback-ct-callbacks.h
index d8494c8a9bd..35a90613423 100644
--- a/ft/rollback-ct-callbacks.h
+++ b/ft/rollback-ct-callbacks.h
@@ -123,7 +123,7 @@ int toku_rollback_cleaner_callback (
     void* UU(extraargs)
     );
 
-static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log(FT h) {
+static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log(FT ft) {
     CACHETABLE_WRITE_CALLBACK wc;
     wc.flush_callback = toku_rollback_flush_callback;
     wc.pe_est_callback = toku_rollback_pe_est_callback;
@@ -131,6 +131,6 @@ static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log(FT
     wc.cleaner_callback = toku_rollback_cleaner_callback;
     wc.clone_callback = toku_rollback_clone_callback;
     wc.checkpoint_complete_callback = nullptr;
-    wc.write_extraargs = h;
+    wc.write_extraargs = ft;
     return wc;
 }
diff --git a/ft/rollback.h b/ft/rollback.h
index eb19ea12ff7..4e68308473b 100644
--- a/ft/rollback.h
+++ b/ft/rollback.h
@@ -137,7 +137,7 @@ void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len);
 // if necessary.
 void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log);
 
-void toku_txn_maybe_note_ft (TOKUTXN txn, FT h);
+void toku_txn_maybe_note_ft (TOKUTXN txn, FT ft);
 int toku_logger_txn_rollback_stats(TOKUTXN txn, struct txn_stat *txn_stat);
 
 int toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind);
diff --git a/ft/tests/ft-test-cursor-2.cc b/ft/tests/ft-test-cursor-2.cc
index 96c4d2085ec..fc4ef53ff7a 100644
--- a/ft/tests/ft-test-cursor-2.cc
+++ b/ft/tests/ft-test-cursor-2.cc
@@ -106,7 +106,7 @@ save_data (ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN vallen, bytevec val, voi
 
 
 // Verify that different cursors return different data items when a DBT is initialized to all zeros (no flags)
-// Note: The BRT test used to implement DBTs with per-cursor allocated space, but there isn't any such thing any more
+// Note: The ft test used to implement DBTs with per-cursor allocated space, but there isn't any such thing any more
 // so this test is a little bit obsolete.
 static void test_multiple_ft_cursor_dbts(int n) {
     if (verbose) printf("test_multiple_ft_cursors:%d\n", n);
diff --git a/ft/tests/test3884.cc b/ft/tests/test3884.cc
index e076731141f..c5ffc152f4c 100644
--- a/ft/tests/test3884.cc
+++ b/ft/tests/test3884.cc
@@ -185,7 +185,7 @@ verify_basement_node_msns(FTNODE node, MSN expected)
 }
 
 //
-// Maximum node size according to the BRT: 1024 (expected node size after split)
+// Maximum node size according to the FT: 1024 (expected node size after split)
 // Maximum basement node size: 256
 // Actual node size before split: 2048
 // Actual basement node size before split: 256
@@ -237,7 +237,7 @@ test_split_on_boundary(void)
 }
 
 //
-// Maximum node size according to the BRT: 1024 (expected node size after split)
+// Maximum node size according to the FT: 1024 (expected node size after split)
 // Maximum basement node size: 256 (except the last)
 // Actual node size before split: 4095
 // Actual basement node size before split: 256 (except the last, of size 2K)
@@ -302,7 +302,7 @@ test_split_with_everything_on_the_left(void)
 
 
 //
-// Maximum node size according to the BRT: 1024 (expected node size after split)
+// Maximum node size according to the FT: 1024 (expected node size after split)
 // Maximum basement node size: 256 (except the last)
 // Actual node size before split: 4095
 // Actual basement node size before split: 256 (except the last, of size 2K)
@@ -487,7 +487,7 @@ test_split_at_end(void)
     toku_destroy_ftnode_internals(&sn);
 }
 
-// Maximum node size according to the BRT: 1024 (expected node size after split)
+// Maximum node size according to the FT: 1024 (expected node size after split)
 // Maximum basement node size: 256
 // Actual node size before split: 2048
 // Actual basement node size before split: 256
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index d49046e8c8f..e0a5be754e1 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -224,17 +224,17 @@ static int print_le(const void* key, const uint32_t keylen, const LEAFENTRY &le,
     return 0;
 }
 
-static void dump_node(int fd, BLOCKNUM blocknum, FT h) {
+static void dump_node(int fd, BLOCKNUM blocknum, FT ft) {
     FTNODE n;
     struct ftnode_fetch_extra bfe;
     FTNODE_DISK_DATA ndd = NULL;
-    fill_bfe_for_full_read(&bfe, h);
+    fill_bfe_for_full_read(&bfe, ft);
     int r = toku_deserialize_ftnode_from (fd, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe);
     assert_zero(r);
     assert(n!=0);
     printf("ftnode\n");
     DISKOFF disksize, diskoffset;
-    toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &diskoffset, &disksize);
+    toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &diskoffset, &disksize);
     printf(" diskoffset  =%" PRId64 "\n", diskoffset);
     printf(" disksize    =%" PRId64 "\n", disksize);
     printf(" serialize_size =%u\n", toku_serialize_ftnode_size(n));
@@ -331,14 +331,14 @@ ok:
     toku_free(ndd);
 }
 
-static void dump_block_translation(FT h, uint64_t offset) {
-    toku_blocknum_dump_translation(h->blocktable, make_blocknum(offset));
+static void dump_block_translation(FT ft, uint64_t offset) {
+    toku_blocknum_dump_translation(ft->blocktable, make_blocknum(offset));
 }
 
-static void dump_fragmentation(int UU(f), FT h, int tsv) {
+static void dump_fragmentation(int UU(f), FT ft, int tsv) {
     int64_t used_space;
     int64_t total_space;
-    toku_blocktable_internal_fragmentation(h->blocktable, &total_space, &used_space);
+    toku_blocktable_internal_fragmentation(ft->blocktable, &total_space, &used_space);
     int64_t fragsizes = total_space - used_space;
 
     if (tsv) {
@@ -354,7 +354,7 @@ static void dump_fragmentation(int UU(f), FT h, int tsv) {
 
 typedef struct {
     int fd;
-    FT h;
+    FT ft;
     uint64_t blocksizes;
     uint64_t leafsizes;
     uint64_t leafblocks;
@@ -365,7 +365,7 @@ static int nodesizes_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void
     FTNODE n;
     FTNODE_DISK_DATA ndd = NULL;
     struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, info->h);
+    fill_bfe_for_full_read(&bfe, info->ft);
     int r = toku_deserialize_ftnode_from(info->fd, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe);
     if (r==0) {
         info->blocksizes += size;
@@ -379,12 +379,12 @@ static int nodesizes_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void
     return 0;
 }
 
-static void dump_nodesizes(int fd, FT h) {
+static void dump_nodesizes(int fd, FT ft) {
     frag_help_extra info;
     memset(&info, 0, sizeof(info));
     info.fd = fd;
-    info.h = h;
-    toku_blocktable_iterate(h->blocktable, TRANSLATION_CHECKPOINTED,
+    info.ft = ft;
+    toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED,
                             nodesizes_helper, &info, true, true);
     printf("leafblocks\t%" PRIu64 "\n", info.leafblocks);
     printf("blocksizes\t%" PRIu64 "\n", info.blocksizes);
@@ -402,12 +402,12 @@ static void dump_garbage_stats(int fd, FT ft) {
 
 typedef struct __dump_node_extra {
     int fd;
-    FT h;
+    FT ft;
 } dump_node_extra;
 
 static int dump_node_wrapper(BLOCKNUM b, int64_t UU(size), int64_t UU(address), void *extra) {
     dump_node_extra *CAST_FROM_VOIDP(info, extra);
-    dump_node(info->fd, b, info->h);
+    dump_node(info->fd, b, info->ft);
     return 0;
 }
 
@@ -472,9 +472,9 @@ static void verify_block(unsigned char *cp, uint64_t file_offset, uint64_t size)
         printf("offset %u expected %" PRIu64 "\n", offset, size);
 }
 
-static void dump_block(int fd, BLOCKNUM blocknum, FT h) {
+static void dump_block(int fd, BLOCKNUM blocknum, FT ft) {
     DISKOFF offset, size;
-    toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &offset, &size);
+    toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &offset, &size);
     printf("%" PRId64 " at %" PRId64 " size %" PRId64 "\n", blocknum.b, offset, size);
 
     unsigned char *CAST_FROM_VOIDP(vp, toku_malloc(size));
@@ -698,7 +698,7 @@ int main (int argc, const char *const argv[]) {
             
             struct __dump_node_extra info;
             info.fd = fd;
-            info.h = ft;
+            info.ft = ft;
             toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED,
                                     dump_node_wrapper, &info, true, true);
         }
diff --git a/ft/txn.cc b/ft/txn.cc
index c19985c81a5..0451bf1f2d5 100644
--- a/ft/txn.cc
+++ b/ft/txn.cc
@@ -618,7 +618,7 @@ int remove_txn (const FT &h, const uint32_t UU(idx), TOKUTXN const UU(txn))
     return 0;
 }
 
-// for every BRT in txn, remove it.
+// for every ft in txn, remove it.
 static void note_txn_closing (TOKUTXN txn) {
     txn->open_fts.iterate<struct tokutxn, remove_txn>(txn);
 }
diff --git a/src/tests/hot-optimize-table-tests.cc b/src/tests/hot-optimize-table-tests.cc
index 6a00afa4a51..886d23366e5 100644
--- a/src/tests/hot-optimize-table-tests.cc
+++ b/src/tests/hot-optimize-table-tests.cc
@@ -103,7 +103,7 @@ const int envflags = DB_INIT_MPOOL |
 DB_ENV* env;
 unsigned int leaf_hits;
 
-// Custom Update Function for our test BRT.
+// Custom Update Function for our test FT.
 static int
 update_func(DB* UU(db),
             const DBT* key,
@@ -266,7 +266,7 @@ test_main(int argc, char * const argv[])
     default_parse_args(argc, argv);
     hot_test_setup();
 
-    // Create and Open the Database/BRT
+    // Create and Open the Database/FT
     DB *db = NULL;
     const unsigned int BIG = 4000000;
     const unsigned int SMALL = 10;

From b33e859c6a6a28574d69fc5e9ae5fc6d3f070587 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sun, 15 Jun 2014 23:01:58 -0400
Subject: [PATCH 030/190] FT-273 Use a comparator object for fractal tree key
 comparisons when possible, untangling the dependency between parameters
 'ft->compare_fun' and 'ft->cmp_descriptor' in a sensible way. It is now much
 less necessary to create a fake db (good) and some long parameter lists are
 now shorter (it's a start)

---
 ft/comparator.h                               |  65 +++++---
 ft/cursor.cc                                  |   8 +-
 ft/ft-flusher.cc                              |  12 +-
 ft/ft-hot-flusher.cc                          |   8 +-
 ft/ft-internal.h                              |  23 +--
 ft/ft-ops.cc                                  |  75 ++++-----
 ft/ft-ops.h                                   |   2 +-
 ft/ft-serialize.cc                            |  12 +-
 ft/ft-test-helpers.cc                         |  12 +-
 ft/ft-verify.cc                               |  22 +--
 ft/ft.cc                                      |  75 +++++----
 ft/ft.h                                       |   4 +-
 ft/ft_node-serialize.cc                       | 125 +++++++--------
 ft/le-cursor.cc                               |  14 +-
 ft/node.cc                                    | 144 +++++++-----------
 ft/node.h                                     |  42 ++---
 ft/tests/bnc-insert-benchmark.cc              |   7 +-
 ft/tests/comparator-test.cc                   |  20 ++-
 ft/tests/ft-bfe-query.cc                      |   4 +-
 ft/tests/ft-clock-test.cc                     |  16 +-
 ft/tests/ft-serialize-benchmark.cc            |  12 +-
 ft/tests/ft-serialize-test.cc                 |  14 +-
 ft/tests/make-tree.cc                         |   2 +-
 ft/tests/msnfilter.cc                         |   8 +-
 ft/tests/orthopush-flush.cc                   | 109 ++++++-------
 ft/tests/verify-bad-msn.cc                    |   2 +-
 ft/tests/verify-misrouted-msgs.cc             |   2 +-
 ft/valgrind.suppressions                      |  13 ++
 locktree/concurrent_tree.cc                   |   2 +-
 locktree/concurrent_tree.h                    |   2 +-
 locktree/keyrange.cc                          |  18 +--
 locktree/keyrange.h                           |   6 +-
 locktree/locktree.cc                          |  19 +--
 locktree/locktree.h                           |   5 +-
 .../concurrent_tree_lkr_acquire_release.cc    |  10 +-
 .../concurrent_tree_lkr_insert_remove.cc      |  15 +-
 ...concurrent_tree_lkr_insert_serial_large.cc |   1 +
 .../tests/concurrent_tree_lkr_remove_all.cc   |   2 +
 locktree/tests/locktree_misc.cc               |   4 +-
 locktree/tests/locktree_overlapping_relock.cc |   6 +-
 .../locktree_single_txnid_optimization.cc     |   6 +-
 locktree/treenode.cc                          |  18 +--
 locktree/treenode.h                           |   8 +-
 43 files changed, 465 insertions(+), 509 deletions(-)

diff --git a/ft/comparator.h b/ft/comparator.h
index 6b78dcfc69d..b21d0b9d845 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -95,36 +95,51 @@ PATENT RIGHTS GRANT:
 
 #include <ft/ybt.h>
 #include <ft/fttypes.h>
+#include <portability/memory.h>
 
 namespace toku {
 
-// a comparator object encapsulates the data necessary for 
-// comparing two keys in a fractal tree. it further understands
-// that points may be positive or negative infinity.
+    // a comparator object encapsulates the data necessary for 
+    // comparing two keys in a fractal tree. it further understands
+    // that points may be positive or negative infinity.
 
-class comparator {
-public:
-    void set_descriptor(DESCRIPTOR desc) {
-        m_fake_db.cmp_descriptor = desc;
-    }
-
-    void create(ft_compare_func cmp, DESCRIPTOR desc) {
-        m_cmp = cmp;
-        memset(&m_fake_db, 0, sizeof(m_fake_db));
-        m_fake_db.cmp_descriptor = desc;
-    }
-
-    int compare(const DBT *a, const DBT *b) {
-        if (toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b)) {
-            return toku_dbt_infinite_compare(a, b);
-        } else {
-            return m_cmp(&m_fake_db, a, b);
+    class comparator {
+    public:
+        void create(ft_compare_func cmp, DESCRIPTOR desc) {
+            _cmp = cmp;
+            XCALLOC(_fake_db);
+            _fake_db->cmp_descriptor = desc;
         }
-    }
 
-private:
-    struct __toku_db m_fake_db;
-    ft_compare_func m_cmp;
-};
+        void destroy() {
+            toku_free(_fake_db);
+        }
+
+        const DESCRIPTOR_S *get_descriptor() const {
+            return _fake_db->cmp_descriptor;
+        }
+
+        ft_compare_func get_compare_func() const {
+            return _cmp;
+        }
+
+        void set_descriptor(DESCRIPTOR desc) {
+            _fake_db->cmp_descriptor = desc;
+        }
+
+        int operator()(const DBT *a, const DBT *b) const {
+            // TODO: add an unlikely() compiler note for this branch
+            if (toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b)) {
+                return toku_dbt_infinite_compare(a, b);
+            } else {
+                // yikes, const sadness here
+                return _cmp(const_cast<DB *>(_fake_db), a, b);
+            }
+        }
+
+    private:
+        DB *_fake_db;
+        ft_compare_func _cmp;
+    };
 
 } /* namespace toku */
diff --git a/ft/cursor.cc b/ft/cursor.cc
index 154bbaa9dff..bacb650a47c 100644
--- a/ft/cursor.cc
+++ b/ft/cursor.cc
@@ -213,8 +213,7 @@ static int ft_cursor_search(FT_CURSOR cursor, ft_search *search,
 }
 
 static inline int compare_k_x(FT_HANDLE ft_handle, const DBT *k, const DBT *x) {
-    FAKE_DB(db, &ft_handle->ft->cmp_descriptor);
-    return ft_handle->ft->compare_fun(&db, k, x);
+    return ft_handle->ft->cmp(k, x);
 }
 
 int toku_ft_cursor_compare_one(const ft_search &UU(search), const DBT *UU(x)) {
@@ -290,11 +289,10 @@ int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *g
 int toku_ft_cursor_check_restricted_range(FT_CURSOR c, bytevec key, ITEMLEN keylen) {
     if (c->out_of_range_error) {
         FT ft = c->ft_handle->ft;
-        FAKE_DB(db, &ft->cmp_descriptor);
         DBT found_key;
         toku_fill_dbt(&found_key, key, keylen);
-        if ((!c->left_is_neg_infty && c->direction <= 0 && ft->compare_fun(&db, &found_key, &c->range_lock_left_key) < 0) ||
-            (!c->right_is_pos_infty && c->direction >= 0 && ft->compare_fun(&db, &found_key, &c->range_lock_right_key) > 0)) {
+        if ((!c->left_is_neg_infty && c->direction <= 0 && ft->cmp(&found_key, &c->range_lock_left_key) < 0) ||
+            (!c->right_is_pos_infty && c->direction >= 0 && ft->cmp(&found_key, &c->range_lock_right_key) > 0)) {
             invariant(c->out_of_range_error);
             return c->out_of_range_error;
         }
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index b2142611ef1..cfc3485f3db 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -406,13 +406,8 @@ ctm_pick_child(FT ft,
     int childnum;
     if (parent->height == 1 && ctme->is_last_child) {
         childnum = parent->n_children - 1;
-    }
-    else {
-        childnum = toku_ftnode_which_child(
-            parent,
-            &ctme->target_key,
-            &ft->cmp_descriptor,
-            ft->compare_fun);
+    } else {
+        childnum = toku_ftnode_which_child(parent, &ctme->target_key, ft->cmp);
     }
     return childnum;
 }
@@ -1703,9 +1698,8 @@ void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID p
                 flow_deltas[1] = memsize_in_buffer;
             }
             toku_ftnode_put_msg(
-                ft->compare_fun,
+                ft->cmp,
                 ft->update_fun,
-                &ft->cmp_descriptor,
                 child,
                 -1,
                 msg,
diff --git a/ft/ft-hot-flusher.cc b/ft/ft-hot-flusher.cc
index 95a28fba6ee..5e891f3ad17 100644
--- a/ft/ft-hot-flusher.cc
+++ b/ft/ft-hot-flusher.cc
@@ -184,10 +184,7 @@ hot_just_pick_child(FT ft,
         childnum = 0;
     } else {
         // Find the pivot boundary.
-        childnum = toku_ftnode_hot_next_child(parent,
-                                               &flusher->highest_pivot_key,
-                                               &ft->cmp_descriptor,
-                                               ft->compare_fun);
+        childnum = toku_ftnode_hot_next_child(parent, &flusher->highest_pivot_key, ft->cmp);
     }
 
     return childnum;
@@ -386,8 +383,7 @@ toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right,
         else if (right) {
             // if we have flushed past the bounds set for us,
             // set rightmost_leaf_seen so we exit
-            FAKE_DB(db, &ft_handle->ft->cmp_descriptor);
-            int cmp = ft_handle->ft->compare_fun(&db, &flusher.max_current_key, right);
+            int cmp = ft_handle->ft->cmp(&flusher.max_current_key, right);
             if (cmp > 0) {
                 flusher.rightmost_leaf_seen = 1;
             }
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 2656a28a418..7bcf7d5acad 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -111,9 +111,9 @@ PATENT RIGHTS GRANT:
 #include "leafentry.h"
 #include "block_table.h"
 #include "compress.h"
-#include <util/mempool.h>
 #include <util/omt.h>
 #include "ft/bndata.h"
+#include "ft/comparator.h"
 #include "ft/rollback.h"
 #include "ft/msg_buffer.h"
 
@@ -213,15 +213,18 @@ struct ft {
     CACHEFILE cf;
     // unique id for dictionary
     DICTIONARY_ID dict_id;
-    ft_compare_func compare_fun;
-    ft_update_func update_fun;
 
     // protected by locktree
     DESCRIPTOR_S descriptor;
-    // protected by locktree and user. User 
-    // makes sure this is only changed
-    // when no activity on tree
+
+    // protected by locktree and user.
+    // User makes sure this is only changed when no activity on tree
     DESCRIPTOR_S cmp_descriptor;
+    // contains a pointer to cmp_descriptor (above) - their lifetimes are bound
+    toku::comparator cmp;
+
+    // the update function always utilizes the cmp_descriptor, not the regular one
+    ft_update_func update_fun;
 
     // These are not read-only:
 
@@ -272,7 +275,7 @@ typedef struct ft *FT;
 // descriptor. We don't bother setting any other fields because
 // the comparison function doesn't need it, and we would like to
 // reduce the CPU work done per comparison.
-#define FAKE_DB(db, desc) struct __toku_db db; do { db.cmp_descriptor = desc; } while (0)
+#define FAKE_DB(db, desc) struct __toku_db db; do { db.cmp_descriptor = const_cast<DESCRIPTOR>(desc); } while (0)
 
 struct ft_options {
     unsigned int nodesize;
@@ -390,14 +393,14 @@ void toku_serialize_ft_to_wbuf (
     DISKOFF translation_size_on_disk
     );
 int toku_deserialize_ft_from (int fd, LSN max_acceptable_lsn, FT *ft);
-void toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset);
-void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc);
+void toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset);
+void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc);
 
 // append a child node to a parent node
 void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey);
 
 // append a message to a nonleaf node child buffer
-void toku_ft_append_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val);
+void toku_ft_append_to_child_buffer(const toku::comparator &cmp, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val);
 
 STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode);
 
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 006fc231928..64818ea6925 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -600,7 +600,7 @@ toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
     } else if (bfe->range_lock_left_key.data == nullptr) {
         return -1;
     } else {
-        return toku_ftnode_which_child(node, &bfe->range_lock_left_key, &bfe->ft->cmp_descriptor, bfe->ft->compare_fun);
+        return toku_ftnode_which_child(node, &bfe->range_lock_left_key, bfe->ft->cmp);
     }
 }
 
@@ -613,7 +613,7 @@ toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
     } else if (bfe->range_lock_right_key.data == nullptr) {
         return -1;
     } else {
-        return toku_ftnode_which_child(node, &bfe->range_lock_right_key, &bfe->ft->cmp_descriptor, bfe->ft->compare_fun);
+        return toku_ftnode_which_child(node, &bfe->range_lock_right_key, bfe->ft->cmp);
     }
 }
 
@@ -625,7 +625,7 @@ ft_cursor_rightmost_child_wanted(FT_CURSOR cursor, FT_HANDLE ft_handle, FTNODE n
     } else if (cursor->range_lock_right_key.data == nullptr) {
         return -1;
     } else {
-        return toku_ftnode_which_child(node, &cursor->range_lock_right_key, &ft_handle->ft->cmp_descriptor, ft_handle->ft->compare_fun);
+        return toku_ftnode_which_child(node, &cursor->range_lock_right_key, ft_handle->ft->cmp);
     }
 }
 
@@ -1125,11 +1125,9 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
         // we can possibly require is a single basement node
         // we find out what basement node the query cares about
         // and check if it is available
-        paranoid_invariant(bfe->ft->compare_fun);
         paranoid_invariant(bfe->search);
         bfe->child_to_read = toku_ft_search_which_child(
-            &bfe->ft->cmp_descriptor,
-            bfe->ft->compare_fun,
+            bfe->ft->cmp,
             node,
             bfe->search
             );
@@ -1154,7 +1152,6 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
         // we can possibly require is a single basement node
         // we find out what basement node the query cares about
         // and check if it is available
-        paranoid_invariant(bfe->ft->compare_fun);
         if (node->height == 0) {
             int left_child = toku_bfe_leftmost_child_wanted(bfe, node);
             int right_child = toku_bfe_rightmost_child_wanted(bfe, node);
@@ -1358,9 +1355,7 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar
 }
 
 int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_heaviside_extra &be) {
-    FAKE_DB(db, be.desc);
-    DBT const *const key = be.key;
-    return be.compare_fun(&db, &kdbt, key);
+    return be.cmp(&kdbt, be.key);
 }
 
 void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT ft) {
@@ -1597,9 +1592,8 @@ static void inject_message_in_locked_node(
     paranoid_invariant(msg->msn.msn > node->max_msn_applied_to_node_on_disk.msn);
     STAT64INFO_S stats_delta = {0,0};
     toku_ftnode_put_msg(
-        ft->compare_fun,
+        ft->cmp,
         ft->update_fun,
-        &ft->cmp_descriptor,
         node,
         childnum,
         msg,
@@ -1884,7 +1878,7 @@ static void push_something_in_subtree(
         paranoid_invariant(ft_msg_type_applies_once(msg->type));
 
         childnum = (target_childnum >= 0 ? target_childnum
-                    : toku_ftnode_which_child(subtree_root, msg->u.id.key, &ft->cmp_descriptor, ft->compare_fun));
+                    : toku_ftnode_which_child(subtree_root, msg->u.id.key, ft->cmp));
         bnc = BNC(subtree_root, childnum);
 
         if (toku_bnc_n_entries(bnc) > 0) {
@@ -2131,7 +2125,7 @@ void toku_ft_root_put_msg(
     } else {
         // The root's height 1.  We may be eligible for promotion here.
         // On the extremes, we want to promote, in the middle, we don't.
-        int childnum = toku_ftnode_which_child(node, msg->u.id.key, &ft->cmp_descriptor, ft->compare_fun);
+        int childnum = toku_ftnode_which_child(node, msg->u.id.key, ft->cmp);
         if (childnum == 0 || childnum == node->n_children - 1) {
             // On the extremes, promote.  We know which childnum we're going to, so pass that down too.
             push_something_in_subtree(ft, node, childnum, msg, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false);
@@ -2144,11 +2138,11 @@ void toku_ft_root_put_msg(
     }
 }
 
+// TODO: Remove me, I'm boring.
 static int ft_compare_keys(FT ft, const DBT *a, const DBT *b)
 // Effect: Compare two keys using the given fractal tree's comparator/descriptor
 {
-    FAKE_DB(db, &ft->cmp_descriptor);
-    return ft->compare_fun(&db, a, b);
+    return ft->cmp(a, b);
 }
 
 static LEAFENTRY bn_get_le_and_key(BASEMENTNODE bn, int idx, DBT *key)
@@ -2241,9 +2235,9 @@ static int ft_leaf_get_relative_key_pos(FT ft, FTNODE leaf, const DBT *key, bool
             if (nondeleted_key_found != nullptr) {
                 // The caller wants to know if a nondeleted key can be found.
                 LEAFENTRY target_le;
-                int childnum = toku_ftnode_which_child(leaf, key, &ft->cmp_descriptor, ft->compare_fun);
+                int childnum = toku_ftnode_which_child(leaf, key, ft->cmp);
                 BASEMENTNODE bn = BLB(leaf, childnum);
-                struct toku_msg_leafval_heaviside_extra extra = { ft->compare_fun, &ft->cmp_descriptor, key };
+                struct toku_msg_leafval_heaviside_extra extra(ft->cmp, key);
                 int r = bn->data_buffer.find_zero<decltype(extra), toku_msg_leafval_heaviside>(
                     extra,
                     &target_le,
@@ -2943,7 +2937,7 @@ toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) {
         .compression_method = ft->h->compression_method,
         .fanout = ft->h->fanout,
         .flags = ft->h->flags,
-        .compare_fun = ft->compare_fun,
+        .compare_fun = ft->cmp.get_compare_func(),
         .update_fun = ft->update_fun
     };
     t->options = options;
@@ -3309,10 +3303,9 @@ static bool search_continue(ft_search *search, void *key, uint32_t key_len) {
     bool result = true;
     if (search->direction == FT_SEARCH_LEFT && search->k_bound) {
         FT_HANDLE CAST_FROM_VOIDP(ft_handle, search->context);
-        FAKE_DB(db, &ft_handle->ft->cmp_descriptor);
         DBT this_key = { .data = key, .size = key_len };
         // search continues if this key <= key bound
-        result = (ft_handle->ft->compare_fun(&db, &this_key, search->k_bound) <= 0);
+        result = (ft_handle->ft->cmp(&this_key, search->k_bound) <= 0);
     }
     return result;
 }
@@ -3624,19 +3617,13 @@ ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search *searc
 }
 
 static inline int
-search_which_child_cmp_with_bound(DB *db, ft_compare_func cmp, FTNODE node, int childnum, ft_search *search, DBT *dbt)
-{
-    return cmp(db, toku_copyref_dbt(dbt, *node->pivotkeys.get_pivot(childnum)), &search->pivot_bound);
+search_which_child_cmp_with_bound(const toku::comparator &cmp, FTNODE node, int childnum,
+                                  ft_search *search, DBT *dbt) {
+    return cmp(toku_copyref_dbt(dbt, *node->pivotkeys.get_pivot(childnum)), &search->pivot_bound);
 }
 
 int
-toku_ft_search_which_child(
-    DESCRIPTOR desc,
-    ft_compare_func cmp,
-    FTNODE node,
-    ft_search *search
-    )
-{
+toku_ft_search_which_child(const toku::comparator &cmp, FTNODE node, ft_search *search) {
     if (node->n_children <= 1) return 0;
 
     DBT pivotkey;
@@ -3671,10 +3658,9 @@ toku_ft_search_which_child(
     // ready to return something, if the pivot is bounded, we have to move
     // over a bit to get away from what we've already searched
     if (search->pivot_bound.data != nullptr) {
-        FAKE_DB(db, desc);
         if (search->direction == FT_SEARCH_LEFT) {
             while (lo < node->n_children - 1 &&
-                   search_which_child_cmp_with_bound(&db, cmp, node, lo, search, &pivotkey) <= 0) {
+                   search_which_child_cmp_with_bound(cmp, node, lo, search, &pivotkey) <= 0) {
                 // searching left to right, if the comparison says the
                 // current pivot (lo) is left of or equal to our bound,
                 // don't search that child again
@@ -3682,7 +3668,7 @@ toku_ft_search_which_child(
             }
         } else {
             while (lo > 0 &&
-                   search_which_child_cmp_with_bound(&db, cmp, node, lo - 1, search, &pivotkey) >= 0) {
+                   search_which_child_cmp_with_bound(cmp, node, lo - 1, search, &pivotkey) >= 0) {
                 // searching right to left, same argument as just above
                 // (but we had to pass lo - 1 because the pivot between lo
                 // and the thing just less than it is at that position in
@@ -3716,8 +3702,7 @@ static bool search_try_again(FTNODE node, int child_to_search, ft_search *search
             // if there is a search bound and the bound is within the search pivot then continue the search
             if (search->k_bound) {
                 FT_HANDLE CAST_FROM_VOIDP(ft_handle, search->context);
-                FAKE_DB(db, &ft_handle->ft->cmp_descriptor);
-                try_again = (ft_handle->ft->compare_fun(&db, search->k_bound, &search->pivot_bound) > 0);
+                try_again = (ft_handle->ft->cmp(search->k_bound, &search->pivot_bound) > 0);
             }
         }
     } else if (search->direction == FT_SEARCH_RIGHT) {
@@ -3990,17 +3975,14 @@ int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn) {
 
 /* ********************* keyrange ************************ */
 
-
 struct keyrange_compare_s {
     FT ft;
     const DBT *key;
 };
 
-static int
-keyrange_compare (DBT const &kdbt, const struct keyrange_compare_s &s) {
-    // TODO: maybe put a const fake_db in the header
-    FAKE_DB(db, &s.ft->cmp_descriptor);
-    return s.ft->compare_fun(&db, &kdbt, s.key);
+// TODO: Remove me, I'm boring
+static int keyrange_compare(DBT const &kdbt, const struct keyrange_compare_s &s) {
+    return s.ft->cmp(&kdbt, s.key);
 }
 
 static void
@@ -4070,10 +4052,10 @@ toku_ft_keysrange_internal (FT_HANDLE ft_handle, FTNODE node,
 {
     int r = 0;
     // if KEY is NULL then use the leftmost key.
-    int left_child_number = key_left ? toku_ftnode_which_child (node, key_left, &ft_handle->ft->cmp_descriptor, ft_handle->ft->compare_fun) : 0;
+    int left_child_number = key_left ? toku_ftnode_which_child (node, key_left, ft_handle->ft->cmp) : 0;
     int right_child_number = node->n_children;  // Sentinel that does not equal left_child_number.
     if (may_find_right) {
-        right_child_number = key_right ? toku_ftnode_which_child (node, key_right, &ft_handle->ft->cmp_descriptor, ft_handle->ft->compare_fun) : node->n_children - 1;
+        right_child_number = key_right ? toku_ftnode_which_child (node, key_right, ft_handle->ft->cmp) : node->n_children - 1;
     }
 
     uint64_t rows_per_child = estimated_num_rows / node->n_children;
@@ -4322,7 +4304,7 @@ static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLO
 
 static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
     int r;
-    int childnum = toku_ft_search_which_child(&ft->cmp_descriptor, ft->compare_fun, node, search);
+    int childnum = toku_ft_search_which_child(ft->cmp, node, search);
     const uint64_t child_subtree_bytes = subtree_bytes / node->n_children;
     if (node->height == 0) {
         r = DB_NOTFOUND;
@@ -4752,8 +4734,7 @@ int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2le
     return 0;
 }
 
-int
-toku_builtin_compare_fun (DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) {
+int toku_builtin_compare_fun (DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) {
     return toku_keycompare(a->data, a->size, b->data, b->size);
 }
 
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index e64cbb35dee..0c94f1ca7c7 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -110,7 +110,7 @@ int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int node
 //   ANY operations. to update the cmp descriptor after any operations have already happened, all handles 
 //   and transactions must close and reopen before the change, then you can update the cmp descriptor
 void toku_ft_change_descriptor(FT_HANDLE t, const DBT* old_descriptor, const DBT* new_descriptor, bool do_log, TOKUTXN txn, bool update_cmp_descriptor);
-uint32_t toku_serialize_descriptor_size(const DESCRIPTOR desc);
+uint32_t toku_serialize_descriptor_size(DESCRIPTOR desc);
 
 void toku_ft_handle_create(FT_HANDLE *ft);
 void toku_ft_set_flags(FT_HANDLE, unsigned int flags);
diff --git a/ft/ft-serialize.cc b/ft/ft-serialize.cc
index 1879561f20a..5f6015d27ec 100644
--- a/ft/ft-serialize.cc
+++ b/ft/ft-serialize.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 
 // not version-sensitive because we only serialize a descriptor using the current layout_version
 uint32_t
-toku_serialize_descriptor_size(const DESCRIPTOR desc) {
+toku_serialize_descriptor_size(DESCRIPTOR desc) {
     //Checksum NOT included in this.  Checksum only exists in header's version.
     uint32_t size = 4; // four bytes for size of descriptor
     size += desc->dbt.size;
@@ -103,7 +103,7 @@ toku_serialize_descriptor_size(const DESCRIPTOR desc) {
 }
 
 static uint32_t
-deserialize_descriptor_size(const DESCRIPTOR desc, int layout_version) {
+deserialize_descriptor_size(DESCRIPTOR desc, int layout_version) {
     //Checksum NOT included in this.  Checksum only exists in header's version.
     uint32_t size = 4; // four bytes for size of descriptor
     if (layout_version == FT_LAYOUT_VERSION_13)
@@ -112,8 +112,7 @@ deserialize_descriptor_size(const DESCRIPTOR desc, int layout_version) {
     return size;
 }
 
-void
-toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc) {
+void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc) {
     wbuf_bytes(wb, desc->dbt.data, desc->dbt.size);
 }
 
@@ -121,7 +120,7 @@ toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR des
 //descriptor.
 //Descriptors are NOT written during the header checkpoint process.
 void
-toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset) {
+toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset) {
     // make the checksum
     int64_t size = toku_serialize_descriptor_size(desc)+4; //4 for checksum
     int64_t size_aligned = roundup_to_multiple(512, size);
@@ -437,7 +436,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
     if (r != 0) {
         goto exit;
     }
-    // copy descriptor to cmp_descriptor for #4541
+    // initialize for svn #4541
+    // TODO: use real dbt function
     ft->cmp_descriptor.dbt.size = ft->descriptor.dbt.size;
     ft->cmp_descriptor.dbt.data = toku_xmemdup(ft->descriptor.dbt.data, ft->descriptor.dbt.size);
     // Version 13 descriptors had an extra 4 bytes that we don't read
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index b46893ff636..31afe6e96df 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -225,9 +225,8 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, const
     static size_t zero_flow_deltas[] = { 0, 0 };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
     toku_ftnode_put_msg(
-        ft_handle->ft->compare_fun,
+        ft_handle->ft->cmp,
         ft_handle->ft->update_fun,
-        &ft_handle->ft->cmp_descriptor,
         node,
         -1,
         &msg,
@@ -293,13 +292,14 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, en
     assert(node->height>0);
 
     DBT k;
-    int childnum = toku_ftnode_which_child(node,
-                                            toku_fill_dbt(&k, key, keylen),
-                                            &ft_handle->ft->cmp_descriptor, ft_handle->ft->compare_fun);
+    int childnum = toku_ftnode_which_child(node, toku_fill_dbt(&k, key, keylen), ft_handle->ft->cmp);
 
     XIDS xids_0 = xids_get_root_xids();
     MSN msn = next_dummymsn();
-    toku_bnc_insert_msg(BNC(node, childnum), key, keylen, val, vallen, msgtype, msn, xids_0, true, NULL, testhelper_string_key_cmp);
+    toku::comparator cmp;
+    cmp.create(testhelper_string_key_cmp, nullptr);
+    toku_bnc_insert_msg(BNC(node, childnum), key, keylen, val, vallen, msgtype, msn, xids_0, true, cmp);
+    cmp.destroy();
     // Hack to get the test working. The problem is that this test
     // is directly queueing something in a FIFO instead of 
     // using ft APIs.
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index 2a6d9fcbb6e..a776446d9ff 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -104,17 +104,13 @@ PATENT RIGHTS GRANT:
 
 static int 
 compare_pairs (FT_HANDLE ft_handle, const DBT *a, const DBT *b) {
-    FAKE_DB(db, &ft_handle->ft->cmp_descriptor);
-    int cmp = ft_handle->ft->compare_fun(&db, a, b);
-    return cmp;
+    return ft_handle->ft->cmp(a, b);
 }
 
 static int 
 compare_pair_to_key (FT_HANDLE ft_handle, const DBT *a, bytevec key, ITEMLEN keylen) {
     DBT y;
-    FAKE_DB(db, &ft_handle->ft->cmp_descriptor);
-    int cmp = ft_handle->ft->compare_fun(&db, a, toku_fill_dbt(&y, key, keylen));
-    return cmp;
+    return ft_handle->ft->cmp(a, toku_fill_dbt(&y, key, keylen));
 }
 
 static int
@@ -256,11 +252,7 @@ verify_sorted_by_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const
         int r = mt.fetch(i, &offset);
         assert_zero(r);
         if (i > 0) {
-            struct toku_msg_buffer_key_msn_cmp_extra extra;
-            ZERO_STRUCT(extra);
-            extra.desc = &ft_handle->ft->cmp_descriptor;
-            extra.cmp = ft_handle->ft->compare_fun;
-            extra.msg_buffer = msg_buffer;
+            struct toku_msg_buffer_key_msn_cmp_extra extra(ft_handle->ft->cmp, msg_buffer);
             if (toku_msg_buffer_key_msn_cmp(extra, last_offset, offset) >= 0) {
                 result = TOKUDB_NEEDS_REPAIR;
                 break;
@@ -274,13 +266,7 @@ verify_sorted_by_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const
 template<typename count_omt_t>
 static int
 count_eq_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const count_omt_t &mt, const DBT *key, MSN msn) {
-    struct toku_msg_buffer_key_msn_heaviside_extra extra;
-    ZERO_STRUCT(extra);
-    extra.desc = &ft_handle->ft->cmp_descriptor;
-    extra.cmp = ft_handle->ft->compare_fun;
-    extra.msg_buffer = msg_buffer;
-    extra.key = key;
-    extra.msn = msn;
+    struct toku_msg_buffer_key_msn_heaviside_extra extra(ft_handle->ft->cmp, msg_buffer, key, msn);
     int r = mt.template find_zero<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(extra, nullptr, nullptr);
     int count;
     if (r == 0) {
diff --git a/ft/ft.cc b/ft/ft.cc
index da8b0524ccf..5110edc66cc 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -119,7 +119,10 @@ ft_destroy(FT ft) {
     //cannot destroy since it is still in use by CURRENT
     assert(ft->h->type == FT_CURRENT);
     toku_blocktable_destroy(&ft->blocktable);
+    ft->cmp.destroy();
+    // TODO: use real dbt function
     if (ft->descriptor.dbt.data) toku_free(ft->descriptor.dbt.data);
+    // TODO: use real dbt function
     if (ft->cmp_descriptor.dbt.data) toku_free(ft->cmp_descriptor.dbt.data);
     toku_ft_destroy_reflock(ft);
     toku_free(ft->h);
@@ -384,7 +387,7 @@ static void ft_init(FT ft, FT_OPTIONS options, CACHEFILE cf) {
 
     toku_list_init(&ft->live_ft_handles);
 
-    ft->compare_fun = options->compare_fun;
+    ft->cmp.create(options->compare_fun, &ft->descriptor);
     ft->update_fun = options->update_fun;
 
     if (ft->cf != NULL) {
@@ -449,9 +452,6 @@ void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn) {
     invariant(ftp);
 
     FT XCALLOC(ft);
-    memset(&ft->descriptor, 0, sizeof(ft->descriptor));
-    memset(&ft->cmp_descriptor, 0, sizeof(ft->cmp_descriptor));
-
     ft->h = ft_header_create(options, make_blocknum(0), (txn ? txn->txnid.parent_id64: TXNID_NONE));
 
     toku_ft_init_reflock(ft);
@@ -471,31 +471,27 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN
 // If the cachefile has not been initialized, then don't modify anything.
 // max_acceptable_lsn is the latest acceptable checkpointed version of the file.
 {
-    {
-        FT ft;
-        if ((ft = (FT) toku_cachefile_get_userdata(cf))!=0) {
-            *header = ft;
-            assert(ft_handle->options.update_fun == ft->update_fun);
-            assert(ft_handle->options.compare_fun == ft->compare_fun);
-            return 0;
-        }
-    }
     FT ft = nullptr;
-    int r;
-    {
-        int fd = toku_cachefile_get_fd(cf);
-        r = toku_deserialize_ft_from(fd, max_acceptable_lsn, &ft);
-        if (r == TOKUDB_BAD_CHECKSUM) {
-            fprintf(stderr, "Checksum failure while reading header in file %s.\n", toku_cachefile_fname_in_env(cf));
-            assert(false);  // make absolutely sure we crash before doing anything else
-        }
+    if ((ft = (FT) toku_cachefile_get_userdata(cf)) != nullptr) {
+        *header = ft;
+        assert(ft_handle->options.update_fun == ft->update_fun);
+        return 0;
     }
-    if (r!=0) return r;
-    // GCC 4.8 seems to get confused by the gotos in the deserialize code and think h is maybe uninitialized.
+
+    int fd = toku_cachefile_get_fd(cf);
+    int r = toku_deserialize_ft_from(fd, max_acceptable_lsn, &ft);
+    if (r == TOKUDB_BAD_CHECKSUM) {
+        fprintf(stderr, "Checksum failure while reading header in file %s.\n", toku_cachefile_fname_in_env(cf));
+        assert(false);  // make absolutely sure we crash before doing anything else
+    } else if (r != 0) {
+        return r;
+    }
+
     invariant_notnull(ft);
-    ft->cf = cf;
-    ft->compare_fun = ft_handle->options.compare_fun;
+    // intuitively, the comparator points to the FT's cmp descriptor
+    ft->cmp.create(ft_handle->options.compare_fun, &ft->cmp_descriptor);
     ft->update_fun = ft_handle->options.update_fun;
+    ft->cf = cf;
     toku_cachefile_set_userdata(cf,
                                 reinterpret_cast<void *>(ft),
                                 ft_log_fassociate_during_checkpoint,
@@ -632,7 +628,7 @@ ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTX
     FT_HANDLE ft_handle;
     assert(old_ft->dict_id.dictid != DICTIONARY_ID_NONE.dictid);
     toku_ft_handle_create(&ft_handle);
-    toku_ft_set_bt_compare(ft_handle, old_ft->compare_fun);
+    toku_ft_set_bt_compare(ft_handle, old_ft->cmp.get_compare_func());
     toku_ft_set_update(ft_handle, old_ft->update_fun);
     toku_ft_handle_set_nodesize(ft_handle, old_ft->h->nodesize);
     toku_ft_handle_set_basementnodesize(ft_handle, old_ft->h->basementnodesize);
@@ -890,7 +886,7 @@ int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,i
 }
 
 void 
-toku_ft_update_descriptor(FT ft, DESCRIPTOR d) 
+toku_ft_update_descriptor(FT ft, DESCRIPTOR desc) 
 // Effect: Changes the descriptor in a tree (log the change, make sure it makes it to disk eventually).
 // requires: the ft is fully user-opened with a valid cachefile.
 //           descriptor updates cannot happen in parallel for an FT 
@@ -898,7 +894,7 @@ toku_ft_update_descriptor(FT ft, DESCRIPTOR d)
 {
     assert(ft->cf);
     int fd = toku_cachefile_get_fd(ft->cf);
-    toku_ft_update_descriptor_with_fd(ft, d, fd);
+    toku_ft_update_descriptor_with_fd(ft, desc, fd);
 }
 
 // upadate the descriptor for an ft and serialize it using
@@ -907,27 +903,30 @@ toku_ft_update_descriptor(FT ft, DESCRIPTOR d)
 // update a descriptor before the ft is fully opened and has
 // a valid cachefile.
 void
-toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR d, int fd) {
+toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd) {
     // the checksum is four bytes, so that's where the magic number comes from
     // make space for the new descriptor and write it out to disk
     DISKOFF offset, size;
-    size = toku_serialize_descriptor_size(d) + 4;
+    size = toku_serialize_descriptor_size(desc) + 4;
     toku_realloc_descriptor_on_disk(ft->blocktable, size, &offset, ft, fd);
-    toku_serialize_descriptor_contents_to_fd(fd, d, offset);
+    toku_serialize_descriptor_contents_to_fd(fd, desc, offset);
 
     // cleanup the old descriptor and set the in-memory descriptor to the new one
+    // TODO: use real dbt function
     if (ft->descriptor.dbt.data) {
         toku_free(ft->descriptor.dbt.data);
     }
-    ft->descriptor.dbt.size = d->dbt.size;
-    ft->descriptor.dbt.data = toku_memdup(d->dbt.data, d->dbt.size);
+    // TODO: use real dbt function
+    ft->descriptor.dbt.size = desc->dbt.size;
+    ft->descriptor.dbt.data = toku_memdup(desc->dbt.data, desc->dbt.size);
 }
 
-void 
-toku_ft_update_cmp_descriptor(FT ft) {
+void toku_ft_update_cmp_descriptor(FT ft) {
+    // TODO: use real dbt function
     if (ft->cmp_descriptor.dbt.data != NULL) {
         toku_free(ft->cmp_descriptor.dbt.data);
     }
+    // TODO: use real dbt function
     ft->cmp_descriptor.dbt.size = ft->descriptor.dbt.size;
     ft->cmp_descriptor.dbt.data = toku_xmemdup(
         ft->descriptor.dbt.data, 
@@ -935,13 +934,11 @@ toku_ft_update_cmp_descriptor(FT ft) {
         );
 }
 
-DESCRIPTOR
-toku_ft_get_descriptor(FT_HANDLE ft_handle) {
+DESCRIPTOR toku_ft_get_descriptor(FT_HANDLE ft_handle) {
     return &ft_handle->ft->descriptor;
 }
 
-DESCRIPTOR
-toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle) {
+DESCRIPTOR toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle) {
     return &ft_handle->ft->cmp_descriptor;
 }
 
diff --git a/ft/ft.h b/ft/ft.h
index 1cf1c1292f4..a5a8ab9d5b8 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -159,11 +159,11 @@ int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,i
 // any operation has already occurred on the ft.
 // see toku_ft_change_descriptor(), which is the transactional version
 // used by the ydb layer. it better describes the client contract.
-void toku_ft_update_descriptor(FT ft, DESCRIPTOR d);
+void toku_ft_update_descriptor(FT ft, DESCRIPTOR desc);
 // use this version if the FT is not fully user-opened with a valid cachefile.
 // this is a clean hack to get deserialization code to update a descriptor
 // while the FT and cf are in the process of opening, for upgrade purposes
-void toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR d, int fd);
+void toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd);
 void toku_ft_update_cmp_descriptor(FT ft);
 
 // get the descriptor for a ft. safe to read as long as clients honor the
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 12491707e95..58a73d6b12b 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -873,19 +873,16 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
 }
 
 static void
-deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf,
-                             DESCRIPTOR desc, ft_compare_func cmp) {
+deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const toku::comparator &cmp) {
     int r;
     int n_in_this_buffer = rbuf_int(rbuf);
     int32_t *fresh_offsets = NULL, *stale_offsets = NULL;
     int32_t *broadcast_offsets = NULL;
     int nfresh = 0, nstale = 0;
     int nbroadcast_offsets = 0;
-    if (cmp) {
-        XMALLOC_N(n_in_this_buffer, stale_offsets);
-        XMALLOC_N(n_in_this_buffer, fresh_offsets);
-        XMALLOC_N(n_in_this_buffer, broadcast_offsets);
-    }
+    XMALLOC_N(n_in_this_buffer, stale_offsets);
+    XMALLOC_N(n_in_this_buffer, fresh_offsets);
+    XMALLOC_N(n_in_this_buffer, broadcast_offsets);
     bnc->msg_buffer.resize(rbuf->size + 64);
     for (int i = 0; i < n_in_this_buffer; i++) {
         bytevec key; ITEMLEN keylen;
@@ -900,23 +897,19 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf,
         rbuf_bytes(rbuf, &key, &keylen); /* Returns a pointer into the rbuf. */
         rbuf_bytes(rbuf, &val, &vallen);
         int32_t *dest;
-        if (cmp) {
-            if (ft_msg_type_applies_once(type)) {
-                if (is_fresh) {
-                    dest = &fresh_offsets[nfresh];
-                    nfresh++;
-                } else {
-                    dest = &stale_offsets[nstale];
-                    nstale++;
-                }
-            } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
-                dest = &broadcast_offsets[nbroadcast_offsets];
-                nbroadcast_offsets++;
+        if (ft_msg_type_applies_once(type)) {
+            if (is_fresh) {
+                dest = &fresh_offsets[nfresh];
+                nfresh++;
             } else {
-                abort();
+                dest = &stale_offsets[nstale];
+                nstale++;
             }
+        } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
+            dest = &broadcast_offsets[nbroadcast_offsets];
+            nbroadcast_offsets++;
         } else {
-            dest = NULL;
+            abort();
         }
         // TODO: Function to parse stuff out of an rbuf into an FT_MSG
         DBT k, v;
@@ -929,19 +922,17 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf,
     }
     invariant(rbuf->ndone == rbuf->size);
 
-    if (cmp) {
-        struct toku_msg_buffer_key_msn_cmp_extra extra = { .desc = desc, .cmp = cmp, .msg_buffer = &bnc->msg_buffer };
-        r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
-        assert_zero(r);
-        bnc->fresh_message_tree.destroy();
-        bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
-        r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
-        assert_zero(r);
-        bnc->stale_message_tree.destroy();
-        bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer);
-        bnc->broadcast_list.destroy();
-        bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
-    }
+    struct toku_msg_buffer_key_msn_cmp_extra extra = { .cmp = cmp, .msg_buffer = &bnc->msg_buffer };
+    r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
+    assert_zero(r);
+    bnc->fresh_message_tree.destroy();
+    bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
+    r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
+    assert_zero(r);
+    bnc->stale_message_tree.destroy();
+    bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer);
+    bnc->broadcast_list.destroy();
+    bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
 }
 
 // effect: deserialize a single message from rbuf and enqueue the result into the given message buffer
@@ -1305,8 +1296,7 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe)
         // we find out what basement node the query cares about
         // and check if it is available
         bfe->child_to_read = toku_ft_search_which_child(
-            &bfe->ft->cmp_descriptor,
-            bfe->ft->compare_fun,
+            bfe->ft->cmp,
             node,
             bfe->search
             );
@@ -1316,7 +1306,6 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe)
         // we can possibly require is a single basement node
         // we find out what basement node the query cares about
         // and check if it is available
-        paranoid_invariant(bfe->ft->compare_fun);
         if (node->height == 0) {
             int left_child = toku_bfe_leftmost_child_wanted(bfe, node);
             int right_child = toku_bfe_rightmost_child_wanted(bfe, node);
@@ -1398,8 +1387,7 @@ deserialize_ftnode_partition(
     struct sub_block *sb,
     FTNODE node,
     int childnum,      // which partition to deserialize
-    DESCRIPTOR desc,
-    ft_compare_func cmp
+    const toku::comparator &cmp
     )
 {
     int r = 0;
@@ -1421,7 +1409,7 @@ deserialize_ftnode_partition(
         NONLEAF_CHILDINFO bnc = BNC(node, childnum);
         if (node->layout_version_read_from_disk <= FT_LAYOUT_VERSION_26) {
             // Layout version <= 26 did not serialize sorted message trees to disk.
-            deserialize_child_buffer_v26(bnc, &rb, desc, cmp);
+            deserialize_child_buffer_v26(bnc, &rb, cmp);
         } else {
             deserialize_child_buffer(bnc, &rb);
         }
@@ -1444,7 +1432,7 @@ exit:
 
 static int
 decompress_and_deserialize_worker(struct rbuf curr_rbuf, struct sub_block curr_sb, FTNODE node, int child,
-        DESCRIPTOR desc, ft_compare_func cmp, tokutime_t *decompress_time)
+                                 const toku::comparator &cmp, tokutime_t *decompress_time)
 {
     int r = 0;
     tokutime_t t0 = toku_time_now();
@@ -1452,7 +1440,7 @@ decompress_and_deserialize_worker(struct rbuf curr_rbuf, struct sub_block curr_s
     tokutime_t t1 = toku_time_now();
     if (r == 0) {
         // at this point, sb->uncompressed_ptr stores the serialized node partition
-        r = deserialize_ftnode_partition(&curr_sb, node, child, desc, cmp);
+        r = deserialize_ftnode_partition(&curr_sb, node, child, cmp);
     }
     *decompress_time = t1 - t0;
 
@@ -1772,11 +1760,9 @@ deserialize_and_upgrade_internal_node(FTNODE node,
         int nfresh = 0;
         int nbroadcast_offsets = 0;
 
-        if (bfe->ft->compare_fun) {
-            XMALLOC_N(n_in_this_buffer, fresh_offsets);
-            // We skip 'stale' offsets for upgraded nodes.
-            XMALLOC_N(n_in_this_buffer, broadcast_offsets);
-        }
+        // We skip 'stale' offsets for upgraded nodes.
+        XMALLOC_N(n_in_this_buffer, fresh_offsets);
+        XMALLOC_N(n_in_this_buffer, broadcast_offsets);
 
         // Atomically decrement the header's MSN count by the number
         // of messages in the buffer.
@@ -1800,18 +1786,14 @@ deserialize_and_upgrade_internal_node(FTNODE node,
 
             // <CER> can we factor this out?
             int32_t *dest;
-            if (bfe->ft->compare_fun) {
-                if (ft_msg_type_applies_once(type)) {
-                    dest = &fresh_offsets[nfresh];
-                    nfresh++;
-                } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
-                    dest = &broadcast_offsets[nbroadcast_offsets];
-                    nbroadcast_offsets++;
-                } else {
-                    abort();
-                }
+            if (ft_msg_type_applies_once(type)) {
+                dest = &fresh_offsets[nfresh];
+                nfresh++;
+            } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
+                dest = &broadcast_offsets[nbroadcast_offsets];
+                nbroadcast_offsets++;
             } else {
-                dest = NULL;
+                abort();
             }
 
             // Increment our MSN, the last message should have the
@@ -1827,18 +1809,15 @@ deserialize_and_upgrade_internal_node(FTNODE node,
             xids_destroy(&xids);
         }
 
-        if (bfe->ft->compare_fun) {
-            struct toku_msg_buffer_key_msn_cmp_extra extra = { .desc = &bfe->ft->cmp_descriptor,
-                                                               .cmp = bfe->ft->compare_fun,
-                                                               .msg_buffer = &bnc->msg_buffer };
-            typedef toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp> key_msn_sort;
-            r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
-            assert_zero(r);
-            bnc->fresh_message_tree.destroy();
-            bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
-            bnc->broadcast_list.destroy();
-            bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
-        }
+        struct toku_msg_buffer_key_msn_cmp_extra extra = { .cmp = bfe->ft->cmp,
+                                                           .msg_buffer = &bnc->msg_buffer };
+        typedef toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp> key_msn_sort;
+        r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
+        assert_zero(r);
+        bnc->fresh_message_tree.destroy();
+        bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
+        bnc->broadcast_list.destroy();
+        bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
     }
 
     // Assign the highest msn from our upgrade message buffers
@@ -2259,7 +2238,7 @@ deserialize_ftnode_from_rbuf(
                 //  case where we read and decompress the partition
                 tokutime_t partition_decompress_time;
                 r = decompress_and_deserialize_worker(curr_rbuf, curr_sb, node, i,
-                        &bfe->ft->cmp_descriptor, bfe->ft->compare_fun, &partition_decompress_time);
+                                                      bfe->ft->cmp, &partition_decompress_time);
                 decompress_time += partition_decompress_time;
                 if (r != 0) {
                     goto cleanup;
@@ -2365,7 +2344,7 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i
     // deserialize
     tokutime_t t2 = toku_time_now();
 
-    r = deserialize_ftnode_partition(&curr_sb, node, childnum, &bfe->ft->cmp_descriptor, bfe->ft->compare_fun);
+    r = deserialize_ftnode_partition(&curr_sb, node, childnum, bfe->ft->cmp);
 
     tokutime_t t3 = toku_time_now();
 
@@ -2409,7 +2388,7 @@ toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fet
 
     tokutime_t t1 = toku_time_now();
 
-    r = deserialize_ftnode_partition(curr_sb, node, childnum, &bfe->ft->cmp_descriptor, bfe->ft->compare_fun);
+    r = deserialize_ftnode_partition(curr_sb, node, childnum, bfe->ft->cmp);
 
     tokutime_t t2 = toku_time_now();
 
diff --git a/ft/le-cursor.cc b/ft/le-cursor.cc
index 3eb73f1345b..1b908064127 100644
--- a/ft/le-cursor.cc
+++ b/ft/le-cursor.cc
@@ -101,10 +101,6 @@ PATENT RIGHTS GRANT:
 // A LE_CURSOR is good for scanning a FT from beginning to end. Useful for hot indexing.
 
 struct le_cursor {
-    // TODO: remove DBs from the ft layer comparison function 
-    // so this is never necessary
-    // use a fake db for comparisons. 
-    struct __toku_db fake_db;
     FT_CURSOR ft_cursor;
     bool neg_infinity; // true when the le cursor is positioned at -infinity (initial setting)
     bool pos_infinity; // true when the le cursor is positioned at +infinity (when _next returns DB_NOTFOUND)
@@ -124,8 +120,6 @@ toku_le_cursor_create(LE_CURSOR *le_cursor_result, FT_HANDLE ft_handle, TOKUTXN
             toku_ft_cursor_set_leaf_mode(le_cursor->ft_cursor);
             le_cursor->neg_infinity = false;
             le_cursor->pos_infinity = true;
-            // zero out the fake DB. this is a rare operation so it's not too slow.
-            memset(&le_cursor->fake_db, 0, sizeof(le_cursor->fake_db));
         }
     }
 
@@ -170,13 +164,9 @@ toku_le_cursor_is_key_greater_or_equal(LE_CURSOR le_cursor, const DBT *key) {
     } else if (le_cursor->pos_infinity) {
         result = false;     // all keys are less than +infinity
     } else {
-        // get the comparison function and descriptor from the cursor's ft
-        FT_HANDLE ft_handle = le_cursor->ft_cursor->ft_handle;
-        ft_compare_func keycompare = toku_ft_get_bt_compare(ft_handle);
-        le_cursor->fake_db.cmp_descriptor = toku_ft_get_cmp_descriptor(ft_handle);
+        FT ft = le_cursor->ft_cursor->ft_handle->ft;
         // get the current position from the cursor and compare it to the given key.
-        DBT *cursor_key = &le_cursor->ft_cursor->key;
-        int r = keycompare(&le_cursor->fake_db, cursor_key, key);
+        int r = ft->cmp(&le_cursor->ft_cursor->key, key);
         if (r <= 0) {
             result = true;  // key is right of the cursor key
         } else {
diff --git a/ft/node.cc b/ft/node.cc
index 57ee43458cf..76265029626 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -408,9 +408,8 @@ do_bn_apply_msg(FT_HANDLE ft_handle, BASEMENTNODE bn, message_buffer *msg_buffer
     // node's msn until the end.
     if (msg.msn.msn > bn->max_msn_applied.msn) {
         toku_ft_bn_apply_msg(
-            ft_handle->ft->compare_fun,
+            ft_handle->ft->cmp,
             ft_handle->ft->update_fun,
-            &ft_handle->ft->cmp_descriptor,
             bn,
             &msg,
             gc_info,
@@ -463,8 +462,7 @@ int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struc
 template<typename find_bounds_omt_t>
 static void
 find_bounds_within_message_tree(
-    DESCRIPTOR desc,       /// used for cmp
-    ft_compare_func cmp,  /// used to compare keys
+    const toku::comparator &cmp,
     const find_bounds_omt_t &message_tree,      /// tree holding message buffer offsets, in which we want to look for indices
     message_buffer *msg_buffer,           /// message buffer in which messages are found
     struct pivot_bounds const * const bounds,  /// key bounds within the basement node we're applying messages to
@@ -480,13 +478,7 @@ find_bounds_within_message_tree(
         // message (with any msn) with the key lower_bound_exclusive.
         // This will be a message we want to try applying, so it is the
         // "lower bound inclusive" within the message_tree.
-        struct toku_msg_buffer_key_msn_heaviside_extra lbi_extra;
-        ZERO_STRUCT(lbi_extra);
-        lbi_extra.desc = desc;
-        lbi_extra.cmp = cmp;
-        lbi_extra.msg_buffer = msg_buffer;
-        lbi_extra.key = bounds->lower_bound_exclusive;
-        lbi_extra.msn = MAX_MSN;
+        struct toku_msg_buffer_key_msn_heaviside_extra lbi_extra(cmp, msg_buffer, bounds->lower_bound_exclusive, MAX_MSN);
         int32_t found_lb;
         r = message_tree.template find<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(lbi_extra, +1, &found_lb, lbi);
         if (r == DB_NOTFOUND) {
@@ -505,8 +497,7 @@ find_bounds_within_message_tree(
             const int32_t offset = found_lb;
             DBT found_lbidbt;
             msg_buffer->get_message_key_msn(offset, &found_lbidbt, nullptr);
-            FAKE_DB(db, desc);
-            int c = cmp(&db, &found_lbidbt, ubi);
+            int c = cmp(&found_lbidbt, ubi);
             // These DBTs really are both inclusive bounds, so we need
             // strict inequality in order to determine that there's
             // nothing between them.  If they're equal, then we actually
@@ -528,13 +519,7 @@ find_bounds_within_message_tree(
         // the first thing bigger than the upper_bound_inclusive key.
         // This is therefore the smallest thing we don't want to apply,
         // and omt::iterate_on_range will not examine it.
-        struct toku_msg_buffer_key_msn_heaviside_extra ube_extra;
-        ZERO_STRUCT(ube_extra);
-        ube_extra.desc = desc;
-        ube_extra.cmp = cmp;
-        ube_extra.msg_buffer = msg_buffer;
-        ube_extra.key = bounds->upper_bound_inclusive;
-        ube_extra.msn = MAX_MSN;
+        struct toku_msg_buffer_key_msn_heaviside_extra ube_extra(cmp, msg_buffer, bounds->upper_bound_inclusive, MAX_MSN);
         r = message_tree.template find<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(ube_extra, +1, nullptr, ube);
         if (r == DB_NOTFOUND) {
             // Couldn't find anything in the buffer bigger than our key,
@@ -577,13 +562,13 @@ bnc_apply_messages_to_basement_node(
 
     uint32_t stale_lbi, stale_ube;
     if (!bn->stale_ancestor_messages_applied) {
-        find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->stale_message_tree, &bnc->msg_buffer, bounds, &stale_lbi, &stale_ube);
+        find_bounds_within_message_tree(t->ft->cmp, bnc->stale_message_tree, &bnc->msg_buffer, bounds, &stale_lbi, &stale_ube);
     } else {
         stale_lbi = 0;
         stale_ube = 0;
     }
     uint32_t fresh_lbi, fresh_ube;
-    find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->fresh_message_tree, &bnc->msg_buffer, bounds, &fresh_lbi, &fresh_ube);
+    find_bounds_within_message_tree(t->ft->cmp, bnc->fresh_message_tree, &bnc->msg_buffer, bounds, &fresh_lbi, &fresh_ube);
 
     // We now know where all the messages we must apply are, so one of the
     // following 4 cases will do the application, depending on which of
@@ -775,8 +760,7 @@ static bool bn_needs_ancestors_messages(
             }
             if (!bn->stale_ancestor_messages_applied) {
                 uint32_t stale_lbi, stale_ube;
-                find_bounds_within_message_tree(&ft->cmp_descriptor,
-                                                ft->compare_fun,
+                find_bounds_within_message_tree(ft->cmp,
                                                 bnc->stale_message_tree,
                                                 &bnc->msg_buffer,
                                                 &curr_bounds,
@@ -788,8 +772,7 @@ static bool bn_needs_ancestors_messages(
                 }
             }
             uint32_t fresh_lbi, fresh_ube;
-            find_bounds_within_message_tree(&ft->cmp_descriptor,
-                                            ft->compare_fun,
+            find_bounds_within_message_tree(ft->cmp,
                                             bnc->fresh_message_tree,
                                             &bnc->msg_buffer,
                                             &curr_bounds,
@@ -905,7 +888,7 @@ int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_
     MSN msn;
     DBT key;
     extra->bnc->msg_buffer.get_message_key_msn(offset, &key, &msn);
-    struct toku_msg_buffer_key_msn_heaviside_extra heaviside_extra = { .desc = &extra->ft->cmp_descriptor, .cmp = extra->ft->compare_fun, .msg_buffer = &extra->bnc->msg_buffer, .key = &key, .msn = msn };
+    struct toku_msg_buffer_key_msn_heaviside_extra heaviside_extra(extra->ft->cmp, &extra->bnc->msg_buffer, &key, msn);
     int r = extra->bnc->stale_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, heaviside_extra, nullptr);
     invariant_zero(r);
     return 0;
@@ -1397,7 +1380,7 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
 // so capturing the msn in the setval_extra_s is not strictly required.         The alternative
 // would be to put a dummy msn in the messages created by setval_fun(), but preserving
 // the original msn seems cleaner and it preserves accountability at a lower layer.
-static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn, FT_MSG msg, uint32_t idx,
+static int do_update(ft_update_func update_fun, const DESCRIPTOR_S *desc, BASEMENTNODE bn, FT_MSG msg, uint32_t idx,
                      LEAFENTRY le,
                      void* keydata,
                      uint32_t keylen,
@@ -1463,9 +1446,8 @@ static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn
 // Should be renamed as something like "apply_msg_to_basement()."
 void
 toku_ft_bn_apply_msg (
-    ft_compare_func compare_fun,
+    const toku::comparator &cmp,
     ft_update_func update_fun,
-    DESCRIPTOR desc,
     BASEMENTNODE bn,
     FT_MSG msg,
     txn_gc_info *gc_info, 
@@ -1483,7 +1465,7 @@ toku_ft_bn_apply_msg (
 
     uint32_t num_klpairs;
     int r;
-    struct toku_msg_leafval_heaviside_extra be = {compare_fun, desc, msg->u.id.key};
+    struct toku_msg_leafval_heaviside_extra be(cmp, msg->u.id.key);
 
     unsigned int doing_seqinsert = bn->seqinsert;
     bn->seqinsert = 0;
@@ -1497,8 +1479,8 @@ toku_ft_bn_apply_msg (
             DBT kdbt;
             r = bn->data_buffer.fetch_key_and_len(idx-1, &kdbt.size, &kdbt.data);
             if (r != 0) goto fz;
-            int cmp = toku_msg_leafval_heaviside(kdbt, be);
-            if (cmp >= 0) goto fz;
+            int c = toku_msg_leafval_heaviside(kdbt, be);
+            if (c >= 0) goto fz;
             r = DB_NOTFOUND;
         } else {
         fz:
@@ -1636,9 +1618,9 @@ toku_ft_bn_apply_msg (
                 key = msg->u.id.key->data;
                 keylen = msg->u.id.key->size;
             }
-            r = do_update(update_fun, desc, bn, msg, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update);
+            r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update);
         } else if (r==0) {
-            r = do_update(update_fun, desc, bn, msg, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update);
+            r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update);
         } // otherwise, a worse error, just return it
         break;
     }
@@ -1661,7 +1643,7 @@ toku_ft_bn_apply_msg (
 
             // This is broken below. Have a compilation error checked
             // in as a reminder
-            r = do_update(update_fun, desc, bn, msg, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update);
+            r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update);
             assert_zero(r);
 
             if (num_leafentries_before == bn->data_buffer.num_klpairs()) {
@@ -1678,11 +1660,8 @@ toku_ft_bn_apply_msg (
 }
 
 static inline int
-key_msn_cmp(const DBT *a, const DBT *b, const MSN amsn, const MSN bmsn,
-            DESCRIPTOR descriptor, ft_compare_func key_cmp)
-{
-    FAKE_DB(db, descriptor);
-    int r = key_cmp(&db, a, b);
+key_msn_cmp(const DBT *a, const DBT *b, const MSN amsn, const MSN bmsn, const toku::comparator &cmp) {
+    int r = cmp(a, b);
     if (r == 0) {
         if (amsn.msn > bmsn.msn) {
             r = +1;
@@ -1699,25 +1678,21 @@ int toku_msg_buffer_key_msn_heaviside(const int32_t &offset, const struct toku_m
     MSN query_msn;
     DBT query_key;
     extra.msg_buffer->get_message_key_msn(offset, &query_key, &query_msn);
-    return key_msn_cmp(&query_key, extra.key, query_msn, extra.msn,
-                       extra.desc, extra.cmp);
+    return key_msn_cmp(&query_key, extra.key, query_msn, extra.msn, extra.cmp);
 }
 
-int
-toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo)
-{
+int toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo) {
     MSN amsn, bmsn;
     DBT akey, bkey;
     extra.msg_buffer->get_message_key_msn(ao, &akey, &amsn);
     extra.msg_buffer->get_message_key_msn(bo, &bkey, &bmsn);
-    return key_msn_cmp(&akey, &bkey, amsn, bmsn,
-                       extra.desc, extra.cmp);
+    return key_msn_cmp(&akey, &bkey, amsn, bmsn, extra.cmp);
 }
 
 // Effect: Enqueue the message represented by the parameters into the
 //   bnc's buffer, and put it in either the fresh or stale message tree,
 //   or the broadcast list.
-static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, FT_MSG msg, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp) {
+static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, FT_MSG msg, bool is_fresh, const toku::comparator &cmp) {
     int r = 0;
     int32_t offset;
     bnc->msg_buffer.enqueue(msg, is_fresh, &offset);
@@ -1725,7 +1700,7 @@ static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, FT_MSG msg, bool is_fresh, DES
     if (ft_msg_type_applies_once(type)) {
         DBT key;
         toku_fill_dbt(&key, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
-        struct toku_msg_buffer_key_msn_heaviside_extra extra = { .desc = desc, .cmp = cmp, .msg_buffer = &bnc->msg_buffer, .key = &key, .msn = msg->msn };
+        struct toku_msg_buffer_key_msn_heaviside_extra extra(cmp, &bnc->msg_buffer, &key, msg->msn);
         if (is_fresh) {
             r = bnc->fresh_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, extra, nullptr);
             assert_zero(r);
@@ -1742,62 +1717,60 @@ static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, FT_MSG msg, bool is_fresh, DES
 }
 
 // This is only exported for tests.
-void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp)
+void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp)
 {
     DBT k, v;
     FT_MSG_S msg = {
         type, msn, xids, .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, data, datalen) } }
     };
-    bnc_insert_msg(bnc, &msg, is_fresh, desc, cmp);
+    bnc_insert_msg(bnc, &msg, is_fresh, cmp);
 }
 
 // append a msg to a nonleaf node's child buffer
-static void ft_append_msg_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node,
+static void ft_append_msg_to_child_buffer(const toku::comparator &cmp, FTNODE node,
                                           int childnum, FT_MSG msg, bool is_fresh) {
     paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL);
-    bnc_insert_msg(BNC(node, childnum), msg, is_fresh, desc, compare_fun);
+    bnc_insert_msg(BNC(node, childnum), msg, is_fresh, cmp);
     node->dirty = 1;
 }
 
 // This is only exported for tests.
-void toku_ft_append_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) {
+void toku_ft_append_to_child_buffer(const toku::comparator &cmp, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) {
     FT_MSG_S msg = {
         type, msn, xids, .u = { .id = { key, val } }
     };
-    ft_append_msg_to_child_buffer(compare_fun, desc, node, childnum, &msg, is_fresh);
+    ft_append_msg_to_child_buffer(cmp, node, childnum, &msg, is_fresh);
 }
 
-static void ft_nonleaf_msg_once_to_child(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
+static void ft_nonleaf_msg_once_to_child(const toku::comparator &cmp, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
 // Previously we had passive aggressive promotion, but that causes a lot of I/O a the checkpoint.  So now we are just putting it in the buffer here.
 // Also we don't worry about the node getting overfull here.  It's the caller's problem.
 {
     unsigned int childnum = (target_childnum >= 0
                              ? target_childnum
-                             : toku_ftnode_which_child(node, msg->u.id.key, desc, compare_fun));
-    ft_append_msg_to_child_buffer(compare_fun, desc, node, childnum, msg, is_fresh);
+                             : toku_ftnode_which_child(node, msg->u.id.key, cmp));
+    ft_append_msg_to_child_buffer(cmp, node, childnum, msg, is_fresh);
     NONLEAF_CHILDINFO bnc = BNC(node, childnum);
     bnc->flow[0] += flow_deltas[0];
     bnc->flow[1] += flow_deltas[1];
 }
 
-static int ft_compare_pivot(DESCRIPTOR desc, ft_compare_func cmp, const DBT *key, const DBT *pivot) {
-    FAKE_DB(db, desc);
-    int r = cmp(&db, key, pivot);
-    return r;
+// TODO: Remove me, I'm boring.
+static int ft_compare_pivot(const toku::comparator &cmp, const DBT *key, const DBT *pivot) {
+    return cmp(key, pivot);
 }
 
 /* Find the leftmost child that may contain the key.
  * If the key exists it will be in the child whose number
  * is the return value of this function.
  */
-int toku_ftnode_which_child(FTNODE node, const DBT *k,
-                            DESCRIPTOR desc, ft_compare_func cmp) {
+int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &cmp) {
     // a funny case of no pivots
     if (node->n_children <= 1) return 0;
 
     // check the last key to optimize seq insertions
     int n = node->n_children-1;
-    int c = ft_compare_pivot(desc, cmp, k, node->pivotkeys.get_pivot(n - 1));
+    int c = ft_compare_pivot(cmp, k, node->pivotkeys.get_pivot(n - 1));
     if (c > 0) return n;
 
     // binary search the pivots
@@ -1806,7 +1779,7 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k,
     int mi;
     while (lo < hi) {
         mi = (lo + hi) / 2;
-        c = ft_compare_pivot(desc, cmp, k, node->pivotkeys.get_pivot(mi));
+        c = ft_compare_pivot(cmp, k, node->pivotkeys.get_pivot(mi));
         if (c > 0) {
             lo = mi+1;
             continue;
@@ -1821,17 +1794,13 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k,
 }
 
 // Used for HOT.
-int
-toku_ftnode_hot_next_child(FTNODE node,
-                           const DBT *k,
-                           DESCRIPTOR desc,
-                           ft_compare_func cmp) {
+int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator &cmp) {
     int low = 0;
     int hi = node->n_children - 1;
     int mi;
     while (low < hi) {
         mi = (low + hi) / 2;
-        int r = ft_compare_pivot(desc, cmp, k, node->pivotkeys.get_pivot(mi));
+        int r = ft_compare_pivot(cmp, k, node->pivotkeys.get_pivot(mi));
         if (r > 0) {
             low = mi + 1;
         } else if (r < 0) {
@@ -1845,19 +1814,20 @@ toku_ftnode_hot_next_child(FTNODE node,
     invariant(low == hi);
     return low;
 }
+
 static void
-ft_nonleaf_msg_all(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
+ft_nonleaf_msg_all(const toku::comparator &cmp, FTNODE node, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
 // Effect: Put the message into a nonleaf node.  We put it into all children, possibly causing the children to become reactive.
 //  We don't do the splitting and merging.  That's up to the caller after doing all the puts it wants to do.
 //  The re_array[i] gets set to the reactivity of any modified child i.         (And there may be several such children.)
 {
     for (int i = 0; i < node->n_children; i++) {
-        ft_nonleaf_msg_once_to_child(compare_fun, desc, node, i, msg, is_fresh, flow_deltas);
+        ft_nonleaf_msg_once_to_child(cmp, node, i, msg, is_fresh, flow_deltas);
     }
 }
 
 static void
-ft_nonleaf_put_msg(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
+ft_nonleaf_put_msg(const toku::comparator &cmp, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
 // Effect: Put the message into a nonleaf node.  We may put it into a child, possibly causing the child to become reactive.
 //  We don't do the splitting and merging.  That's up to the caller after doing all the puts it wants to do.
 //  The re_array[i] gets set to the reactivity of any modified child i.         (And there may be several such children.)
@@ -1875,9 +1845,9 @@ ft_nonleaf_put_msg(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, in
     node->max_msn_applied_to_node_on_disk = msg_msn;
 
     if (ft_msg_type_applies_once(msg->type)) {
-        ft_nonleaf_msg_once_to_child(compare_fun, desc, node, target_childnum, msg, is_fresh, flow_deltas);
+        ft_nonleaf_msg_once_to_child(cmp, node, target_childnum, msg, is_fresh, flow_deltas);
     } else if (ft_msg_type_applies_all(msg->type)) {
-        ft_nonleaf_msg_all(compare_fun, desc, node, msg, is_fresh, flow_deltas);
+        ft_nonleaf_msg_all(cmp, node, msg, is_fresh, flow_deltas);
     } else {
         paranoid_invariant(ft_msg_type_does_nothing(msg->type));
     }
@@ -2032,9 +2002,8 @@ void toku_ftnode_leaf_run_gc(FT ft, FTNODE node) {
 
 void
 toku_ftnode_put_msg (
-    ft_compare_func compare_fun,
+    const toku::comparator &cmp,
     ft_update_func update_fun,
-    DESCRIPTOR desc,
     FTNODE node,
     int target_childnum,
     FT_MSG msg,
@@ -2057,9 +2026,9 @@ toku_ftnode_put_msg (
     // and instead defer to these functions
     //
     if (node->height==0) {
-        toku_ft_leaf_apply_msg(compare_fun, update_fun, desc, node, target_childnum, msg, gc_info, nullptr, stats_to_update);
+        toku_ft_leaf_apply_msg(cmp, update_fun, node, target_childnum, msg, gc_info, nullptr, stats_to_update);
     } else {
-        ft_nonleaf_put_msg(compare_fun, desc, node, target_childnum, msg, is_fresh, flow_deltas);
+        ft_nonleaf_put_msg(cmp, node, target_childnum, msg, is_fresh, flow_deltas);
     }
 }
 
@@ -2067,9 +2036,8 @@ toku_ftnode_put_msg (
 //           This function is called during message injection and/or flushing, so the entire
 //           node MUST be in memory.
 void toku_ft_leaf_apply_msg(
-    ft_compare_func compare_fun,
+    const toku::comparator &cmp,
     ft_update_func update_fun,
-    DESCRIPTOR desc,
     FTNODE node,
     int target_childnum,  // which child to inject to, or -1 if unknown
     FT_MSG msg,
@@ -2109,13 +2077,12 @@ void toku_ft_leaf_apply_msg(
     if (ft_msg_type_applies_once(msg->type)) {
         unsigned int childnum = (target_childnum >= 0
                                  ? target_childnum
-                                 : toku_ftnode_which_child(node, msg->u.id.key, desc, compare_fun));
+                                 : toku_ftnode_which_child(node, msg->u.id.key, cmp));
         BASEMENTNODE bn = BLB(node, childnum);
         if (msg->msn.msn > bn->max_msn_applied.msn) {
             bn->max_msn_applied = msg->msn;
-            toku_ft_bn_apply_msg(compare_fun,
+            toku_ft_bn_apply_msg(cmp,
                                  update_fun,
-                                 desc,
                                  bn,
                                  msg,
                                  gc_info,
@@ -2129,9 +2096,8 @@ void toku_ft_leaf_apply_msg(
         for (int childnum=0; childnum<node->n_children; childnum++) {
             if (msg->msn.msn > BLB(node, childnum)->max_msn_applied.msn) {
                 BLB(node, childnum)->max_msn_applied = msg->msn;
-                toku_ft_bn_apply_msg(compare_fun,
+                toku_ft_bn_apply_msg(cmp,
                                      update_fun,
-                                     desc,
                                      BLB(node, childnum),
                                      msg,
                                      gc_info,
diff --git a/ft/node.h b/ft/node.h
index 0ef842d9dd0..6300d92e86e 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -88,6 +88,7 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
+#include "ft/comparator.h"
 #include "ft/cachetable.h"
 #include "ft/bndata.h"
 #include "ft/fttypes.h"
@@ -305,7 +306,7 @@ void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node);
 void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM blocknum, int height, int num_children, 
                                   int layout_version, unsigned int flags);
 
-int toku_ftnode_which_child(FTNODE node, const DBT *k, DESCRIPTOR desc, ft_compare_func cmp);
+int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &cmp);
 
 //
 // Field in ftnode_fetch_extra that tells the 
@@ -378,25 +379,31 @@ typedef struct ftnode_fetch_extra *FTNODE_FETCH_EXTRA;
 // TODO: put the heaviside functions into their respective 'struct .*extra;' namespaces
 //
 struct toku_msg_buffer_key_msn_heaviside_extra {
-    DESCRIPTOR desc;
-    ft_compare_func cmp;
+    const toku::comparator &cmp;
     message_buffer *msg_buffer;
     const DBT *key;
     MSN msn;
+    toku_msg_buffer_key_msn_heaviside_extra(const toku::comparator &c, message_buffer *mb, const DBT *k, MSN m) :
+        cmp(c), msg_buffer(mb), key(k), msn(m) {
+    }
 };
 int toku_msg_buffer_key_msn_heaviside(const int32_t &v, const struct toku_msg_buffer_key_msn_heaviside_extra &extra);
 
 struct toku_msg_buffer_key_msn_cmp_extra {
-    DESCRIPTOR desc;
-    ft_compare_func cmp;
+    const toku::comparator &cmp;
     message_buffer *msg_buffer;
+    toku_msg_buffer_key_msn_cmp_extra(const toku::comparator &c, message_buffer *mb) :
+        cmp(c), msg_buffer(mb) {
+    }
 };
 int toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extrap, const int &a, const int &b);
 
 struct toku_msg_leafval_heaviside_extra {
-    ft_compare_func compare_fun;
-    DESCRIPTOR desc;
-    DBT const * const key;
+    const toku::comparator &cmp;
+    DBT const *const key;
+    toku_msg_leafval_heaviside_extra(const toku::comparator &c, const DBT *k) :
+        cmp(c), key(k) {
+    }
 };
 int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_heaviside_extra &be);
 
@@ -404,7 +411,7 @@ unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc);
 int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc);
 long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
 long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
-void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp);
+void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp);
 void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
 void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
 bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
@@ -435,11 +442,10 @@ enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize);
  * If k is equal to some pivot, then we return the next (to the right)
  * childnum.
  */
-int toku_ftnode_hot_next_child(FTNODE node, const DBT *k,
-                               DESCRIPTOR desc, ft_compare_func cmp);
+int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator &cmp);
 
-void toku_ftnode_put_msg(ft_compare_func compare_fun, ft_update_func update_fun,
-                         DESCRIPTOR desc, FTNODE node, int target_childnum,
+void toku_ftnode_put_msg(const toku::comparator &cmp, ft_update_func update_fun,
+                         FTNODE node, int target_childnum,
                          FT_MSG msg, bool is_fresh, txn_gc_info *gc_info,
                          size_t flow_deltas[], STAT64INFO stats_to_update);
 
@@ -447,12 +453,12 @@ void toku_ft_bn_apply_msg_once(BASEMENTNODE bn, const FT_MSG msg, uint32_t idx,
                                uint32_t le_keylen, LEAFENTRY le, txn_gc_info *gc_info,
                                uint64_t *workdonep, STAT64INFO stats_to_update);
 
-void toku_ft_bn_apply_msg(ft_compare_func compare_fun, ft_update_func update_fun,
-                          DESCRIPTOR desc, BASEMENTNODE bn, FT_MSG msg, txn_gc_info *gc_info,
+void toku_ft_bn_apply_msg(const toku::comparator &cmp, ft_update_func update_fun,
+                          BASEMENTNODE bn, FT_MSG msg, txn_gc_info *gc_info,
                           uint64_t *workdone, STAT64INFO stats_to_update);
 
-void toku_ft_leaf_apply_msg(ft_compare_func compare_fun, ft_update_func update_fun,
-                            DESCRIPTOR desc, FTNODE node, int target_childnum,
+void toku_ft_leaf_apply_msg(const toku::comparator &cmp, ft_update_func update_fun,
+                            FTNODE node, int target_childnum,
                             FT_MSG msg, txn_gc_info *gc_info,
                             uint64_t *workdone, STAT64INFO stats_to_update);
 
@@ -487,7 +493,7 @@ bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancesto
 void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read);
 
 struct ft_search;
-int toku_ft_search_which_child(DESCRIPTOR desc, ft_compare_func cmp, FTNODE node, ft_search *search);
+int toku_ft_search_which_child(const toku::comparator &cmp, FTNODE node, ft_search *search);
 
 //
 // internal node inline functions
diff --git a/ft/tests/bnc-insert-benchmark.cc b/ft/tests/bnc-insert-benchmark.cc
index 253a216e675..a1313440cbb 100644
--- a/ft/tests/bnc-insert-benchmark.cc
+++ b/ft/tests/bnc-insert-benchmark.cc
@@ -137,6 +137,9 @@ run_test(unsigned long eltsize, unsigned long nodesize, unsigned long repeat)
     struct timeval t[2];
     gettimeofday(&t[0], NULL);
 
+    toku::comparator cmp;
+    cmp.create(long_key_cmp, nullptr);
+
     for (unsigned int i = 0; i < repeat; ++i) {
         bnc = toku_create_empty_nl();
         for (; toku_bnc_nbytesinbuf(bnc) <= nodesize; ++cur) {
@@ -144,7 +147,7 @@ run_test(unsigned long eltsize, unsigned long nodesize, unsigned long repeat)
                                 &keys[cur % 1024], sizeof keys[cur % 1024],
                                 vals[cur % 1024], eltsize - (sizeof keys[cur % 1024]),
                                 FT_NONE, next_dummymsn(), xids_123, true,
-                                NULL, long_key_cmp); assert_zero(r);
+                                cmp); assert_zero(r);
         }
         nbytesinserted += toku_bnc_nbytesinbuf(bnc);
         destroy_nonleaf_childinfo(bnc);
@@ -157,6 +160,8 @@ run_test(unsigned long eltsize, unsigned long nodesize, unsigned long repeat)
     long long unsigned eltrate = (long) (cur / dt);
     printf("%0.03lf MB/sec\n", mbrate);
     printf("%llu elts/sec\n", eltrate);
+
+    cmp.destroy();
 }
 
 int
diff --git a/ft/tests/comparator-test.cc b/ft/tests/comparator-test.cc
index ad09ad0c3ab..359115886cb 100644
--- a/ft/tests/comparator-test.cc
+++ b/ft/tests/comparator-test.cc
@@ -112,14 +112,16 @@ static void test_desc(void) {
     // create with d1, make sure it gets used
     cmp.create(magic_compare, &d1);
     expected_desc = &d1;
-    c = cmp.compare(&dbt_a, &dbt_b);
+    c = cmp(&dbt_a, &dbt_b);
     invariant(c == MAGIC);
 
     // set desc to d2, make sure it gets used
     cmp.set_descriptor(&d2);
     expected_desc = &d2;
-    c = cmp.compare(&dbt_a, &dbt_b);
+    c = cmp(&dbt_a, &dbt_b);
     invariant(c == MAGIC);
+
+    cmp.destroy();
 }
 
 static int dont_compare_me_bro(DB *db, const DBT *a, const DBT *b) {
@@ -137,20 +139,22 @@ static void test_infinity(void) {
     // should never be called and thus the dbt never actually read.
     DBT arbitrary_dbt;
 
-    c = cmp.compare(&arbitrary_dbt, toku_dbt_positive_infinity());
+    c = cmp(&arbitrary_dbt, toku_dbt_positive_infinity());
     invariant(c < 0);
-    c = cmp.compare(toku_dbt_negative_infinity(), &arbitrary_dbt);
+    c = cmp(toku_dbt_negative_infinity(), &arbitrary_dbt);
     invariant(c < 0);
 
-    c = cmp.compare(toku_dbt_positive_infinity(), &arbitrary_dbt);
+    c = cmp(toku_dbt_positive_infinity(), &arbitrary_dbt);
     invariant(c > 0);
-    c = cmp.compare(&arbitrary_dbt, toku_dbt_negative_infinity());
+    c = cmp(&arbitrary_dbt, toku_dbt_negative_infinity());
     invariant(c > 0);
 
-    c = cmp.compare(toku_dbt_negative_infinity(), toku_dbt_negative_infinity());
+    c = cmp(toku_dbt_negative_infinity(), toku_dbt_negative_infinity());
     invariant(c == 0);
-    c = cmp.compare(toku_dbt_positive_infinity(), toku_dbt_positive_infinity());
+    c = cmp(toku_dbt_positive_infinity(), toku_dbt_positive_infinity());
     invariant(c == 0);
+
+    cmp.destroy();
 }
 
 int main(void) {
diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc
index 00b93345762..4e9be6750a7 100644
--- a/ft/tests/ft-bfe-query.cc
+++ b/ft/tests/ft-bfe-query.cc
@@ -103,7 +103,6 @@ int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) {
 static void
 test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     int r;
-    ft_h->compare_fun = int64_key_cmp;    
     FT_CURSOR XMALLOC(cursor);
     FTNODE dn = NULL;
     PAIR_ATTR attr;
@@ -250,7 +249,6 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
 static void
 test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     int r;
-    ft_h->compare_fun = int64_key_cmp;    
     FT_CURSOR XMALLOC(cursor);
     FTNODE dn = NULL;
     FTNODE_DISK_DATA ndd = NULL;
@@ -422,6 +420,7 @@ test_prefetching(void) {
                  128*1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
+    ft_h->cmp.create(int64_key_cmp, nullptr);
     ft->ft = ft_h;
     toku_blocktable_create_new(&ft_h->blocktable);
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
@@ -453,6 +452,7 @@ test_prefetching(void) {
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->cmp.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index 1590c9e9019..2dbed9b5fa9 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -147,7 +147,6 @@ static void
 test1(int fd, FT ft_h, FTNODE *dn) {
     int r;
     struct ftnode_fetch_extra bfe_all;
-    ft_h->compare_fun = string_key_cmp;
     fill_bfe_for_full_read(&bfe_all, ft_h);
     FTNODE_DISK_DATA ndd = NULL;
     r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all);
@@ -226,7 +225,6 @@ test2(int fd, FT ft_h, FTNODE *dn) {
     memset(&right, 0, sizeof(right));
     ft_search search;
     
-    ft_h->compare_fun = string_key_cmp;
     fill_bfe_for_subset_read(
         &bfe_subset,
         ft_h,
@@ -279,7 +277,6 @@ test3_leaf(int fd, FT ft_h, FTNODE *dn) {
     memset(&left, 0, sizeof(left));
     memset(&right, 0, sizeof(right));
     
-    ft_h->compare_fun = string_key_cmp;
     fill_bfe_for_min_read(
         &bfe_min,
         ft_h
@@ -335,13 +332,18 @@ test_serialize_nonleaf(void) {
     r = xids_create_child(xids_123, &xids_234, (TXNID)234);
     CKERR(r);
 
-    toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, NULL, string_key_cmp);
-    toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, NULL, string_key_cmp);
-    toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, NULL, string_key_cmp);
+    toku::comparator cmp;
+    cmp.create(string_key_cmp, nullptr);
+
+    toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
+    toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
+    toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
+
     //Cleanup:
     xids_destroy(&xids_0);
     xids_destroy(&xids_123);
     xids_destroy(&xids_234);
+    cmp.destroy();
 
     FT_HANDLE XMALLOC(ft);
     FT XCALLOC(ft_h);
@@ -353,6 +355,7 @@ test_serialize_nonleaf(void) {
                  128*1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
+    ft_h->cmp.create(string_key_cmp, nullptr);
     ft->ft = ft_h;
     
     toku_blocktable_create_new(&ft_h->blocktable);
@@ -387,6 +390,7 @@ test_serialize_nonleaf(void) {
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
+    ft_h->cmp.destroy();
     toku_free(ft_h);
     toku_free(ft);
 
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index 5f7266533d3..dd0405353c0 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -195,9 +195,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
                  128*1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
+    ft_h->cmp.create(long_key_cmp, nullptr);
     ft->ft = ft_h;
     
-    ft_h->compare_fun = long_key_cmp;
     toku_blocktable_create_new(&ft_h->blocktable);
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
@@ -279,6 +279,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->cmp.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
@@ -317,6 +318,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     XIDS xids_123;
     r = xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
+    toku::comparator cmp;
+    cmp.create(long_key_cmp, nullptr);
     int nperchild = nelts / 8;
     for (int ck = 0; ck < sn.n_children; ++ck) {
         long k;
@@ -332,7 +335,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
             }
             memset(&buf[c], 0, valsize - c);
 
-            toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, NULL, long_key_cmp);
+            toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp);
         }
         if (ck < 7) {
             DBT pivotkey;
@@ -343,6 +346,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     //Cleanup:
     xids_destroy(&xids_0);
     xids_destroy(&xids_123);
+    cmp.destroy();
 
     FT_HANDLE XMALLOC(ft);
     FT XCALLOC(ft_h);
@@ -354,9 +358,9 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
                  128*1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
+    ft_h->cmp.create(long_key_cmp, nullptr);
     ft->ft = ft_h;
     
-    ft_h->compare_fun = long_key_cmp;
     toku_blocktable_create_new(&ft_h->blocktable);
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
@@ -411,10 +415,12 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
+    ft_h->cmp.destroy();
     toku_free(ft_h);
     toku_free(ft);
     toku_free(ndd);
     toku_free(ndd2);
+    cmp.destroy();
 
     r = close(fd); assert(r != -1);
 }
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index 5e2e93f09b3..06daebc6232 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -164,7 +164,6 @@ string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
 static void
 setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) {
     int r;
-    ft_h->compare_fun = string_key_cmp;
     if (bft == read_all) {
         struct ftnode_fetch_extra bfe;
         fill_bfe_for_full_read(&bfe, ft_h);
@@ -1050,13 +1049,18 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     r = xids_create_child(xids_123, &xids_234, (TXNID)234);
     CKERR(r);
 
-    toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, NULL, string_key_cmp);
-    toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, NULL, string_key_cmp);
-    toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, NULL, string_key_cmp);
+    toku::comparator cmp;
+    cmp.create(string_key_cmp, nullptr);
+
+    toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
+    toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
+    toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
+
     //Cleanup:
     xids_destroy(&xids_0);
     xids_destroy(&xids_123);
     xids_destroy(&xids_234);
+    cmp.destroy();
 
     FT_HANDLE XMALLOC(ft);
     FT XCALLOC(ft_h);
@@ -1068,6 +1072,7 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
                  128*1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
+    ft_h->cmp.create(string_key_cmp, nullptr);
     ft->ft = ft_h;
     
     toku_blocktable_create_new(&ft_h->blocktable);
@@ -1125,6 +1130,7 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     toku_free(ft);
     toku_free(src_ndd);
     toku_free(dest_ndd);
+    cmp.destroy();
 
     r = close(fd); assert(r != -1);
 }
diff --git a/ft/tests/make-tree.cc b/ft/tests/make-tree.cc
index 7ebfd28b275..e48d4592800 100644
--- a/ft/tests/make-tree.cc
+++ b/ft/tests/make-tree.cc
@@ -152,7 +152,7 @@ insert_into_child_buffer(FT_HANDLE ft, FTNODE node, int childnum, int minkey, in
         unsigned int key = htonl(val);
         DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key);
         DBT theval; toku_fill_dbt(&theval, &val, sizeof val);
-        toku_ft_append_to_child_buffer(ft->ft->compare_fun, NULL, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval);
+        toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval);
 	node->max_msn_applied_to_node_on_disk = msn;
     }
 }
diff --git a/ft/tests/msnfilter.cc b/ft/tests/msnfilter.cc
index 1ab13f745e7..29fb40c5d8f 100644
--- a/ft/tests/msnfilter.cc
+++ b/ft/tests/msnfilter.cc
@@ -134,7 +134,7 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
     FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
 
-    toku_ft_leaf_apply_msg(ft->ft->compare_fun, ft->ft->update_fun, &ft->ft->cmp_descriptor, leafnode, -1, &msg, &gc_info, nullptr, nullptr);
+    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, &msg, &gc_info, nullptr, nullptr);
     {
 	int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair);
 	assert(r==0);
@@ -142,7 +142,7 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
     }
 
     FT_MSG_S badmsg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval }} };
-    toku_ft_leaf_apply_msg(ft->ft->compare_fun, ft->ft->update_fun, &ft->ft->cmp_descriptor, leafnode, -1, &badmsg, &gc_info, nullptr, nullptr);
+    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, &badmsg, &gc_info, nullptr, nullptr);
 
     // message should be rejected for duplicate msn, row should still have original val
     {
@@ -155,7 +155,7 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
     msn = next_dummymsn();
     ft->ft->h->max_msn_in_ft = msn;
     FT_MSG_S msg2 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &val2 }} };
-    toku_ft_leaf_apply_msg(ft->ft->compare_fun, ft->ft->update_fun, &ft->ft->cmp_descriptor, leafnode, -1, &msg2, &gc_info, nullptr, nullptr);
+    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, &msg2, &gc_info, nullptr, nullptr);
 
     // message should be accepted, val should have new value
     {
@@ -167,7 +167,7 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
     // now verify that message with lesser (older) msn is rejected
     msn.msn = msn.msn - 10;
     FT_MSG_S msg3 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval } }};
-    toku_ft_leaf_apply_msg(ft->ft->compare_fun, ft->ft->update_fun, &ft->ft->cmp_descriptor, leafnode, -1, &msg3, &gc_info, nullptr, nullptr);
+    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, &msg3, &gc_info, nullptr, nullptr);
 
     // message should be rejected, val should still have value in pair2
     {
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index 1ca3869eb64..cc6af928a97 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -97,22 +97,7 @@ static TOKUTXN const null_txn = 0;
 static DB * const null_db = 0;
 static const char *fname = TOKU_TEST_FILENAME;
 static txn_gc_info non_mvcc_gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-
-static int dummy_cmp(DB *db __attribute__((unused)),
-                     const DBT *a, const DBT *b) {
-    int c;
-    if (a->size > b->size) {
-        c = memcmp(a->data, b->data, b->size);
-    } else if (a->size < b->size) {
-        c = memcmp(a->data, b->data, a->size);
-    } else {
-        return memcmp(a->data, b->data, a->size);
-    }
-    if (c == 0) {
-        c = a->size - b->size;
-    }
-    return c;
-}
+static toku::comparator dummy_cmp;
 
 // generate size random bytes into dest
 static void
@@ -176,7 +161,7 @@ insert_random_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool *is_fresh_out
 
     toku_bnc_insert_msg(bnc, key, keylen + (sizeof pfx), val, vallen,
                         FT_INSERT, msn, xids, is_fresh,
-                        NULL, dummy_cmp);
+                        dummy_cmp);
 }
 
 // generate a random message with xids and a key starting with pfx, insert
@@ -219,7 +204,7 @@ insert_random_message_to_bn(
     *keyp = toku_xmemdup(keydbt->data, keydbt->size);
     int64_t numbytes;
     toku_le_apply_msg(&msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
-    toku_ft_bn_apply_msg(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, &non_mvcc_gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb, &msg, &non_mvcc_gc_info, NULL, NULL);
     if (msn.msn > blb->max_msn_applied.msn) {
         blb->max_msn_applied = msn;
     }
@@ -269,11 +254,11 @@ insert_same_message_to_bns(
     *keyp = toku_xmemdup(keydbt->data, keydbt->size);
     int64_t numbytes;
     toku_le_apply_msg(&msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
-    toku_ft_bn_apply_msg(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, &non_mvcc_gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb1, &msg, &non_mvcc_gc_info, NULL, NULL);
     if (msn.msn > blb1->max_msn_applied.msn) {
         blb1->max_msn_applied = msn;
     }
-    toku_ft_bn_apply_msg(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, &non_mvcc_gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb2, &msg, &non_mvcc_gc_info, NULL, NULL);
     if (msn.msn > blb2->max_msn_applied.msn) {
         blb2->max_msn_applied = msn;
     }
@@ -329,7 +314,7 @@ insert_random_update_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool is_fre
     toku_bnc_insert_msg(bnc, key, keylen + (sizeof pfx),
                         update_extra, sizeof *update_extra,
                         FT_UPDATE, msn, xids, is_fresh,
-                        NULL, dummy_cmp);
+                        dummy_cmp);
     if (msn.msn > max_msn->msn) {
         *max_msn = msn;
     }
@@ -407,11 +392,11 @@ flush_to_internal(FT_HANDLE t) {
             enum ft_msg_type type = ft_msg_get_type(msg);
             XIDS xids = ft_msg_get_xids(msg);
             for (int k = 0; k < num_parent_messages; ++k) {
-                if (dummy_cmp(NULL, &keydbt, parent_messages[k]->u.id.key) == 0 &&
+                if (dummy_cmp(&keydbt, parent_messages[k]->u.id.key) == 0 &&
                         msn.msn == parent_messages[k]->msn.msn) {
                     assert(parent_messages_present[k] == 0);
                     assert(found == 0);
-                    assert(dummy_cmp(NULL, &valdbt, parent_messages[k]->u.id.val) == 0);
+                    assert(dummy_cmp(&valdbt, parent_messages[k]->u.id.val) == 0);
                     assert(type == parent_messages[k]->type);
                     assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[k]->xids));
                     assert(parent_messages_is_fresh[k] == is_fresh);
@@ -420,11 +405,11 @@ flush_to_internal(FT_HANDLE t) {
                 }
             }
             for (int k = 0; k < num_child_messages; ++k) {
-                if (dummy_cmp(NULL, &keydbt, child_messages[k]->u.id.key) == 0 &&
+                if (dummy_cmp(&keydbt, child_messages[k]->u.id.key) == 0 &&
                         msn.msn == child_messages[k]->msn.msn) {
                     assert(child_messages_present[k] == 0);
                     assert(found == 0);
-                    assert(dummy_cmp(NULL, &valdbt, child_messages[k]->u.id.val) == 0);
+                    assert(dummy_cmp(&valdbt, child_messages[k]->u.id.val) == 0);
                     assert(type == child_messages[k]->type);
                     assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[k]->xids));
                     assert(child_messages_is_fresh[k] == is_fresh);
@@ -506,7 +491,7 @@ flush_to_internal_multiple(FT_HANDLE t) {
         insert_random_message(child_bncs[i%8], &child_messages[i], &child_messages_is_fresh[i], xids_123, i%8);
         total_size += toku_bnc_memory_used(child_bncs[i%8]);
         if (i % 8 < 7) {
-            if (childkeys[i%8] == NULL || dummy_cmp(NULL, child_messages[i]->u.id.key, childkeys[i%8]->u.id.key) > 0) {
+            if (childkeys[i%8] == NULL || dummy_cmp(child_messages[i]->u.id.key, childkeys[i%8]->u.id.key) > 0) {
                 childkeys[i%8] = child_messages[i];
             }
         }
@@ -567,11 +552,11 @@ flush_to_internal_multiple(FT_HANDLE t) {
                 enum ft_msg_type type = ft_msg_get_type(msg);
                 XIDS xids = ft_msg_get_xids(msg);
                 for (int i = 0; i < num_parent_messages; ++i) {
-                    if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 &&
+                    if (dummy_cmp(&keydbt, parent_messages[i]->u.id.key) == 0 &&
                             msn.msn == parent_messages[i]->msn.msn) {
                         assert(parent_messages_present[i] == 0);
                         assert(found == 0);
-                        assert(dummy_cmp(NULL, &valdbt, parent_messages[i]->u.id.val) == 0);
+                        assert(dummy_cmp(&valdbt, parent_messages[i]->u.id.val) == 0);
                         assert(type == parent_messages[i]->type);
                         assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids));
                         assert(parent_messages_is_fresh[i] == is_fresh);
@@ -580,11 +565,11 @@ flush_to_internal_multiple(FT_HANDLE t) {
                     }
                 }
                 for (int i = 0; i < num_child_messages; ++i) {
-                    if (dummy_cmp(NULL, &keydbt, child_messages[i]->u.id.key) == 0 &&
+                    if (dummy_cmp(&keydbt, child_messages[i]->u.id.key) == 0 &&
                             msn.msn == child_messages[i]->msn.msn) {
                         assert(child_messages_present[i] == 0);
                         assert(found == 0);
-                        assert(dummy_cmp(NULL, &valdbt, child_messages[i]->u.id.val) == 0);
+                        assert(dummy_cmp(&valdbt, child_messages[i]->u.id.val) == 0);
                         assert(type == child_messages[i]->type);
                         assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids));
                         assert(child_messages_is_fresh[i] == is_fresh);
@@ -691,7 +676,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
         total_size += child_blbs[i%8]->data_buffer.get_memory_size();
         if (i % 8 < 7) {
             DBT keydbt;
-            if (childkeys[i%8].size == 0 || dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) {
+            if (childkeys[i%8].size == 0 || dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) {
                 toku_fill_dbt(&childkeys[i%8], key_pointers[i], keylens[i]);
             }
         }
@@ -701,7 +686,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
     for (i = 0; i < num_child_messages; ++i) {
         DBT keydbt;
         if (i % 8 < 7) {
-            assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0);
+            assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0);
         }
     }
 
@@ -723,7 +708,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
     if (make_leaf_up_to_date) {
         for (i = 0; i < num_parent_messages; ++i) {
             if (!parent_messages_is_fresh[i]) {
-                toku_ft_leaf_apply_msg(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
             }
         }
         for (i = 0; i < 8; ++i) {
@@ -803,10 +788,10 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
             }
             int found = 0;
             for (i = num_parent_messages - 1; i >= 0; --i) {
-                if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0) {
+                if (dummy_cmp(&keydbt, parent_messages[i]->u.id.key) == 0) {
                     if (found == 0) {
                         struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(e, parent_messages[i]->u.id.val->data);
-                        assert(dummy_cmp(NULL, &valdbt, &e->new_val) == 0);
+                        assert(dummy_cmp(&valdbt, &e->new_val) == 0);
                         found++;
                     }
                     assert(parent_messages_present[i] == 0);
@@ -822,9 +807,9 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
                     toku_fill_dbt(&childkeydbt, key_pointers[i], keylens[i]);
                     toku_fill_dbt(&childvaldbt, valp, vallen);
                 }
-                if (dummy_cmp(NULL, &keydbt, &childkeydbt) == 0) {
+                if (dummy_cmp(&keydbt, &childkeydbt) == 0) {
                     if (found == 0) {
-                        assert(dummy_cmp(NULL, &valdbt, &childvaldbt) == 0);
+                        assert(dummy_cmp(&valdbt, &childvaldbt) == 0);
                         found++;
                     }
                     assert(child_messages_present[i] == 0);
@@ -919,7 +904,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
         insert_random_message_to_bn(t, child_blbs[i%8], &key_pointers[i], &keylens[i], &child_messages[i], xids_123, i%8);
         total_size += child_blbs[i%8]->data_buffer.get_memory_size();
         DBT keydbt;
-        if (childkeys[i%8].size == 0 || dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) {
+        if (childkeys[i%8].size == 0 || dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) {
             toku_fill_dbt(&childkeys[i%8], key_pointers[i], keylens[i]);
         }
     }
@@ -927,7 +912,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
 
     for (i = 0; i < num_child_messages; ++i) {
         DBT keydbt;
-        assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0);
+        assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0);
     }
 
     {
@@ -947,9 +932,9 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
 
     if (make_leaf_up_to_date) {
         for (i = 0; i < num_parent_messages; ++i) {
-            if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
+            if (dummy_cmp(parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
                 !parent_messages_is_fresh[i]) {
-                toku_ft_leaf_apply_msg(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
             }
         }
         for (i = 0; i < 8; ++i) {
@@ -963,7 +948,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
 
     for (i = 0; i < num_parent_messages; ++i) {
         if (make_leaf_up_to_date &&
-            dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
+            dummy_cmp(parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
             !parent_messages_is_fresh[i]) {
             assert(parent_messages_applied[i] == 1);
         } else {
@@ -999,9 +984,9 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
             DBT keydbt;
             toku_fill_dbt(&keydbt, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
             MSN msn = msg->msn;
-            if (dummy_cmp(NULL, &keydbt, &childkeys[7]) > 0) {
+            if (dummy_cmp(&keydbt, &childkeys[7]) > 0) {
                 for (int i = 0; i < num_parent_messages; ++i) {
-                    if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 &&
+                    if (dummy_cmp(&keydbt, parent_messages[i]->u.id.key) == 0 &&
                             msn.msn == parent_messages[i]->msn.msn) {
                         assert(is_fresh == parent_messages_is_fresh[i]);
                         break;
@@ -1024,7 +1009,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
     assert(total_messages <= num_parent_messages + num_child_messages);
 
     for (i = 0; i < num_parent_messages; ++i) {
-        if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0) {
+        if (dummy_cmp(parent_messages[i]->u.id.key, &childkeys[7]) <= 0) {
             assert(parent_messages_applied[i] == 1);
         } else {
             assert(parent_messages_applied[i] == 0);
@@ -1120,7 +1105,7 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
         total_size += child1_blbs[i%8]->data_buffer.get_memory_size();
         if (i % 8 < 7) {
             DBT keydbt;
-            if (child1keys[i%8].size == 0 || dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) > 0) {
+            if (child1keys[i%8].size == 0 || dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) > 0) {
                 toku_fill_dbt(&child1keys[i%8], key_pointers[i], keylens[i]);
                 toku_fill_dbt(&child2keys[i%8], key_pointers[i], keylens[i]);
             }
@@ -1131,8 +1116,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     for (i = 0; i < num_child_messages; ++i) {
         DBT keydbt;
         if (i % 8 < 7) {
-            assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) <= 0);
-            assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child2keys[i%8]) <= 0);
+            assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) <= 0);
+            assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child2keys[i%8]) <= 0);
         }
     }
 
@@ -1155,8 +1140,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     if (make_leaf_up_to_date) {
         for (i = 0; i < num_parent_messages; ++i) {
             if (!parent_messages_is_fresh[i]) {
-                toku_ft_leaf_apply_msg(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
-                toku_ft_leaf_apply_msg(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child1, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child2, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
             }
         }
         for (i = 0; i < 8; ++i) {
@@ -1222,8 +1207,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
                 toku_fill_dbt(&key2dbt, keyp, keylen);
                 toku_fill_dbt(&val2dbt, valp, vallen);
             }
-            assert(dummy_cmp(NULL, &key1dbt, &key2dbt) == 0);
-            assert(dummy_cmp(NULL, &val1dbt, &val2dbt) == 0);
+            assert(dummy_cmp(&key1dbt, &key2dbt) == 0);
+            assert(dummy_cmp(&val1dbt, &val2dbt) == 0);
         }
     }
 
@@ -1271,10 +1256,28 @@ parse_args(int argc, const char *argv[]) {
     }
 }
 
+static int cmp_fn(DB *db __attribute__((unused)),
+                     const DBT *a, const DBT *b) {
+    int c;
+    if (a->size > b->size) {
+        c = memcmp(a->data, b->data, b->size);
+    } else if (a->size < b->size) {
+        c = memcmp(a->data, b->data, a->size);
+    } else {
+        return memcmp(a->data, b->data, a->size);
+    }
+    if (c == 0) {
+        c = a->size - b->size;
+    }
+    return c;
+}
+
 int
 test_main (int argc, const char *argv[]) {
     parse_args(argc, argv);
 
+    dummy_cmp.create(cmp_fn, nullptr);
+
     initialize_dummymsn();
     int r;
     CACHETABLE ct;
@@ -1308,5 +1311,7 @@ test_main (int argc, const char *argv[]) {
     r = toku_close_ft_handle_nolsn(t, 0);          assert(r==0);
     toku_cachetable_close(&ct);
 
+    dummy_cmp.destroy();
+
     return 0;
 }
diff --git a/ft/tests/verify-bad-msn.cc b/ft/tests/verify-bad-msn.cc
index 74a5d07efc8..07e37d43cf9 100644
--- a/ft/tests/verify-bad-msn.cc
+++ b/ft/tests/verify-bad-msn.cc
@@ -156,7 +156,7 @@ insert_into_child_buffer(FT_HANDLE ft, FTNODE node, int childnum, int minkey, in
         unsigned int key = htonl(val);
         DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key);
         DBT theval; toku_fill_dbt(&theval, &val, sizeof val);
-        toku_ft_append_to_child_buffer(ft->ft->compare_fun, NULL, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval);
+        toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval);
 
 	// Create bad tree (don't do following):
 	// node->max_msn_applied_to_node = msn;
diff --git a/ft/tests/verify-misrouted-msgs.cc b/ft/tests/verify-misrouted-msgs.cc
index 1a6fa852ecb..6b7555a89c6 100644
--- a/ft/tests/verify-misrouted-msgs.cc
+++ b/ft/tests/verify-misrouted-msgs.cc
@@ -144,7 +144,7 @@ insert_into_child_buffer(FT_HANDLE ft, FTNODE node, int childnum, int minkey, in
         DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key);
         DBT theval; toku_fill_dbt(&theval, &val, sizeof val);
 	MSN msn = next_dummymsn();
-        toku_ft_append_to_child_buffer(ft->ft->compare_fun, NULL, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval);
+        toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval);
     }
 }
 
diff --git a/ft/valgrind.suppressions b/ft/valgrind.suppressions
index b1ee1662079..d8b9b09bd1f 100644
--- a/ft/valgrind.suppressions
+++ b/ft/valgrind.suppressions
@@ -281,3 +281,16 @@
    fun:_dl_start
    obj:/lib/x86_64-linux-gnu/ld-2.17.so
 }
+{
+    <ld_is_not_clean_on_arch_linux_june_2014>
+    Memcheck:Leak
+    match-leak-kinds: reachable
+    fun:calloc
+    obj:/usr/lib/libdl-2.19.so
+    fun:dlsym
+    fun:_Z19toku_memory_startupv
+    fun:call_init.part.0
+    fun:_dl_init
+    obj:/usr/lib/ld-2.19.so
+}
+
diff --git a/locktree/concurrent_tree.cc b/locktree/concurrent_tree.cc
index 37fa8eee0cb..b7366d51dbc 100644
--- a/locktree/concurrent_tree.cc
+++ b/locktree/concurrent_tree.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 #include <toku_assert.h>
 
-void concurrent_tree::create(comparator *cmp) {
+void concurrent_tree::create(const comparator *cmp) {
     // start with an empty root node. we do this instead of
     // setting m_root to null so there's always a root to lock
     m_root.create_root(cmp);
diff --git a/locktree/concurrent_tree.h b/locktree/concurrent_tree.h
index 4a6b10bdcfc..58683a10ee8 100644
--- a/locktree/concurrent_tree.h
+++ b/locktree/concurrent_tree.h
@@ -173,7 +173,7 @@ public:
     };
 
     // effect: initialize the tree to an empty state
-    void create(comparator *cmp);
+    void create(const comparator *cmp);
 
     // effect: destroy the tree.
     // requires: tree is empty
diff --git a/locktree/keyrange.cc b/locktree/keyrange.cc
index 0bf9790196c..197e250aeb6 100644
--- a/locktree/keyrange.cc
+++ b/locktree/keyrange.cc
@@ -129,13 +129,13 @@ void keyrange::create_copy(const keyrange &range) {
 // extend this keyrange by choosing the leftmost and rightmost
 // endpoints between this range and the given. replaced keys
 // in this range are freed and inherited keys are copied.
-void keyrange::extend(comparator *cmp, const keyrange &range) {
+void keyrange::extend(const comparator &cmp, const keyrange &range) {
     const DBT *range_left = range.get_left_key();
     const DBT *range_right = range.get_right_key();
-    if (cmp->compare(range_left, get_left_key()) < 0) {
+    if (cmp(range_left, get_left_key()) < 0) {
         replace_left_key(range_left);
     }
-    if (cmp->compare(range_right, get_right_key()) > 0) {
+    if (cmp(range_right, get_right_key()) > 0) {
         replace_right_key(range_right);
     }
 }
@@ -152,20 +152,20 @@ uint64_t keyrange::get_memory_size(void) const {
 }
 
 // compare ranges.
-keyrange::comparison keyrange::compare(comparator *cmp, const keyrange &range) const {
-    if (cmp->compare(get_right_key(), range.get_left_key()) < 0) {
+keyrange::comparison keyrange::compare(const comparator &cmp, const keyrange &range) const {
+    if (cmp(get_right_key(), range.get_left_key()) < 0) {
         return comparison::LESS_THAN;
-    } else if (cmp->compare(get_left_key(), range.get_right_key()) > 0) {
+    } else if (cmp(get_left_key(), range.get_right_key()) > 0) {
         return comparison::GREATER_THAN;
-    } else if (cmp->compare(get_left_key(), range.get_left_key()) == 0 &&
-            cmp->compare(get_right_key(), range.get_right_key()) == 0) {
+    } else if (cmp(get_left_key(), range.get_left_key()) == 0 &&
+            cmp(get_right_key(), range.get_right_key()) == 0) {
         return comparison::EQUALS;
     } else {
         return comparison::OVERLAPS;
     }
 }
 
-bool keyrange::overlaps(comparator *cmp, const keyrange &range) const {
+bool keyrange::overlaps(const comparator &cmp, const keyrange &range) const {
     // equality is a stronger form of overlapping.
     // so two ranges "overlap" if they're either equal or just overlapping.
     comparison c = compare(cmp, range);
diff --git a/locktree/keyrange.h b/locktree/keyrange.h
index 9843703c79e..c086fc1cd21 100644
--- a/locktree/keyrange.h
+++ b/locktree/keyrange.h
@@ -117,7 +117,7 @@ public:
     // effect: extends the keyrange by choosing the leftmost and rightmost
     //         endpoints from this range and the given range.
     //         replaced keys in this range are freed, new keys are copied.
-    void extend(comparator *cmp, const keyrange &range);
+    void extend(const comparator &cmp, const keyrange &range);
 
     // returns: the amount of memory this keyrange takes. does not account
     //          for point optimizations or malloc overhead.
@@ -143,10 +143,10 @@ public:
     //          EQUALS       if given range has the same left and right endpoints
     //          OVERLAPS     if at least one of the given range's endpoints falls
     //                       between this range's endpoints
-    comparison compare(comparator *cmp, const keyrange &range) const;
+    comparison compare(const comparator &cmp, const keyrange &range) const;
 
     // returns: true if the range and the given range are equal or overlapping
-    bool overlaps(comparator *cmp, const keyrange &range) const;
+    bool overlaps(const comparator &cmp, const keyrange &range) const;
 
     // returns: a keyrange representing -inf, +inf
     static keyrange get_infinite_range(void);
diff --git a/locktree/locktree.cc b/locktree/locktree.cc
index 164d0cbc0da..bda355a5238 100644
--- a/locktree/locktree.cc
+++ b/locktree/locktree.cc
@@ -121,14 +121,12 @@ void locktree::create(locktree_manager *mgr, DICTIONARY_ID dict_id,
     m_mgr = mgr;
     m_dict_id = dict_id;
 
-    // the only reason m_cmp is malloc'd here is to prevent gdb from printing
-    // out an entire DB struct every time you inspect a locktree.
-    XCALLOC(m_cmp);
-    m_cmp->create(cmp, desc);
+    m_cmp.create(cmp, desc);
     m_reference_count = 1;
     m_userdata = nullptr;
+
     XCALLOC(m_rangetree);
-    m_rangetree->create(m_cmp);
+    m_rangetree->create(&m_cmp);
 
     m_sto_txnid = TXNID_NONE;
     m_sto_buffer.create();
@@ -155,11 +153,10 @@ void locktree::create(locktree_manager *mgr, DICTIONARY_ID dict_id,
 
 void locktree::destroy(void) {
     invariant(m_reference_count == 0);
+    m_cmp.destroy();
     m_rangetree->destroy();
-    toku_free(m_cmp);
     toku_free(m_rangetree);
     m_sto_buffer.destroy();
-
     m_lock_request_info.pending_lock_requests.destroy();
 }
 
@@ -299,7 +296,7 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
 
     concurrent_tree sto_rangetree;
     concurrent_tree::locked_keyrange sto_lkr;
-    sto_rangetree.create(m_cmp);
+    sto_rangetree.create(&m_cmp);
 
     // insert all of the ranges from the single txnid buffer into a new rangtree
     range_buffer::iterator iter(&m_sto_buffer);
@@ -438,7 +435,7 @@ int locktree::try_acquire_lock(bool is_write_request,
                                txnid_set *conflicts, bool big_txn) {
     // All ranges in the locktree must have left endpoints <= right endpoints.
     // Range comparisons rely on this fact, so we make a paranoid invariant here.
-    paranoid_invariant(m_cmp->compare(left_key, right_key) <= 0);
+    paranoid_invariant(m_cmp(left_key, right_key) <= 0);
     int r = m_mgr == nullptr ? 0 :
             m_mgr->check_current_lock_constraints(big_txn);
     if (r == 0) {
@@ -581,7 +578,7 @@ void locktree::release_locks(TXNID txnid, const range_buffer *ranges) {
             const DBT *right_key = rec.get_right_key();
             // All ranges in the locktree must have left endpoints <= right endpoints.
             // Range comparisons rely on this fact, so we make a paranoid invariant here.
-            paranoid_invariant(m_cmp->compare(left_key, right_key) <= 0);
+            paranoid_invariant(m_cmp(left_key, right_key) <= 0);
             remove_overlapping_locks_for_txnid(txnid, left_key, right_key);
             iter.next();
         }
@@ -795,7 +792,7 @@ struct lt_lock_request_info *locktree::get_lock_request_info(void) {
 }
 
 void locktree::set_descriptor(DESCRIPTOR desc) {
-    m_cmp->set_descriptor(desc);
+    m_cmp.set_descriptor(desc);
 }
 
 locktree_manager *locktree::get_manager(void) const {
diff --git a/locktree/locktree.h b/locktree/locktree.h
index ebd736bc746..666af3bfd6a 100644
--- a/locktree/locktree.h
+++ b/locktree/locktree.h
@@ -323,8 +323,7 @@ namespace toku {
     // - Destroy the manager.
     class locktree {
     public:
-        // effect: Creates a locktree that uses the given memory tracker
-        //         to report memory usage and honor memory constraints.
+        // effect: Creates a locktree
         void create(locktree_manager *mgr, DICTIONARY_ID dict_id,
                     DESCRIPTOR desc, ft_compare_func cmp);
 
@@ -399,7 +398,7 @@ namespace toku {
         // is valid for as long as the locktree. this is currently
         // implemented by opening an ft_handle for this locktree and
         // storing it as userdata below.
-        comparator *m_cmp;
+        comparator m_cmp;
 
         concurrent_tree *m_rangetree;
 
diff --git a/locktree/tests/concurrent_tree_lkr_acquire_release.cc b/locktree/tests/concurrent_tree_lkr_acquire_release.cc
index ecf683ed8f8..6ae972d2321 100644
--- a/locktree/tests/concurrent_tree_lkr_acquire_release.cc
+++ b/locktree/tests/concurrent_tree_lkr_acquire_release.cc
@@ -126,19 +126,19 @@ void concurrent_tree_unit_test::test_lkr_acquire_release(void) {
 
             // if the subtree root does not overlap then one of its children
             // must exist and have an overlapping range.
-            if (!lkr.m_subtree->m_range.overlaps(&cmp, range)) {
+            if (!lkr.m_subtree->m_range.overlaps(cmp, range)) {
                 treenode *left = lkr.m_subtree->m_left_child.ptr;
                 treenode *right = lkr.m_subtree->m_right_child.ptr;
                 if (left != nullptr) {
                     // left exists, so if it does not overlap then the right must
-                    if (!left->m_range.overlaps(&cmp, range)) {
+                    if (!left->m_range.overlaps(cmp, range)) {
                         invariant_notnull(right);
-                        invariant(right->m_range.overlaps(&cmp, range));
+                        invariant(right->m_range.overlaps(cmp, range));
                     }
                 } else {
                     // no left child, so the right must exist and be overlapping
                     invariant_notnull(right);
-                    invariant(right->m_range.overlaps(&cmp, range));
+                    invariant(right->m_range.overlaps(cmp, range));
                 }
             }
 
@@ -160,6 +160,8 @@ void concurrent_tree_unit_test::test_lkr_acquire_release(void) {
         lkr.release();
         tree.destroy();
     }
+
+    cmp.destroy();
 }
 
 } /* namespace toku */
diff --git a/locktree/tests/concurrent_tree_lkr_insert_remove.cc b/locktree/tests/concurrent_tree_lkr_insert_remove.cc
index ae71cda4526..275abbb3baa 100644
--- a/locktree/tests/concurrent_tree_lkr_insert_remove.cc
+++ b/locktree/tests/concurrent_tree_lkr_insert_remove.cc
@@ -117,17 +117,17 @@ static void verify_unique_keys(void) {
 }
 
 static uint64_t check_for_range_and_count(concurrent_tree::locked_keyrange *lkr,
-        comparator *cmp, const keyrange &range, bool range_should_exist) {
+        const comparator &cmp, const keyrange &range, bool range_should_exist) {
 
     struct check_fn_obj {
-        comparator *cmp;
+        const comparator *cmp;
         uint64_t count;
         keyrange target_range;
         bool target_range_found;
 
         bool fn(const keyrange &query_range, TXNID txnid) { 
             (void) txnid;
-            if (query_range.compare(cmp, target_range) == keyrange::comparison::EQUALS) {
+            if (query_range.compare(*cmp, target_range) == keyrange::comparison::EQUALS) {
                 invariant(!target_range_found);
                 target_range_found = true;
             }
@@ -135,7 +135,7 @@ static uint64_t check_for_range_and_count(concurrent_tree::locked_keyrange *lkr,
             return true;
         }
     } check_fn;
-    check_fn.cmp = cmp;
+    check_fn.cmp = &cmp;
     check_fn.count = 0;
     check_fn.target_range = range;
     check_fn.target_range_found = false;
@@ -174,14 +174,14 @@ void concurrent_tree_unit_test::test_lkr_insert_remove(void) {
         // insert an element. it should exist and the
         // count should be correct.
         lkr.insert(range, i);
-        n = check_for_range_and_count(&lkr, &cmp, range, true);
+        n = check_for_range_and_count(&lkr, cmp, range, true);
         if (i >= cap) {
             invariant(n == cap + 1);
             // remove an element previously inserted. it should
             // no longer exist and the count should be correct.
             range.create(get_ith_key_from_set(i - cap), get_ith_key_from_set(i - cap));
             lkr.remove(range);
-            n = check_for_range_and_count(&lkr, &cmp, range, false);
+            n = check_for_range_and_count(&lkr, cmp, range, false);
             invariant(n == cap);
         } else {
             invariant(n == i + 1);
@@ -193,12 +193,13 @@ void concurrent_tree_unit_test::test_lkr_insert_remove(void) {
         keyrange range;
         range.create(get_ith_key_from_set(num_keys - i - 1), get_ith_key_from_set(num_keys - i - 1));
         lkr.remove(range);
-        n = check_for_range_and_count(&lkr, &cmp, range, false);
+        n = check_for_range_and_count(&lkr, cmp, range, false);
         invariant(n == (cap - i - 1));
     }
 
     lkr.release();
     tree.destroy();
+    cmp.destroy();
 }
 
 } /* namespace toku */
diff --git a/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc b/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc
index 5f0f81dc275..1165bff9151 100644
--- a/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc
+++ b/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc
@@ -136,6 +136,7 @@ void concurrent_tree_unit_test::test_lkr_insert_serial_large(void) {
 
     lkr.release();
     tree.destroy();
+    cmp.destroy();
 }
 
 } /* namespace toku */
diff --git a/locktree/tests/concurrent_tree_lkr_remove_all.cc b/locktree/tests/concurrent_tree_lkr_remove_all.cc
index c7d5f4d3204..0f7b045ded9 100644
--- a/locktree/tests/concurrent_tree_lkr_remove_all.cc
+++ b/locktree/tests/concurrent_tree_lkr_remove_all.cc
@@ -132,6 +132,8 @@ void concurrent_tree_unit_test::test_lkr_remove_all(void) {
         lkr.release();
         tree.destroy();
     }
+
+    cmp.destroy();
 }
 
 } /* namespace toku */
diff --git a/locktree/tests/locktree_misc.cc b/locktree/tests/locktree_misc.cc
index 72906cca983..a87716d862b 100644
--- a/locktree/tests/locktree_misc.cc
+++ b/locktree/tests/locktree_misc.cc
@@ -128,11 +128,11 @@ void locktree_unit_test::test_misc(void) {
     // descriptor when we set the locktree's descriptor
     lt.set_descriptor(&d1);
     expected_descriptor = &d1;
-    r = lt.m_cmp->compare(&dbt_a, &dbt_b);
+    r = lt.m_cmp(&dbt_a, &dbt_b);
     invariant(r == expected_comparison_magic);
     lt.set_descriptor(&d2);
     expected_descriptor = &d2;
-    r = lt.m_cmp->compare(&dbt_a, &dbt_b);
+    r = lt.m_cmp(&dbt_a, &dbt_b);
     invariant(r == expected_comparison_magic);
 
     lt.release_reference();
diff --git a/locktree/tests/locktree_overlapping_relock.cc b/locktree/tests/locktree_overlapping_relock.cc
index 6b412f214b4..d8212541c96 100644
--- a/locktree/tests/locktree_overlapping_relock.cc
+++ b/locktree/tests/locktree_overlapping_relock.cc
@@ -143,7 +143,7 @@ void locktree_unit_test::test_overlapping_relock(void) {
             bool saw_the_other;
             TXNID expected_txnid;
             keyrange *expected_range;
-            comparator *cmp;
+            const comparator *cmp;
             bool fn(const keyrange &range, TXNID txnid) {
                 if (txnid == the_other_txnid) {
                     invariant(!saw_the_other);
@@ -151,12 +151,12 @@ void locktree_unit_test::test_overlapping_relock(void) {
                     return true;
                 }
                 invariant(txnid == expected_txnid);
-                keyrange::comparison c = range.compare(cmp, *expected_range);
+                keyrange::comparison c = range.compare(*cmp, *expected_range);
                 invariant(c == keyrange::comparison::EQUALS);
                 return true;
             }
         } verify_fn;
-        verify_fn.cmp = lt.m_cmp;
+        verify_fn.cmp = &lt.m_cmp;
 
 #define do_verify() \
         do { verify_fn.saw_the_other = false; locktree_iterate<verify_fn_obj>(&lt, &verify_fn); } while (0)
diff --git a/locktree/tests/locktree_single_txnid_optimization.cc b/locktree/tests/locktree_single_txnid_optimization.cc
index 9da0eff51ce..b0e8e72efdc 100644
--- a/locktree/tests/locktree_single_txnid_optimization.cc
+++ b/locktree/tests/locktree_single_txnid_optimization.cc
@@ -149,15 +149,15 @@ void locktree_unit_test::test_single_txnid_optimization(void) {
             struct verify_fn_obj {
                 TXNID expected_txnid;
                 keyrange *expected_range;
-                comparator *cmp;
+                const comparator *cmp;
                 bool fn(const keyrange &range, TXNID txnid) {
                     invariant(txnid == expected_txnid);
-                    keyrange::comparison c = range.compare(cmp, *expected_range);
+                    keyrange::comparison c = range.compare(*cmp, *expected_range);
                     invariant(c == keyrange::comparison::EQUALS);
                     return true;
                 }
             } verify_fn;
-            verify_fn.cmp = lt.m_cmp;
+            verify_fn.cmp = &lt.m_cmp;
 
             keyrange range;
             range.create(one, one);
diff --git a/locktree/treenode.cc b/locktree/treenode.cc
index 0e8953ce895..836e16ce0f5 100644
--- a/locktree/treenode.cc
+++ b/locktree/treenode.cc
@@ -99,7 +99,7 @@ void treenode::mutex_unlock(void) {
     toku_mutex_unlock(&m_mutex);
 }
 
-void treenode::init(comparator *cmp) {
+void treenode::init(const comparator *cmp) {
     m_txnid = TXNID_NONE;
     m_is_root = false;
     m_is_empty = true;
@@ -117,7 +117,7 @@ void treenode::init(comparator *cmp) {
     m_right_child.set(nullptr);
 }
 
-void treenode::create_root(comparator *cmp) {
+void treenode::create_root(const comparator *cmp) {
     init(cmp);
     m_is_root = true;
 }
@@ -145,10 +145,10 @@ bool treenode::is_empty(void) {
 }
 
 bool treenode::range_overlaps(const keyrange &range) {
-    return m_range.overlaps(m_cmp, range);
+    return m_range.overlaps(*m_cmp, range);
 }
 
-treenode *treenode::alloc(comparator *cmp, const keyrange &range, TXNID txnid) {
+treenode *treenode::alloc(const comparator *cmp, const keyrange &range, TXNID txnid) {
     treenode *XCALLOC(node);
     node->init(cmp);
     node->set_range_and_txnid(range, txnid);
@@ -190,7 +190,7 @@ treenode *treenode::find_node_with_overlapping_child(const keyrange &range,
 
     // determine which child to look at based on a comparison. if we were
     // given a comparison hint, use that. otherwise, compare them now.
-    keyrange::comparison c = cmp_hint ? *cmp_hint : range.compare(m_cmp, m_range);
+    keyrange::comparison c = cmp_hint ? *cmp_hint : range.compare(*m_cmp, m_range);
 
     treenode *child;
     if (c == keyrange::comparison::LESS_THAN) {
@@ -209,7 +209,7 @@ treenode *treenode::find_node_with_overlapping_child(const keyrange &range,
     if (child == nullptr) {
         return this;
     } else {
-        c = range.compare(m_cmp, child->m_range);
+        c = range.compare(*m_cmp, child->m_range);
         if (c == keyrange::comparison::EQUALS || c == keyrange::comparison::OVERLAPS) {
             child->mutex_unlock();
             return this;
@@ -225,7 +225,7 @@ treenode *treenode::find_node_with_overlapping_child(const keyrange &range,
 
 template <class F>
 void treenode::traverse_overlaps(const keyrange &range, F *function) {
-    keyrange::comparison c = range.compare(m_cmp, m_range);
+    keyrange::comparison c = range.compare(*m_cmp, m_range);
     if (c == keyrange::comparison::EQUALS) {
         // Doesn't matter if fn wants to keep going, there
         // is nothing left, so return.
@@ -264,7 +264,7 @@ void treenode::traverse_overlaps(const keyrange &range, F *function) {
 void treenode::insert(const keyrange &range, TXNID txnid) {
     // choose a child to check. if that child is null, then insert the new node there.
     // otherwise recur down that child's subtree
-    keyrange::comparison c = range.compare(m_cmp, m_range);
+    keyrange::comparison c = range.compare(*m_cmp, m_range);
     if (c == keyrange::comparison::LESS_THAN) {
         treenode *left_child = lock_and_rebalance_left();
         if (left_child == nullptr) {
@@ -382,7 +382,7 @@ treenode *treenode::remove(const keyrange &range) {
     // if the range is equal to this node's range, then just remove
     // the root of this subtree. otherwise search down the tree
     // in either the left or right children.
-    keyrange::comparison c = range.compare(m_cmp, m_range);
+    keyrange::comparison c = range.compare(*m_cmp, m_range);
     switch (c) {
     case keyrange::comparison::EQUALS:
         return remove_root_of_subtree();
diff --git a/locktree/treenode.h b/locktree/treenode.h
index 24bd4ed49e1..d79c70a9dae 100644
--- a/locktree/treenode.h
+++ b/locktree/treenode.h
@@ -123,7 +123,7 @@ public:
     // - node may be unlocked if no other thread has visibility
 
     // effect: create the root node
-    void create_root(comparator *cmp);
+    void create_root(const comparator *cmp);
 
     // effect: destroys the root node
     void destroy_root(void);
@@ -210,7 +210,7 @@ private:
     child_ptr m_right_child;
 
     // comparator for ranges
-    comparator *m_cmp;
+    const comparator *m_cmp;
 
     // marked for the root node. the root node is never free()'d
     // when removed, but instead marked as empty.
@@ -220,7 +220,7 @@ private:
     bool m_is_empty;
 
     // effect: initializes an empty node with the given comparator
-    void init(comparator *cmp);
+    void init(const comparator *cmp);
 
     // requires: *parent is initialized to something meaningful.
     // requires: subtree is non-empty
@@ -267,7 +267,7 @@ private:
     treenode *maybe_rebalance(void);
 
     // returns: allocated treenode populated with a copy of the range and txnid
-    static treenode *alloc(comparator *cmp, const keyrange &range, TXNID txnid);
+    static treenode *alloc(const comparator *cmp, const keyrange &range, TXNID txnid);
 
     // requires: node is a locked root node, or an unlocked non-root node
     static void free(treenode *node);

From ac4e71ef40ae44c5b0ddb211fe5e1ebff15222bb Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sun, 15 Jun 2014 23:13:54 -0400
Subject: [PATCH 031/190] Really fix the test helper memory leak this time

---
 ft/ft-test-helpers.cc | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index 31afe6e96df..1da34c48d94 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -133,6 +133,9 @@ int toku_testsetup_leaf(FT_HANDLE ft_handle, BLOCKNUM *blocknum, int n_children,
         toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
     }
     node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
+    for (int i = 0; i + 1 < n_children; i++) {
+        toku_destroy_dbt(&pivotkeys[i]);
+    }
     toku_free(pivotkeys);
 
     *blocknum = node->blocknum;
@@ -154,6 +157,11 @@ int toku_testsetup_nonleaf (FT_HANDLE ft_handle, int height, BLOCKNUM *blocknum,
         toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
     }
     node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
+    for (int i = 0; i + 1 < n_children; i++) {
+        toku_destroy_dbt(&pivotkeys[i]);
+    }
+    toku_free(pivotkeys);
+
     *blocknum = node->blocknum;
     toku_unpin_ftnode(ft_handle->ft, node);
     return 0;

From 97d77b9e66ad4358f9428d95505ca16eeb54152a Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sun, 15 Jun 2014 23:58:29 -0400
Subject: [PATCH 032/190] FT-273 Fixup a bug and a few tests

---
 ft/ft.cc                           | 3 ++-
 ft/tests/ft-serialize-benchmark.cc | 1 -
 ft/tests/ft-serialize-test.cc      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ft/ft.cc b/ft/ft.cc
index 5110edc66cc..8376f7ea230 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -387,7 +387,8 @@ static void ft_init(FT ft, FT_OPTIONS options, CACHEFILE cf) {
 
     toku_list_init(&ft->live_ft_handles);
 
-    ft->cmp.create(options->compare_fun, &ft->descriptor);
+    // intuitively, the comparator points to the FT's cmp descriptor
+    ft->cmp.create(options->compare_fun, &ft->cmp_descriptor);
     ft->update_fun = options->update_fun;
 
     if (ft->cf != NULL) {
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index dd0405353c0..937d15be30d 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -420,7 +420,6 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     toku_free(ft);
     toku_free(ndd);
     toku_free(ndd2);
-    cmp.destroy();
 
     r = close(fd); assert(r != -1);
 }
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index 06daebc6232..5ff14e87ed7 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -1125,12 +1125,12 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
 
     toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->cmp.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
     toku_free(src_ndd);
     toku_free(dest_ndd);
-    cmp.destroy();
 
     r = close(fd); assert(r != -1);
 }

From 3ab781a1d1831a87e6955593e55d40b77ffd14c5 Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Mon, 16 Jun 2014 08:35:57 -0400
Subject: [PATCH 033/190] FT-273 fix compilation on osx

---
 ft/ft_node-serialize.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 58a73d6b12b..74d672304f9 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -922,7 +922,7 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const tok
     }
     invariant(rbuf->ndone == rbuf->size);
 
-    struct toku_msg_buffer_key_msn_cmp_extra extra = { .cmp = cmp, .msg_buffer = &bnc->msg_buffer };
+    struct toku_msg_buffer_key_msn_cmp_extra extra(cmp, &bnc->msg_buffer);
     r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
     assert_zero(r);
     bnc->fresh_message_tree.destroy();
@@ -1809,8 +1809,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
             xids_destroy(&xids);
         }
 
-        struct toku_msg_buffer_key_msn_cmp_extra extra = { .cmp = bfe->ft->cmp,
-                                                           .msg_buffer = &bnc->msg_buffer };
+        struct toku_msg_buffer_key_msn_cmp_extra extra(bfe->ft->cmp, &bnc->msg_buffer);
         typedef toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp> key_msn_sort;
         r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
         assert_zero(r);

From bdd607af6acc5c90154fd3c43b5702462698ae46 Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Mon, 16 Jun 2014 09:06:01 -0400
Subject: [PATCH 034/190] fixed a bunch of clang warnings

---
 ft/tests/ft-test-cursor.cc                   |  1 -
 ft/tests/ft-test.cc                          |  1 -
 ft/tests/ft-test0.cc                         |  1 -
 ft/tests/ft-test1.cc                         |  1 -
 ft/tests/ft-test2.cc                         |  1 -
 ft/tests/ft-test3.cc                         |  1 -
 ft/tests/ft-test4.cc                         |  1 -
 ft/tests/ft-test5.cc                         |  1 -
 ft/tests/keyrange.cc                         |  1 -
 ft/tests/le-cursor-provdel.cc                |  2 -
 ft/tests/le-cursor-right.cc                  |  1 -
 ft/tests/le-cursor-walk.cc                   |  1 -
 ft/tests/orthopush-flush.cc                  | 47 +++++++++-----------
 ft/tests/test-checkpoint-during-flush.cc     |  1 -
 ft/tests/test-checkpoint-during-merge.cc     |  1 -
 ft/tests/test-checkpoint-during-rebalance.cc |  1 -
 ft/tests/test-checkpoint-during-split.cc     |  1 -
 ft/tests/test-del-inorder.cc                 |  1 -
 ft/tests/test-dirty-flushes-on-cleaner.cc    |  1 -
 ft/tests/test-dump-ft.cc                     |  1 -
 ft/tests/test-flushes-on-cleaner.cc          |  1 -
 ft/tests/test-ft-overflow.cc                 |  1 -
 ft/tests/test-hot-with-bounds.cc             |  1 -
 ft/tests/test-inc-split.cc                   |  1 -
 ft/tests/test-merges-on-cleaner.cc           |  1 -
 ft/tests/test-pick-child-to-flush.cc         |  1 -
 ft/tests/test3681.cc                         |  1 -
 ft/tests/test3856.cc                         |  1 -
 ft/tests/test3884.cc                         |  1 -
 ft/tests/test4115.cc                         |  1 -
 ft/tests/test4244.cc                         |  1 -
 ft/tests/test_logcursor.cc                   |  1 -
 ft/tests/upgrade_test_simple.cc              |  1 -
 src/tests/dump-env.cc                        |  1 -
 src/tests/hotindexer-bw.cc                   |  1 -
 src/tests/recover-2483.cc                    |  2 -
 src/tests/recover-test2.cc                   |  1 -
 src/tests/recover-test3.cc                   |  1 -
 src/tests/recovery_fileops_stress.cc         |  1 -
 39 files changed, 22 insertions(+), 65 deletions(-)

diff --git a/ft/tests/ft-test-cursor.cc b/ft/tests/ft-test-cursor.cc
index 3807db28f04..fa200705e1e 100644
--- a/ft/tests/ft-test-cursor.cc
+++ b/ft/tests/ft-test-cursor.cc
@@ -94,7 +94,6 @@ PATENT RIGHTS GRANT:
 static const char *fname = TOKU_TEST_FILENAME;
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static int test_cursor_debug = 0;
 
diff --git a/ft/tests/ft-test.cc b/ft/tests/ft-test.cc
index 862a18243e9..f826e20967f 100644
--- a/ft/tests/ft-test.cc
+++ b/ft/tests/ft-test.cc
@@ -92,7 +92,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static const char *fname = TOKU_TEST_FILENAME;
 
diff --git a/ft/tests/ft-test0.cc b/ft/tests/ft-test0.cc
index e2edde5145d..3e03b808d9f 100644
--- a/ft/tests/ft-test0.cc
+++ b/ft/tests/ft-test0.cc
@@ -92,7 +92,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static void test0 (void) {
     FT_HANDLE t;
diff --git a/ft/tests/ft-test1.cc b/ft/tests/ft-test1.cc
index 6d70bf287ff..cbf082e9fd2 100644
--- a/ft/tests/ft-test1.cc
+++ b/ft/tests/ft-test1.cc
@@ -92,7 +92,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static void test1 (void) {
     FT_HANDLE t;
diff --git a/ft/tests/ft-test2.cc b/ft/tests/ft-test2.cc
index 6efac8c1a41..eb80122fd7c 100644
--- a/ft/tests/ft-test2.cc
+++ b/ft/tests/ft-test2.cc
@@ -92,7 +92,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static void test2 (int limit) {
     FT_HANDLE t;
diff --git a/ft/tests/ft-test3.cc b/ft/tests/ft-test3.cc
index 3fb81660e0d..761c1cae643 100644
--- a/ft/tests/ft-test3.cc
+++ b/ft/tests/ft-test3.cc
@@ -97,7 +97,6 @@ static const char *fname = TOKU_TEST_FILENAME;
 static const enum toku_compression_method compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static void test3 (int nodesize, int basementnodesize, int count) {
     FT_HANDLE t;
diff --git a/ft/tests/ft-test4.cc b/ft/tests/ft-test4.cc
index 2f5e861204a..d41fa5a697c 100644
--- a/ft/tests/ft-test4.cc
+++ b/ft/tests/ft-test4.cc
@@ -95,7 +95,6 @@ PATENT RIGHTS GRANT:
 static const char *fname = TOKU_TEST_FILENAME;
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static void test4 (int nodesize, int count) {
     FT_HANDLE t;
diff --git a/ft/tests/ft-test5.cc b/ft/tests/ft-test5.cc
index dc2fe43c7e6..1615873d17d 100644
--- a/ft/tests/ft-test5.cc
+++ b/ft/tests/ft-test5.cc
@@ -92,7 +92,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static void test5 (void) {
     int r;
diff --git a/ft/tests/keyrange.cc b/ft/tests/keyrange.cc
index 7fad706c377..8274da108ce 100644
--- a/ft/tests/keyrange.cc
+++ b/ft/tests/keyrange.cc
@@ -96,7 +96,6 @@ PATENT RIGHTS GRANT:
 #include <unistd.h>
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static const char *fname = TOKU_TEST_FILENAME;
 static CACHETABLE ct;
diff --git a/ft/tests/le-cursor-provdel.cc b/ft/tests/le-cursor-provdel.cc
index d22b0f130e6..5331bf8e3d3 100644
--- a/ft/tests/le-cursor-provdel.cc
+++ b/ft/tests/le-cursor-provdel.cc
@@ -95,8 +95,6 @@ PATENT RIGHTS GRANT:
 #include "le-cursor.h"
 #include "test.h"
 
-static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static int
 get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) {
diff --git a/ft/tests/le-cursor-right.cc b/ft/tests/le-cursor-right.cc
index d20ba9b4594..82dcfd4ff03 100644
--- a/ft/tests/le-cursor-right.cc
+++ b/ft/tests/le-cursor-right.cc
@@ -99,7 +99,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static int
 get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) {
diff --git a/ft/tests/le-cursor-walk.cc b/ft/tests/le-cursor-walk.cc
index e382e8d57ab..c2644b50e64 100644
--- a/ft/tests/le-cursor-walk.cc
+++ b/ft/tests/le-cursor-walk.cc
@@ -97,7 +97,6 @@ PATENT RIGHTS GRANT:
 #include <unistd.h>
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static int
 get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) {
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index cc6af928a97..3e3d7a560e6 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -94,7 +94,6 @@ PATENT RIGHTS GRANT:
 #include "ule.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 static const char *fname = TOKU_TEST_FILENAME;
 static txn_gc_info non_mvcc_gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
 static toku::comparator dummy_cmp;
@@ -320,8 +319,6 @@ insert_random_update_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool is_fre
     }
 }
 
-const int M = 1024 * 1024;
-
 // flush from one internal node to another, where both only have one
 // buffer
 static void
@@ -551,29 +548,29 @@ flush_to_internal_multiple(FT_HANDLE t) {
                 MSN msn = msg->msn;
                 enum ft_msg_type type = ft_msg_get_type(msg);
                 XIDS xids = ft_msg_get_xids(msg);
-                for (int i = 0; i < num_parent_messages; ++i) {
-                    if (dummy_cmp(&keydbt, parent_messages[i]->u.id.key) == 0 &&
-                            msn.msn == parent_messages[i]->msn.msn) {
-                        assert(parent_messages_present[i] == 0);
+                for (int _i = 0; _i < num_parent_messages; ++_i) {
+                    if (dummy_cmp(&keydbt, parent_messages[_i]->u.id.key) == 0 &&
+                            msn.msn == parent_messages[_i]->msn.msn) {
+                        assert(parent_messages_present[_i] == 0);
                         assert(found == 0);
-                        assert(dummy_cmp(&valdbt, parent_messages[i]->u.id.val) == 0);
-                        assert(type == parent_messages[i]->type);
-                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids));
-                        assert(parent_messages_is_fresh[i] == is_fresh);
-                        parent_messages_present[i]++;
+                        assert(dummy_cmp(&valdbt, parent_messages[_i]->u.id.val) == 0);
+                        assert(type == parent_messages[_i]->type);
+                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[_i]->xids));
+                        assert(parent_messages_is_fresh[_i] == is_fresh);
+                        parent_messages_present[_i]++;
                         found++;
                     }
                 }
-                for (int i = 0; i < num_child_messages; ++i) {
-                    if (dummy_cmp(&keydbt, child_messages[i]->u.id.key) == 0 &&
-                            msn.msn == child_messages[i]->msn.msn) {
-                        assert(child_messages_present[i] == 0);
+                for (int _i = 0; _i < num_child_messages; ++_i) {
+                    if (dummy_cmp(&keydbt, child_messages[_i]->u.id.key) == 0 &&
+                            msn.msn == child_messages[_i]->msn.msn) {
+                        assert(child_messages_present[_i] == 0);
                         assert(found == 0);
-                        assert(dummy_cmp(&valdbt, child_messages[i]->u.id.val) == 0);
-                        assert(type == child_messages[i]->type);
-                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids));
-                        assert(child_messages_is_fresh[i] == is_fresh);
-                        child_messages_present[i]++;
+                        assert(dummy_cmp(&valdbt, child_messages[_i]->u.id.val) == 0);
+                        assert(type == child_messages[_i]->type);
+                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[_i]->xids));
+                        assert(child_messages_is_fresh[_i] == is_fresh);
+                        child_messages_present[_i]++;
                         found++;
                     }
                 }
@@ -985,10 +982,10 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
             toku_fill_dbt(&keydbt, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
             MSN msn = msg->msn;
             if (dummy_cmp(&keydbt, &childkeys[7]) > 0) {
-                for (int i = 0; i < num_parent_messages; ++i) {
-                    if (dummy_cmp(&keydbt, parent_messages[i]->u.id.key) == 0 &&
-                            msn.msn == parent_messages[i]->msn.msn) {
-                        assert(is_fresh == parent_messages_is_fresh[i]);
+                for (int _i = 0; _i < num_parent_messages; ++_i) {
+                    if (dummy_cmp(&keydbt, parent_messages[_i]->u.id.key) == 0 &&
+                            msn.msn == parent_messages[_i]->msn.msn) {
+                        assert(is_fresh == parent_messages_is_fresh[_i]);
                         break;
                     }
                 }
diff --git a/ft/tests/test-checkpoint-during-flush.cc b/ft/tests/test-checkpoint-during-flush.cc
index ac04682398e..9a42bed40a2 100644
--- a/ft/tests/test-checkpoint-during-flush.cc
+++ b/ft/tests/test-checkpoint-during-flush.cc
@@ -99,7 +99,6 @@ PATENT RIGHTS GRANT:
 #include "checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test-checkpoint-during-merge.cc b/ft/tests/test-checkpoint-during-merge.cc
index 652763781ad..35a3ab79459 100644
--- a/ft/tests/test-checkpoint-during-merge.cc
+++ b/ft/tests/test-checkpoint-during-merge.cc
@@ -99,7 +99,6 @@ PATENT RIGHTS GRANT:
 #include "checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test-checkpoint-during-rebalance.cc b/ft/tests/test-checkpoint-during-rebalance.cc
index ce24ba6889c..03add082f4d 100644
--- a/ft/tests/test-checkpoint-during-rebalance.cc
+++ b/ft/tests/test-checkpoint-during-rebalance.cc
@@ -99,7 +99,6 @@ PATENT RIGHTS GRANT:
 #include "checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test-checkpoint-during-split.cc b/ft/tests/test-checkpoint-during-split.cc
index 315a097eaca..2ab749858e5 100644
--- a/ft/tests/test-checkpoint-during-split.cc
+++ b/ft/tests/test-checkpoint-during-split.cc
@@ -99,7 +99,6 @@ PATENT RIGHTS GRANT:
 #include "checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test-del-inorder.cc b/ft/tests/test-del-inorder.cc
index eca4c1d0f9a..c95801ef430 100644
--- a/ft/tests/test-del-inorder.cc
+++ b/ft/tests/test-del-inorder.cc
@@ -95,7 +95,6 @@ PATENT RIGHTS GRANT:
 
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test-dirty-flushes-on-cleaner.cc b/ft/tests/test-dirty-flushes-on-cleaner.cc
index ea369be7799..332b86917fe 100644
--- a/ft/tests/test-dirty-flushes-on-cleaner.cc
+++ b/ft/tests/test-dirty-flushes-on-cleaner.cc
@@ -98,7 +98,6 @@ PATENT RIGHTS GRANT:
 
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test-dump-ft.cc b/ft/tests/test-dump-ft.cc
index 47f625d3731..28007290a52 100644
--- a/ft/tests/test-dump-ft.cc
+++ b/ft/tests/test-dump-ft.cc
@@ -94,7 +94,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 int
 test_main(int argc, const char *argv[]) {
diff --git a/ft/tests/test-flushes-on-cleaner.cc b/ft/tests/test-flushes-on-cleaner.cc
index fe5d9b38f9d..b7f0edf7bb6 100644
--- a/ft/tests/test-flushes-on-cleaner.cc
+++ b/ft/tests/test-flushes-on-cleaner.cc
@@ -98,7 +98,6 @@ PATENT RIGHTS GRANT:
 
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test-ft-overflow.cc b/ft/tests/test-ft-overflow.cc
index c6afed36d04..c1c3f9b2f9d 100644
--- a/ft/tests/test-ft-overflow.cc
+++ b/ft/tests/test-ft-overflow.cc
@@ -97,7 +97,6 @@ PATENT RIGHTS GRANT:
 static const char *fname = TOKU_TEST_FILENAME;
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 static void
 test_overflow (void) {
diff --git a/ft/tests/test-hot-with-bounds.cc b/ft/tests/test-hot-with-bounds.cc
index bd18d297b9b..af191232b79 100644
--- a/ft/tests/test-hot-with-bounds.cc
+++ b/ft/tests/test-hot-with-bounds.cc
@@ -99,7 +99,6 @@ PATENT RIGHTS GRANT:
 #include "checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test-inc-split.cc b/ft/tests/test-inc-split.cc
index 40533509e33..5430e456835 100644
--- a/ft/tests/test-inc-split.cc
+++ b/ft/tests/test-inc-split.cc
@@ -120,7 +120,6 @@ PATENT RIGHTS GRANT:
 
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test-merges-on-cleaner.cc b/ft/tests/test-merges-on-cleaner.cc
index 1093de08221..d4780db5f44 100644
--- a/ft/tests/test-merges-on-cleaner.cc
+++ b/ft/tests/test-merges-on-cleaner.cc
@@ -97,7 +97,6 @@ PATENT RIGHTS GRANT:
 #include "checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test-pick-child-to-flush.cc b/ft/tests/test-pick-child-to-flush.cc
index fe1762dc980..6203c9085c8 100644
--- a/ft/tests/test-pick-child-to-flush.cc
+++ b/ft/tests/test-pick-child-to-flush.cc
@@ -100,7 +100,6 @@ PATENT RIGHTS GRANT:
 #include "checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test3681.cc b/ft/tests/test3681.cc
index 4307dfa741a..2367c3d07ce 100644
--- a/ft/tests/test3681.cc
+++ b/ft/tests/test3681.cc
@@ -101,7 +101,6 @@ PATENT RIGHTS GRANT:
 CACHETABLE ct;
 FT_HANDLE t;
 
-static DB * const null_db = 0;
 static TOKUTXN const null_txn = 0;
 
 volatile bool done = false;
diff --git a/ft/tests/test3856.cc b/ft/tests/test3856.cc
index 2353e871b83..8ead29a32ea 100644
--- a/ft/tests/test3856.cc
+++ b/ft/tests/test3856.cc
@@ -99,7 +99,6 @@ PATENT RIGHTS GRANT:
 static const char *fname = TOKU_TEST_FILENAME;
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 static int const nodesize = 1<<12, basementnodesize = 1<<9;
 static const enum toku_compression_method compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
 static int const count = 1000;
diff --git a/ft/tests/test3884.cc b/ft/tests/test3884.cc
index c5ffc152f4c..602e89c57ca 100644
--- a/ft/tests/test3884.cc
+++ b/ft/tests/test3884.cc
@@ -111,7 +111,6 @@ static const int vallen = 64 - sizeof(long) - (sizeof(((LEAFENTRY)NULL)->type)
 #define dummy_msn_3884 ((MSN) { (uint64_t) 3884 * MIN_MSN.msn })
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 static const char *fname = TOKU_TEST_FILENAME;
 
 static void
diff --git a/ft/tests/test4115.cc b/ft/tests/test4115.cc
index 631af3cf03b..457adcdac15 100644
--- a/ft/tests/test4115.cc
+++ b/ft/tests/test4115.cc
@@ -96,7 +96,6 @@ PATENT RIGHTS GRANT:
 #include <unistd.h>
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 const char *fname = TOKU_TEST_FILENAME;
 CACHETABLE ct;
diff --git a/ft/tests/test4244.cc b/ft/tests/test4244.cc
index c258666b1c5..e21b86fec9b 100644
--- a/ft/tests/test4244.cc
+++ b/ft/tests/test4244.cc
@@ -96,7 +96,6 @@ PATENT RIGHTS GRANT:
 #include <ft-cachetable-wrappers.h>
 
 static TOKUTXN const null_txn = 0;
-static DB * const null_db = 0;
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
diff --git a/ft/tests/test_logcursor.cc b/ft/tests/test_logcursor.cc
index 7b3f46e3d38..51b1fcc1e64 100644
--- a/ft/tests/test_logcursor.cc
+++ b/ft/tests/test_logcursor.cc
@@ -105,7 +105,6 @@ const char LOGDIR[100] = "./dir.test_logcursor";
 const int FSYNC = 1;
 const int NO_FSYNC = 0;
 
-const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN;
 const char *namea="a.db";
 const char *nameb="b.db";
 const char *a="a";
diff --git a/ft/tests/upgrade_test_simple.cc b/ft/tests/upgrade_test_simple.cc
index e9c9d6cb9c7..619f8492b59 100644
--- a/ft/tests/upgrade_test_simple.cc
+++ b/ft/tests/upgrade_test_simple.cc
@@ -100,7 +100,6 @@ PATENT RIGHTS GRANT:
 #include "checkpoint.h"
 
 static TOKUTXN const null_txn = NULL;
-static DB * const null_db = NULL;
 
 static int
 noop_getf(ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN UU(vallen), bytevec UU(val), void *extra, bool UU(lock_only))
diff --git a/src/tests/dump-env.cc b/src/tests/dump-env.cc
index 8348c25f2bd..edbfc7d02cb 100644
--- a/src/tests/dump-env.cc
+++ b/src/tests/dump-env.cc
@@ -95,7 +95,6 @@ static DB_ENV *env;
 static DB *db;
 DB_TXN *txn;
 
-const int num_insert = 25000;
 
 static void
 setup (void) {
diff --git a/src/tests/hotindexer-bw.cc b/src/tests/hotindexer-bw.cc
index fa53a4062e8..7d06cce6ca0 100644
--- a/src/tests/hotindexer-bw.cc
+++ b/src/tests/hotindexer-bw.cc
@@ -103,7 +103,6 @@ static int num_rows;
 static const int FORWARD = 0;
 static const int BACKWARD = 1;
 typedef int Direction;
-static const int TXN_NONE = 0;
 static const int TXN_CREATE = 1;
 static const int TXN_END = 2;
 typedef int TxnWork;
diff --git a/src/tests/recover-2483.cc b/src/tests/recover-2483.cc
index 0950a304075..e2244534e2e 100644
--- a/src/tests/recover-2483.cc
+++ b/src/tests/recover-2483.cc
@@ -94,8 +94,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 
-const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE;
-
 DB_ENV *env;
 DB_TXN *tid;
 DB     *db;
diff --git a/src/tests/recover-test2.cc b/src/tests/recover-test2.cc
index 524c197c625..9faeedc8d5d 100644
--- a/src/tests/recover-test2.cc
+++ b/src/tests/recover-test2.cc
@@ -94,7 +94,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 
-const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE;
 const char *namea="a.db";
 
 DB_ENV *env;
diff --git a/src/tests/recover-test3.cc b/src/tests/recover-test3.cc
index a3de519172d..7dcc191fc25 100644
--- a/src/tests/recover-test3.cc
+++ b/src/tests/recover-test3.cc
@@ -94,7 +94,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 
-const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE;
 const char *namea="a.db";
 
 DB_ENV *env;
diff --git a/src/tests/recovery_fileops_stress.cc b/src/tests/recovery_fileops_stress.cc
index 5546ad53f5d..acbb59bd05b 100644
--- a/src/tests/recovery_fileops_stress.cc
+++ b/src/tests/recovery_fileops_stress.cc
@@ -104,7 +104,6 @@ DB** db_array;
 DB* states;
 static const int percent_do_op = 20;
 static const int percent_do_abort = 25;
-static const int commit_abort_ratio = 3;
 static const int start_crashing_iter = 10;
 // iterations_per_crash_in_recovery should be an odd number;
 static const int iterations_per_crash_in_recovery = 7;

From 9ace31b0295a683b177f77e30751e48fb37b1a3f Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 16 Jun 2014 10:51:43 -0400
Subject: [PATCH 035/190] FT-273 Handle deserialization cases where we don't
 yet have a comparison function

---
 ft/comparator.h         |   4 ++
 ft/ft_node-serialize.cc | 125 ++++++++++++++++++++++++----------------
 2 files changed, 80 insertions(+), 49 deletions(-)

diff --git a/ft/comparator.h b/ft/comparator.h
index b21d0b9d845..555e260df6a 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -127,6 +127,10 @@ namespace toku {
             _fake_db->cmp_descriptor = desc;
         }
 
+        bool valid() const {
+            return _cmp != nullptr;
+        }
+
         int operator()(const DBT *a, const DBT *b) const {
             // TODO: add an unlikely() compiler note for this branch
             if (toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b)) {
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 74d672304f9..04e890f0c88 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -876,13 +876,23 @@ static void
 deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const toku::comparator &cmp) {
     int r;
     int n_in_this_buffer = rbuf_int(rbuf);
-    int32_t *fresh_offsets = NULL, *stale_offsets = NULL;
-    int32_t *broadcast_offsets = NULL;
+    int32_t *fresh_offsets = nullptr, *stale_offsets = nullptr;
+    int32_t *broadcast_offsets = nullptr;
     int nfresh = 0, nstale = 0;
     int nbroadcast_offsets = 0;
-    XMALLOC_N(n_in_this_buffer, stale_offsets);
-    XMALLOC_N(n_in_this_buffer, fresh_offsets);
-    XMALLOC_N(n_in_this_buffer, broadcast_offsets);
+
+    // Only sort buffers if we have a valid comparison function. In certain scenarios,
+    // like deserialie_ft_versioned() or tokuftdump, we'll need to deserialize ftnodes
+    // for simple inspection and don't actually require that the message buffers are
+    // properly sorted. This is very ugly, but correct.
+    const bool sort_buffers = cmp.valid();
+
+    if (sort_buffers) {
+        XMALLOC_N(n_in_this_buffer, stale_offsets);
+        XMALLOC_N(n_in_this_buffer, fresh_offsets);
+        XMALLOC_N(n_in_this_buffer, broadcast_offsets);
+    }
+
     bnc->msg_buffer.resize(rbuf->size + 64);
     for (int i = 0; i < n_in_this_buffer; i++) {
         bytevec key; ITEMLEN keylen;
@@ -896,21 +906,24 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const tok
         xids_create_from_buffer(rbuf, &xids);
         rbuf_bytes(rbuf, &key, &keylen); /* Returns a pointer into the rbuf. */
         rbuf_bytes(rbuf, &val, &vallen);
-        int32_t *dest;
-        if (ft_msg_type_applies_once(type)) {
-            if (is_fresh) {
-                dest = &fresh_offsets[nfresh];
-                nfresh++;
+        int32_t *dest = nullptr;
+        if (sort_buffers) {
+            if (ft_msg_type_applies_once(type)) {
+                if (is_fresh) {
+                    dest = &fresh_offsets[nfresh];
+                    nfresh++;
+                } else {
+                    dest = &stale_offsets[nstale];
+                    nstale++;
+                }
+            } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
+                dest = &broadcast_offsets[nbroadcast_offsets];
+                nbroadcast_offsets++;
             } else {
-                dest = &stale_offsets[nstale];
-                nstale++;
+                abort();
             }
-        } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
-            dest = &broadcast_offsets[nbroadcast_offsets];
-            nbroadcast_offsets++;
-        } else {
-            abort();
         }
+
         // TODO: Function to parse stuff out of an rbuf into an FT_MSG
         DBT k, v;
         FT_MSG_S msg = {
@@ -922,17 +935,19 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const tok
     }
     invariant(rbuf->ndone == rbuf->size);
 
-    struct toku_msg_buffer_key_msn_cmp_extra extra(cmp, &bnc->msg_buffer);
-    r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
-    assert_zero(r);
-    bnc->fresh_message_tree.destroy();
-    bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
-    r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
-    assert_zero(r);
-    bnc->stale_message_tree.destroy();
-    bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer);
-    bnc->broadcast_list.destroy();
-    bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
+    if (sort_buffers) {
+        struct toku_msg_buffer_key_msn_cmp_extra extra(cmp, &bnc->msg_buffer);
+        r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
+        assert_zero(r);
+        bnc->fresh_message_tree.destroy();
+        bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
+        r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
+        assert_zero(r);
+        bnc->stale_message_tree.destroy();
+        bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer);
+        bnc->broadcast_list.destroy();
+        bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
+    }
 }
 
 // effect: deserialize a single message from rbuf and enqueue the result into the given message buffer
@@ -1750,19 +1765,27 @@ deserialize_and_upgrade_internal_node(FTNODE node,
     MSN highest_msn;
     highest_msn.msn = 0;
 
+    // Only sort buffers if we have a valid comparison function. In certain scenarios,
+    // like deserialie_ft_versioned() or tokuftdump, we'll need to deserialize ftnodes
+    // for simple inspection and don't actually require that the message buffers are
+    // properly sorted. This is very ugly, but correct.
+    const bool sort_buffers = bfe->ft->cmp.valid();
+
     // Deserialize de-compressed buffers.
     for (int i = 0; i < node->n_children; ++i) {
         NONLEAF_CHILDINFO bnc = BNC(node, i);
         int n_in_this_buffer = rbuf_int(rb);          // 22. node count
 
-        int32_t *fresh_offsets = NULL;
-        int32_t *broadcast_offsets = NULL;
+        int32_t *fresh_offsets = nullptr;
+        int32_t *broadcast_offsets = nullptr;
         int nfresh = 0;
         int nbroadcast_offsets = 0;
 
         // We skip 'stale' offsets for upgraded nodes.
-        XMALLOC_N(n_in_this_buffer, fresh_offsets);
-        XMALLOC_N(n_in_this_buffer, broadcast_offsets);
+        if (sort_buffers) {
+            XMALLOC_N(n_in_this_buffer, fresh_offsets);
+            XMALLOC_N(n_in_this_buffer, broadcast_offsets);
+        }
 
         // Atomically decrement the header's MSN count by the number
         // of messages in the buffer.
@@ -1785,15 +1808,17 @@ deserialize_and_upgrade_internal_node(FTNODE node,
             rbuf_bytes(rb, &val, &vallen);             // 26. value
 
             // <CER> can we factor this out?
-            int32_t *dest;
-            if (ft_msg_type_applies_once(type)) {
-                dest = &fresh_offsets[nfresh];
-                nfresh++;
-            } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
-                dest = &broadcast_offsets[nbroadcast_offsets];
-                nbroadcast_offsets++;
-            } else {
-                abort();
+            int32_t *dest = nullptr;
+            if (sort_buffers) {
+                if (ft_msg_type_applies_once(type)) {
+                    dest = &fresh_offsets[nfresh];
+                    nfresh++;
+                } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
+                    dest = &broadcast_offsets[nbroadcast_offsets];
+                    nbroadcast_offsets++;
+                } else {
+                    abort();
+                }
             }
 
             // Increment our MSN, the last message should have the
@@ -1809,14 +1834,16 @@ deserialize_and_upgrade_internal_node(FTNODE node,
             xids_destroy(&xids);
         }
 
-        struct toku_msg_buffer_key_msn_cmp_extra extra(bfe->ft->cmp, &bnc->msg_buffer);
-        typedef toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp> key_msn_sort;
-        r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
-        assert_zero(r);
-        bnc->fresh_message_tree.destroy();
-        bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
-        bnc->broadcast_list.destroy();
-        bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
+        if (sort_buffers) {
+            struct toku_msg_buffer_key_msn_cmp_extra extra(bfe->ft->cmp, &bnc->msg_buffer);
+            typedef toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp> key_msn_sort;
+            r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
+            assert_zero(r);
+            bnc->fresh_message_tree.destroy();
+            bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
+            bnc->broadcast_list.destroy();
+            bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
+        }
     }
 
     // Assign the highest msn from our upgrade message buffers

From d42d5fc8168c197d5752d9d886fd9d62bc69e4de Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 16 Jun 2014 11:04:35 -0400
Subject: [PATCH 036/190] FT-275 Change toku::sort to return void

---
 ft/ft_node-serialize.cc | 15 +++++----------
 ft/node.cc              |  3 +--
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 04e890f0c88..660c7b479df 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -874,7 +874,6 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
 
 static void
 deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const toku::comparator &cmp) {
-    int r;
     int n_in_this_buffer = rbuf_int(rbuf);
     int32_t *fresh_offsets = nullptr, *stale_offsets = nullptr;
     int32_t *broadcast_offsets = nullptr;
@@ -937,12 +936,10 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const tok
 
     if (sort_buffers) {
         struct toku_msg_buffer_key_msn_cmp_extra extra(cmp, &bnc->msg_buffer);
-        r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
-        assert_zero(r);
+        toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
         bnc->fresh_message_tree.destroy();
         bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
-        r = toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
-        assert_zero(r);
+        toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
         bnc->stale_message_tree.destroy();
         bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer);
         bnc->broadcast_list.destroy();
@@ -1688,10 +1685,9 @@ deserialize_and_upgrade_internal_node(FTNODE node,
                                       struct ftnode_fetch_extra* bfe,
                                       STAT64INFO info)
 {
-    int r = 0;
     int version = node->layout_version_read_from_disk;
 
-    if(version == FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT) {
+    if (version == FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT) {
         (void) rbuf_int(rb);                          // 10. fingerprint
     }
 
@@ -1837,8 +1833,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
         if (sort_buffers) {
             struct toku_msg_buffer_key_msn_cmp_extra extra(bfe->ft->cmp, &bnc->msg_buffer);
             typedef toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp> key_msn_sort;
-            r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
-            assert_zero(r);
+            key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
             bnc->fresh_message_tree.destroy();
             bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
             bnc->broadcast_list.destroy();
@@ -1870,7 +1865,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
         }
     }
 
-    return r;
+    return 0;
 }
 
 // This function takes a deserialized version 13 or 14 buffer and
diff --git a/ft/node.cc b/ft/node.cc
index 76265029626..f7b427afb1c 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -601,8 +601,7 @@ bnc_apply_messages_to_basement_node(
         invariant(sfo_extra.i == buffer_size);
 
         // Sort by MSN.
-        r = toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
-        assert_zero(r);
+        toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
 
         // Apply the messages in MSN order.
         for (int i = 0; i < buffer_size; ++i) {

From 80c444ec984641bf620dc3334ab4484a89963b56 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 16 Jun 2014 11:28:13 -0400
Subject: [PATCH 037/190] FT-274 Use ybt functions for descriptor memory
 management, which is safer and more readable.

---
 ft/ft-serialize.cc | 26 ++++++++------------------
 ft/ft.cc           | 28 +++++++---------------------
 src/ydb_db.cc      | 24 +++++++-----------------
 3 files changed, 22 insertions(+), 56 deletions(-)

diff --git a/ft/ft-serialize.cc b/ft/ft-serialize.cc
index 5f6015d27ec..c9a8c2ffbc5 100644
--- a/ft/ft-serialize.cc
+++ b/ft/ft-serialize.cc
@@ -153,15 +153,7 @@ deserialize_descriptor_from_rbuf(struct rbuf *rb, DESCRIPTOR desc, int layout_ve
     uint32_t size;
     bytevec data;
     rbuf_bytes(rb, &data, &size);
-    bytevec data_copy = data;
-    if (size > 0) {
-        data_copy = toku_memdup(data, size); //Cannot keep the reference from rbuf. Must copy.
-        lazy_assert(data_copy);
-    } else {
-        lazy_assert(size==0);
-        data_copy = NULL;
-    }
-    toku_fill_dbt(&desc->dbt, data_copy, size);
+    toku_memdup_dbt(&desc->dbt, data, size);
 }
 
 static int
@@ -194,12 +186,10 @@ deserialize_descriptor_from(int fd, BLOCK_TABLE bt, DESCRIPTOR desc, int layout_
                     goto exit;
                 }
             }
-            {
-                struct rbuf rb = {.buf = dbuf, .size = (unsigned int) size, .ndone = 0};
-                //Not temporary; must have a toku_memdup'd copy.
-                deserialize_descriptor_from_rbuf(&rb, desc, layout_version);
-            }
-            lazy_assert(deserialize_descriptor_size(desc, layout_version)+4 == size);
+
+            struct rbuf rb = { .buf = dbuf, .size = (unsigned int) size, .ndone = 0 };
+            deserialize_descriptor_from_rbuf(&rb, desc, layout_version);
+            lazy_assert(deserialize_descriptor_size(desc, layout_version) + 4 == size);
             toku_free(dbuf);
         }
     }
@@ -436,10 +426,10 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
     if (r != 0) {
         goto exit;
     }
+
     // initialize for svn #4541
-    // TODO: use real dbt function
-    ft->cmp_descriptor.dbt.size = ft->descriptor.dbt.size;
-    ft->cmp_descriptor.dbt.data = toku_xmemdup(ft->descriptor.dbt.data, ft->descriptor.dbt.size);
+    toku_clone_dbt(&ft->cmp_descriptor.dbt, ft->descriptor.dbt);
+
     // Version 13 descriptors had an extra 4 bytes that we don't read
     // anymore.  Since the header is going to think it's the current
     // version if it gets written out, we need to write the descriptor in
diff --git a/ft/ft.cc b/ft/ft.cc
index 8376f7ea230..e05307018e6 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -120,10 +120,8 @@ ft_destroy(FT ft) {
     assert(ft->h->type == FT_CURRENT);
     toku_blocktable_destroy(&ft->blocktable);
     ft->cmp.destroy();
-    // TODO: use real dbt function
-    if (ft->descriptor.dbt.data) toku_free(ft->descriptor.dbt.data);
-    // TODO: use real dbt function
-    if (ft->cmp_descriptor.dbt.data) toku_free(ft->cmp_descriptor.dbt.data);
+    toku_destroy_dbt(&ft->descriptor.dbt);
+    toku_destroy_dbt(&ft->cmp_descriptor.dbt);
     toku_ft_destroy_reflock(ft);
     toku_free(ft->h);
 }
@@ -913,26 +911,14 @@ toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd) {
     toku_serialize_descriptor_contents_to_fd(fd, desc, offset);
 
     // cleanup the old descriptor and set the in-memory descriptor to the new one
-    // TODO: use real dbt function
-    if (ft->descriptor.dbt.data) {
-        toku_free(ft->descriptor.dbt.data);
-    }
-    // TODO: use real dbt function
-    ft->descriptor.dbt.size = desc->dbt.size;
-    ft->descriptor.dbt.data = toku_memdup(desc->dbt.data, desc->dbt.size);
+    toku_destroy_dbt(&ft->descriptor.dbt);
+    toku_clone_dbt(&ft->descriptor.dbt, desc->dbt);
 }
 
 void toku_ft_update_cmp_descriptor(FT ft) {
-    // TODO: use real dbt function
-    if (ft->cmp_descriptor.dbt.data != NULL) {
-        toku_free(ft->cmp_descriptor.dbt.data);
-    }
-    // TODO: use real dbt function
-    ft->cmp_descriptor.dbt.size = ft->descriptor.dbt.size;
-    ft->cmp_descriptor.dbt.data = toku_xmemdup(
-        ft->descriptor.dbt.data, 
-        ft->descriptor.dbt.size
-        );
+    // cleanup the old cmp descriptor and clone it as the in-memory descriptor
+    toku_destroy_dbt(&ft->cmp_descriptor.dbt);
+    toku_clone_dbt(&ft->cmp_descriptor.dbt, ft->descriptor.dbt);
 }
 
 DESCRIPTOR toku_ft_get_descriptor(FT_HANDLE ft_handle) {
diff --git a/src/ydb_db.cc b/src/ydb_db.cc
index 87ad9189f5c..2852283bfce 100644
--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -565,11 +565,12 @@ toku_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, uint32_t f
     HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
     int r = 0;
     TOKUTXN ttxn = txn ? db_txn_struct_i(txn)->tokutxn : NULL;
-    DBT old_descriptor;
     bool is_db_hot_index  = ((flags & DB_IS_HOT_INDEX) != 0);
     bool update_cmp_descriptor = ((flags & DB_UPDATE_CMP_DESCRIPTOR) != 0);
 
-    toku_init_dbt(&old_descriptor);
+    DBT old_descriptor_dbt;
+    toku_init_dbt(&old_descriptor_dbt);
+
     if (!db_opened(db) || !descriptor || (descriptor->size>0 && !descriptor->data)){
         r = EINVAL;
         goto cleanup;
@@ -582,23 +583,12 @@ toku_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, uint32_t f
         if (r != 0) { goto cleanup; }    
     }
 
-    // TODO: use toku_clone_dbt(&old-descriptor, db->descriptor);
-    old_descriptor.size = db->descriptor->dbt.size;
-    old_descriptor.data = toku_memdup(db->descriptor->dbt.data, db->descriptor->dbt.size);
-
-    toku_ft_change_descriptor(
-        db->i->ft_handle, 
-        &old_descriptor, 
-        descriptor, 
-        true, 
-        ttxn, 
-        update_cmp_descriptor
-        );
+    toku_clone_dbt(&old_descriptor_dbt, db->descriptor->dbt);
+    toku_ft_change_descriptor(db->i->ft_handle, &old_descriptor_dbt, descriptor, 
+                              true, ttxn, update_cmp_descriptor);
 
 cleanup:
-    if (old_descriptor.data) {
-        toku_free(old_descriptor.data);
-    }
+    toku_destroy_dbt(&old_descriptor_dbt);
     return r;
 }
 

From 6c073ffff0206bb03089749ecf82a156f3a3715f Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 16 Jun 2014 14:14:43 -0400
Subject: [PATCH 038/190] FT-271 Move block allocator code into a class.

---
 ft/block_allocator.cc                  | 388 ++++++++++++-------------
 ft/block_allocator.h                   | 233 ++++++++-------
 ft/block_table.cc                      |  92 +++---
 ft/ft-cachetable-wrappers.cc           |   1 +
 ft/ft-flusher.cc                       |   7 +-
 ft/ft-internal.h                       |   6 +-
 ft/ft-ops.cc                           |  31 +-
 ft/ft-serialize.cc                     |  19 +-
 ft/ft-verify.cc                        |   9 +-
 ft/ft.cc                               |   5 +-
 ft/ft_node-serialize.cc                |   1 +
 ft/ftverify.cc                         |   2 +-
 ft/loader/loader.cc                    |  23 +-
 ft/logger.cc                           |  11 +-
 ft/rollback-ct-callbacks.cc            |  15 +-
 ft/rollback.cc                         |  20 +-
 ft/tests/block_allocator_test.cc       |  73 +++--
 ft/tests/ft-bfe-query.cc               |   6 +-
 ft/tests/ft-clock-test.cc              |  12 +-
 ft/tests/ft-serialize-benchmark.cc     |  12 +-
 ft/tests/ft-serialize-test.cc          |  42 +--
 ft/tests/test_block_allocator_merge.cc |  26 +-
 ft/tokuftdump.cc                       |  12 +-
 23 files changed, 508 insertions(+), 538 deletions(-)

diff --git a/ft/block_allocator.cc b/ft/block_allocator.cc
index a16df353760..f595950bc52 100644
--- a/ft/block_allocator.cc
+++ b/ft/block_allocator.cc
@@ -89,109 +89,69 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 #ident "$Id$"
 
-#include "block_allocator.h"
-#include <memory.h>
-#include <toku_assert.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
+#include <string>
+#include <cstring>
+
+#include "portability/memory.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_stdint.h"
+#include "portability/toku_stdlib.h"
+
+#include "ft/block_allocator.h"
 
 // Here's a very simple implementation.
 // It's not very fast at allocating or freeing.
 // Previous implementation used next_fit, but now use first_fit since we are moving blocks around to reduce file size.
 
-struct block_allocator {
-    uint64_t reserve_at_beginning; // How much to reserve at the beginning
-    uint64_t alignment;            // Block alignment
-    uint64_t n_blocks; // How many blocks
-    uint64_t blocks_array_size; // How big is the blocks_array.  Must be >= n_blocks.
-    struct block_allocator_blockpair *blocks_array; // These blocks are sorted by address.
-    uint64_t n_bytes_in_use; // including the reserve_at_beginning
-};
-
-void
-block_allocator_validate (BLOCK_ALLOCATOR ba) {
-    uint64_t i;
-    uint64_t n_bytes_in_use = ba->reserve_at_beginning;
-    for (i=0; i<ba->n_blocks; i++) {
-        n_bytes_in_use += ba->blocks_array[i].size;
-        if (i>0) {
-            assert(ba->blocks_array[i].offset >  ba->blocks_array[i-1].offset);
-            assert(ba->blocks_array[i].offset >= ba->blocks_array[i-1].offset + ba->blocks_array[i-1].size );
-        }
-    }
-    assert(n_bytes_in_use == ba->n_bytes_in_use);
-}
-
 #if 0
-#define VALIDATE(b) block_allocator_validate(b)
+#define VALIDATE() validate()
 #else
-#define VALIDATE(b) ((void)0)
+#define VALIDATE()
 #endif
 
-#if 0
-void
-block_allocator_print (BLOCK_ALLOCATOR ba) {
-    uint64_t i;
-    for (i=0; i<ba->n_blocks; i++) {
-        printf("%" PRId64 ":%" PRId64 " ", ba->blocks_array[i].offset, ba->blocks_array[i].size);
-    }
-    printf("\n");
-    VALIDATE(ba);
-}
-#endif
+void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
+    // the alignment must be at least 512 and aligned with 512 to work with direct I/O
+    assert(alignment >= 512 && (alignment % 512) == 0);
 
-void
-create_block_allocator (BLOCK_ALLOCATOR *ba, uint64_t reserve_at_beginning, uint64_t alignment) {
-    assert(alignment>=512 && 0==(alignment%512)); // the alignment must be at least 512 and aligned with 512 to make DIRECT_IO happy.
-    BLOCK_ALLOCATOR XMALLOC(result);
-    result->reserve_at_beginning = reserve_at_beginning;
-    result->alignment = alignment;
-    result->n_blocks = 0;
-    result->blocks_array_size = 1;
-    XMALLOC_N(result->blocks_array_size, result->blocks_array);
-    result->n_bytes_in_use = reserve_at_beginning;
-    *ba = result;
-    VALIDATE(result);
+    _reserve_at_beginning = reserve_at_beginning;
+    _alignment = alignment;
+    _n_blocks = 0;
+    _blocks_array_size = 1;
+    XMALLOC_N(_blocks_array_size, _blocks_array);
+    _n_bytes_in_use = reserve_at_beginning;
+
+    VALIDATE();
 }
 
-void
-destroy_block_allocator (BLOCK_ALLOCATOR *bap) {
-    BLOCK_ALLOCATOR ba = *bap;
-    *bap = 0;
-    toku_free(ba->blocks_array);
-    toku_free(ba);
+void block_allocator::destroy() {
+    toku_free(_blocks_array);
 }
 
-static void
-grow_blocks_array_by (BLOCK_ALLOCATOR ba, uint64_t n_to_add) {
-    if (ba->n_blocks + n_to_add > ba->blocks_array_size) {
-        uint64_t new_size = ba->n_blocks + n_to_add;
-        uint64_t at_least = ba->blocks_array_size * 2;
+void block_allocator::grow_blocks_array_by(uint64_t n_to_add) {
+    if (_n_blocks + n_to_add > _blocks_array_size) {
+        uint64_t new_size = _n_blocks + n_to_add;
+        uint64_t at_least = _blocks_array_size * 2;
         if (at_least > new_size) {
             new_size = at_least;
         }
-        ba->blocks_array_size = new_size;
-        XREALLOC_N(ba->blocks_array_size, ba->blocks_array);
+        _blocks_array_size = new_size;
+        XREALLOC_N(_blocks_array_size, _blocks_array);
     }
 }
 
-
-static void
-grow_blocks_array (BLOCK_ALLOCATOR ba) {
-    grow_blocks_array_by(ba, 1);
+void block_allocator::grow_blocks_array() {
+    grow_blocks_array_by(1);
 }
 
-void
-block_allocator_merge_blockpairs_into (uint64_t d,       struct block_allocator_blockpair dst[/*d*/],
-                                       uint64_t s, const struct block_allocator_blockpair src[/*s*/])
+void block_allocator::merge_blockpairs_into(uint64_t d, struct blockpair dst[],
+                                            uint64_t s, const struct blockpair src[])
 {
     uint64_t tail = d+s;
-    while (d>0 && s>0) {
-        struct block_allocator_blockpair       *dp = &dst[d-1];
-        struct block_allocator_blockpair const *sp = &src[s-1];
-        struct block_allocator_blockpair       *tp = &dst[tail-1];
-        assert(tail>0);
+    while (d > 0 && s > 0) {
+        struct blockpair       *dp = &dst[d - 1];
+        struct blockpair const *sp = &src[s - 1];
+        struct blockpair       *tp = &dst[tail - 1];
+        assert(tail > 0);
         if (dp->offset > sp->offset) {
             *tp = *dp;
             d--;
@@ -202,139 +162,143 @@ block_allocator_merge_blockpairs_into (uint64_t d,       struct block_allocator_
             tail--;
         }
     }
-    while (d>0) {
-        struct block_allocator_blockpair *dp = &dst[d-1];
-        struct block_allocator_blockpair *tp = &dst[tail-1];
+    while (d > 0) {
+        struct blockpair *dp = &dst[d - 1];
+        struct blockpair *tp = &dst[tail - 1];
         *tp = *dp;
         d--;
         tail--;
     }
-    while (s>0) {
-        struct block_allocator_blockpair const *sp = &src[s-1];
-        struct block_allocator_blockpair       *tp = &dst[tail-1];
+    while (s > 0) {
+        struct blockpair const *sp = &src[s - 1];
+        struct blockpair       *tp = &dst[tail - 1];
         *tp = *sp;
         s--;
         tail--;
     }
 }
 
-static int
-compare_blockpairs (const void *av, const void *bv) {
-    const struct block_allocator_blockpair *a = (const struct block_allocator_blockpair *) av;
-    const struct block_allocator_blockpair *b = (const struct block_allocator_blockpair *) bv;
-    if (a->offset < b->offset) return -1;
-    if (a->offset > b->offset) return +1;
-    return 0;
-}
-
-void
-block_allocator_alloc_blocks_at (BLOCK_ALLOCATOR ba, uint64_t n_blocks, struct block_allocator_blockpair pairs[/*n_blocks*/])
-// See the documentation in block_allocator.h
-{
-    VALIDATE(ba);
-    qsort(pairs, n_blocks, sizeof(*pairs), compare_blockpairs);
-    for (uint64_t i=0; i<n_blocks; i++) {
-        assert(pairs[i].offset >= ba->reserve_at_beginning);
-        assert(pairs[i].offset%ba->alignment == 0);
-        ba->n_bytes_in_use += pairs[i].size;
-        invariant(pairs[i].size > 0); //Allocator does not support size 0 blocks. See block_allocator_free_block.
+int block_allocator::compare_blockpairs(const void *av, const void *bv) {
+    const struct blockpair *a = (const struct blockpair *) av;
+    const struct blockpair *b = (const struct blockpair *) bv;
+    if (a->offset < b->offset) {
+        return -1;
+    } else if (a->offset > b->offset) {
+        return 1;
+    } else {
+        return 0;
     }
-    grow_blocks_array_by(ba, n_blocks);
-    block_allocator_merge_blockpairs_into(ba->n_blocks, ba->blocks_array,
-                                          n_blocks,     pairs);
-    ba->n_blocks += n_blocks;
-    VALIDATE(ba);
 }
 
-void
-block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t offset) {
-    struct block_allocator_blockpair p = {.offset = offset, .size=size};
+// See the documentation in block_allocator.h
+void block_allocator::alloc_blocks_at(uint64_t n_blocks, struct blockpair pairs[]) {
+    VALIDATE();
+    qsort(pairs, n_blocks, sizeof(*pairs), compare_blockpairs);
+    for (uint64_t i = 0; i < n_blocks; i++) {
+        assert(pairs[i].offset >= _reserve_at_beginning);
+        assert(pairs[i].offset % _alignment == 0);
+        _n_bytes_in_use += pairs[i].size;
+        // Allocator does not support size 0 blocks. See block_allocator_free_block.
+        invariant(pairs[i].size > 0);
+    }
+    grow_blocks_array_by(n_blocks);
+    merge_blockpairs_into(_n_blocks, _blocks_array, n_blocks, pairs);
+    _n_blocks += n_blocks;
+    VALIDATE();
+}
+
+void block_allocator::alloc_block_at(uint64_t size, uint64_t offset) {
+    struct blockpair p(offset, size);
+
     // Just do a linear search for the block.
     // This data structure is a sorted array (no gaps or anything), so the search isn't really making this any slower than the insertion.
     // To speed up the insertion when opening a file, we provide the block_allocator_alloc_blocks_at function.
-    block_allocator_alloc_blocks_at(ba, 1, &p);
+    alloc_blocks_at(1, &p);
 }
 
-static inline uint64_t
-align (uint64_t value, BLOCK_ALLOCATOR ba)
 // Effect: align a value by rounding up.
-{
-    return ((value+ba->alignment-1)/ba->alignment)*ba->alignment;
+static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
+    return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
 }
 
-void block_allocator_alloc_block(BLOCK_ALLOCATOR ba, uint64_t size, uint64_t *offset)
 // Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
-{
-    invariant(size > 0); //Allocator does not support size 0 blocks. See block_allocator_free_block.
-    grow_blocks_array(ba);
-    ba->n_bytes_in_use += size;
-    if (ba->n_blocks==0) {
-        assert(ba->n_bytes_in_use == ba->reserve_at_beginning + size); // we know exactly how many are in use
-        ba->blocks_array[0].offset = align(ba->reserve_at_beginning, ba);
-        ba->blocks_array[0].size  = size;
-        *offset = ba->blocks_array[0].offset;
-        ba->n_blocks++;
+void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
+    // Allocator does not support size 0 blocks. See block_allocator_free_block.
+    invariant(size > 0);
+
+    grow_blocks_array();
+    _n_bytes_in_use += size;
+    if (_n_blocks == 0) {
+        assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use
+        _blocks_array[0].offset = align(_reserve_at_beginning, _alignment);
+        _blocks_array[0].size = size;
+        *offset = _blocks_array[0].offset;
+        _n_blocks++;
         return;
     }
+
     // Implement first fit.
     {
-        uint64_t end_of_reserve = align(ba->reserve_at_beginning, ba);
-        if (end_of_reserve + size <= ba->blocks_array[0].offset ) {
+        uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
+        if (end_of_reserve + size <= _blocks_array[0].offset ) {
             // Check to see if the space immediately after the reserve is big enough to hold the new block.
-            struct block_allocator_blockpair *bp = &ba->blocks_array[0];
-            memmove(bp+1, bp, (ba->n_blocks)*sizeof(*bp));
+            struct blockpair *bp = &_blocks_array[0];
+            memmove(bp + 1, bp, _n_blocks * sizeof(*bp));
             bp[0].offset = end_of_reserve;
-            bp[0].size   = size;
-            ba->n_blocks++;
+            bp[0].size = size;
+            _n_blocks++;
             *offset = end_of_reserve;
-            VALIDATE(ba);
+            VALIDATE();
             return;
         }
     }
-    for (uint64_t blocknum = 0; blocknum +1 < ba->n_blocks; blocknum ++) {
+
+    for (uint64_t blocknum = 0; blocknum + 1 < _n_blocks; blocknum++) {
         // Consider the space after blocknum
-        struct block_allocator_blockpair *bp = &ba->blocks_array[blocknum];
+        struct blockpair *bp = &_blocks_array[blocknum];
         uint64_t this_offset = bp[0].offset;
         uint64_t this_size   = bp[0].size;
-        uint64_t answer_offset = align(this_offset + this_size, ba);
-        if (answer_offset + size > bp[1].offset) continue; // The block we want doesn't fit after this block.
+        uint64_t answer_offset = align(this_offset + this_size, _alignment);
+        if (answer_offset + size > bp[1].offset) {
+            continue; // The block we want doesn't fit after this block.
+        }
+
         // It fits, so allocate it here.
-        memmove(bp+2, bp+1, (ba->n_blocks - blocknum -1)*sizeof(*bp));
+        memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp));
         bp[1].offset = answer_offset;
-        bp[1].size   = size;
-        ba->n_blocks++;
+        bp[1].size = size;
+        _n_blocks++;
         *offset = answer_offset;
-        VALIDATE(ba);
+        VALIDATE();
         return;
     }
+
     // It didn't fit anywhere, so fit it on the end.
-    assert(ba->n_blocks < ba->blocks_array_size);
-    struct block_allocator_blockpair *bp = &ba->blocks_array[ba->n_blocks];
-    uint64_t answer_offset = align(bp[-1].offset+bp[-1].size, ba);
+    assert(_n_blocks < _blocks_array_size);
+    struct blockpair *bp = &_blocks_array[_n_blocks];
+    uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment);
     bp->offset = answer_offset;
-    bp->size   = size;
-    ba->n_blocks++;
+    bp->size = size;
+    _n_blocks++;
     *offset = answer_offset;
-    VALIDATE(ba);
+    VALIDATE();
 }
 
-static int64_t
-find_block (BLOCK_ALLOCATOR ba, uint64_t offset)
 // Find the index in the blocks array that has a particular offset.  Requires that the block exist.
 // Use binary search so it runs fast.
-{
-    VALIDATE(ba);
-    if (ba->n_blocks==1) {
-        assert(ba->blocks_array[0].offset == offset);
+int64_t block_allocator::find_block(uint64_t offset) {
+    VALIDATE();
+    if (_n_blocks == 1) {
+        assert(_blocks_array[0].offset == offset);
         return 0;
     }
+
     uint64_t lo = 0;
-    uint64_t hi = ba->n_blocks;
+    uint64_t hi = _n_blocks;
     while (1) {
         assert(lo<hi); // otherwise no such block exists.
         uint64_t mid = (lo+hi)/2;
-        uint64_t thisoff = ba->blocks_array[mid].offset;
-        //printf("lo=%" PRId64 " hi=%" PRId64 " mid=%" PRId64 "  thisoff=%" PRId64 " offset=%" PRId64 "\n", lo, hi, mid, thisoff, offset);
+        uint64_t thisoff = _blocks_array[mid].offset;
         if (thisoff < offset) {
             lo = mid+1;
         } else if (thisoff > offset) {
@@ -350,69 +314,64 @@ find_block (BLOCK_ALLOCATOR ba, uint64_t offset)
 // a 0-sized block can share offset with a non-zero sized block.
 // The non-zero sized block is not exchangable with a zero sized block (or vice versa),
 // so inserting 0-sized blocks can cause corruption here.
-void
-block_allocator_free_block (BLOCK_ALLOCATOR ba, uint64_t offset) {
-    VALIDATE(ba);
-    int64_t bn = find_block(ba, offset);
-    assert(bn>=0); // we require that there is a block with that offset.  Might as well abort if no such block exists.
-    ba->n_bytes_in_use -= ba->blocks_array[bn].size;
-    memmove(&ba->blocks_array[bn], &ba->blocks_array[bn+1], (ba->n_blocks-bn-1) * sizeof(struct block_allocator_blockpair));
-    ba->n_blocks--;
-    VALIDATE(ba);
+void block_allocator::free_block(uint64_t offset) {
+    VALIDATE();
+    int64_t bn = find_block(offset);
+    assert(bn >= 0); // we require that there is a block with that offset.
+    _n_bytes_in_use -= _blocks_array[bn].size;
+    memmove(&_blocks_array[bn], &_blocks_array[bn +1 ],
+            (_n_blocks - bn - 1) * sizeof(struct blockpair));
+    _n_blocks--;
+    VALIDATE();
 }
 
-uint64_t
-block_allocator_block_size (BLOCK_ALLOCATOR ba, uint64_t offset) {
-    int64_t bn = find_block(ba, offset);
-    assert(bn>=0); // we require that there is a block with that offset.  Might as well abort if no such block exists.
-    return ba->blocks_array[bn].size;
+uint64_t block_allocator::block_size(uint64_t offset) {
+    int64_t bn = find_block(offset);
+    assert(bn >=0); // we require that there is a block with that offset.
+    return _blocks_array[bn].size;
 }
 
-uint64_t
-block_allocator_allocated_limit (BLOCK_ALLOCATOR ba) {
-    if (ba->n_blocks==0) return ba->reserve_at_beginning;
-    else {
-        struct block_allocator_blockpair *last = &ba->blocks_array[ba->n_blocks-1];
+uint64_t block_allocator::allocated_limit() const {
+    if (_n_blocks == 0) {
+        return _reserve_at_beginning;
+    } else {
+        struct blockpair *last = &_blocks_array[_n_blocks - 1];
         return last->offset + last->size;
     }
 }
 
-int
-block_allocator_get_nth_block_in_layout_order (BLOCK_ALLOCATOR ba, uint64_t b, uint64_t *offset, uint64_t *size)
 // Effect: Consider the blocks in sorted order.  The reserved block at the beginning is number 0.  The next one is number 1 and so forth.
 // Return the offset and size of the block with that number.
 // Return 0 if there is a block that big, return nonzero if b is too big.
-{
-    if (b==0) {
-        *offset=0;
-        *size  =ba->reserve_at_beginning;
+int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) {
+    if (b ==0 ) {
+        *offset = 0;
+        *size = _reserve_at_beginning;
         return  0;
-    } else if (b > ba->n_blocks) {
+    } else if (b > _n_blocks) {
         return -1;
     } else {
-        *offset=ba->blocks_array[b-1].offset;
-        *size  =ba->blocks_array[b-1].size;
+        *offset =_blocks_array[b - 1].offset;
+        *size =_blocks_array[b - 1].size;
         return 0;
     }
 }
 
-void
-block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION report) {
-    //Requires: report->file_size_bytes is filled in
-    //Requires: report->data_bytes is filled in
-    //Requires: report->checkpoint_bytes_additional is filled in
+// Requires: report->file_size_bytes is filled in
+// Requires: report->data_bytes is filled in
+// Requires: report->checkpoint_bytes_additional is filled in
+void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) {
+    assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional);
 
-    assert(ba->n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional);
-
-    report->unused_bytes         = 0;
-    report->unused_blocks        = 0;
+    report->unused_bytes = 0;
+    report->unused_blocks = 0;
     report->largest_unused_block = 0;
-    if (ba->n_blocks > 0) {
+    if (_n_blocks > 0) {
         //Deal with space before block 0 and after reserve:
         {
-            struct block_allocator_blockpair *bp = &ba->blocks_array[0];
-            assert(bp->offset >= align(ba->reserve_at_beginning, ba));
-            uint64_t free_space = bp->offset - align(ba->reserve_at_beginning, ba);
+            struct blockpair *bp = &_blocks_array[0];
+            assert(bp->offset >= align(_reserve_at_beginning, _alignment));
+            uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment);
             if (free_space > 0) {
                 report->unused_bytes += free_space;
                 report->unused_blocks++;
@@ -423,12 +382,12 @@ block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION
         }
 
         //Deal with space between blocks:
-        for (uint64_t blocknum = 0; blocknum +1 < ba->n_blocks; blocknum ++) {
+        for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) {
             // Consider the space after blocknum
-            struct block_allocator_blockpair *bp = &ba->blocks_array[blocknum];
+            struct blockpair *bp = &_blocks_array[blocknum];
             uint64_t this_offset = bp[0].offset;
             uint64_t this_size   = bp[0].size;
-            uint64_t end_of_this_block = align(this_offset+this_size, ba);
+            uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
             uint64_t next_offset = bp[1].offset;
             uint64_t free_space  = next_offset - end_of_this_block;
             if (free_space > 0) {
@@ -442,10 +401,10 @@ block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION
 
         //Deal with space after last block
         {
-            struct block_allocator_blockpair *bp = &ba->blocks_array[ba->n_blocks-1];
+            struct blockpair *bp = &_blocks_array[_n_blocks-1];
             uint64_t this_offset = bp[0].offset;
             uint64_t this_size   = bp[0].size;
-            uint64_t end_of_this_block = align(this_offset+this_size, ba);
+            uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
             if (end_of_this_block < report->file_size_bytes) {
                 uint64_t free_space  = report->file_size_bytes - end_of_this_block;
                 assert(free_space > 0);
@@ -456,10 +415,9 @@ block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION
                 }
             }
         }
-    }
-    else {
-        //No blocks.  Just the reserve.
-        uint64_t end_of_this_block = align(ba->reserve_at_beginning, ba);
+    } else {
+        // No blocks.  Just the reserve.
+        uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment);
         if (end_of_this_block < report->file_size_bytes) {
             uint64_t free_space  = report->file_size_bytes - end_of_this_block;
             assert(free_space > 0);
@@ -471,3 +429,15 @@ block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION
         }
     }
 }
+
+void block_allocator::validate() const {
+    uint64_t n_bytes_in_use = _reserve_at_beginning;
+    for (uint64_t i = 0; i < _n_blocks; i++) {
+        n_bytes_in_use += _blocks_array[i].size;
+        if (i > 0) {
+            assert(_blocks_array[i].offset >  _blocks_array[i - 1].offset);
+            assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size );
+        }
+    }
+    assert(n_bytes_in_use == _n_bytes_in_use);
+}
diff --git a/ft/block_allocator.h b/ft/block_allocator.h
index b86bf578fa7..5c1bc75a504 100644
--- a/ft/block_allocator.h
+++ b/ft/block_allocator.h
@@ -92,133 +92,146 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "fttypes.h"
+#include <db.h>
 
-
-#define BLOCK_ALLOCATOR_ALIGNMENT 4096
-// How much must be reserved at the beginning for the block?
-//  The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
-//  So 4096 should be enough.
-#define BLOCK_ALLOCATOR_HEADER_RESERVE 4096
-#if (BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT) != 0
-#error
-#endif
+#include "portability/toku_stdint.h"
 
 // Block allocator.
-// Overview: A block allocator manages the allocation of variable-sized blocks.
+//
+// A block allocator manages the allocation of variable-sized blocks.
 // The translation of block numbers to addresses is handled elsewhere.
 // The allocation of block numbers is handled elsewhere.
-
-// We can create a block allocator.
+//
 // When creating a block allocator we also specify a certain-sized
-// block at the beginning that is preallocated (and cannot be allocated
-// or freed)
-
+// block at the beginning that is preallocated (and cannot be allocated or freed)
+//
 // We can allocate blocks of a particular size at a particular location.
 // We can allocate blocks of a particular size at a location chosen by the allocator.
 // We can free blocks.
 // We can determine the size of a block.
 
+class block_allocator {
+public:
+    static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096;
 
-#define BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE (2*BLOCK_ALLOCATOR_HEADER_RESERVE)
+    // How much must be reserved at the beginning for the block?
+    //  The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
+    //  So 4096 should be enough.
+    static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096;
+    
+    static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0,
+                  "block allocator header must have proper alignment");
 
-typedef struct block_allocator *BLOCK_ALLOCATOR;
+    static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
 
-void create_block_allocator (BLOCK_ALLOCATOR * ba, uint64_t reserve_at_beginning, uint64_t alignment);
-// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
-//  All blocks be start on a multiple of ALIGNMENT.
-//  Aborts if we run out of memory.
-// Parameters
-//  ba (OUT):                        Result stored here.
-//  reserve_at_beginning (IN)        Size of reserved block at beginning.  This size does not have to be aligned.
-//  alignment (IN)                   Block alignment.
+    // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
+    //  All blocks be start on a multiple of ALIGNMENT.
+    //  Aborts if we run out of memory.
+    // Parameters
+    //  reserve_at_beginning (IN)        Size of reserved block at beginning.  This size does not have to be aligned.
+    //  alignment (IN)                   Block alignment.
+    void create(uint64_t reserve_at_beginning, uint64_t alignment);
 
-void destroy_block_allocator (BLOCK_ALLOCATOR *ba);
-// Effect: Destroy a block allocator at *ba.
-//  Also, set *ba=NULL.
-// Rationale:  If there was only one copy of the pointer, this kills that copy too.
-// Paramaters:
-//  ba (IN/OUT):
+    // Effect: Destroy this block allocator
+    void destroy();
 
+    // Effect: Allocate a block of the specified size at a particular offset.
+    //  Aborts if anything goes wrong.
+    //  The performance of this function may be as bad as Theta(N), where N is the number of blocks currently in use.
+    // Usage note: To allocate several blocks (e.g., when opening a FT),  use block_allocator_alloc_blocks_at().
+    // Requires: The resulting block may not overlap any other allocated block.
+    //  And the offset must be a multiple of the block alignment.
+    // Parameters:
+    //  size (IN):   The size of the block.
+    //  offset (IN): The location of the block.
+    void alloc_block_at(uint64_t size, uint64_t offset);
 
-void block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t offset);
-// Effect: Allocate a block of the specified size at a particular offset.
-//  Aborts if anything goes wrong.
-//  The performance of this function may be as bad as Theta(N), where N is the number of blocks currently in use.
-// Usage note: To allocate several blocks (e.g., when opening a FT),  use block_allocator_alloc_blocks_at().
-// Requires: The resulting block may not overlap any other allocated block.
-//  And the offset must be a multiple of the block alignment.
-// Parameters:
-//  ba (IN/OUT): The block allocator.  (Modifies ba.)
-//  size (IN):   The size of the block.
-//  offset (IN): The location of the block.
+    struct blockpair {
+        uint64_t offset;
+        uint64_t size;
+        blockpair(uint64_t o, uint64_t s) :
+            offset(o), size(s) {
+        }
+    };
 
+    // Effect: Take pairs in any order, and add them all, as if we did block_allocator_alloc_block() on each pair.
+    //  This should run in time O(N + M log M) where N is the number of blocks in ba, and M is the number of new blocks.
+    // Modifies: pairs (sorts them).
+    void alloc_blocks_at(uint64_t n_blocks, blockpair *pairs);
 
-struct block_allocator_blockpair {
-    uint64_t offset;
-    uint64_t size;
+    // Effect: Allocate a block of the specified size at an address chosen by the allocator.
+    //  Aborts if anything goes wrong.
+    //  The block address will be a multiple of the alignment.
+    // Parameters:
+    //  ba (IN/OUT):  The block allocator.   (Modifies ba.)
+    //  size (IN):    The size of the block.  (The size does not have to be aligned.)
+    //  offset (OUT): The location of the block.
+    void alloc_block(uint64_t size, uint64_t *offset);
+
+    // Effect: Free the block at offset.
+    // Requires: There must be a block currently allocated at that offset.
+    // Parameters:
+    //  ba (IN/OUT): The block allocator.  (Modifies ba.)
+    //  offset (IN): The offset of the block.
+    void free_block(uint64_t offset);
+
+    // Effect: Return the size of the block that starts at offset.
+    // Requires: There must be a block currently allocated at that offset.
+    // Parameters:
+    //  ba (IN/OUT): The block allocator.  (Modifies ba.)
+    //  offset (IN): The offset of the block.
+    uint64_t block_size(uint64_t offset);
+
+    // Effect: Check to see if the block allocator is OK.  This may take a long time.
+    // Usage Hints: Probably only use this for unit tests.
+    // TODO: Private?
+    void validate() const;
+
+    // Effect: Return the unallocated block address of "infinite" size.
+    //  That is, return the smallest address that is above all the allocated blocks.
+    uint64_t allocated_limit() const;
+
+    // Effect: Consider the blocks in sorted order.  The reserved block at the beginning is number 0.  The next one is number 1 and so forth.
+    //  Return the offset and size of the block with that number.
+    //  Return 0 if there is a block that big, return nonzero if b is too big.
+    // Rationale: This is probably useful only for tests.
+    int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size);
+
+    // Effect:  Fill in report to indicate how the file is used.
+    // Requires: 
+    //  report->file_size_bytes is filled in
+    //  report->data_bytes is filled in
+    //  report->checkpoint_bytes_additional is filled in
+    void get_unused_statistics(TOKU_DB_FRAGMENTATION report);
+
+    // Effect: Merge dst[d] and src[s] into dst[d+s], merging in place.
+    //   Initially dst and src hold sorted arrays (sorted by increasing offset).
+    //   Finally dst contains all d+s elements sorted in order.
+    // Requires: 
+    //   dst and src are sorted.
+    //   dst must be large enough (sizeof(dst) >= d && sizeof(src) >= s)
+    //   No blocks may overlap.
+    // Rationale: This is exposed so it can be tested by a glass box tester.
+    static void merge_blockpairs_into(uint64_t d, struct blockpair dst[],
+                                      uint64_t s, const struct blockpair src[]);
+
+private:
+    void grow_blocks_array_by(uint64_t n_to_add);
+    void grow_blocks_array();
+    int64_t find_block(uint64_t offset);
+
+    static int compare_blockpairs(const void *av, const void *bv);
+
+    // How much to reserve at the beginning
+    uint64_t _reserve_at_beginning;
+    // Block alignment
+    uint64_t _alignment;
+    // How many blocks
+    uint64_t _n_blocks;
+    // How big is the blocks_array.  Must be >= n_blocks.
+    uint64_t _blocks_array_size;
+    // These blocks are sorted by address.
+    struct blockpair *_blocks_array;
+    // Including the reserve_at_beginning
+    uint64_t _n_bytes_in_use;
 };
-void block_allocator_alloc_blocks_at (BLOCK_ALLOCATOR ba, uint64_t n_blocks, struct block_allocator_blockpair *pairs);
-// Effect: Take pairs in any order, and add them all, as if we did block_allocator_alloc_block() on each pair.
-//  This should run in time O(N + M log M) where N is the number of blocks in ba, and M is the number of new blocks.
-// Modifies: pairs (sorts them).
-
-void block_allocator_alloc_block (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t *offset);
-// Effect: Allocate a block of the specified size at an address chosen by the allocator.
-//  Aborts if anything goes wrong.
-//  The block address will be a multiple of the alignment.
-// Parameters:
-//  ba (IN/OUT):  The block allocator.   (Modifies ba.)
-//  size (IN):    The size of the block.  (The size does not have to be aligned.)
-//  offset (OUT): The location of the block.
-
-void block_allocator_free_block (BLOCK_ALLOCATOR ba, uint64_t offset);
-// Effect: Free the block at offset.
-// Requires: There must be a block currently allocated at that offset.
-// Parameters:
-//  ba (IN/OUT): The block allocator.  (Modifies ba.)
-//  offset (IN): The offset of the block.
-
-
-uint64_t block_allocator_block_size (BLOCK_ALLOCATOR ba, uint64_t offset);
-// Effect: Return the size of the block that starts at offset.
-// Requires: There must be a block currently allocated at that offset.
-// Parameters:
-//  ba (IN/OUT): The block allocator.  (Modifies ba.)
-//  offset (IN): The offset of the block.
-
-void block_allocator_validate (BLOCK_ALLOCATOR ba);
-// Effect: Check to see if the block allocator is OK.  This may take a long time.
-// Usage Hints: Probably only use this for unit tests.
-
-void block_allocator_print (BLOCK_ALLOCATOR ba);
-// Effect: Print information about the block allocator.
-// Rationale: This is probably useful only for debugging.
-
-uint64_t block_allocator_allocated_limit (BLOCK_ALLOCATOR ba);
-// Effect: Return the unallocated block address of "infinite" size.
-//  That is, return the smallest address that is above all the allocated blocks.
-
-int block_allocator_get_nth_block_in_layout_order (BLOCK_ALLOCATOR ba, uint64_t b, uint64_t *offset, uint64_t *size);
-// Effect: Consider the blocks in sorted order.  The reserved block at the beginning is number 0.  The next one is number 1 and so forth.
-//  Return the offset and size of the block with that number.
-//  Return 0 if there is a block that big, return nonzero if b is too big.
-// Rationale: This is probably useful only for tests.
-
-void block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION report);
-// Effect:  Fill in report to indicate how the file is used.
-// Requires: 
-//  report->file_size_bytes is filled in
-//  report->data_bytes is filled in
-//  report->checkpoint_bytes_additional is filled in
-
-void block_allocator_merge_blockpairs_into (uint64_t d,       struct block_allocator_blockpair dst[/*d*/],
-				       uint64_t s, const struct block_allocator_blockpair src[/*s*/]);
-// Effect: Merge dst[d] and src[s] into dst[d+s], merging in place.
-//   Initially dst and src hold sorted arrays (sorted by increasing offset).
-//   Finally dst contains all d+s elements sorted in order.
-// Requires: 
-//   dst and src are sorted.
-//   dst must be large enough.
-//   No blocks may overlap.
-// Rationale: This is exposed so it can be tested by a glass box tester.  Otherwise it would be static (file-scope) function inside block_allocator.c
diff --git a/ft/block_table.cc b/ft/block_table.cc
index 725aa9ec7d9..f6e7d543408 100644
--- a/ft/block_table.cc
+++ b/ft/block_table.cc
@@ -89,20 +89,21 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <toku_portability.h>
-#include "ft-internal.h"        // ugly but pragmatic, need access to dirty bits while holding translation lock
-#include "fttypes.h"
-#include "block_table.h"
-#include "memory.h"
-#include "toku_assert.h"
-#include <toku_pthread.h>
-#include "block_allocator.h"
-#include "rbuf.h"
-#include "wbuf.h"
-#include <util/nb_mutex.h>
+#include "portability/toku_portability.h"
+#include "portability/memory.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_pthread.h"
 
+#include "ft/block_allocator.h"
+#include "ft/block_table.h"
+#include "ft/ft-internal.h"        // ugly but pragmatic, need access to dirty bits while holding translation lock
+#include "ft/fttypes.h"
 // TODO: reorganize this dependency
 #include "ft/ft-ops.h" // for toku_maybe_truncate_file
+#include "ft/rbuf.h"
+#include "ft/wbuf.h"
+
+#include "util/nb_mutex.h"
 
 //When the translation (btt) is stored on disk:
 //  In Header:
@@ -157,8 +158,8 @@ struct block_table {
     struct translation checkpointed; // the translation for the data that shall remain inviolate on disk until the next checkpoint finishes, after which any blocks used only in this translation can be freed.
 
     // The in-memory data structure for block allocation.  There is no on-disk data structure for block allocation.
-    // Note: This is *allocation* not *translation*.  The block_allocator is unaware of which blocks are used for which translation, but simply allocates and deallocates blocks.
-    BLOCK_ALLOCATOR block_allocator;
+    // Note: This is *allocation* not *translation*.  The bt_block_allocator is unaware of which blocks are used for which translation, but simply allocates and deallocates blocks.
+    block_allocator bt_block_allocator;
     toku_mutex_t mutex;
     struct nb_mutex safe_file_size_lock;
     bool checkpoint_skipped;
@@ -189,7 +190,7 @@ ft_set_dirty(FT ft, bool for_checkpoint){
 static void
 maybe_truncate_file(BLOCK_TABLE bt, int fd, uint64_t size_needed_before) {
     toku_mutex_assert_locked(&bt->mutex);
-    uint64_t new_size_needed = block_allocator_allocated_limit(bt->block_allocator);
+    uint64_t new_size_needed = bt->bt_block_allocator.allocated_limit();
     //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful.
     if (new_size_needed < size_needed_before && new_size_needed < bt->safe_file_size) {
         nb_mutex_lock(&bt->safe_file_size_lock, &bt->mutex);
@@ -308,10 +309,6 @@ toku_block_translation_note_start_checkpoint_unlocked (BLOCK_TABLE bt) {
     bt->checkpoint_skipped = false;
 }
 
-//#define PRNTF(str, b, siz, ad, bt) printf("%s[%d] %s %" PRId64 " %" PRId64 " %" PRId64 "\n", __FUNCTION__, __LINE__, str, b, siz, ad); fflush(stdout); if (bt) block_allocator_validate(((BLOCK_TABLE)(bt))->block_allocator);
-//Debugging function
-#define PRNTF(str, b, siz, ad, bt) 
-
 void toku_block_translation_note_skipped_checkpoint (BLOCK_TABLE bt) {
     //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header
     lock_for_blocktable(bt);
@@ -334,7 +331,7 @@ void
 toku_block_translation_note_end_checkpoint (BLOCK_TABLE bt, int fd) {
     // Free unused blocks
     lock_for_blocktable(bt);
-    uint64_t allocated_limit_at_start = block_allocator_allocated_limit(bt->block_allocator);
+    uint64_t allocated_limit_at_start = bt->bt_block_allocator.allocated_limit();
     paranoid_invariant_notnull(bt->inprogress.block_translation);
     if (bt->checkpoint_skipped) {
         toku_free(bt->inprogress.block_translation);
@@ -354,8 +351,7 @@ toku_block_translation_note_end_checkpoint (BLOCK_TABLE bt, int fd) {
             struct block_translation_pair *pair = &t->block_translation[i];
             if (pair->size > 0 && !translation_prevents_freeing(&bt->inprogress, make_blocknum(i), pair)) {
                 assert(!translation_prevents_freeing(&bt->current, make_blocknum(i), pair));
-                PRNTF("free", i, pair->size, pair->u.diskoff, bt);
-                block_allocator_free_block(bt->block_allocator, pair->u.diskoff);
+                bt->bt_block_allocator.free_block(pair->u.diskoff);
             }
         }
         toku_free(bt->checkpointed.block_translation);
@@ -434,8 +430,7 @@ toku_ft_unlock (FT ft) {
 void
 toku_block_free(BLOCK_TABLE bt, uint64_t offset) {
     lock_for_blocktable(bt);
-PRNTF("freeSOMETHINGunknown", 0L, 0L, offset, bt);
-    block_allocator_free_block(bt->block_allocator, offset);
+    bt->bt_block_allocator.free_block(offset);
     unlock_for_blocktable(bt);
 }
 
@@ -463,14 +458,12 @@ blocknum_realloc_on_disk_internal (BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DIS
 
     struct translation *t = &bt->current;
     struct block_translation_pair old_pair = t->block_translation[b.b];
-PRNTF("old", b.b, old_pair.size, old_pair.u.diskoff, bt);
     //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
     bool cannot_free = (bool)
         ((!for_checkpoint && translation_prevents_freeing(&bt->inprogress,   b, &old_pair)) ||
          translation_prevents_freeing(&bt->checkpointed, b, &old_pair));
     if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) {
-PRNTF("Freed", b.b, old_pair.size, old_pair.u.diskoff, bt);
-        block_allocator_free_block(bt->block_allocator, old_pair.u.diskoff);
+        bt->bt_block_allocator.free_block(old_pair.u.diskoff);
     }
 
     uint64_t allocator_offset = diskoff_unused;
@@ -478,12 +471,11 @@ PRNTF("Freed", b.b, old_pair.size, old_pair.u.diskoff, bt);
     if (size > 0) {
         // Allocate a new block if the size is greater than 0,
         // if the size is just 0, offset will be set to diskoff_unused
-        block_allocator_alloc_block(bt->block_allocator, size, &allocator_offset);
+        bt->bt_block_allocator.alloc_block(size, &allocator_offset);
     }
     t->block_translation[b.b].u.diskoff = allocator_offset;
     *offset = allocator_offset;
 
-PRNTF("New", b.b, t->block_translation[b.b].size, t->block_translation[b.b].u.diskoff, bt);
     //Update inprogress btt if appropriate (if called because Pending bit is set).
     if (for_checkpoint) {
         paranoid_invariant(b.b < bt->inprogress.length_of_array);
@@ -544,8 +536,7 @@ static void blocknum_alloc_translation_on_disk_unlocked(BLOCK_TABLE bt)
     //Allocate a new block
     int64_t size = calculate_size_on_disk(t);
     uint64_t offset;
-    block_allocator_alloc_block(bt->block_allocator, size, &offset);
-PRNTF("blokAllokator", 1L, size, offset, bt);
+    bt->bt_block_allocator.alloc_block(size, &offset);
     t->block_translation[b.b].u.diskoff = offset;
     t->block_translation[b.b].size      = size;
 }
@@ -668,7 +659,6 @@ free_blocknum_in_translation(struct translation *t, BLOCKNUM b)
     verify_valid_freeable_blocknum(t, b);
     paranoid_invariant(t->block_translation[b.b].size != size_is_free);
 
-    PRNTF("free_blocknum", b.b, t->block_translation[b.b].size, t->block_translation[b.b].u.diskoff, bt);
     t->block_translation[b.b].size                 = size_is_free;
     t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
     t->blocknum_freelist_head                      = b;
@@ -697,8 +687,7 @@ free_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *bp, FT ft, bool for_checkpoint)
             (translation_prevents_freeing(&bt->inprogress,   b, &old_pair) ||
              translation_prevents_freeing(&bt->checkpointed, b, &old_pair));
         if (!cannot_free) {
-PRNTF("free_blocknum_free", b.b, old_pair.size, old_pair.u.diskoff, bt);
-            block_allocator_free_block(bt->block_allocator, old_pair.u.diskoff);
+            bt->bt_block_allocator.free_block(old_pair.u.diskoff);
         }
     }
     else {
@@ -859,7 +848,7 @@ toku_blocktable_destroy(BLOCK_TABLE *btp) {
     if (bt->inprogress.block_translation)   toku_free(bt->inprogress.block_translation);
     if (bt->checkpointed.block_translation) toku_free(bt->checkpointed.block_translation);
 
-    destroy_block_allocator(&bt->block_allocator);
+    bt->bt_block_allocator.destroy();
     blocktable_lock_destroy(bt);
     nb_mutex_destroy(&bt->safe_file_size_lock);
     toku_free(bt);
@@ -874,20 +863,18 @@ blocktable_create_internal (void) {
     nb_mutex_init(&bt->safe_file_size_lock);
 
     //There are two headers, so we reserve space for two.
-    uint64_t reserve_per_header = BLOCK_ALLOCATOR_HEADER_RESERVE;
+    uint64_t reserve_per_header = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
 
     //Must reserve in multiples of BLOCK_ALLOCATOR_ALIGNMENT
     //Round up the per-header usage if necessary.
     //We want each header aligned.
-    uint64_t remainder = BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT;
-    if (remainder!=0) {
-        reserve_per_header += BLOCK_ALLOCATOR_ALIGNMENT;
+    uint64_t remainder = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE % block_allocator::BLOCK_ALLOCATOR_ALIGNMENT;
+    if (remainder != 0) {
+        reserve_per_header += block_allocator::BLOCK_ALLOCATOR_ALIGNMENT;
         reserve_per_header -= remainder;
     }
-    assert(2*reserve_per_header == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    create_block_allocator(&bt->block_allocator,
-                           BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
-                           BLOCK_ALLOCATOR_ALIGNMENT);
+    assert(2 * reserve_per_header == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    bt->bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, block_allocator::BLOCK_ALLOCATOR_ALIGNMENT);
     return bt;
 }
 
@@ -942,7 +929,6 @@ translation_deserialize_from_buffer(struct translation *t,    // destination int
     for (i=0; i < t->length_of_array; i++) {
         t->block_translation[i].u.diskoff = rbuf_diskoff(&rt);
         t->block_translation[i].size    = rbuf_diskoff(&rt);
-PRNTF("ReadIn", i, t->block_translation[i].size, t->block_translation[i].u.diskoff, NULL);
     }
     assert(calculate_size_on_disk(t)                                     == (int64_t)size_on_disk);
     assert(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size      == (int64_t)size_on_disk);
@@ -952,24 +938,22 @@ exit:
 }
 
 // We just initialized a translation, inform block allocator to reserve space for each blocknum in use.
-static void
-blocktable_note_translation (BLOCK_ALLOCATOR allocator, struct translation *t) {
+static void blocktable_note_translation(block_allocator *ba, struct translation *t) {
     //This is where the space for them will be reserved (in addition to normal blocks).
     //See RESERVED_BLOCKNUMS
 
     // Previously this added blocks one at a time.  Now we make an array and pass it in so it can be sorted and merged.  See #3218.
-    struct block_allocator_blockpair *XMALLOC_N(t->smallest_never_used_blocknum.b, pairs);
+    struct block_allocator::blockpair *XMALLOC_N(t->smallest_never_used_blocknum.b, pairs);
     uint64_t n_pairs = 0;
     for (int64_t i=0; i<t->smallest_never_used_blocknum.b; i++) {
         struct block_translation_pair pair = t->block_translation[i];
         if (pair.size > 0) {
             paranoid_invariant(pair.u.diskoff != diskoff_unused);
             int cur_pair = n_pairs++;
-            pairs[cur_pair] = (struct block_allocator_blockpair) { .offset = (uint64_t) pair.u.diskoff,
-                                                                   .size = (uint64_t) pair.size };
+            pairs[cur_pair] = block_allocator::blockpair(pair.u.diskoff, pair.size);
         }
     }
-    block_allocator_alloc_blocks_at(allocator, n_pairs, pairs);
+    ba->alloc_blocks_at(n_pairs, pairs);
     toku_free(pairs);
 }
 
@@ -989,7 +973,7 @@ toku_blocktable_create_from_buffer(int fd,
     if (r != 0) {
         goto exit;
     }
-    blocktable_note_translation(bt->block_allocator, &bt->checkpointed);
+    blocktable_note_translation(&bt->bt_block_allocator, &bt->checkpointed);
     // we just filled in checkpointed, now copy it to current.  
     copy_translation(&bt->current, &bt->checkpointed, TRANSLATION_CURRENT);
 
@@ -1009,7 +993,7 @@ void
 toku_blocktable_create_new(BLOCK_TABLE *btp) {
     BLOCK_TABLE bt = blocktable_create_internal();
     translation_default(&bt->checkpointed);  // create default btt (empty except for reserved blocknums)
-    blocktable_note_translation(bt->block_allocator, &bt->checkpointed);
+    blocktable_note_translation(&bt->bt_block_allocator, &bt->checkpointed);
     // we just created a default checkpointed, now copy it to current.  
     copy_translation(&bt->current, &bt->checkpointed, TRANSLATION_CURRENT);
 
@@ -1103,9 +1087,9 @@ toku_block_table_get_fragmentation_unlocked(BLOCK_TABLE bt, TOKU_DB_FRAGMENTATIO
     //Requires:  report->file_size_bytes is already filled in.
     
     //Count the headers.
-    report->data_bytes                   = BLOCK_ALLOCATOR_HEADER_RESERVE;
+    report->data_bytes                   = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     report->data_blocks                  = 1;
-    report->checkpoint_bytes_additional  = BLOCK_ALLOCATOR_HEADER_RESERVE;
+    report->checkpoint_bytes_additional  = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     report->checkpoint_blocks_additional = 1;
 
     struct translation *current = &bt->current;
@@ -1145,7 +1129,7 @@ toku_block_table_get_fragmentation_unlocked(BLOCK_TABLE bt, TOKU_DB_FRAGMENTATIO
         }
     }
 
-    block_allocator_get_unused_statistics(bt->block_allocator, report);
+    bt->bt_block_allocator.get_unused_statistics(report);
 }
 
 void
diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index f7834fad991..3e8800ad0fc 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -89,6 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
+#include "ft/block_table.h"
 #include "ft/fttypes.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-flusher.h"
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index cfc3485f3db..34d2e5d8c63 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -89,6 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
+#include "ft/block_table.h"
 #include "ft/ft.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-internal.h"
@@ -1334,11 +1335,7 @@ maybe_merge_pinned_nodes(
     }
 }
 
-static void merge_remove_key_callback(
-    BLOCKNUM *bp,
-    bool for_checkpoint,
-    void *extra)
-{
+static void merge_remove_key_callback(BLOCKNUM *bp, bool for_checkpoint, void *extra) {
     FT ft = (FT) extra;
     toku_free_blocknum(ft->blocktable, bp, ft, for_checkpoint);
 }
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 7bcf7d5acad..7f264056d65 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -109,7 +109,6 @@ PATENT RIGHTS GRANT:
 #include "toku_list.h"
 #include <util/omt.h>
 #include "leafentry.h"
-#include "block_table.h"
 #include "compress.h"
 #include <util/omt.h>
 #include "ft/bndata.h"
@@ -117,6 +116,7 @@ PATENT RIGHTS GRANT:
 #include "ft/rollback.h"
 #include "ft/msg_buffer.h"
 
+struct block_table;
 struct ft_search;
 
 enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
@@ -229,7 +229,7 @@ struct ft {
     // These are not read-only:
 
     // protected by blocktable lock
-    BLOCK_TABLE blocktable;
+    struct block_table *blocktable;
 
     // protected by atomic builtins
     STAT64INFO_S in_memory_stats;
@@ -385,7 +385,7 @@ unsigned int toku_serialize_ftnode_size(FTNODE node); /* How much space will it
 void toku_verify_or_set_counts(FTNODE);
 
 size_t toku_serialize_ft_size (FT_HEADER h);
-void toku_serialize_ft_to (int fd, FT_HEADER h, BLOCK_TABLE blocktable, CACHEFILE cf);
+void toku_serialize_ft_to (int fd, FT_HEADER h, struct block_table *blocktable, CACHEFILE cf);
 void toku_serialize_ft_to_wbuf (
     struct wbuf *wbuf, 
     FT_HEADER h, 
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 64818ea6925..4bc15ee22c4 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -200,21 +200,22 @@ basement nodes, bulk fetch,  and partial fetch:
 
 */
 
-#include "checkpoint.h"
-#include "cursor.h"
-#include "ft.h"
-#include "ft-cachetable-wrappers.h"
-#include "ft-flusher.h"
-#include "ft-internal.h"
-#include "node.h"
-#include "ft_layout_version.h"
-#include "log-internal.h"
-#include "sub_block.h"
-#include "txn_manager.h"
-#include "leafentry.h"
-#include "xids.h"
-#include "ft_msg.h"
-#include "ule.h"
+#include "ft/block_table.h"
+#include "ft/checkpoint.h"
+#include "ft/cursor.h"
+#include "ft/ft.h"
+#include "ft/ft-cachetable-wrappers.h"
+#include "ft/ft-flusher.h"
+#include "ft/ft-internal.h"
+#include "ft/ft_layout_version.h"
+#include "ft/ft_msg.h"
+#include "ft/leafentry.h"
+#include "ft/log-internal.h"
+#include "ft/node.h"
+#include "ft/sub_block.h"
+#include "ft/txn_manager.h"
+#include "ft/ule.h"
+#include "ft/xids.h"
 
 #include <toku_race_tools.h>
 
diff --git a/ft/ft-serialize.cc b/ft/ft-serialize.cc
index c9a8c2ffbc5..39d260deb7f 100644
--- a/ft/ft-serialize.cc
+++ b/ft/ft-serialize.cc
@@ -89,9 +89,10 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "compress.h"
-#include "ft.h"
-#include "ft-internal.h"
+#include "ft/block_table.h"
+#include "ft/compress.h"
+#include "ft/ft.h"
+#include "ft/ft-internal.h"
 
 // not version-sensitive because we only serialize a descriptor using the current layout_version
 uint32_t
@@ -509,7 +510,7 @@ serialize_ft_min_size (uint32_t version) {
         abort();
     }
 
-    lazy_assert(size <= BLOCK_ALLOCATOR_HEADER_RESERVE);
+    lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
     return size;
 }
 
@@ -586,7 +587,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
     //If too big, it is corrupt.  We would probably notice during checksum
     //but may have to do a multi-gigabyte malloc+read to find out.
     //If its too small reading rbuf would crash, so verify.
-    if (size > BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) {
+    if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) {
         r = TOKUDB_DICTIONARY_NO_HEADER;
         goto exit;
     }
@@ -675,7 +676,7 @@ toku_deserialize_ft_from(int fd,
         h0_acceptable = true;
     }
 
-    toku_off_t header_1_off = BLOCK_ALLOCATOR_HEADER_RESERVE;
+    toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1);
     if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) {
         h1_acceptable = true;
@@ -754,7 +755,7 @@ exit:
 size_t toku_serialize_ft_size (FT_HEADER h) {
     size_t size = serialize_ft_min_size(h->layout_version);
     //There is no dynamic data.
-    lazy_assert(size <= BLOCK_ALLOCATOR_HEADER_RESERVE);
+    lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
     return size;
 }
 
@@ -816,7 +817,7 @@ void toku_serialize_ft_to (int fd, FT_HEADER h, BLOCK_TABLE blocktable, CACHEFIL
     struct wbuf w_main;
     size_t size_main       = toku_serialize_ft_size(h);
     size_t size_main_aligned = roundup_to_multiple(512, size_main);
-    assert(size_main_aligned<BLOCK_ALLOCATOR_HEADER_RESERVE);
+    assert(size_main_aligned<block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
     char *XMALLOC_N_ALIGNED(512, size_main_aligned, mainbuf);
     for (size_t i=size_main; i<size_main_aligned; i++) mainbuf[i]=0; // initialize the end of the buffer with zeros
     wbuf_init(&w_main, mainbuf, size_main);
@@ -844,7 +845,7 @@ void toku_serialize_ft_to (int fd, FT_HEADER h, BLOCK_TABLE blocktable, CACHEFIL
     //Alternate writing header to two locations:
     //   Beginning (0) or BLOCK_ALLOCATOR_HEADER_RESERVE
     toku_off_t main_offset;
-    main_offset = (h->checkpoint_count & 0x1) ? 0 : BLOCK_ALLOCATOR_HEADER_RESERVE;
+    main_offset = (h->checkpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset);
     toku_free(w_main.buf);
     toku_free(w_translation.buf);
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index a776446d9ff..056ae18bd17 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -97,10 +97,11 @@ PATENT RIGHTS GRANT:
  *   For each nonleaf node:  All the messages have keys that are between the associated pivot keys ( left_pivot_key < message <= right_pivot_key)
  */
 
-#include "ft-cachetable-wrappers.h"
-#include "ft-internal.h"
-#include "ft.h"
-#include "node.h"
+#include "ft/block_table.h"
+#include "ft/ft.h"
+#include "ft/ft-cachetable-wrappers.h"
+#include "ft/ft-internal.h"
+#include "ft/node.h"
 
 static int 
 compare_pairs (FT_HANDLE ft_handle, const DBT *a, const DBT *b) {
diff --git a/ft/ft.cc b/ft/ft.cc
index e05307018e6..e60e57d547c 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -89,6 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
+#include "ft/block_table.h"
 #include "ft/ft.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-internal.h"
@@ -107,10 +108,10 @@ toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created) {
 
     // hold lock around setting and clearing of dirty bit
     // (see cooperative use of dirty bit in ft_begin_checkpoint())
-    toku_ft_lock (ft);
+    toku_ft_lock(ft);
     ft->h->root_xid_that_created = new_root_xid_that_created;
     ft->h->dirty = 1;
-    toku_ft_unlock (ft);
+    toku_ft_unlock(ft);
 }
 
 static void
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 660c7b479df..ba808685a87 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -89,6 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
+#include "ft/block_table.h"
 #include "ft/cachetable.h"
 #include "ft/compress.h"
 #include "ft/ft.h"
diff --git a/ft/ftverify.cc b/ft/ftverify.cc
index d82d4ae1240..cd72de387c8 100644
--- a/ft/ftverify.cc
+++ b/ft/ftverify.cc
@@ -200,7 +200,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p)
         }
     }
     {
-        toku_off_t header_1_off = BLOCK_ALLOCATOR_HEADER_RESERVE;
+        toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
         r1 = deserialize_ft_from_fd_into_rbuf(
             fd,
             header_1_off,
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index 75272172a46..b56cfd8e5f6 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -100,18 +100,19 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 #include <fcntl.h>
 
-#include <util/x1764.h>
+#include "ft/block_table.h"
+#include "ft/ft.h"
+#include "ft/ft-internal.h"
+#include "ft/leafentry.h"
+#include "ft/loader/loader-internal.h"
+#include "ft/loader/pqueue.h"
+#include "ft/loader/dbufio.h"
+#include "ft/log-internal.h"
+#include "ft/node.h"
+#include "ft/sub_block.h"
+#include "ft/sub_block_map.h"
 
-#include "loader/loader-internal.h"
-#include "ft-internal.h"
-#include "sub_block.h"
-#include "sub_block_map.h"
-#include "loader/pqueue.h"
-#include "loader/dbufio.h"
-#include "leafentry.h"
-#include "log-internal.h"
-#include "ft.h"
-#include "node.h"
+#include "util/x1764.h"
 
 static size_t (*os_fwrite_fun)(const void *,size_t,size_t,FILE*)=NULL;
 void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) {
diff --git a/ft/logger.cc b/ft/logger.cc
index 004c200cc62..9900e7d6f6a 100644
--- a/ft/logger.cc
+++ b/ft/logger.cc
@@ -94,12 +94,13 @@ PATENT RIGHTS GRANT:
 #include <limits.h>
 #include <unistd.h>
 
-#include "ft.h"
-#include "log-internal.h"
-#include "txn_manager.h"
-#include "rollback_log_node_cache.h"
+#include "ft/block_table.h"
+#include "ft/ft.h"
+#include "ft/log-internal.h"
+#include "ft/txn_manager.h"
+#include "ft/rollback_log_node_cache.h"
 
-#include <util/status.h>
+#include "util/status.h"
 
 static const int log_format_version=TOKU_LOG_VERSION;
 
diff --git a/ft/rollback-ct-callbacks.cc b/ft/rollback-ct-callbacks.cc
index 7083e17bd0d..997387eac2b 100644
--- a/ft/rollback-ct-callbacks.cc
+++ b/ft/rollback-ct-callbacks.cc
@@ -89,15 +89,16 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <toku_portability.h>
-#include <memory.h>
+#include "portability/memory.h"
+#include "portability/toku_portability.h"
 
-#include "ft-internal.h"
-#include "fttypes.h"
-#include "rollback.h"
-#include "rollback-ct-callbacks.h"
+#include "ft/block_table.h"
+#include "ft/ft-internal.h"
+#include "ft/fttypes.h"
+#include "ft/rollback.h"
+#include "ft/rollback-ct-callbacks.h"
 
-#include <util/memarena.h>
+#include "util/memarena.h"
 
 // Address used as a sentinel. Otherwise unused.
 static struct serialized_rollback_log_node cloned_rollback;
diff --git a/ft/rollback.cc b/ft/rollback.cc
index ccb8fbfa286..0648246d4b3 100644
--- a/ft/rollback.cc
+++ b/ft/rollback.cc
@@ -91,25 +91,21 @@ PATENT RIGHTS GRANT:
 
 #include <toku_stdint.h>
 
-#include "ft.h"
-#include "log-internal.h"
-#include "rollback-ct-callbacks.h"
+#include "ft/block_table.h"
+#include "ft/ft.h"
+#include "ft/log-internal.h"
+#include "ft/rollback-ct-callbacks.h"
 
 static void rollback_unpin_remove_callback(CACHEKEY* cachekey, bool for_checkpoint, void* extra) {
-    FT CAST_FROM_VOIDP(h, extra);
-    toku_free_blocknum(
-        h->blocktable,
-        cachekey,
-        h,
-        for_checkpoint
-        );
+    FT CAST_FROM_VOIDP(ft, extra);
+    toku_free_blocknum(ft->blocktable, cachekey, ft, for_checkpoint);
 }
 
 void toku_rollback_log_unpin_and_remove(TOKUTXN txn, ROLLBACK_LOG_NODE log) {
     int r;
     CACHEFILE cf = txn->logger->rollback_cachefile;
-    FT CAST_FROM_VOIDP(h, toku_cachefile_get_userdata(cf));
-    r = toku_cachetable_unpin_and_remove (cf, log->ct_pair, rollback_unpin_remove_callback, h);
+    FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf));
+    r = toku_cachetable_unpin_and_remove (cf, log->ct_pair, rollback_unpin_remove_callback, ft);
     assert(r == 0);
 }
 
diff --git a/ft/tests/block_allocator_test.cc b/ft/tests/block_allocator_test.cc
index ef6f1fcdc97..9b02061ae47 100644
--- a/ft/tests/block_allocator_test.cc
+++ b/ft/tests/block_allocator_test.cc
@@ -91,42 +91,40 @@ PATENT RIGHTS GRANT:
 
 #include "test.h"
 
-static void ba_alloc_at (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t offset) {
-    block_allocator_validate(ba);
-    block_allocator_alloc_block_at(ba, size*512, offset*512);
-    block_allocator_validate(ba);
+static void ba_alloc_at(block_allocator *ba, uint64_t size, uint64_t offset) {
+    ba->validate();
+    ba->alloc_block_at(size * 512, offset * 512);
+    ba->validate();
 }
 
-static void ba_alloc (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t *answer) {
-    block_allocator_validate(ba);
+static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
+    ba->validate();
     uint64_t actual_answer;
-    block_allocator_alloc_block(ba, 512*size, &actual_answer);
-    block_allocator_validate(ba);
+    ba->alloc_block(512 * size, &actual_answer);
+    ba->validate();
+
     assert(actual_answer%512==0);
     *answer = actual_answer/512;
 }
 
-static void ba_free (BLOCK_ALLOCATOR ba, uint64_t offset) {
-    block_allocator_validate(ba);
-    block_allocator_free_block(ba, offset*512);
-    block_allocator_validate(ba);
+static void ba_free(block_allocator *ba, uint64_t offset) {
+    ba->validate();
+    ba->free_block(offset * 512);
+    ba->validate();
 }
 
-static void
-ba_check_l (BLOCK_ALLOCATOR ba, uint64_t blocknum_in_layout_order, uint64_t expected_offset, uint64_t expected_size)
-{
+static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order,
+                       uint64_t expected_offset, uint64_t expected_size) {
     uint64_t actual_offset, actual_size;
-    int r = block_allocator_get_nth_block_in_layout_order(ba, blocknum_in_layout_order, &actual_offset, &actual_size);
+    int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
     assert(r==0);
     assert(expected_offset*512 == actual_offset);
     assert(expected_size  *512 == actual_size);
 }
 
-static void
-ba_check_none (BLOCK_ALLOCATOR ba, uint64_t blocknum_in_layout_order)
-{
+static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) {
     uint64_t actual_offset, actual_size;
-    int r = block_allocator_get_nth_block_in_layout_order(ba, blocknum_in_layout_order, &actual_offset, &actual_size);
+    int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
     assert(r==-1);
 }
 
@@ -134,12 +132,13 @@ ba_check_none (BLOCK_ALLOCATOR ba, uint64_t blocknum_in_layout_order)
 // Simple block allocator test
 static void
 test_ba0 (void) {
-    BLOCK_ALLOCATOR ba;
+    block_allocator allocator;
+    block_allocator *ba = &allocator;
     uint64_t b0, b1;
-    create_block_allocator(&ba, 100*512, 1*512);
-    assert(block_allocator_allocated_limit(ba)==100*512);
+    ba->create(100*512, 1*512);
+    assert(ba->allocated_limit()==100*512);
     ba_alloc_at(ba, 50, 100);
-    assert(block_allocator_allocated_limit(ba)==150*512);
+    assert(ba->allocated_limit()==150*512);
     ba_alloc_at(ba, 25, 150);
     ba_alloc   (ba, 10, &b0);
     ba_check_l (ba, 0, 0,   100);
@@ -154,9 +153,9 @@ test_ba0 (void) {
     assert(b0==160);
     ba_alloc(ba, 10, &b0);
     ba_alloc(ba, 113, &b1);
-    assert(113*512==block_allocator_block_size(ba, b1 *512));
-    assert(10 *512==block_allocator_block_size(ba, b0 *512));
-    assert(50 *512==block_allocator_block_size(ba, 100*512));
+    assert(113*512==ba->block_size(b1 *512));
+    assert(10 *512==ba->block_size(b0 *512));
+    assert(50 *512==ba->block_size(100*512));
 
     uint64_t b2, b3, b4, b5, b6, b7;
     ba_alloc(ba, 100, &b2);     
@@ -183,15 +182,15 @@ test_ba0 (void) {
     ba_free(ba, b4);           
     ba_alloc(ba, 100, &b4);    
 
-    destroy_block_allocator(&ba);
-    assert(ba==0);
+    ba->destroy();
 }
 
 // Manually to get coverage of all the code in the block allocator.
 static void
 test_ba1 (int n_initial) {
-    BLOCK_ALLOCATOR ba;
-    create_block_allocator(&ba, 0*512, 1*512);
+    block_allocator allocator;
+    block_allocator *ba = &allocator;
+    ba->create(0*512, 1*512);
     int i;
     int n_blocks=0;
     uint64_t blocks[1000];
@@ -213,19 +212,19 @@ test_ba1 (int n_initial) {
 	}
     }
     
-    destroy_block_allocator(&ba);
-    assert(ba==0);
+    ba->destroy();
 }
     
 // Check to see if it is first fit or best fit.
 static void
 test_ba2 (void)
 {
-    BLOCK_ALLOCATOR ba;
+    block_allocator allocator;
+    block_allocator *ba = &allocator;
     uint64_t b[6];
     enum { BSIZE = 1024 };
-    create_block_allocator(&ba, 100*512, BSIZE*512);
-    assert(block_allocator_allocated_limit(ba)==100*512);
+    ba->create(100*512, BSIZE*512);
+    assert(ba->allocated_limit()==100*512);
     ba_check_l    (ba, 0, 0, 100);
     ba_check_none (ba, 1);
 
@@ -344,7 +343,7 @@ test_ba2 (void)
     ba_alloc(ba, 100, &b11);
     assert(b11==5*BSIZE);
 
-    destroy_block_allocator(&ba);
+    ba->destroy();
 }
 
 int
diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc
index 4e9be6750a7..33a146c79ba 100644
--- a/ft/tests/ft-bfe-query.cc
+++ b/ft/tests/ft-bfe-query.cc
@@ -435,10 +435,10 @@ test_prefetching(void) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
@@ -450,7 +450,7 @@ test_prefetching(void) {
 
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index 2dbed9b5fa9..a9cec44dacb 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -371,10 +371,10 @@ test_serialize_nonleaf(void) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
@@ -387,7 +387,7 @@ test_serialize_nonleaf(void) {
     toku_destroy_ftnode_internals(&sn);
     toku_free(ndd);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
     ft_h->cmp.destroy();
@@ -451,10 +451,10 @@ test_serialize_leaf(void) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
@@ -466,7 +466,7 @@ test_serialize_leaf(void) {
 
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
     toku_free(ft_h);
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index 937d15be30d..33e2e34347e 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -211,10 +211,10 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
 
@@ -277,7 +277,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
 
     toku_ftnode_free(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
@@ -374,10 +374,10 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
 
@@ -412,7 +412,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
     ft_h->cmp.destroy();
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index 5ff14e87ed7..1a2ba6564da 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -315,10 +315,10 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -373,7 +373,7 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -448,10 +448,10 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -508,7 +508,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -574,10 +574,10 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
 
@@ -636,7 +636,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -709,10 +709,10 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
 
@@ -773,7 +773,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -845,10 +845,10 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -901,7 +901,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -965,10 +965,10 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
 
@@ -1000,7 +1000,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -1088,10 +1088,10 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -1123,7 +1123,7 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
     toku_blocktable_destroy(&ft_h->blocktable);
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
diff --git a/ft/tests/test_block_allocator_merge.cc b/ft/tests/test_block_allocator_merge.cc
index af66c7408bf..796a09f398b 100644
--- a/ft/tests/test_block_allocator_merge.cc
+++ b/ft/tests/test_block_allocator_merge.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 int verbose = 0;
 
 static void
-print_array (uint64_t n, const struct block_allocator_blockpair a[/*n*/]) {
+print_array (uint64_t n, const struct block_allocator::blockpair a[/*n*/]) {
     printf("{");
     for (uint64_t i=0; i<n; i++) printf(" %016lx", (long)a[i].offset);
     printf("}\n");
@@ -103,20 +103,20 @@ print_array (uint64_t n, const struct block_allocator_blockpair a[/*n*/]) {
 
 static int
 compare_blockpairs (const void *av, const void *bv) {
-    const struct block_allocator_blockpair *CAST_FROM_VOIDP(a, av);
-    const struct block_allocator_blockpair *CAST_FROM_VOIDP(b, bv);
+    const struct block_allocator::blockpair *CAST_FROM_VOIDP(a, av);
+    const struct block_allocator::blockpair *CAST_FROM_VOIDP(b, bv);
     if (a->offset < b->offset) return -1;
     if (a->offset > b->offset) return +1;
     return 0;
 }
 
 static void
-test_merge (uint64_t an, const struct block_allocator_blockpair a[/*an*/],
-	    uint64_t bn, const struct block_allocator_blockpair b[/*bn*/]) {
+test_merge (uint64_t an, const struct block_allocator::blockpair a[/*an*/],
+	    uint64_t bn, const struct block_allocator::blockpair b[/*bn*/]) {
     if (verbose>1) { printf("a:"); print_array(an, a); }
     if (verbose>1) { printf("b:"); print_array(bn, b); }
-    struct block_allocator_blockpair *MALLOC_N(an+bn, q);
-    struct block_allocator_blockpair *MALLOC_N(an+bn, m);
+    struct block_allocator::blockpair *MALLOC_N(an+bn, q);
+    struct block_allocator::blockpair *MALLOC_N(an+bn, m);
     if (q==0 || m==0) {
 	fprintf(stderr, "malloc failed, continuing\n");
 	goto malloc_failed;
@@ -131,7 +131,7 @@ test_merge (uint64_t an, const struct block_allocator_blockpair a[/*an*/],
     qsort(q, an+bn, sizeof(*q), compare_blockpairs);
     if (verbose>1) { printf("q:"); print_array(an+bn, q); }
     if (verbose) printf("merge\n");
-    block_allocator_merge_blockpairs_into(an, m, bn, b);
+    block_allocator::merge_blockpairs_into(an, m, bn, b);
     if (verbose) printf("compare\n");
     if (verbose>1) { printf("m:"); print_array(an+bn, m); }
     for (uint64_t i=0; i<an+bn; i++) {
@@ -163,8 +163,8 @@ compute_b (uint64_t i, int mode) {
 static void
 test_merge_n_m (uint64_t n, uint64_t m, int mode)
 {
-    struct block_allocator_blockpair *MALLOC_N(n, na);
-    struct block_allocator_blockpair *MALLOC_N(m, ma);
+    struct block_allocator::blockpair *MALLOC_N(n, na);
+    struct block_allocator::blockpair *MALLOC_N(m, ma);
     if (na==0 || ma==0) {
 	fprintf(stderr, "malloc failed, continuing\n");
 	goto malloc_failed;
@@ -197,8 +197,8 @@ test_big_merge (void) {
 
 	uint64_t an = twoG;
 	uint64_t bn = 1;
-	struct block_allocator_blockpair *MALLOC_N(an+bn, a);
-        struct block_allocator_blockpair *MALLOC_N(bn,    b);
+	struct block_allocator::blockpair *MALLOC_N(an+bn, a);
+        struct block_allocator::blockpair *MALLOC_N(bn,    b);
         if (a == nullptr) {
             fprintf(stderr, "%s:%u malloc failed, continuing\n", __FUNCTION__, __LINE__);
             goto malloc_failed;
@@ -211,7 +211,7 @@ test_big_merge (void) {
         assert(b);
         for (uint64_t i=0; i<an; i++) a[i].offset=i+1;
         b[0].offset = 0;
-        block_allocator_merge_blockpairs_into(an, a, bn, b);
+        block_allocator::merge_blockpairs_into(an, a, bn, b);
         for (uint64_t i=0; i<an+bn; i++) assert(a[i].offset == i);
     malloc_failed:
         toku_free(a);
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index e0a5be754e1..d489130114b 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -91,11 +91,6 @@ PATENT RIGHTS GRANT:
 
 // Dump a fractal tree file
 
-#include "cachetable.h"
-#include "ft.h"
-#include "fttypes.h"
-#include "ft-internal.h"
-#include "ft/node.h"
 #include <ctype.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -103,6 +98,13 @@ PATENT RIGHTS GRANT:
 #include <inttypes.h>
 #include <limits.h>
 
+#include "ft/block_table.h"
+#include "ft/cachetable.h"
+#include "ft/ft.h"
+#include "ft/fttypes.h"
+#include "ft/ft-internal.h"
+#include "ft/node.h"
+
 static int do_dump_data = 1;
 static int do_interactive = 0;
 static int do_header = 0;

From 4031870d8288e19037a71245d8f767f33872944a Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 16 Jun 2014 15:25:48 -0400
Subject: [PATCH 039/190] FT-271 Fix mixed signed/unsigned comparison which
 breaks some versions of gcc

---
 ft/tests/ft-bfe-query.cc           | 4 ++--
 ft/tests/ft-clock-test.cc          | 4 ++--
 ft/tests/ft-serialize-benchmark.cc | 4 ++--
 ft/tests/ft-serialize-test.cc      | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc
index 33a146c79ba..0b1f0f4d394 100644
--- a/ft/tests/ft-bfe-query.cc
+++ b/ft/tests/ft-bfe-query.cc
@@ -435,10 +435,10 @@ test_prefetching(void) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index a9cec44dacb..c6b7109d6a9 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -451,10 +451,10 @@ test_serialize_leaf(void) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index 33e2e34347e..d7de58d199e 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -374,10 +374,10 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
 
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index 1a2ba6564da..75df57ee341 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -315,10 +315,10 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;

From 211027e536fa08d45127a876e0f3a105b87566e2 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 17 Jun 2014 10:56:41 -0400
Subject: [PATCH 040/190] FT-271 Fix more signed/unsigned comparisons

---
 ft/tests/ft-clock-test.cc          |  4 ++--
 ft/tests/ft-serialize-benchmark.cc |  4 ++--
 ft/tests/ft-serialize-test.cc      | 24 ++++++++++++------------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index c6b7109d6a9..ceadd6aaedb 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -371,10 +371,10 @@ test_serialize_nonleaf(void) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index d7de58d199e..342fae76d81 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -211,10 +211,10 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
 
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index 75df57ee341..11d8a2989f0 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -448,10 +448,10 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -574,10 +574,10 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
 
@@ -709,10 +709,10 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
 
@@ -845,10 +845,10 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -965,10 +965,10 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
 
@@ -1088,10 +1088,10 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
         DISKOFF offset;
         DISKOFF size;
         toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
-        assert(offset==block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
-        assert(offset == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;

From f2df37d620c6a26025069beedd3293ef8493b07e Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Wed, 18 Jun 2014 22:23:13 -0400
Subject: [PATCH 041/190] FT-93 Add a class for pivot bounds, remove the
 assumption that pivot keys must come from a DBT stored in the ftnode by
 adding ftnode_pivot_keys::fill_dbt()

---
 ft/ft-cachetable-wrappers.cc  |   2 +-
 ft/ft-cachetable-wrappers.h   |   2 +-
 ft/ft-flusher.cc              |  14 +-
 ft/ft-hot-flusher.cc          |   2 +-
 ft/ft-internal.h              |  27 ++-
 ft/ft-ops.cc                  | 118 ++++++-----
 ft/ft-verify.cc               |  21 +-
 ft/node.cc                    | 358 +++++++++++++++++++++++++++-------
 ft/node.h                     |  69 +++++--
 ft/tests/ft-serialize-test.cc |  14 +-
 ft/tests/orthopush-flush.cc   |  15 +-
 ft/tokuftdump.cc              |   4 +-
 ft/ybt.cc                     |   6 +
 ft/ybt.h                      |   3 +
 14 files changed, 478 insertions(+), 177 deletions(-)

diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index 3e8800ad0fc..105a2f03dc6 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -209,7 +209,7 @@ toku_pin_ftnode_for_query(
     uint32_t fullhash,
     UNLOCKERS unlockers,
     ANCESTORS ancestors,
-    const PIVOT_BOUNDS bounds,
+    const pivot_bounds &bounds,
     FTNODE_FETCH_EXTRA bfe,
     bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
     FTNODE *node_p,
diff --git a/ft/ft-cachetable-wrappers.h b/ft/ft-cachetable-wrappers.h
index a25575f3712..044195c7fde 100644
--- a/ft/ft-cachetable-wrappers.h
+++ b/ft/ft-cachetable-wrappers.h
@@ -147,7 +147,7 @@ toku_pin_ftnode_for_query(
     uint32_t fullhash,
     UNLOCKERS unlockers,
     ANCESTORS ancestors,
-    const PIVOT_BOUNDS pbounds,
+    const pivot_bounds &bounds,
     FTNODE_FETCH_EXTRA bfe,
     bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
     FTNODE *node_p,
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index 34d2e5d8c63..8b0ff8c8546 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -468,7 +468,7 @@ ct_maybe_merge_child(struct flusher_advice *fa,
             ctme.is_last_child = false;
             pivot_to_save = childnum;
         }
-        toku_clone_dbt(&ctme.target_key, *parent->pivotkeys.get_pivot(pivot_to_save));
+        toku_clone_dbt(&ctme.target_key, parent->pivotkeys.get_pivot(pivot_to_save));
 
         // at this point, ctme is properly setup, now we can do the merge
         struct flusher_advice new_fa;
@@ -580,7 +580,7 @@ handle_split_of_child(
         if (toku_ft_debug_mode) {
             printf("%s:%d Child %d splitting on %s\n", __FILE__, __LINE__, childnum, (char*)splitk->data);
             printf("%s:%d oldsplitkeys:", __FILE__, __LINE__);
-            for(int i = 0; i < node->n_children - 1; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i)->data);
+            for(int i = 0; i < node->n_children - 1; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i).data);
             printf("\n");
         }
     )
@@ -631,7 +631,7 @@ handle_split_of_child(
     WHEN_NOT_GCOV(
         if (toku_ft_debug_mode) {
             printf("%s:%d splitkeys:", __FILE__, __LINE__);
-            for (int i = 0; i < node->n_children - 2; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i)->data);
+            for (int i = 0; i < node->n_children - 2; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i).data);
             printf("\n");
         }
     )
@@ -937,7 +937,7 @@ ftleaf_split(
         int split_idx = num_left_bns - (split_on_boundary ? 0 : 1);
         node->pivotkeys.split_at(split_idx, &B->pivotkeys);
         if (split_on_boundary && num_left_bns < node->n_children && splitk) {
-            toku_copyref_dbt(splitk, *node->pivotkeys.get_pivot(num_left_bns - 1));
+            toku_copyref_dbt(splitk, node->pivotkeys.get_pivot(num_left_bns - 1));
         } else if (splitk) {
             bn_data* bd = BLB_DATA(node, num_left_bns - 1);
             uint32_t keylen;
@@ -997,7 +997,7 @@ ft_nonleaf_split(
 
         // the split key for our parent is the rightmost pivot key in node
         node->pivotkeys.split_at(n_children_in_a, &B->pivotkeys);
-        toku_clone_dbt(splitk, *node->pivotkeys.get_pivot(n_children_in_a - 1));
+        toku_clone_dbt(splitk, node->pivotkeys.get_pivot(n_children_in_a - 1));
         node->pivotkeys.delete_at(n_children_in_a - 1);
 
         node->n_children = n_children_in_a;
@@ -1408,8 +1408,8 @@ ft_merge_child(
     {
         DBT splitk;
         toku_init_dbt(&splitk);
-        const DBT *old_split_key = node->pivotkeys.get_pivot(childnuma);
-        maybe_merge_pinned_nodes(node, old_split_key, childa, childb, &did_merge, &did_rebalance, &splitk, ft->h->nodesize);
+        const DBT old_split_key = node->pivotkeys.get_pivot(childnuma);
+        maybe_merge_pinned_nodes(node, &old_split_key, childa, childb, &did_merge, &did_rebalance, &splitk, ft->h->nodesize);
         //toku_verify_estimates(t,childa);
         // the tree did react if a merge (did_merge) or rebalance (new spkit key) occurred
         *did_react = (bool)(did_merge || did_rebalance);
diff --git a/ft/ft-hot-flusher.cc b/ft/ft-hot-flusher.cc
index 5e891f3ad17..aa695185838 100644
--- a/ft/ft-hot-flusher.cc
+++ b/ft/ft-hot-flusher.cc
@@ -199,7 +199,7 @@ hot_update_flusher_keys(FTNODE parent,
     // child node.
     if (childnum < (parent->n_children - 1)) {
         toku_destroy_dbt(&flusher->max_current_key);
-        toku_clone_dbt(&flusher->max_current_key, *parent->pivotkeys.get_pivot(childnum));
+        toku_clone_dbt(&flusher->max_current_key, parent->pivotkeys.get_pivot(childnum));
     }
 }
 
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 7f264056d65..63bcc1f1280 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -461,15 +461,26 @@ void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_curs
 
 void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe);
 
-struct pivot_bounds {
-    const DBT * const lower_bound_exclusive;
-    const DBT * const upper_bound_inclusive; // NULL to indicate negative or positive infinity (which are in practice exclusive since there are now transfinite keys in messages).
-};
-typedef struct pivot_bounds const * const PIVOT_BOUNDS;
+class pivot_bounds {
+public:
+    pivot_bounds(const DBT &lbe_dbt, const DBT &ubi_dbt);
 
-const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound_exclusive);
-const DBT *postpivotkey (FTNODE node, int childnum, const DBT * const upper_bound_inclusive);
-struct pivot_bounds next_pivot_keys (FTNODE node, int childnum, struct pivot_bounds const * const old_pb);
+    pivot_bounds next_bounds(FTNODE node, int childnum) const;
+
+    const DBT *lbe() const;
+    const DBT *ubi() const;
+
+    static pivot_bounds infinite_bounds();
+
+private:
+    DBT _prepivotkey(FTNODE node, int childnum, const DBT &lbe_dbt) const;
+    DBT _postpivotkey(FTNODE node, int childnum, const DBT &ubi_dbt) const;
+
+    // if toku_dbt_is_empty() is true for either bound, then it represents
+    // negative or positive infinity (which are exclusive in practice)
+    const DBT _lower_bound_exclusive;
+    const DBT _upper_bound_inclusive;
+};
 
 bool
 toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum);
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 4bc15ee22c4..1fd343a2b00 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -445,28 +445,55 @@ uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum) {
     return toku_cachetable_hash(cf, BP_BLOCKNUM(node, childnum));
 }
 
-const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound_exclusive) {
-    if (childnum==0)
-        return lower_bound_exclusive;
-    else {
+//
+// pivot bounds
+// TODO: move me to ft/node.cc?
+// 
+
+pivot_bounds::pivot_bounds(const DBT &lbe_dbt, const DBT &ubi_dbt) :
+    _lower_bound_exclusive(lbe_dbt), _upper_bound_inclusive(ubi_dbt) {
+}
+
+pivot_bounds pivot_bounds::infinite_bounds() {
+    DBT dbt;
+    toku_init_dbt(&dbt);
+
+    // infinity is represented by an empty dbt
+    invariant(toku_dbt_is_empty(&dbt));
+    return pivot_bounds(dbt, dbt);
+}
+
+const DBT *pivot_bounds::lbe() const {
+    return &_lower_bound_exclusive;
+}
+
+const DBT *pivot_bounds::ubi() const {
+    return &_upper_bound_inclusive;
+}
+
+DBT pivot_bounds::_prepivotkey(FTNODE node, int childnum, const DBT &lbe_dbt) const {
+    if (childnum == 0) {
+        return lbe_dbt;
+    } else {
         return node->pivotkeys.get_pivot(childnum - 1);
     }
 }
 
-const DBT *postpivotkey (FTNODE node, int childnum, const DBT * const upper_bound_inclusive) {
-    if (childnum+1 == node->n_children)
-        return upper_bound_inclusive;
-    else {
+DBT pivot_bounds::_postpivotkey(FTNODE node, int childnum, const DBT &ubi_dbt) const {
+    if (childnum + 1 == node->n_children) {
+        return ubi_dbt;
+    } else {
         return node->pivotkeys.get_pivot(childnum);
     }
 }
 
-struct pivot_bounds next_pivot_keys (FTNODE node, int childnum, struct pivot_bounds const * const old_pb) {
-    struct pivot_bounds pb = {.lower_bound_exclusive = prepivotkey(node, childnum, old_pb->lower_bound_exclusive),
-                              .upper_bound_inclusive = postpivotkey(node, childnum, old_pb->upper_bound_inclusive)};
-    return pb;
+pivot_bounds pivot_bounds::next_bounds(FTNODE node, int childnum) const {
+    return pivot_bounds(_prepivotkey(node, childnum, _lower_bound_exclusive),
+                        _postpivotkey(node, childnum, _upper_bound_inclusive));
 }
 
+////////////////////////////////////////////////////////////////////////////////
+
 static long get_avail_internal_node_partition_size(FTNODE node, int i) {
     paranoid_invariant(node->height > 0);
     return toku_bnc_memory_size(BNC(node, i));
@@ -3443,7 +3470,7 @@ ft_search_node (
     FT_CURSOR ftcursor,
     UNLOCKERS unlockers,
     ANCESTORS,
-    struct pivot_bounds const * const bounds,
+    const pivot_bounds &bounds,
     bool can_bulk_fetch
     );
 
@@ -3540,7 +3567,7 @@ unlock_ftnode_fun (void *v) {
 /* search in a node's child */
 static int
 ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool *doprefetch, FT_CURSOR ftcursor, UNLOCKERS unlockers,
-                 ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool can_bulk_fetch)
+                 ANCESTORS ancestors, const pivot_bounds &bounds, bool can_bulk_fetch)
 // Effect: Search in a node's child.  Searches are read-only now (at least as far as the hardcopy is concerned).
 {
     struct ancestors next_ancestors = {node, childnum, ancestors};
@@ -3620,7 +3647,7 @@ ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search *searc
 static inline int
 search_which_child_cmp_with_bound(const toku::comparator &cmp, FTNODE node, int childnum,
                                   ft_search *search, DBT *dbt) {
-    return cmp(toku_copyref_dbt(dbt, *node->pivotkeys.get_pivot(childnum)), &search->pivot_bound);
+    return cmp(toku_copyref_dbt(dbt, node->pivotkeys.get_pivot(childnum)), &search->pivot_bound);
 }
 
 int
@@ -3634,7 +3661,7 @@ toku_ft_search_which_child(const toku::comparator &cmp, FTNODE node, ft_search *
     int mi;
     while (lo < hi) {
         mi = (lo + hi) / 2;
-        toku_copyref_dbt(&pivotkey, *node->pivotkeys.get_pivot(mi));
+        node->pivotkeys.fill_pivot(mi, &pivotkey);
         // search->compare is really strange, and only works well with a
         // linear search, it makes binary search a pita.
         //
@@ -3690,7 +3717,7 @@ maybe_search_save_bound(
     int p = (search->direction == FT_SEARCH_LEFT) ? child_searched : child_searched - 1;
     if (p >= 0 && p < node->n_children-1) {
         toku_destroy_dbt(&search->pivot_bound);
-        toku_clone_dbt(&search->pivot_bound, *node->pivotkeys.get_pivot(p));
+        toku_clone_dbt(&search->pivot_bound, node->pivotkeys.get_pivot(p));
     }
 }
 
@@ -3725,7 +3752,7 @@ ft_search_node(
     FT_CURSOR ftcursor,
     UNLOCKERS unlockers,
     ANCESTORS ancestors,
-    struct pivot_bounds const * const bounds,
+    const pivot_bounds &bounds,
     bool can_bulk_fetch
     )
 {
@@ -3737,7 +3764,7 @@ ft_search_node(
     // At this point, we must have the necessary partition available to continue the search
     //
     assert(BP_STATE(node,child_to_search) == PT_AVAIL);
-    const struct pivot_bounds next_bounds = next_pivot_keys(node, child_to_search, bounds);
+    const pivot_bounds next_bounds = bounds.next_bounds(node, child_to_search);
     if (node->height > 0) {
         r = ft_search_child(
             ft_handle,
@@ -3750,7 +3777,7 @@ ft_search_node(
             ftcursor,
             unlockers,
             ancestors,
-            &next_bounds,
+            next_bounds,
             can_bulk_fetch
             );
     }
@@ -3779,12 +3806,8 @@ ft_search_node(
     // we have a new pivotkey
     if (node->height == 0) {
         // when we run off the end of a basement, try to lock the range up to the pivot. solves #3529
-        const DBT *pivot = nullptr;
-        if (search->direction == FT_SEARCH_LEFT) {
-            pivot = next_bounds.upper_bound_inclusive; // left -> right
-        } else {
-            pivot = next_bounds.lower_bound_exclusive; // right -> left
-        }
+        const DBT *pivot = search->direction == FT_SEARCH_LEFT ? next_bounds.ubi() : // left -> right
+                                                                 next_bounds.lbe();  // right -> left
         if (pivot != nullptr) {
             int rr = getf(pivot->size, pivot->data, 0, nullptr, getf_v, true);
             if (rr != 0) {
@@ -3812,11 +3835,6 @@ ft_search_node(
     return r;
 }
 
-static const struct pivot_bounds infinite_bounds = {
-    .lower_bound_exclusive = nullptr,
-    .upper_bound_inclusive = nullptr,
-};
-
 int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, FT_CURSOR ftcursor, bool can_bulk_fetch)
 // Effect: Perform a search.  Associate cursor with a leaf if possible.
 // All searches are performed through this function.
@@ -3894,7 +3912,7 @@ try_again:
     {
         bool doprefetch = false;
         //static int counter = 0;         counter++;
-        r = ft_search_node(ft_handle, node, search, bfe.child_to_read, getf, getf_v, &doprefetch, ftcursor, &unlockers, (ANCESTORS)NULL, &infinite_bounds, can_bulk_fetch);
+        r = ft_search_node(ft_handle, node, search, bfe.child_to_read, getf, getf_v, &doprefetch, ftcursor, &unlockers, (ANCESTORS)NULL, pivot_bounds::infinite_bounds(), can_bulk_fetch);
         if (r==TOKUDB_TRY_AGAIN) {
             // there are two cases where we get TOKUDB_TRY_AGAIN
             //  case 1 is when some later call to toku_pin_ftnode returned
@@ -4048,7 +4066,7 @@ toku_ft_keysrange_internal (FT_HANDLE ft_handle, FTNODE node,
                             uint64_t estimated_num_rows,
                             struct ftnode_fetch_extra *min_bfe, // set up to read a minimal read.
                             struct ftnode_fetch_extra *match_bfe, // set up to read a basement node iff both keys in it
-                            struct unlockers *unlockers, ANCESTORS ancestors, struct pivot_bounds const * const bounds)
+                            struct unlockers *unlockers, ANCESTORS ancestors, const pivot_bounds &bounds)
 // Implementation note: Assign values to less, equal, and greater, and then on the way out (returning up the stack) we add more values in.
 {
     int r = 0;
@@ -4096,11 +4114,11 @@ toku_ft_keysrange_internal (FT_HANDLE ft_handle, FTNODE node,
 
             struct unlock_ftnode_extra unlock_extra   = {ft_handle,childnode,false};
             struct unlockers next_unlockers = {true, unlock_ftnode_fun, (void*)&unlock_extra, unlockers};
-            const struct pivot_bounds next_bounds = next_pivot_keys(node, left_child_number, bounds);
+            const struct pivot_bounds next_bounds = bounds.next_bounds(node, left_child_number);
 
             r = toku_ft_keysrange_internal(ft_handle, childnode, key_left, key_right, child_may_find_right,
                                            less, equal_left, middle, equal_right, greater, single_basement_node,
-                                           rows_per_child, min_bfe, match_bfe, &next_unlockers, &next_ancestors, &next_bounds);
+                                           rows_per_child, min_bfe, match_bfe, &next_unlockers, &next_ancestors, next_bounds);
             if (r != TOKUDB_TRY_AGAIN) {
                 assert_zero(r);
 
@@ -4179,7 +4197,7 @@ try_again:
             r = toku_ft_keysrange_internal (ft_handle, node, key_left, key_right, true,
                                             &less, &equal_left, &middle, &equal_right, &greater,
                                             &single_basement_node, numrows,
-                                            &min_bfe, &match_bfe, &unlockers, (ANCESTORS)NULL, &infinite_bounds);
+                                            &min_bfe, &match_bfe, &unlockers, (ANCESTORS)NULL, pivot_bounds::infinite_bounds());
             assert(r == 0 || r == TOKUDB_TRY_AGAIN);
             if (r == TOKUDB_TRY_AGAIN) {
                 assert(!unlockers.locked);
@@ -4195,7 +4213,7 @@ try_again:
                 r = toku_ft_keysrange_internal (ft_handle, node, key_right, nullptr, false,
                                                 &less2, &equal_left2, &middle2, &equal_right2, &greater2,
                                                 &ignore, numrows,
-                                                &min_bfe, &match_bfe, &unlockers, (ANCESTORS)nullptr, &infinite_bounds);
+                                                &min_bfe, &match_bfe, &unlockers, (ANCESTORS)nullptr, pivot_bounds::infinite_bounds());
                 assert(r == 0 || r == TOKUDB_TRY_AGAIN);
                 if (r == TOKUDB_TRY_AGAIN) {
                     assert(!unlockers.locked);
@@ -4282,9 +4300,9 @@ static int get_key_after_bytes_in_basementnode(FT ft, BASEMENTNODE bn, const DBT
     return r;
 }
 
-static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped);
+static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped);
 
-static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
+static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
     int r;
     struct ancestors next_ancestors = {node, childnum, ancestors};
     BLOCKNUM childblocknum = BP_BLOCKNUM(node, childnum);
@@ -4299,11 +4317,11 @@ static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLO
     assert_zero(r);
     struct unlock_ftnode_extra unlock_extra = {ft_h, child, false};
     struct unlockers next_unlockers = {true, unlock_ftnode_fun, (void *) &unlock_extra, unlockers};
-    const struct pivot_bounds next_bounds = next_pivot_keys(node, childnum, bounds);
-    return get_key_after_bytes_in_subtree(ft_h, ft, child, &next_unlockers, &next_ancestors, &next_bounds, bfe, search, subtree_bytes, start_key, skip_len, callback, cb_extra, skipped);
+    const pivot_bounds next_bounds = bounds.next_bounds(node, childnum);
+    return get_key_after_bytes_in_subtree(ft_h, ft, child, &next_unlockers, &next_ancestors, next_bounds, bfe, search, subtree_bytes, start_key, skip_len, callback, cb_extra, skipped);
 }
 
-static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
+static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
     int r;
     int childnum = toku_ft_search_which_child(ft->cmp, node, search);
     const uint64_t child_subtree_bytes = subtree_bytes / node->n_children;
@@ -4321,7 +4339,8 @@ static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UN
             } else {
                 *skipped += child_subtree_bytes;
                 if (*skipped >= skip_len && i < node->n_children - 1) {
-                    callback(node->pivotkeys.get_pivot(i), *skipped, cb_extra);
+                    DBT pivot;
+                    callback(node->pivotkeys.fill_pivot(i, &pivot), *skipped, cb_extra);
                     r = 0;
                 }
                 // Otherwise, r is still DB_NOTFOUND.  If this is the last
@@ -4389,7 +4408,7 @@ int toku_ft_get_key_after_bytes(FT_HANDLE ft_h, const DBT *start_key, uint64_t s
             numbytes = 0;
         }
         uint64_t skipped = 0;
-        r = get_key_after_bytes_in_subtree(ft_h, ft, root, &unlockers, nullptr, &infinite_bounds, &bfe, &search, (uint64_t) numbytes, start_key, skip_len, callback, cb_extra, &skipped);
+        r = get_key_after_bytes_in_subtree(ft_h, ft, root, &unlockers, nullptr, pivot_bounds::infinite_bounds(), &bfe, &search, (uint64_t) numbytes, start_key, skip_len, callback, cb_extra, &skipped);
         assert(!unlockers.locked);
         if (r != TOKUDB_TRY_AGAIN) {
             if (r == DB_NOTFOUND) {
@@ -4450,7 +4469,7 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
         int i;
         for (i=0; i+1< node->n_children; i++) {
             fprintf(file, "%*spivotkey %d =", depth+1, "", i);
-            toku_print_BYTESTRING(file, node->pivotkeys.get_pivot(i)->size, (char *) node->pivotkeys.get_pivot(i)->data);
+            toku_print_BYTESTRING(file, node->pivotkeys.get_pivot(i).size, (char *) node->pivotkeys.get_pivot(i).data);
             fprintf(file, "\n");
         }
         for (i=0; i< node->n_children; i++) {
@@ -4492,12 +4511,13 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
             for (i=0; i<node->n_children; i++) {
                 fprintf(file, "%*schild %d\n", depth, "", i);
                 if (i>0) {
-                    char *CAST_FROM_VOIDP(key, node->pivotkeys.get_pivot(i - 1)->data);
-                    fprintf(file, "%*spivot %d len=%u %u\n", depth+1, "", i-1, node->pivotkeys.get_pivot(i - 1)->size, (unsigned)toku_dtoh32(*(int*)key));
+                    char *CAST_FROM_VOIDP(key, node->pivotkeys.get_pivot(i - 1).data);
+                    fprintf(file, "%*spivot %d len=%u %u\n", depth+1, "", i-1, node->pivotkeys.get_pivot(i - 1).size, (unsigned)toku_dtoh32(*(int*)key));
                 }
+                DBT x, y;
                 toku_dump_ftnode(file, ft_handle, BP_BLOCKNUM(node, i), depth+4,
-                                  (i==0) ? lorange : node->pivotkeys.get_pivot(i - 1),
-                                  (i==node->n_children-1) ? hirange : node->pivotkeys.get_pivot(i));
+                                  (i==0) ? lorange : node->pivotkeys.fill_pivot(i - 1, &x),
+                                  (i==node->n_children-1) ? hirange : node->pivotkeys.fill_pivot(i, &y));
             }
         }
     }
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index 056ae18bd17..ba78f11f421 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -158,7 +158,8 @@ get_ith_key_dbt (BASEMENTNODE bn, int i) {
 
 #define VERIFY_ASSERTION(predicate, i, string) ({                                                                              \
     if(!(predicate)) {                                                                                                         \
-        if (verbose) {                                                                                                         \
+        (void) verbose; \
+        if (true) {                                                                                                         \
             fprintf(stderr, "%s:%d: Looking at child %d of block %" PRId64 ": %s\n", __FILE__, __LINE__, i, blocknum.b, string); \
         }                                                                                                                      \
         result = TOKUDB_NEEDS_REPAIR;                                                                                          \
@@ -398,24 +399,27 @@ toku_verify_ftnode_internal(FT_HANDLE ft_handle,
     }
     // Verify that all the pivot keys are in order.
     for (int i = 0; i < node->n_children-2; i++) {
-        int compare = compare_pairs(ft_handle, node->pivotkeys.get_pivot(i), node->pivotkeys.get_pivot(i + 1));
+        DBT x, y;
+        int compare = compare_pairs(ft_handle, node->pivotkeys.fill_pivot(i, &x), node->pivotkeys.fill_pivot(i + 1, &y));
         VERIFY_ASSERTION(compare < 0, i, "Value is >= the next value");
     }
     // Verify that all the pivot keys are lesser_pivot < pivot <= greatereq_pivot
     for (int i = 0; i < node->n_children-1; i++) {
+        DBT x;
         if (lesser_pivot) {
-            int compare = compare_pairs(ft_handle, lesser_pivot, node->pivotkeys.get_pivot(i));
+            int compare = compare_pairs(ft_handle, lesser_pivot, node->pivotkeys.fill_pivot(i, &x));
             VERIFY_ASSERTION(compare < 0, i, "Pivot is >= the lower-bound pivot");
         }
         if (greatereq_pivot) {
-            int compare = compare_pairs(ft_handle, greatereq_pivot, node->pivotkeys.get_pivot(i));
+            int compare = compare_pairs(ft_handle, greatereq_pivot, node->pivotkeys.fill_pivot(i, &x));
             VERIFY_ASSERTION(compare >= 0, i, "Pivot is < the upper-bound pivot");
         }
     }
 
     for (int i = 0; i < node->n_children; i++) {
-        const DBT *curr_less_pivot = (i==0) ? lesser_pivot : node->pivotkeys.get_pivot(i - 1);
-        const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.get_pivot(i);
+        DBT x, y;
+        const DBT *curr_less_pivot = (i==0) ? lesser_pivot : node->pivotkeys.fill_pivot(i - 1, &x);
+        const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.fill_pivot(i, &y);
         if (node->height > 0) {
             NONLEAF_CHILDINFO bnc = BNC(node, i);
             // Verify that messages in the buffers are in the right place.
@@ -518,14 +522,15 @@ toku_verify_ftnode (FT_HANDLE ft_handle,
         for (int i = 0; i < node->n_children; i++) {
             FTNODE child_node;
             toku_get_node_for_verify(BP_BLOCKNUM(node, i), ft_handle, &child_node);
+            DBT x, y;
             int r = toku_verify_ftnode(ft_handle, rootmsn,
                                        (toku_bnc_n_entries(BNC(node, i)) > 0
                                         ? this_msn
                                         : parentmsn_with_messages),
                                        messages_exist_above || toku_bnc_n_entries(BNC(node, i)) > 0,
                                        child_node, node->height-1,
-                                       (i==0)                  ? lesser_pivot        : node->pivotkeys.get_pivot(i - 1),
-                                       (i==node->n_children-1) ? greatereq_pivot     : node->pivotkeys.get_pivot(i),
+                                       (i==0)                  ? lesser_pivot        : node->pivotkeys.fill_pivot(i - 1, &x),
+                                       (i==node->n_children-1) ? greatereq_pivot     : node->pivotkeys.fill_pivot(i, &y),
                                        progress_callback, progress_extra,
                                        recurse, verbose, keep_going_on_failure);
             if (r) {
diff --git a/ft/node.cc b/ft/node.cc
index f7b427afb1c..7201420e1b8 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -100,137 +100,350 @@ PATENT RIGHTS GRANT:
 void ftnode_pivot_keys::create_empty() {
     _num_pivots = 0;
     _total_size = 0;
-    _keys = nullptr;
+    _fixed_keys = nullptr;
+    _fixed_keylen = 0;
+    _dbt_keys = nullptr;
 }
 
 void ftnode_pivot_keys::create_from_dbts(const DBT *keys, int n) {
+    create_empty();
     _num_pivots = n;
-    _total_size = 0;
-    XMALLOC_N(_num_pivots, _keys);
-    for (int i = 0; i < _num_pivots; i++) {
-        size_t size = keys[i].size;
-        toku_memdup_dbt(&_keys[i], keys[i].data, size);
-        _total_size += size;
+
+    // see if every key has the same length
+    bool keys_same_size = true;
+    for (int i = 1; i < _num_pivots; i++) {
+        if (keys[i].size != keys[i - 1].size) {
+            keys_same_size = false;
+            break;
+        }
     }
+
+    if (keys_same_size && _num_pivots > 0) {
+        // if so, store pivots in a tightly packed array of fixed length keys
+        _fixed_keylen = keys[0].size;
+        _total_size = _fixed_keylen * _num_pivots;
+        XMALLOC_N(_total_size, _fixed_keys);
+        for (int i = 0; i < _num_pivots; i++) {
+            invariant(keys[i].size == _fixed_keylen);
+            memcpy(_fixed_key(i), keys[i].data, _fixed_keylen);
+        }
+    } else {
+        // otherwise we'll just store the pivots in an array of dbts
+        XMALLOC_N(_num_pivots, _dbt_keys);
+        for (int i = 0; i < _num_pivots; i++) {
+            size_t size = keys[i].size;
+            toku_memdup_dbt(&_dbt_keys[i], keys[i].data, size);
+            _total_size += size;
+        }
+    }
+}
+
+void ftnode_pivot_keys::_create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n) {
+    create_empty();
+    _num_pivots = n;
+    _fixed_keylen = fixed_keylen;
+    _total_size = _fixed_keylen * _num_pivots;
+    XMEMDUP_N(_fixed_keys, fixedkeys, _total_size);
 }
 
 // effect: create pivot keys as a clone of an existing set of pivotkeys
 void ftnode_pivot_keys::create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys) {
-    create_from_dbts(pivotkeys._keys, pivotkeys._num_pivots);
+    if (pivotkeys._fixed_format()) {
+        _create_from_fixed_keys(pivotkeys._fixed_keys, pivotkeys._fixed_keylen, pivotkeys._num_pivots);
+    } else {
+        create_from_dbts(pivotkeys._dbt_keys, pivotkeys._num_pivots);
+    }
 }
 
 void ftnode_pivot_keys::destroy() {
-    if (_keys != nullptr) {
+    if (_dbt_keys != nullptr) {
         for (int i = 0; i < _num_pivots; i++) {
-            toku_destroy_dbt(&_keys[i]);
+            toku_destroy_dbt(&_dbt_keys[i]);
         }
-        toku_free(_keys);
+        toku_free(_dbt_keys);
+        _dbt_keys = nullptr;
     }
-    _keys = nullptr;
+    if (_fixed_keys != nullptr) {
+        toku_free(_fixed_keys);
+        _fixed_keys = nullptr;
+    }
+    _fixed_keylen = 0;
     _num_pivots = 0;
     _total_size = 0;
 }
 
+void ftnode_pivot_keys::_convert_to_fixed_format() {
+    invariant(!_fixed_format());
+
+    // convert to a tightly packed array of fixed length keys
+    _fixed_keylen = _dbt_keys[0].size;
+    _total_size = _fixed_keylen * _num_pivots;
+    XMALLOC_N(_total_size, _fixed_keys);
+    for (int i = 0; i < _num_pivots; i++) {
+        invariant(_dbt_keys[i].size == _fixed_keylen);
+        memcpy(_fixed_key(i), _dbt_keys[i].data, _fixed_keylen);
+    }
+
+    // destroy the dbt array format
+    for (int i = 0; i < _num_pivots; i++) {
+        toku_destroy_dbt(&_dbt_keys[i]);
+    }
+    toku_free(_dbt_keys);
+    _dbt_keys = nullptr;
+
+    invariant(_fixed_format());
+}
+
+void ftnode_pivot_keys::_convert_to_dbt_format() {
+    invariant(_fixed_format());
+
+    // convert to an aray of dbts
+    XREALLOC_N(_num_pivots, _dbt_keys);
+    for (int i = 0; i < _num_pivots; i++) {
+        toku_memdup_dbt(&_dbt_keys[i], _fixed_key(i), _fixed_keylen);
+    }
+
+    // destroy the fixed key format
+    toku_free(_fixed_keys);
+    _fixed_keys = nullptr;
+    _fixed_keylen = 0;
+
+    invariant(!_fixed_format());
+}
+
 void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
-    XMALLOC_N(n, _keys);
     _num_pivots = n;
     _total_size = 0;
+    _fixed_keys = nullptr;
+    _fixed_keylen = 0;
+    _dbt_keys = nullptr;
+
+    XMALLOC_N(_num_pivots, _dbt_keys);
+    bool keys_same_size = true;
     for (int i = 0; i < _num_pivots; i++) {
         bytevec pivotkeyptr;
         uint32_t size;
         rbuf_bytes(rb, &pivotkeyptr, &size);
-        toku_memdup_dbt(&_keys[i], pivotkeyptr, size);
+        toku_memdup_dbt(&_dbt_keys[i], pivotkeyptr, size);
         _total_size += size;
+        if (i > 0 && keys_same_size && _dbt_keys[i].size != _dbt_keys[i - 1].size) {
+            // not all keys are the same size, we'll stick to the dbt array format
+            keys_same_size = false;
+        }
+    }
+
+    if (keys_same_size && _num_pivots > 0) {
+        _convert_to_fixed_format();
     }
 }
 
-const DBT *ftnode_pivot_keys::get_pivot(int i) const {
+DBT ftnode_pivot_keys::get_pivot(int i) const {
     paranoid_invariant(i < _num_pivots);
-    return &_keys[i];
+    if (_fixed_format()) {
+        paranoid_invariant(i * _fixed_keylen < _total_size);
+        DBT dbt;
+        toku_fill_dbt(&dbt, _fixed_key(i), _fixed_keylen);
+        return dbt;
+    } else {
+        return _dbt_keys[i];
+    }
 }
 
-void ftnode_pivot_keys::_add_key(const DBT *key, int i) {
-    toku_clone_dbt(&_keys[i], *key);
-    _total_size += _keys[i].size;
+DBT *ftnode_pivot_keys::fill_pivot(int i, DBT *dbt) const {
+    paranoid_invariant(i < _num_pivots);
+    if (_fixed_format()) {
+        toku_fill_dbt(dbt, _fixed_key(i), _fixed_keylen);
+    } else {
+        toku_copyref_dbt(dbt, _dbt_keys[i]);
+    }
+    return dbt;
 }
 
-void ftnode_pivot_keys::_destroy_key(int i) {
-    invariant(_total_size >= _keys[i].size);
-    _total_size -= _keys[i].size;
-    toku_destroy_dbt(&_keys[i]);
+void ftnode_pivot_keys::_add_key_dbt(const DBT *key, int i) {
+    toku_clone_dbt(&_dbt_keys[i], *key);
+    _total_size += _dbt_keys[i].size;
+}
+
+void ftnode_pivot_keys::_destroy_key_dbt(int i) {
+    invariant(_total_size >= _dbt_keys[i].size);
+    _total_size -= _dbt_keys[i].size;
+    toku_destroy_dbt(&_dbt_keys[i]);
+}
+
+void ftnode_pivot_keys::_insert_at_dbt(const DBT *key, int i) {
+    // make space for a new pivot, slide existing keys to the right
+    REALLOC_N(_num_pivots + 1, _dbt_keys);
+    memmove(&_dbt_keys[i + 1], &_dbt_keys[i], (_num_pivots - i) * sizeof(DBT));
+    _add_key_dbt(key, i);
+}
+
+void ftnode_pivot_keys::_insert_at_fixed(const DBT *key, int i) {
+    REALLOC_N((_num_pivots + 1) * _fixed_keylen, _fixed_keys); 
+    memmove(_fixed_key(i + 1), _fixed_key(i), (_num_pivots - i) * _fixed_keylen);
+    memcpy(_fixed_key(i), key->data, _fixed_keylen);
+    _total_size += _fixed_keylen;
 }
 
 void ftnode_pivot_keys::insert_at(const DBT *key, int i) {
     invariant(i <= _num_pivots); // it's ok to insert at the end, so we check <= n
 
-    // make space for a new pivot, slide existing keys to the right
-    REALLOC_N(_num_pivots + 1, _keys);
-    memmove(&_keys[i + 1], &_keys[i], (_num_pivots - i) * sizeof(DBT));
+    // if the new key doesn't have the same size, we can't be in fixed format
+    if (_fixed_format() && key->size != _fixed_keylen) {
+        _convert_to_dbt_format();
+    }
 
+    if (_fixed_format()) {
+        _insert_at_fixed(key, i);
+    } else {
+        _insert_at_dbt(key, i);
+    }
     _num_pivots++;
-    _add_key(key, i);
+
+    invariant(total_size() > 0);
+}
+
+void ftnode_pivot_keys::_append_dbt(const ftnode_pivot_keys &pivotkeys) {
+    REALLOC_N(_num_pivots + pivotkeys._num_pivots, _dbt_keys);
+    bool other_fixed = pivotkeys._fixed_format();
+    for (int i = 0; i < pivotkeys._num_pivots; i++) {
+        toku_memdup_dbt(&_dbt_keys[_num_pivots + i],
+                        other_fixed ? pivotkeys._fixed_key(i) :
+                                      pivotkeys._dbt_keys[i].data,
+                        other_fixed ? pivotkeys._fixed_keylen :
+                                      pivotkeys._dbt_keys[i].size);
+    }
+}
+
+void ftnode_pivot_keys::_append_fixed(const ftnode_pivot_keys &pivotkeys) {
+    if (pivotkeys._fixed_format() && pivotkeys._fixed_keylen == _fixed_keylen) {
+        // other pivotkeys have the same fixed keylen 
+        REALLOC_N((_num_pivots + pivotkeys._num_pivots) * _fixed_keylen, _fixed_keys);
+        memcpy(_fixed_key(_num_pivots), pivotkeys._fixed_keys, pivotkeys._total_size);
+    } else {
+        // must convert to dbt format, other pivotkeys have different length'd keys
+        _convert_to_dbt_format();
+        _append_dbt(pivotkeys);
+    }
 }
 
 void ftnode_pivot_keys::append(const ftnode_pivot_keys &pivotkeys) {
-    REALLOC_N(_num_pivots + pivotkeys._num_pivots, _keys);
-    for (int i = 0; i < pivotkeys._num_pivots; i++) {
-        const DBT *key = &pivotkeys._keys[i];
-        toku_memdup_dbt(&_keys[_num_pivots + i], key->data, key->size);
+    if (_fixed_format()) {
+        _append_fixed(pivotkeys);
+    } else {
+        _append_dbt(pivotkeys);
     }
     _num_pivots += pivotkeys._num_pivots;
     _total_size += pivotkeys._total_size;
 }
 
+void ftnode_pivot_keys::_replace_at_dbt(const DBT *key, int i) {
+    _destroy_key_dbt(i);
+    _add_key_dbt(key, i);
+}
+
+void ftnode_pivot_keys::_replace_at_fixed(const DBT *key, int i) {
+    if (key->size == _fixed_keylen) {
+        memcpy(_fixed_key(i), key->data, _fixed_keylen);
+    } else {
+        // must convert to dbt format, replacement key has different length
+        _convert_to_dbt_format();
+        _replace_at_dbt(key, i);
+    }
+}
+
 void ftnode_pivot_keys::replace_at(const DBT *key, int i) {
     if (i < _num_pivots) {
-        _destroy_key(i);
-        _add_key(key, i);
+        if (_fixed_format()) {
+            _replace_at_fixed(key, i);
+        } else {
+            _replace_at_dbt(key, i);
+        }
     } else {
         invariant(i == _num_pivots); // appending to the end is ok
         insert_at(key, i);
     }
+    invariant(total_size() > 0);
+}
+
+void ftnode_pivot_keys::_delete_at_fixed(int i) {
+    memmove(_fixed_key(i), _fixed_key(i + 1), (_num_pivots - 1 - i) * _fixed_keylen);
+    _total_size -= _fixed_keylen;
+}
+
+void ftnode_pivot_keys::_delete_at_dbt(int i) {
+    // slide over existing keys, then shrink down to size
+    _destroy_key_dbt(i);
+    memmove(&_dbt_keys[i], &_dbt_keys[i + 1], (_num_pivots - 1 - i) * sizeof(DBT));
+    REALLOC_N(_num_pivots - 1, _dbt_keys);
 }
 
 void ftnode_pivot_keys::delete_at(int i) {
     invariant(i < _num_pivots);
-    _destroy_key(i);
 
-    // slide over existing keys
-    memmove(&_keys[i], &_keys[i + 1], (_num_pivots - 1 - i) * sizeof(DBT));
+    if (_fixed_format()) {
+        _delete_at_fixed(i);
+    } else {
+        _delete_at_dbt(i);
+    }
 
-    // shrink down to the new size
     _num_pivots--;
-    REALLOC_N(_num_pivots, _keys);
+}
+
+void ftnode_pivot_keys::_split_at_fixed(int i, ftnode_pivot_keys *other) {
+    // recreate the other set of pivots from index >= i
+    other->_create_from_fixed_keys(_fixed_key(i), _fixed_keylen, _num_pivots - i);
+
+    // shrink down to size
+    _total_size = i * _fixed_keylen;
+    REALLOC_N(_total_size, _fixed_keys);
+}
+
+void ftnode_pivot_keys::_split_at_dbt(int i, ftnode_pivot_keys *other) {
+    // recreate the other set of pivots from index >= i
+    other->create_from_dbts(&_dbt_keys[i], _num_pivots - i);
+
+    // destroy everything greater, shrink down to size
+    for (int k = i; k < _num_pivots; k++) {
+        _destroy_key_dbt(k);
+    }
+    REALLOC_N(i, _dbt_keys);
 }
 
 void ftnode_pivot_keys::split_at(int i, ftnode_pivot_keys *other) {
     if (i < _num_pivots) {
-        other->create_from_dbts(&_keys[i], _num_pivots - i);
-
-        // destroy everything greater
-        for (int k = i; k < _num_pivots; k++) {
-            _destroy_key(k);
+        if (_fixed_format()) {
+            _split_at_fixed(i, other);
+        } else {
+            _split_at_dbt(i, other);
         }
-
         _num_pivots = i;
-        REALLOC_N(_num_pivots, _keys);
     }
 }
 
+void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const {
+    bool fixed = _fixed_format();
+    size_t written = 0;
+    for (int i = 0; i < _num_pivots; i++) {
+        size_t size = fixed ? _fixed_keylen : _dbt_keys[i].size;
+        invariant(size);
+        wbuf_nocrc_bytes(wb, fixed ? _fixed_key(i) : _dbt_keys[i].data, size);
+        written += size;
+    }
+    invariant(written == _total_size);
+}
+
 int ftnode_pivot_keys::num_pivots() const {
+    // if we have fixed size keys, the number of pivots should be consistent
+    paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen * _num_pivots));
     return _num_pivots;
 }
 
 size_t ftnode_pivot_keys::total_size() const {
+    // if we have fixed size keys, the total size should be consistent
+    paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen * _num_pivots));
     return _total_size;
 }
 
-void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const {
-    for (int i = 0; i < _num_pivots; i++) {
-        wbuf_nocrc_bytes(wb, _keys[i].data, _keys[i].size);
-    }
-}
-
 // Effect: Fill in N as an empty ftnode.
 // TODO: Rename toku_ftnode_create
 void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int num_children, int layout_version, unsigned int flags) {
@@ -465,20 +678,20 @@ find_bounds_within_message_tree(
     const toku::comparator &cmp,
     const find_bounds_omt_t &message_tree,      /// tree holding message buffer offsets, in which we want to look for indices
     message_buffer *msg_buffer,           /// message buffer in which messages are found
-    struct pivot_bounds const * const bounds,  /// key bounds within the basement node we're applying messages to
+    const pivot_bounds &bounds,  /// key bounds within the basement node we're applying messages to
     uint32_t *lbi,        /// (output) "lower bound inclusive" (index into message_tree)
     uint32_t *ube         /// (output) "upper bound exclusive" (index into message_tree)
     )
 {
     int r = 0;
 
-    if (bounds->lower_bound_exclusive) {
+    if (!toku_dbt_is_empty(bounds.lbe())) {
         // By setting msn to MAX_MSN and by using direction of +1, we will
         // get the first message greater than (in (key, msn) order) any
         // message (with any msn) with the key lower_bound_exclusive.
         // This will be a message we want to try applying, so it is the
         // "lower bound inclusive" within the message_tree.
-        struct toku_msg_buffer_key_msn_heaviside_extra lbi_extra(cmp, msg_buffer, bounds->lower_bound_exclusive, MAX_MSN);
+        struct toku_msg_buffer_key_msn_heaviside_extra lbi_extra(cmp, msg_buffer, bounds.lbe(), MAX_MSN);
         int32_t found_lb;
         r = message_tree.template find<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(lbi_extra, +1, &found_lb, lbi);
         if (r == DB_NOTFOUND) {
@@ -489,11 +702,11 @@ find_bounds_within_message_tree(
             *ube = 0;
             return;
         }
-        if (bounds->upper_bound_inclusive) {
+        if (!toku_dbt_is_empty(bounds.ubi())) {
             // Check if what we found for lbi is greater than the upper
             // bound inclusive that we have.  If so, there are no relevant
             // messages between these bounds.
-            const DBT *ubi = bounds->upper_bound_inclusive;
+            const DBT *ubi = bounds.ubi();
             const int32_t offset = found_lb;
             DBT found_lbidbt;
             msg_buffer->get_message_key_msn(offset, &found_lbidbt, nullptr);
@@ -514,12 +727,12 @@ find_bounds_within_message_tree(
         // the first message in the OMT.
         *lbi = 0;
     }
-    if (bounds->upper_bound_inclusive) {
+    if (!toku_dbt_is_empty(bounds.ubi())) {
         // Again, we use an msn of MAX_MSN and a direction of +1 to get
         // the first thing bigger than the upper_bound_inclusive key.
         // This is therefore the smallest thing we don't want to apply,
         // and omt::iterate_on_range will not examine it.
-        struct toku_msg_buffer_key_msn_heaviside_extra ube_extra(cmp, msg_buffer, bounds->upper_bound_inclusive, MAX_MSN);
+        struct toku_msg_buffer_key_msn_heaviside_extra ube_extra(cmp, msg_buffer, bounds.ubi(), MAX_MSN);
         r = message_tree.template find<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(ube_extra, +1, nullptr, ube);
         if (r == DB_NOTFOUND) {
             // Couldn't find anything in the buffer bigger than our key,
@@ -547,7 +760,7 @@ bnc_apply_messages_to_basement_node(
     BASEMENTNODE bn,   // where to apply messages
     FTNODE ancestor,  // the ancestor node where we can find messages to apply
     int childnum,      // which child buffer of ancestor contains messages we want
-    struct pivot_bounds const * const bounds,  // contains pivot key bounds of this basement node
+    const pivot_bounds &bounds,  // contains pivot key bounds of this basement node
     txn_gc_info *gc_info,
     bool* msgs_applied
     )
@@ -641,13 +854,13 @@ apply_ancestors_messages_to_bn(
     FTNODE node,
     int childnum,
     ANCESTORS ancestors,
-    struct pivot_bounds const * const bounds, 
+    const pivot_bounds &bounds, 
     txn_gc_info *gc_info,
     bool* msgs_applied
     )
 {
     BASEMENTNODE curr_bn = BLB(node, childnum);
-    struct pivot_bounds curr_bounds = next_pivot_keys(node, childnum, bounds);
+    const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum);
     for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
         if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > curr_bn->max_msn_applied.msn) {
             paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL);
@@ -656,7 +869,7 @@ apply_ancestors_messages_to_bn(
                 curr_bn,
                 curr_ancestors->node,
                 curr_ancestors->childnum,
-                &curr_bounds,
+                curr_bounds,
                 gc_info,
                 msgs_applied
                 );
@@ -678,7 +891,7 @@ toku_apply_ancestors_messages_to_node (
     FT_HANDLE t, 
     FTNODE node, 
     ANCESTORS ancestors, 
-    struct pivot_bounds const * const bounds, 
+    const pivot_bounds &bounds, 
     bool* msgs_applied, 
     int child_to_read
     )
@@ -741,13 +954,13 @@ static bool bn_needs_ancestors_messages(
     FT ft,
     FTNODE node,
     int childnum,
-    struct pivot_bounds const * const bounds,
+    const pivot_bounds &bounds,
     ANCESTORS ancestors, 
     MSN* max_msn_applied
     ) 
 {
     BASEMENTNODE bn = BLB(node, childnum);
-    struct pivot_bounds curr_bounds = next_pivot_keys(node, childnum, bounds);
+    const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum);
     bool needs_ancestors_messages = false;
     for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
         if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > bn->max_msn_applied.msn) {
@@ -762,7 +975,7 @@ static bool bn_needs_ancestors_messages(
                 find_bounds_within_message_tree(ft->cmp,
                                                 bnc->stale_message_tree,
                                                 &bnc->msg_buffer,
-                                                &curr_bounds,
+                                                curr_bounds,
                                                 &stale_lbi,
                                                 &stale_ube);
                 if (stale_lbi < stale_ube) {
@@ -774,7 +987,7 @@ static bool bn_needs_ancestors_messages(
             find_bounds_within_message_tree(ft->cmp,
                                             bnc->fresh_message_tree,
                                             &bnc->msg_buffer,
-                                            &curr_bounds,
+                                            curr_bounds,
                                             &fresh_lbi,
                                             &fresh_ube);
             if (fresh_lbi < fresh_ube) {
@@ -794,7 +1007,7 @@ bool toku_ft_leaf_needs_ancestors_messages(
     FT ft, 
     FTNODE node, 
     ANCESTORS ancestors, 
-    struct pivot_bounds const * const bounds, 
+    const pivot_bounds &bounds, 
     MSN *const max_msn_in_path, 
     int child_to_read
     )
@@ -1767,9 +1980,11 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &c
     // a funny case of no pivots
     if (node->n_children <= 1) return 0;
 
+    DBT pivot;
+
     // check the last key to optimize seq insertions
     int n = node->n_children-1;
-    int c = ft_compare_pivot(cmp, k, node->pivotkeys.get_pivot(n - 1));
+    int c = ft_compare_pivot(cmp, k, node->pivotkeys.fill_pivot(n - 1, &pivot));
     if (c > 0) return n;
 
     // binary search the pivots
@@ -1778,7 +1993,7 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &c
     int mi;
     while (lo < hi) {
         mi = (lo + hi) / 2;
-        c = ft_compare_pivot(cmp, k, node->pivotkeys.get_pivot(mi));
+        c = ft_compare_pivot(cmp, k, node->pivotkeys.fill_pivot(mi, &pivot));
         if (c > 0) {
             lo = mi+1;
             continue;
@@ -1794,12 +2009,13 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &c
 
 // Used for HOT.
 int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator &cmp) {
+    DBT pivot;
     int low = 0;
     int hi = node->n_children - 1;
     int mi;
     while (low < hi) {
         mi = (low + hi) / 2;
-        int r = ft_compare_pivot(cmp, k, node->pivotkeys.get_pivot(mi));
+        int r = ft_compare_pivot(cmp, k, node->pivotkeys.fill_pivot(mi, &pivot));
         if (r > 0) {
             low = mi + 1;
         } else if (r < 0) {
diff --git a/ft/node.h b/ft/node.h
index 6300d92e86e..9fbc18cb896 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -106,7 +106,7 @@ public:
     void create_empty();
 
     // effect: create pivot keys by copying the given DBT array
-    void create_from_dbts(const DBT *keys, int num_pivots);
+    void create_from_dbts(const DBT *keys, int n);
 
     // effect: create pivot keys as a clone of an existing set of pivotkeys
     void create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys);
@@ -114,10 +114,14 @@ public:
     void destroy();
 
     // effect: deserialize pivot keys previously serialized by serialize_to_wbuf()
-    void deserialize_from_rbuf(struct rbuf *rb, int num_pivots);
+    void deserialize_from_rbuf(struct rbuf *rb, int n);
 
     // returns: unowned DBT representing the i'th pivot key
-    const DBT *get_pivot(int i) const;
+    DBT get_pivot(int i) const;
+
+    // effect: fills a DBT with the i'th pivot key
+    // returns: the given dbt
+    DBT *fill_pivot(int i, DBT *dbt) const;
 
     // effect: insert a pivot into the i'th position, shifting others to the right
     void insert_at(const DBT *key, int i);
@@ -136,21 +140,59 @@ public:
     // requires: *other is empty (size == 0)
     void split_at(int i, ftnode_pivot_keys *other);
 
+    // effect: serialize pivot keys to a wbuf
+    // requires: wbuf has at least ftnode_pivot_keys::total_size() bytes available
+    void serialize_to_wbuf(struct wbuf *wb) const;
+
     int num_pivots() const;
 
     // return: the sum of the keys sizes of each pivot
     size_t total_size() const;
 
-    // effect: serialize pivot keys to a wbuf
-    // requires: wbuf has at least ftnode_pivot_keys::total_size() bytes available
-    void serialize_to_wbuf(struct wbuf *wb) const;
-
 private:
-    // adds/destroys keys at a certain index, maintaining _total_size, but not _num_pivots
-    void _add_key(const DBT *key, int i);
-    void _destroy_key(int i);
+    // effect: create pivot keys, in fixed key format, by copying the given key array
+    void _create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n);
+
+    char *_fixed_key(int i) const {
+        return &_fixed_keys[i * _fixed_keylen];
+    }
+
+    bool _fixed_format() const {
+        return _fixed_keys != nullptr;
+    }
+
+    void sanity_check() const;
+
+    void _insert_at_dbt(const DBT *key, int i);
+    void _append_dbt(const ftnode_pivot_keys &pivotkeys);
+    void _replace_at_dbt(const DBT *key, int i);
+    void _delete_at_dbt(int i);
+    void _split_at_dbt(int i, ftnode_pivot_keys *other);
+
+    void _insert_at_fixed(const DBT *key, int i);
+    void _append_fixed(const ftnode_pivot_keys &pivotkeys);
+    void _replace_at_fixed(const DBT *key, int i);
+    void _delete_at_fixed(int i);
+    void _split_at_fixed(int i, ftnode_pivot_keys *other);
+
+    // adds/destroys keys at a certain index (in dbt format),
+    // maintaining _total_size, but not _num_pivots
+    void _add_key_dbt(const DBT *key, int i);
+    void _destroy_key_dbt(int i);
+
+    // conversions to and from packed key array format
+    void _convert_to_dbt_format();
+    void _convert_to_fixed_format();
+
+    // If every key is _fixed_keylen long, then _fixed_key is a
+    // packed array of keys..
+    char *_fixed_keys;
+    size_t _fixed_keylen;
+
+    // ..otherwise _fixed_keys is null and we store an array of dbts,
+    // each representing a key. this is simpler but less cache-efficient.
+    DBT *_dbt_keys;
 
-    DBT *_keys;
     int _num_pivots;
     size_t _total_size;
 };
@@ -482,12 +524,13 @@ void toku_ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc);
 void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node);
 
 // TODO: Should ft_handle just be FT?
+class pivot_bounds;
 void toku_apply_ancestors_messages_to_node(FT_HANDLE t, FTNODE node, ANCESTORS ancestors,
-                                           struct pivot_bounds const *const bounds,
+                                           const pivot_bounds &bounds,
                                            bool *msgs_applied, int child_to_read);
 
 bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancestors,
-                                           struct pivot_bounds const *const bounds,
+                                           const pivot_bounds &bounds,
                                            MSN *const max_msn_in_path, int child_to_read);
 
 void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read);
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index 11d8a2989f0..de85e6c609b 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -360,7 +360,7 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
                 assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
                 assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
                 if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, elts[last_i].keyp) <= 0);
+                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, elts[last_i].keyp) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
@@ -495,7 +495,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
                 assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
                 assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
                 if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, les[last_i].keyp) <= 0);
+                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, les[last_i].keyp) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
@@ -618,7 +618,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
                 assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
                 assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
                 if (bn < npartitions-1) {
-                    uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn)->data);
+                    uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn).data);
                     void* tmp = les[last_i].keyp;
                     uint32_t *CAST_FROM_VOIDP(item, tmp);
                     assert(*pivot >= *item);
@@ -759,7 +759,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
                 assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
                 assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
                 if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, (char*)(les[last_i].keyp)) <= 0);
+                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(les[last_i].keyp)) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
@@ -888,7 +888,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
                 assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
                 assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
                 if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, (char*)(elts[last_i].keyp)) <= 0);
+                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(elts[last_i].keyp)) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
@@ -1107,8 +1107,8 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
     assert(dn->height == 1);
     assert(dn->n_children==2);
-    assert(strcmp((char*)dn->pivotkeys.get_pivot(0)->data, "hello")==0);
-    assert(dn->pivotkeys.get_pivot(0)->size==6);
+    assert(strcmp((char*)dn->pivotkeys.get_pivot(0).data, "hello")==0);
+    assert(dn->pivotkeys.get_pivot(0).size==6);
     assert(BP_BLOCKNUM(dn,0).b==30);
     assert(BP_BLOCKNUM(dn,1).b==35);
 
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index 3e3d7a560e6..8ac3278c41e 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -737,9 +737,8 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
         BP_STATE(parentnode, 0) = PT_AVAIL;
         parentnode->max_msn_applied_to_node_on_disk = max_parent_msn;
         struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL };
-        const struct pivot_bounds infinite_bounds = { .lower_bound_exclusive = NULL, .upper_bound_inclusive = NULL };
         bool msgs_applied;
-        toku_apply_ancestors_messages_to_node(t, child, &ancestors, &infinite_bounds, &msgs_applied, -1);
+        toku_apply_ancestors_messages_to_node(t, child, &ancestors, pivot_bounds::infinite_bounds(), &msgs_applied, -1);
 
         struct checkit_fn {
             int operator()(FT_MSG UU(msg), bool is_fresh) {
@@ -962,12 +961,11 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
     parentnode->max_msn_applied_to_node_on_disk = max_parent_msn;
     struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL };
     DBT lbe, ubi;
-    const struct pivot_bounds bounds = {
-        .lower_bound_exclusive = toku_init_dbt(&lbe),
-        .upper_bound_inclusive = toku_clone_dbt(&ubi, childkeys[7])
-    };
+    toku_init_dbt(&lbe);
+    toku_clone_dbt(&ubi, childkeys[7]);
+    const pivot_bounds bounds(lbe, ubi);
     bool msgs_applied;
-    toku_apply_ancestors_messages_to_node(t, child, &ancestors, &bounds, &msgs_applied, -1);
+    toku_apply_ancestors_messages_to_node(t, child, &ancestors, bounds, &msgs_applied, -1);
 
     struct checkit_fn {
         DBT *childkeys;
@@ -1162,9 +1160,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     BP_STATE(parentnode, 0) = PT_AVAIL;
     parentnode->max_msn_applied_to_node_on_disk = max_parent_msn;
     struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL };
-    const struct pivot_bounds infinite_bounds = { .lower_bound_exclusive = NULL, .upper_bound_inclusive = NULL };
     bool msgs_applied;
-    toku_apply_ancestors_messages_to_node(t, child2, &ancestors, &infinite_bounds, &msgs_applied, -1);
+    toku_apply_ancestors_messages_to_node(t, child2, &ancestors, pivot_bounds::infinite_bounds(), &msgs_applied, -1);
 
     struct checkit_fn {
         int operator()(FT_MSG UU(msg), bool is_fresh) {
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index d489130114b..a366e5de116 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -260,11 +260,11 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT ft) {
 
     printf(" pivots:\n");
     for (int i=0; i<n->n_children-1; i++) {
-        const DBT *piv = n->pivotkeys.get_pivot(i);
+        const DBT piv = n->pivotkeys.get_pivot(i);
         printf("  pivot %2d:", i);
         if (n->flags)
             printf(" flags=%x ", n->flags);
-        print_item(piv->data, piv->size);
+        print_item(piv.data, piv.size);
         printf("\n");
     }
     printf(" children:\n");
diff --git a/ft/ybt.cc b/ft/ybt.cc
index 42cfecd236c..a86a019caf9 100644
--- a/ft/ybt.cc
+++ b/ft/ybt.cc
@@ -317,6 +317,12 @@ bool toku_dbt_is_infinite(const DBT *dbt) {
     return dbt == toku_dbt_positive_infinity() || dbt == toku_dbt_negative_infinity();
 }
 
+bool toku_dbt_is_empty(const DBT *dbt) {
+    // can't have a null data field with a non-zero size
+    paranoid_invariant(dbt->data != nullptr || dbt->size == 0);
+    return dbt->data == nullptr;
+}
+
 int toku_dbt_infinite_compare(const DBT *a, const DBT *b) {
     if (a == b) {
         return 0;
diff --git a/ft/ybt.h b/ft/ybt.h
index 84293f94b9c..dd90e00fa0a 100644
--- a/ft/ybt.h
+++ b/ft/ybt.h
@@ -129,6 +129,9 @@ const DBT *toku_dbt_negative_infinity(void);
 // returns: true if the given dbt is either positive or negative infinity
 bool toku_dbt_is_infinite(const DBT *dbt);
 
+// returns: true if the given dbt has no data (ie: dbt->data == nullptr)
+bool toku_dbt_is_empty(const DBT *dbt);
+
 // effect: compares two potentially infinity-valued dbts
 // requires: at least one is infinite (assert otherwise)
 int toku_dbt_infinite_compare(const DBT *a, const DBT *b);

From 8b63c61afc5088ab0261713c743cb8b6a4b13cfe Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Wed, 18 Jun 2014 22:23:22 -0400
Subject: [PATCH 042/190] FT-93 Move pivotkey code to its own file,
 ft/pivotkeys.cc

---
 ft/CMakeLists.txt |   1 +
 ft/node.cc        | 347 ------------------------------------
 ft/pivotkeys.cc   | 445 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 446 insertions(+), 347 deletions(-)
 create mode 100644 ft/pivotkeys.cc

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index b0916ef8de0..01db81c43b9 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -54,6 +54,7 @@ set(FT_SOURCES
   log_upgrade
   msg_buffer
   node
+  pivotkeys
   quicklz
   recover
   rollback
diff --git a/ft/node.cc b/ft/node.cc
index 7201420e1b8..d00c4085d54 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -97,353 +97,6 @@ PATENT RIGHTS GRANT:
 #include "util/scoped_malloc.h"
 #include "util/sort.h"
 
-void ftnode_pivot_keys::create_empty() {
-    _num_pivots = 0;
-    _total_size = 0;
-    _fixed_keys = nullptr;
-    _fixed_keylen = 0;
-    _dbt_keys = nullptr;
-}
-
-void ftnode_pivot_keys::create_from_dbts(const DBT *keys, int n) {
-    create_empty();
-    _num_pivots = n;
-
-    // see if every key has the same length
-    bool keys_same_size = true;
-    for (int i = 1; i < _num_pivots; i++) {
-        if (keys[i].size != keys[i - 1].size) {
-            keys_same_size = false;
-            break;
-        }
-    }
-
-    if (keys_same_size && _num_pivots > 0) {
-        // if so, store pivots in a tightly packed array of fixed length keys
-        _fixed_keylen = keys[0].size;
-        _total_size = _fixed_keylen * _num_pivots;
-        XMALLOC_N(_total_size, _fixed_keys);
-        for (int i = 0; i < _num_pivots; i++) {
-            invariant(keys[i].size == _fixed_keylen);
-            memcpy(_fixed_key(i), keys[i].data, _fixed_keylen);
-        }
-    } else {
-        // otherwise we'll just store the pivots in an array of dbts
-        XMALLOC_N(_num_pivots, _dbt_keys);
-        for (int i = 0; i < _num_pivots; i++) {
-            size_t size = keys[i].size;
-            toku_memdup_dbt(&_dbt_keys[i], keys[i].data, size);
-            _total_size += size;
-        }
-    }
-}
-
-void ftnode_pivot_keys::_create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n) {
-    create_empty();
-    _num_pivots = n;
-    _fixed_keylen = fixed_keylen;
-    _total_size = _fixed_keylen * _num_pivots;
-    XMEMDUP_N(_fixed_keys, fixedkeys, _total_size);
-}
-
-// effect: create pivot keys as a clone of an existing set of pivotkeys
-void ftnode_pivot_keys::create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys) {
-    if (pivotkeys._fixed_format()) {
-        _create_from_fixed_keys(pivotkeys._fixed_keys, pivotkeys._fixed_keylen, pivotkeys._num_pivots);
-    } else {
-        create_from_dbts(pivotkeys._dbt_keys, pivotkeys._num_pivots);
-    }
-}
-
-void ftnode_pivot_keys::destroy() {
-    if (_dbt_keys != nullptr) {
-        for (int i = 0; i < _num_pivots; i++) {
-            toku_destroy_dbt(&_dbt_keys[i]);
-        }
-        toku_free(_dbt_keys);
-        _dbt_keys = nullptr;
-    }
-    if (_fixed_keys != nullptr) {
-        toku_free(_fixed_keys);
-        _fixed_keys = nullptr;
-    }
-    _fixed_keylen = 0;
-    _num_pivots = 0;
-    _total_size = 0;
-}
-
-void ftnode_pivot_keys::_convert_to_fixed_format() {
-    invariant(!_fixed_format());
-
-    // convert to a tightly packed array of fixed length keys
-    _fixed_keylen = _dbt_keys[0].size;
-    _total_size = _fixed_keylen * _num_pivots;
-    XMALLOC_N(_total_size, _fixed_keys);
-    for (int i = 0; i < _num_pivots; i++) {
-        invariant(_dbt_keys[i].size == _fixed_keylen);
-        memcpy(_fixed_key(i), _dbt_keys[i].data, _fixed_keylen);
-    }
-
-    // destroy the dbt array format
-    for (int i = 0; i < _num_pivots; i++) {
-        toku_destroy_dbt(&_dbt_keys[i]);
-    }
-    toku_free(_dbt_keys);
-    _dbt_keys = nullptr;
-
-    invariant(_fixed_format());
-}
-
-void ftnode_pivot_keys::_convert_to_dbt_format() {
-    invariant(_fixed_format());
-
-    // convert to an aray of dbts
-    XREALLOC_N(_num_pivots, _dbt_keys);
-    for (int i = 0; i < _num_pivots; i++) {
-        toku_memdup_dbt(&_dbt_keys[i], _fixed_key(i), _fixed_keylen);
-    }
-
-    // destroy the fixed key format
-    toku_free(_fixed_keys);
-    _fixed_keys = nullptr;
-    _fixed_keylen = 0;
-
-    invariant(!_fixed_format());
-}
-
-void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
-    _num_pivots = n;
-    _total_size = 0;
-    _fixed_keys = nullptr;
-    _fixed_keylen = 0;
-    _dbt_keys = nullptr;
-
-    XMALLOC_N(_num_pivots, _dbt_keys);
-    bool keys_same_size = true;
-    for (int i = 0; i < _num_pivots; i++) {
-        bytevec pivotkeyptr;
-        uint32_t size;
-        rbuf_bytes(rb, &pivotkeyptr, &size);
-        toku_memdup_dbt(&_dbt_keys[i], pivotkeyptr, size);
-        _total_size += size;
-        if (i > 0 && keys_same_size && _dbt_keys[i].size != _dbt_keys[i - 1].size) {
-            // not all keys are the same size, we'll stick to the dbt array format
-            keys_same_size = false;
-        }
-    }
-
-    if (keys_same_size && _num_pivots > 0) {
-        _convert_to_fixed_format();
-    }
-}
-
-DBT ftnode_pivot_keys::get_pivot(int i) const {
-    paranoid_invariant(i < _num_pivots);
-    if (_fixed_format()) {
-        paranoid_invariant(i * _fixed_keylen < _total_size);
-        DBT dbt;
-        toku_fill_dbt(&dbt, _fixed_key(i), _fixed_keylen);
-        return dbt;
-    } else {
-        return _dbt_keys[i];
-    }
-}
-
-DBT *ftnode_pivot_keys::fill_pivot(int i, DBT *dbt) const {
-    paranoid_invariant(i < _num_pivots);
-    if (_fixed_format()) {
-        toku_fill_dbt(dbt, _fixed_key(i), _fixed_keylen);
-    } else {
-        toku_copyref_dbt(dbt, _dbt_keys[i]);
-    }
-    return dbt;
-}
-
-void ftnode_pivot_keys::_add_key_dbt(const DBT *key, int i) {
-    toku_clone_dbt(&_dbt_keys[i], *key);
-    _total_size += _dbt_keys[i].size;
-}
-
-void ftnode_pivot_keys::_destroy_key_dbt(int i) {
-    invariant(_total_size >= _dbt_keys[i].size);
-    _total_size -= _dbt_keys[i].size;
-    toku_destroy_dbt(&_dbt_keys[i]);
-}
-
-void ftnode_pivot_keys::_insert_at_dbt(const DBT *key, int i) {
-    // make space for a new pivot, slide existing keys to the right
-    REALLOC_N(_num_pivots + 1, _dbt_keys);
-    memmove(&_dbt_keys[i + 1], &_dbt_keys[i], (_num_pivots - i) * sizeof(DBT));
-    _add_key_dbt(key, i);
-}
-
-void ftnode_pivot_keys::_insert_at_fixed(const DBT *key, int i) {
-    REALLOC_N((_num_pivots + 1) * _fixed_keylen, _fixed_keys); 
-    memmove(_fixed_key(i + 1), _fixed_key(i), (_num_pivots - i) * _fixed_keylen);
-    memcpy(_fixed_key(i), key->data, _fixed_keylen);
-    _total_size += _fixed_keylen;
-}
-
-void ftnode_pivot_keys::insert_at(const DBT *key, int i) {
-    invariant(i <= _num_pivots); // it's ok to insert at the end, so we check <= n
-
-    // if the new key doesn't have the same size, we can't be in fixed format
-    if (_fixed_format() && key->size != _fixed_keylen) {
-        _convert_to_dbt_format();
-    }
-
-    if (_fixed_format()) {
-        _insert_at_fixed(key, i);
-    } else {
-        _insert_at_dbt(key, i);
-    }
-    _num_pivots++;
-
-    invariant(total_size() > 0);
-}
-
-void ftnode_pivot_keys::_append_dbt(const ftnode_pivot_keys &pivotkeys) {
-    REALLOC_N(_num_pivots + pivotkeys._num_pivots, _dbt_keys);
-    bool other_fixed = pivotkeys._fixed_format();
-    for (int i = 0; i < pivotkeys._num_pivots; i++) {
-        toku_memdup_dbt(&_dbt_keys[_num_pivots + i],
-                        other_fixed ? pivotkeys._fixed_key(i) :
-                                      pivotkeys._dbt_keys[i].data,
-                        other_fixed ? pivotkeys._fixed_keylen :
-                                      pivotkeys._dbt_keys[i].size);
-    }
-}
-
-void ftnode_pivot_keys::_append_fixed(const ftnode_pivot_keys &pivotkeys) {
-    if (pivotkeys._fixed_format() && pivotkeys._fixed_keylen == _fixed_keylen) {
-        // other pivotkeys have the same fixed keylen 
-        REALLOC_N((_num_pivots + pivotkeys._num_pivots) * _fixed_keylen, _fixed_keys);
-        memcpy(_fixed_key(_num_pivots), pivotkeys._fixed_keys, pivotkeys._total_size);
-    } else {
-        // must convert to dbt format, other pivotkeys have different length'd keys
-        _convert_to_dbt_format();
-        _append_dbt(pivotkeys);
-    }
-}
-
-void ftnode_pivot_keys::append(const ftnode_pivot_keys &pivotkeys) {
-    if (_fixed_format()) {
-        _append_fixed(pivotkeys);
-    } else {
-        _append_dbt(pivotkeys);
-    }
-    _num_pivots += pivotkeys._num_pivots;
-    _total_size += pivotkeys._total_size;
-}
-
-void ftnode_pivot_keys::_replace_at_dbt(const DBT *key, int i) {
-    _destroy_key_dbt(i);
-    _add_key_dbt(key, i);
-}
-
-void ftnode_pivot_keys::_replace_at_fixed(const DBT *key, int i) {
-    if (key->size == _fixed_keylen) {
-        memcpy(_fixed_key(i), key->data, _fixed_keylen);
-    } else {
-        // must convert to dbt format, replacement key has different length
-        _convert_to_dbt_format();
-        _replace_at_dbt(key, i);
-    }
-}
-
-void ftnode_pivot_keys::replace_at(const DBT *key, int i) {
-    if (i < _num_pivots) {
-        if (_fixed_format()) {
-            _replace_at_fixed(key, i);
-        } else {
-            _replace_at_dbt(key, i);
-        }
-    } else {
-        invariant(i == _num_pivots); // appending to the end is ok
-        insert_at(key, i);
-    }
-    invariant(total_size() > 0);
-}
-
-void ftnode_pivot_keys::_delete_at_fixed(int i) {
-    memmove(_fixed_key(i), _fixed_key(i + 1), (_num_pivots - 1 - i) * _fixed_keylen);
-    _total_size -= _fixed_keylen;
-}
-
-void ftnode_pivot_keys::_delete_at_dbt(int i) {
-    // slide over existing keys, then shrink down to size
-    _destroy_key_dbt(i);
-    memmove(&_dbt_keys[i], &_dbt_keys[i + 1], (_num_pivots - 1 - i) * sizeof(DBT));
-    REALLOC_N(_num_pivots - 1, _dbt_keys);
-}
-
-void ftnode_pivot_keys::delete_at(int i) {
-    invariant(i < _num_pivots);
-
-    if (_fixed_format()) {
-        _delete_at_fixed(i);
-    } else {
-        _delete_at_dbt(i);
-    }
-
-    _num_pivots--;
-}
-
-void ftnode_pivot_keys::_split_at_fixed(int i, ftnode_pivot_keys *other) {
-    // recreate the other set of pivots from index >= i
-    other->_create_from_fixed_keys(_fixed_key(i), _fixed_keylen, _num_pivots - i);
-
-    // shrink down to size
-    _total_size = i * _fixed_keylen;
-    REALLOC_N(_total_size, _fixed_keys);
-}
-
-void ftnode_pivot_keys::_split_at_dbt(int i, ftnode_pivot_keys *other) {
-    // recreate the other set of pivots from index >= i
-    other->create_from_dbts(&_dbt_keys[i], _num_pivots - i);
-
-    // destroy everything greater, shrink down to size
-    for (int k = i; k < _num_pivots; k++) {
-        _destroy_key_dbt(k);
-    }
-    REALLOC_N(i, _dbt_keys);
-}
-
-void ftnode_pivot_keys::split_at(int i, ftnode_pivot_keys *other) {
-    if (i < _num_pivots) {
-        if (_fixed_format()) {
-            _split_at_fixed(i, other);
-        } else {
-            _split_at_dbt(i, other);
-        }
-        _num_pivots = i;
-    }
-}
-
-void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const {
-    bool fixed = _fixed_format();
-    size_t written = 0;
-    for (int i = 0; i < _num_pivots; i++) {
-        size_t size = fixed ? _fixed_keylen : _dbt_keys[i].size;
-        invariant(size);
-        wbuf_nocrc_bytes(wb, fixed ? _fixed_key(i) : _dbt_keys[i].data, size);
-        written += size;
-    }
-    invariant(written == _total_size);
-}
-
-int ftnode_pivot_keys::num_pivots() const {
-    // if we have fixed size keys, the number of pivots should be consistent
-    paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen * _num_pivots));
-    return _num_pivots;
-}
-
-size_t ftnode_pivot_keys::total_size() const {
-    // if we have fixed size keys, the total size should be consistent
-    paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen * _num_pivots));
-    return _total_size;
-}
-
 // Effect: Fill in N as an empty ftnode.
 // TODO: Rename toku_ftnode_create
 void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int num_children, int layout_version, unsigned int flags) {
diff --git a/ft/pivotkeys.cc b/ft/pivotkeys.cc
new file mode 100644
index 00000000000..6cad37106da
--- /dev/null
+++ b/ft/pivotkeys.cc
@@ -0,0 +1,445 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+
+#include <string>
+
+#include "portability/memory.h"
+
+#include "ft/node.h"
+#include "ft/rbuf.h"
+#include "ft/wbuf.h"
+
+void ftnode_pivot_keys::create_empty() {
+    _num_pivots = 0;
+    _total_size = 0;
+    _fixed_keys = nullptr;
+    _fixed_keylen = 0;
+    _dbt_keys = nullptr;
+}
+
+void ftnode_pivot_keys::create_from_dbts(const DBT *keys, int n) {
+    create_empty();
+    _num_pivots = n;
+
+    // see if every key has the same length
+    bool keys_same_size = true;
+    for (int i = 1; i < _num_pivots; i++) {
+        if (keys[i].size != keys[i - 1].size) {
+            keys_same_size = false;
+            break;
+        }
+    }
+
+    if (keys_same_size && _num_pivots > 0) {
+        // if so, store pivots in a tightly packed array of fixed length keys
+        _fixed_keylen = keys[0].size;
+        _total_size = _fixed_keylen * _num_pivots;
+        XMALLOC_N(_total_size, _fixed_keys);
+        for (int i = 0; i < _num_pivots; i++) {
+            invariant(keys[i].size == _fixed_keylen);
+            memcpy(_fixed_key(i), keys[i].data, _fixed_keylen);
+        }
+    } else {
+        // otherwise we'll just store the pivots in an array of dbts
+        XMALLOC_N(_num_pivots, _dbt_keys);
+        for (int i = 0; i < _num_pivots; i++) {
+            size_t size = keys[i].size;
+            toku_memdup_dbt(&_dbt_keys[i], keys[i].data, size);
+            _total_size += size;
+        }
+    }
+}
+
+void ftnode_pivot_keys::_create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n) {
+    create_empty();
+    _num_pivots = n;
+    _fixed_keylen = fixed_keylen;
+    _total_size = _fixed_keylen * _num_pivots;
+    XMEMDUP_N(_fixed_keys, fixedkeys, _total_size);
+}
+
+// effect: create pivot keys as a clone of an existing set of pivotkeys
+void ftnode_pivot_keys::create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys) {
+    if (pivotkeys._fixed_format()) {
+        _create_from_fixed_keys(pivotkeys._fixed_keys, pivotkeys._fixed_keylen, pivotkeys._num_pivots);
+    } else {
+        create_from_dbts(pivotkeys._dbt_keys, pivotkeys._num_pivots);
+    }
+}
+
+void ftnode_pivot_keys::destroy() {
+    if (_dbt_keys != nullptr) {
+        for (int i = 0; i < _num_pivots; i++) {
+            toku_destroy_dbt(&_dbt_keys[i]);
+        }
+        toku_free(_dbt_keys);
+        _dbt_keys = nullptr;
+    }
+    if (_fixed_keys != nullptr) {
+        toku_free(_fixed_keys);
+        _fixed_keys = nullptr;
+    }
+    _fixed_keylen = 0;
+    _num_pivots = 0;
+    _total_size = 0;
+}
+
+void ftnode_pivot_keys::_convert_to_fixed_format() {
+    invariant(!_fixed_format());
+
+    // convert to a tightly packed array of fixed length keys
+    _fixed_keylen = _dbt_keys[0].size;
+    _total_size = _fixed_keylen * _num_pivots;
+    XMALLOC_N(_total_size, _fixed_keys);
+    for (int i = 0; i < _num_pivots; i++) {
+        invariant(_dbt_keys[i].size == _fixed_keylen);
+        memcpy(_fixed_key(i), _dbt_keys[i].data, _fixed_keylen);
+    }
+
+    // destroy the dbt array format
+    for (int i = 0; i < _num_pivots; i++) {
+        toku_destroy_dbt(&_dbt_keys[i]);
+    }
+    toku_free(_dbt_keys);
+    _dbt_keys = nullptr;
+
+    invariant(_fixed_format());
+}
+
+void ftnode_pivot_keys::_convert_to_dbt_format() {
+    invariant(_fixed_format());
+
+    // convert to an aray of dbts
+    XREALLOC_N(_num_pivots, _dbt_keys);
+    for (int i = 0; i < _num_pivots; i++) {
+        toku_memdup_dbt(&_dbt_keys[i], _fixed_key(i), _fixed_keylen);
+    }
+
+    // destroy the fixed key format
+    toku_free(_fixed_keys);
+    _fixed_keys = nullptr;
+    _fixed_keylen = 0;
+
+    invariant(!_fixed_format());
+}
+
+void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
+    _num_pivots = n;
+    _total_size = 0;
+    _fixed_keys = nullptr;
+    _fixed_keylen = 0;
+    _dbt_keys = nullptr;
+
+    XMALLOC_N(_num_pivots, _dbt_keys);
+    bool keys_same_size = true;
+    for (int i = 0; i < _num_pivots; i++) {
+        bytevec pivotkeyptr;
+        uint32_t size;
+        rbuf_bytes(rb, &pivotkeyptr, &size);
+        toku_memdup_dbt(&_dbt_keys[i], pivotkeyptr, size);
+        _total_size += size;
+        if (i > 0 && keys_same_size && _dbt_keys[i].size != _dbt_keys[i - 1].size) {
+            // not all keys are the same size, we'll stick to the dbt array format
+            keys_same_size = false;
+        }
+    }
+
+    if (keys_same_size && _num_pivots > 0) {
+        _convert_to_fixed_format();
+    }
+}
+
+DBT ftnode_pivot_keys::get_pivot(int i) const {
+    paranoid_invariant(i < _num_pivots);
+    if (_fixed_format()) {
+        paranoid_invariant(i * _fixed_keylen < _total_size);
+        DBT dbt;
+        toku_fill_dbt(&dbt, _fixed_key(i), _fixed_keylen);
+        return dbt;
+    } else {
+        return _dbt_keys[i];
+    }
+}
+
+DBT *ftnode_pivot_keys::fill_pivot(int i, DBT *dbt) const {
+    paranoid_invariant(i < _num_pivots);
+    if (_fixed_format()) {
+        toku_fill_dbt(dbt, _fixed_key(i), _fixed_keylen);
+    } else {
+        toku_copyref_dbt(dbt, _dbt_keys[i]);
+    }
+    return dbt;
+}
+
+void ftnode_pivot_keys::_add_key_dbt(const DBT *key, int i) {
+    toku_clone_dbt(&_dbt_keys[i], *key);
+    _total_size += _dbt_keys[i].size;
+}
+
+void ftnode_pivot_keys::_destroy_key_dbt(int i) {
+    invariant(_total_size >= _dbt_keys[i].size);
+    _total_size -= _dbt_keys[i].size;
+    toku_destroy_dbt(&_dbt_keys[i]);
+}
+
+void ftnode_pivot_keys::_insert_at_dbt(const DBT *key, int i) {
+    // make space for a new pivot, slide existing keys to the right
+    REALLOC_N(_num_pivots + 1, _dbt_keys);
+    memmove(&_dbt_keys[i + 1], &_dbt_keys[i], (_num_pivots - i) * sizeof(DBT));
+    _add_key_dbt(key, i);
+}
+
+void ftnode_pivot_keys::_insert_at_fixed(const DBT *key, int i) {
+    REALLOC_N((_num_pivots + 1) * _fixed_keylen, _fixed_keys); 
+    memmove(_fixed_key(i + 1), _fixed_key(i), (_num_pivots - i) * _fixed_keylen);
+    memcpy(_fixed_key(i), key->data, _fixed_keylen);
+    _total_size += _fixed_keylen;
+}
+
+void ftnode_pivot_keys::insert_at(const DBT *key, int i) {
+    invariant(i <= _num_pivots); // it's ok to insert at the end, so we check <= n
+
+    // if the new key doesn't have the same size, we can't be in fixed format
+    if (_fixed_format() && key->size != _fixed_keylen) {
+        _convert_to_dbt_format();
+    }
+
+    if (_fixed_format()) {
+        _insert_at_fixed(key, i);
+    } else {
+        _insert_at_dbt(key, i);
+    }
+    _num_pivots++;
+
+    invariant(total_size() > 0);
+}
+
+void ftnode_pivot_keys::_append_dbt(const ftnode_pivot_keys &pivotkeys) {
+    REALLOC_N(_num_pivots + pivotkeys._num_pivots, _dbt_keys);
+    bool other_fixed = pivotkeys._fixed_format();
+    for (int i = 0; i < pivotkeys._num_pivots; i++) {
+        toku_memdup_dbt(&_dbt_keys[_num_pivots + i],
+                        other_fixed ? pivotkeys._fixed_key(i) :
+                                      pivotkeys._dbt_keys[i].data,
+                        other_fixed ? pivotkeys._fixed_keylen :
+                                      pivotkeys._dbt_keys[i].size);
+    }
+}
+
+void ftnode_pivot_keys::_append_fixed(const ftnode_pivot_keys &pivotkeys) {
+    if (pivotkeys._fixed_format() && pivotkeys._fixed_keylen == _fixed_keylen) {
+        // other pivotkeys have the same fixed keylen 
+        REALLOC_N((_num_pivots + pivotkeys._num_pivots) * _fixed_keylen, _fixed_keys);
+        memcpy(_fixed_key(_num_pivots), pivotkeys._fixed_keys, pivotkeys._total_size);
+    } else {
+        // must convert to dbt format, other pivotkeys have different length'd keys
+        _convert_to_dbt_format();
+        _append_dbt(pivotkeys);
+    }
+}
+
+void ftnode_pivot_keys::append(const ftnode_pivot_keys &pivotkeys) {
+    if (_fixed_format()) {
+        _append_fixed(pivotkeys);
+    } else {
+        _append_dbt(pivotkeys);
+    }
+    _num_pivots += pivotkeys._num_pivots;
+    _total_size += pivotkeys._total_size;
+}
+
+void ftnode_pivot_keys::_replace_at_dbt(const DBT *key, int i) {
+    _destroy_key_dbt(i);
+    _add_key_dbt(key, i);
+}
+
+void ftnode_pivot_keys::_replace_at_fixed(const DBT *key, int i) {
+    if (key->size == _fixed_keylen) {
+        memcpy(_fixed_key(i), key->data, _fixed_keylen);
+    } else {
+        // must convert to dbt format, replacement key has different length
+        _convert_to_dbt_format();
+        _replace_at_dbt(key, i);
+    }
+}
+
+void ftnode_pivot_keys::replace_at(const DBT *key, int i) {
+    if (i < _num_pivots) {
+        if (_fixed_format()) {
+            _replace_at_fixed(key, i);
+        } else {
+            _replace_at_dbt(key, i);
+        }
+    } else {
+        invariant(i == _num_pivots); // appending to the end is ok
+        insert_at(key, i);
+    }
+    invariant(total_size() > 0);
+}
+
+void ftnode_pivot_keys::_delete_at_fixed(int i) {
+    memmove(_fixed_key(i), _fixed_key(i + 1), (_num_pivots - 1 - i) * _fixed_keylen);
+    _total_size -= _fixed_keylen;
+}
+
+void ftnode_pivot_keys::_delete_at_dbt(int i) {
+    // slide over existing keys, then shrink down to size
+    _destroy_key_dbt(i);
+    memmove(&_dbt_keys[i], &_dbt_keys[i + 1], (_num_pivots - 1 - i) * sizeof(DBT));
+    REALLOC_N(_num_pivots - 1, _dbt_keys);
+}
+
+void ftnode_pivot_keys::delete_at(int i) {
+    invariant(i < _num_pivots);
+
+    if (_fixed_format()) {
+        _delete_at_fixed(i);
+    } else {
+        _delete_at_dbt(i);
+    }
+
+    _num_pivots--;
+}
+
+void ftnode_pivot_keys::_split_at_fixed(int i, ftnode_pivot_keys *other) {
+    // recreate the other set of pivots from index >= i
+    other->_create_from_fixed_keys(_fixed_key(i), _fixed_keylen, _num_pivots - i);
+
+    // shrink down to size
+    _total_size = i * _fixed_keylen;
+    REALLOC_N(_total_size, _fixed_keys);
+}
+
+void ftnode_pivot_keys::_split_at_dbt(int i, ftnode_pivot_keys *other) {
+    // recreate the other set of pivots from index >= i
+    other->create_from_dbts(&_dbt_keys[i], _num_pivots - i);
+
+    // destroy everything greater, shrink down to size
+    for (int k = i; k < _num_pivots; k++) {
+        _destroy_key_dbt(k);
+    }
+    REALLOC_N(i, _dbt_keys);
+}
+
+void ftnode_pivot_keys::split_at(int i, ftnode_pivot_keys *other) {
+    if (i < _num_pivots) {
+        if (_fixed_format()) {
+            _split_at_fixed(i, other);
+        } else {
+            _split_at_dbt(i, other);
+        }
+        _num_pivots = i;
+    }
+}
+
+void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const {
+    bool fixed = _fixed_format();
+    size_t written = 0;
+    for (int i = 0; i < _num_pivots; i++) {
+        size_t size = fixed ? _fixed_keylen : _dbt_keys[i].size;
+        invariant(size);
+        wbuf_nocrc_bytes(wb, fixed ? _fixed_key(i) : _dbt_keys[i].data, size);
+        written += size;
+    }
+    invariant(written == _total_size);
+}
+
+int ftnode_pivot_keys::num_pivots() const {
+    // if we have fixed size keys, the number of pivots should be consistent
+    paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen * _num_pivots));
+    return _num_pivots;
+}
+
+size_t ftnode_pivot_keys::total_size() const {
+    // if we have fixed size keys, the total size should be consistent
+    paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen * _num_pivots));
+    return _total_size;
+}

From 42377605b8e4dbeca953517faad7402814c7d40d Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 19 Jun 2014 10:56:46 -0400
Subject: [PATCH 043/190] FT-281 Add support for big-endian keys that can be
 compared using the builtin key comparison function

---
 src/tests/threaded_stress_test_helpers.h | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/tests/threaded_stress_test_helpers.h b/src/tests/threaded_stress_test_helpers.h
index 2a841d1a16d..d587fbdf608 100644
--- a/src/tests/threaded_stress_test_helpers.h
+++ b/src/tests/threaded_stress_test_helpers.h
@@ -208,6 +208,7 @@ struct cli_args {
     bool nocrashstatus; // do not print engine status upon crash
     bool prelock_updates; // update threads perform serial updates on a prelocked range
     bool disperse_keys; // spread the keys out during a load (by reversing the bits in the loop index) to make a wide tree we can spread out random inserts into
+    bool memcmp_keys; // pack keys big endian and use the builtin key comparison function in the fractal tree
     bool direct_io; // use direct I/O
     const char *print_engine_status; // print engine status rows matching a simple regex "a|b|c", matching strings where a or b or c is a subtring.
 };
@@ -832,12 +833,13 @@ fill_key_buf(int64_t key, uint8_t *data, struct cli_args *args) {
     }
     invariant(key >= 0);
     if (args->key_size == sizeof(int)) {
-        const int key32 = key;
+        const int key32 = args->memcmp_keys ? toku_htonl(key) : key;
         memcpy(data, &key32, sizeof(key32));
     } else {
         invariant(args->key_size >= sizeof(key));
-        memcpy(data, &key, sizeof(key));
-        memset(data + sizeof(key), 0, args->key_size - sizeof(key));
+        const int64_t key64 = args->memcmp_keys ? toku_htonl(key) : key;
+        memcpy(data, &key64, sizeof(key64));
+        memset(data + sizeof(key64), 0, args->key_size - sizeof(key64));
     }
 }
 
@@ -1965,7 +1967,9 @@ static int create_tables(DB_ENV **env_res, DB **db_res, int num_DBs,
     db_env_set_num_bucket_mutexes(env_args.num_bucket_mutexes);
     r = db_env_create(&env, 0); assert(r == 0);
     r = env->set_redzone(env, 0); CKERR(r);
-    r = env->set_default_bt_compare(env, bt_compare); CKERR(r);
+    if (!cli_args->memcmp_keys) {
+        r = env->set_default_bt_compare(env, bt_compare); CKERR(r);
+    }
     r = env->set_lk_max_memory(env, env_args.lk_max_memory); CKERR(r);
     r = env->set_cachesize(env, env_args.cachetable_size / (1 << 30), env_args.cachetable_size % (1 << 30), 1); CKERR(r);
     r = env->set_lg_bsize(env, env_args.rollback_node_size); CKERR(r);
@@ -2163,7 +2167,9 @@ static int open_tables(DB_ENV **env_res, DB **db_res, int num_DBs,
     db_env_set_num_bucket_mutexes(env_args.num_bucket_mutexes);
     r = db_env_create(&env, 0); assert(r == 0);
     r = env->set_redzone(env, 0); CKERR(r);
-    r = env->set_default_bt_compare(env, bt_compare); CKERR(r);
+    if (!cli_args->memcmp_keys) {
+        r = env->set_default_bt_compare(env, bt_compare); CKERR(r);
+    }
     r = env->set_lk_max_memory(env, env_args.lk_max_memory); CKERR(r);
     env->set_update(env, env_args.update_function);
     r = env->set_cachesize(env, env_args.cachetable_size / (1 << 30), env_args.cachetable_size % (1 << 30), 1); CKERR(r);
@@ -2281,6 +2287,7 @@ static struct cli_args UU() get_default_args(void) {
         .nocrashstatus = false,
         .prelock_updates = false,
         .disperse_keys = false,
+        .memcmp_keys = false,
         .direct_io = false,
         };
     DEFAULT_ARGS.env_args.envdir = TOKU_TEST_FILENAME;
@@ -2668,6 +2675,7 @@ static inline void parse_stress_test_args (int argc, char *const argv[], struct
         BOOL_ARG("nocrashstatus",                     nocrashstatus),
         BOOL_ARG("prelock_updates",                   prelock_updates),
         BOOL_ARG("disperse_keys",                     disperse_keys),
+        BOOL_ARG("memcmp_keys",                       memcmp_keys),
         BOOL_ARG("direct_io",                         direct_io),
 
         STRING_ARG("--envdir",                        env_args.envdir),

From c969df95fdaf381a7e0e16aac0711f7a230f6f61 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 19 Jun 2014 10:56:52 -0400
Subject: [PATCH 044/190] FT-282 Optimize key comparisons by adding an unlikely
 clause to toku_dbt_is_infinite(). Also, check for the builtin key comparison
 function to get an inlined version to run when possible.

---
 ft/comparator.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/ft/comparator.h b/ft/comparator.h
index 555e260df6a..74e110ecfd3 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -97,6 +97,9 @@ PATENT RIGHTS GRANT:
 #include <ft/fttypes.h>
 #include <portability/memory.h>
 
+// TODO: this should really all be encapsulated in ft/comparator.cc
+int toku_builtin_compare_fun(DB *, const DBT *a, const DBT *b) __attribute__((__visibility__("default")));
+
 namespace toku {
 
     // a comparator object encapsulates the data necessary for 
@@ -109,6 +112,7 @@ namespace toku {
             _cmp = cmp;
             XCALLOC(_fake_db);
             _fake_db->cmp_descriptor = desc;
+            _builtin = _cmp == &toku_builtin_compare_fun;
         }
 
         void destroy() {
@@ -132,9 +136,10 @@ namespace toku {
         }
 
         int operator()(const DBT *a, const DBT *b) const {
-            // TODO: add an unlikely() compiler note for this branch
-            if (toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b)) {
+            if (__builtin_expect(!!(toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b)), 0)) {
                 return toku_dbt_infinite_compare(a, b);
+            } else if (_builtin) {
+                return toku_builtin_compare_fun(nullptr, a, b);
             } else {
                 // yikes, const sadness here
                 return _cmp(const_cast<DB *>(_fake_db), a, b);
@@ -144,6 +149,7 @@ namespace toku {
     private:
         DB *_fake_db;
         ft_compare_func _cmp;
+        bool _builtin;
     };
 
 } /* namespace toku */

From 866c62e3761a354378bea6bbf8fbbd83d3bcba5f Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 19 Jun 2014 11:27:25 -0400
Subject: [PATCH 045/190] Remove 'struct' keyword from class (which made clang
 angry)

---
 ft/ft-ops.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 1fd343a2b00..0249e3ea09c 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -4114,7 +4114,7 @@ toku_ft_keysrange_internal (FT_HANDLE ft_handle, FTNODE node,
 
             struct unlock_ftnode_extra unlock_extra   = {ft_handle,childnode,false};
             struct unlockers next_unlockers = {true, unlock_ftnode_fun, (void*)&unlock_extra, unlockers};
-            const struct pivot_bounds next_bounds = bounds.next_bounds(node, left_child_number);
+            const pivot_bounds next_bounds = bounds.next_bounds(node, left_child_number);
 
             r = toku_ft_keysrange_internal(ft_handle, childnode, key_left, key_right, child_may_find_right,
                                            less, equal_left, middle, equal_right, greater, single_basement_node,

From 79fb31d9ea973c6ff4acbb1244222fd4cb68407b Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Fri, 20 Jun 2014 15:28:16 -0400
Subject: [PATCH 046/190] set cmake_policy for cmake 3.0

---
 CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5a5a9713b4e..1bdc5a1f3b6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,10 @@
 cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
 
+cmake_policy(SET CMP0026 OLD)
+cmake_policy(SET CMP0043 OLD)
+cmake_policy(SET CMP0045 OLD)
+
 project(TokuDB)
 
 # suppress -rdynamic

From 5d0caabf54bf97888f256ec1b725c3ba22dc7d04 Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Fri, 20 Jun 2014 15:40:21 -0400
Subject: [PATCH 047/190] Revert "set cmake_policy for cmake 3.0"

This reverts commit 92557e6bbf14d50febe19b9bb5fa995f96a970d5.
---
 CMakeLists.txt | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1bdc5a1f3b6..5a5a9713b4e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,10 +1,6 @@
 cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
 
-cmake_policy(SET CMP0026 OLD)
-cmake_policy(SET CMP0043 OLD)
-cmake_policy(SET CMP0045 OLD)
-
 project(TokuDB)
 
 # suppress -rdynamic

From 4e9ec6a6cf8027b69bedde526b01a7b0226c6872 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Fri, 20 Jun 2014 16:12:41 -0400
Subject: [PATCH 048/190] FT-249 Move de/serialization code to the message
 buffer class itself, further simplifying the logic that is contained in
 ft/ft_node-serialize.cc

---
 ft/ft_node-serialize.cc | 163 ++++++++++------------------------------
 ft/msg_buffer.cc        |  80 +++++++++++++++++++-
 ft/msg_buffer.h         |  13 +++-
 3 files changed, 131 insertions(+), 125 deletions(-)

diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index ba808685a87..7b3f81dad38 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -341,41 +341,26 @@ wbuf_write_offset(const int32_t &offset, const uint32_t UU(idx), struct wbuf *co
     return 0;
 }
 
-static void
-serialize_child_buffer(NONLEAF_CHILDINFO bnc, struct wbuf *wb)
-{
+static void serialize_child_buffer(NONLEAF_CHILDINFO bnc, struct wbuf *wb) {
     unsigned char ch = FTNODE_PARTITION_MSG_BUFFER;
     wbuf_nocrc_char(wb, ch);
 
-    // serialize the message buffer, first the number of entries, then the elements
-    wbuf_nocrc_int(wb, toku_bnc_n_entries(bnc));
-    struct msg_serialize_fn {
-        struct wbuf *wb;
-        msg_serialize_fn(struct wbuf *w) : wb(w) { }
-        int operator()(FT_MSG msg, bool is_fresh) {
-            enum ft_msg_type type = (enum ft_msg_type) msg->type;
-            paranoid_invariant((int) type >= 0 && (int) type < 256);
-            wbuf_nocrc_char(wb, (unsigned char) type);
-            wbuf_nocrc_char(wb, (unsigned char) is_fresh);
-            wbuf_MSN(wb, msg->msn);
-            wbuf_nocrc_xids(wb, ft_msg_get_xids(msg));
-            wbuf_nocrc_bytes(wb, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
-            wbuf_nocrc_bytes(wb, ft_msg_get_val(msg), ft_msg_get_vallen(msg));
-            return 0;
-        }
-    } serialize_fn(wb);
-    bnc->msg_buffer.iterate(serialize_fn);
-
-    bnc_verify_message_trees(bnc);
+    // serialize the message buffer
+    bnc->msg_buffer.serialize_to_wbuf(wb);
 
     // serialize the message trees (num entries, offsets array):
-    //    fresh, stale, broadcast
+    // first, verify their contents are consistent with the message buffer
+    bnc_verify_message_trees(bnc);
+
+    // fresh
     wbuf_nocrc_int(wb, bnc->fresh_message_tree.size());
     bnc->fresh_message_tree.iterate<struct wbuf, wbuf_write_offset>(wb);
 
+    // stale
     wbuf_nocrc_int(wb, bnc->stale_message_tree.size());
     bnc->stale_message_tree.iterate<struct wbuf, wbuf_write_offset>(wb);
 
+    // broadcast
     wbuf_nocrc_int(wb, bnc->broadcast_list.size());
     bnc->broadcast_list.iterate<struct wbuf, wbuf_write_offset>(wb);
 }
@@ -875,67 +860,23 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
 
 static void
 deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const toku::comparator &cmp) {
-    int n_in_this_buffer = rbuf_int(rbuf);
-    int32_t *fresh_offsets = nullptr, *stale_offsets = nullptr;
-    int32_t *broadcast_offsets = nullptr;
-    int nfresh = 0, nstale = 0;
-    int nbroadcast_offsets = 0;
+    int32_t nfresh = 0, nstale = 0, nbroadcast = 0;
+    int32_t *fresh_offsets, *stale_offsets, *broadcast_offsets;
 
     // Only sort buffers if we have a valid comparison function. In certain scenarios,
     // like deserialie_ft_versioned() or tokuftdump, we'll need to deserialize ftnodes
     // for simple inspection and don't actually require that the message buffers are
     // properly sorted. This is very ugly, but correct.
-    const bool sort_buffers = cmp.valid();
+    const bool sort = cmp.valid();
 
-    if (sort_buffers) {
-        XMALLOC_N(n_in_this_buffer, stale_offsets);
-        XMALLOC_N(n_in_this_buffer, fresh_offsets);
-        XMALLOC_N(n_in_this_buffer, broadcast_offsets);
-    }
+    // read in the message buffer
+    bnc->msg_buffer.deserialize_from_rbuf(rbuf,
+                                          sort ? &fresh_offsets : nullptr, &nfresh,
+                                          sort ? &stale_offsets : nullptr, &nstale,
+                                          sort ? &broadcast_offsets : nullptr, &nbroadcast);
 
-    bnc->msg_buffer.resize(rbuf->size + 64);
-    for (int i = 0; i < n_in_this_buffer; i++) {
-        bytevec key; ITEMLEN keylen;
-        bytevec val; ITEMLEN vallen;
-        // this is weird but it's necessary to pass icc and gcc together
-        unsigned char ctype = rbuf_char(rbuf);
-        enum ft_msg_type type = (enum ft_msg_type) ctype;
-        bool is_fresh = rbuf_char(rbuf);
-        MSN msn = rbuf_msn(rbuf);
-        XIDS xids;
-        xids_create_from_buffer(rbuf, &xids);
-        rbuf_bytes(rbuf, &key, &keylen); /* Returns a pointer into the rbuf. */
-        rbuf_bytes(rbuf, &val, &vallen);
-        int32_t *dest = nullptr;
-        if (sort_buffers) {
-            if (ft_msg_type_applies_once(type)) {
-                if (is_fresh) {
-                    dest = &fresh_offsets[nfresh];
-                    nfresh++;
-                } else {
-                    dest = &stale_offsets[nstale];
-                    nstale++;
-                }
-            } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
-                dest = &broadcast_offsets[nbroadcast_offsets];
-                nbroadcast_offsets++;
-            } else {
-                abort();
-            }
-        }
-
-        // TODO: Function to parse stuff out of an rbuf into an FT_MSG
-        DBT k, v;
-        FT_MSG_S msg = {
-            type, msn, xids,
-            .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen) } }
-        };
-        bnc->msg_buffer.enqueue(&msg, is_fresh, dest);
-        xids_destroy(&xids);
-    }
-    invariant(rbuf->ndone == rbuf->size);
-
-    if (sort_buffers) {
+    if (sort) {
+        int n_in_this_buffer = nfresh + nstale + nbroadcast;
         struct toku_msg_buffer_key_msn_cmp_extra extra(cmp, &bnc->msg_buffer);
         toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
         bnc->fresh_message_tree.destroy();
@@ -944,66 +885,44 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const tok
         bnc->stale_message_tree.destroy();
         bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer);
         bnc->broadcast_list.destroy();
-        bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
+        bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast, n_in_this_buffer);
     }
 }
 
-// effect: deserialize a single message from rbuf and enqueue the result into the given message buffer
-static void
-msg_buffer_deserialize_msg_from_rbuf(message_buffer *msg_buffer, struct rbuf *rbuf) {
-    bytevec key, val;
-    ITEMLEN keylen, vallen;
-    enum ft_msg_type type = (enum ft_msg_type) rbuf_char(rbuf);
-    bool is_fresh = rbuf_char(rbuf);
-    MSN msn = rbuf_msn(rbuf);
-    XIDS xids;
-    xids_create_from_buffer(rbuf, &xids);
-    rbuf_bytes(rbuf, &key, &keylen); /* Returns a pointer into the rbuf. */
-    rbuf_bytes(rbuf, &val, &vallen);
-    // TODO: Function to parse stuff out of an rbuf into an FT_MSG
-    DBT k, v;
-    FT_MSG_S msg = {
-        type, msn, xids,
-        .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen) } }
-    };
-    msg_buffer->enqueue(&msg, is_fresh, nullptr);
-    xids_destroy(&xids);
-}
-
 static void
 deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf) {
-    int n_in_this_buffer = rbuf_int(rbuf);
-    int nfresh = 0, nstale = 0, nbroadcast_offsets = 0;
-    int32_t *XMALLOC_N(n_in_this_buffer, stale_offsets);
-    int32_t *XMALLOC_N(n_in_this_buffer, fresh_offsets);
-    int32_t *XMALLOC_N(n_in_this_buffer, broadcast_offsets);
-
-    bnc->msg_buffer.resize(rbuf->size + 64);
-    for (int i = 0; i < n_in_this_buffer; i++) {
-        msg_buffer_deserialize_msg_from_rbuf(&bnc->msg_buffer, rbuf);
-    }
+    // read in the message buffer
+    bnc->msg_buffer.deserialize_from_rbuf(rbuf,
+                                          nullptr, nullptr,  // fresh_offsets, nfresh,
+                                          nullptr, nullptr,  // stale_offsets, nstale,
+                                          nullptr, nullptr); // broadcast_offsets, nbroadcast
 
     // read in each message tree (fresh, stale, broadcast)
-    nfresh = rbuf_int(rbuf);
+    int32_t nfresh = rbuf_int(rbuf);
+    int32_t *XMALLOC_N(nfresh, fresh_offsets);
     for (int i = 0; i < nfresh; i++) {
         fresh_offsets[i] = rbuf_int(rbuf);
     }
-    nstale = rbuf_int(rbuf);
+
+    int32_t nstale = rbuf_int(rbuf);
+    int32_t *XMALLOC_N(nstale, stale_offsets);
     for (int i = 0; i < nstale; i++) {
         stale_offsets[i] = rbuf_int(rbuf);
     }
-    nbroadcast_offsets = rbuf_int(rbuf);
-    for (int i = 0; i < nbroadcast_offsets; i++) {
+
+    int32_t nbroadcast = rbuf_int(rbuf);
+    int32_t *XMALLOC_N(nbroadcast, broadcast_offsets);
+    for (int i = 0; i < nbroadcast; i++) {
         broadcast_offsets[i] = rbuf_int(rbuf);
     }
 
     // build OMTs out of each offset array
     bnc->fresh_message_tree.destroy();
-    bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
+    bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, nfresh);
     bnc->stale_message_tree.destroy();
-    bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer);
+    bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, nstale);
     bnc->broadcast_list.destroy();
-    bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
+    bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast, nbroadcast);
 }
 
 // dump a buffer to stderr
@@ -1776,7 +1695,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
         int32_t *fresh_offsets = nullptr;
         int32_t *broadcast_offsets = nullptr;
         int nfresh = 0;
-        int nbroadcast_offsets = 0;
+        int nbroadcast = 0;
 
         // We skip 'stale' offsets for upgraded nodes.
         if (sort_buffers) {
@@ -1811,8 +1730,8 @@ deserialize_and_upgrade_internal_node(FTNODE node,
                     dest = &fresh_offsets[nfresh];
                     nfresh++;
                 } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
-                    dest = &broadcast_offsets[nbroadcast_offsets];
-                    nbroadcast_offsets++;
+                    dest = &broadcast_offsets[nbroadcast];
+                    nbroadcast++;
                 } else {
                     abort();
                 }
@@ -1838,7 +1757,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
             bnc->fresh_message_tree.destroy();
             bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
             bnc->broadcast_list.destroy();
-            bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer);
+            bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast, n_in_this_buffer);
         }
     }
 
diff --git a/ft/msg_buffer.cc b/ft/msg_buffer.cc
index 7e247f45250..1e95321e8b0 100644
--- a/ft/msg_buffer.cc
+++ b/ft/msg_buffer.cc
@@ -110,7 +110,63 @@ void message_buffer::destroy() {
     }
 }
 
-void message_buffer::resize(size_t new_size) {
+void message_buffer::deserialize_from_rbuf(struct rbuf *rb,
+                                           int32_t **fresh_offsets, int32_t *nfresh,
+                                           int32_t **stale_offsets, int32_t *nstale,
+                                           int32_t **broadcast_offsets, int32_t *nbroadcast) {
+    // read the number of messages in this buffer
+    int n_in_this_buffer = rbuf_int(rb);
+    if (fresh_offsets != nullptr) {
+        XMALLOC_N(n_in_this_buffer, *fresh_offsets);
+    }
+    if (stale_offsets != nullptr) {
+        XMALLOC_N(n_in_this_buffer, *stale_offsets);
+    }
+    if (broadcast_offsets != nullptr) {
+        XMALLOC_N(n_in_this_buffer, *broadcast_offsets);
+    }
+
+    _resize(rb->size + 64); // rb->size is a good hint for how big the buffer will be
+
+    // read in each message individually
+    for (int i = 0; i < n_in_this_buffer; i++) {
+        bytevec key; ITEMLEN keylen;
+        bytevec val; ITEMLEN vallen;
+        // this is weird but it's necessary to pass icc and gcc together
+        unsigned char ctype = rbuf_char(rb);
+        enum ft_msg_type type = (enum ft_msg_type) ctype;
+        bool is_fresh = rbuf_char(rb);
+        MSN msn = rbuf_msn(rb);
+        XIDS xids;
+        xids_create_from_buffer(rb, &xids);
+        rbuf_bytes(rb, &key, &keylen); /* Returns a pointer into the rbuf. */
+        rbuf_bytes(rb, &val, &vallen);
+        int32_t *dest = nullptr;
+        if (ft_msg_type_applies_once(type)) {
+            if (is_fresh) {
+                dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr;
+            } else {
+                dest = stale_offsets ? *stale_offsets + (*nstale)++ : nullptr;
+            }
+        } else {
+            invariant(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type));
+            dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr;
+        }
+
+        // TODO: Function to parse stuff out of an rbuf into an FT_MSG
+        DBT k, v;
+        FT_MSG_S msg = {
+            type, msn, xids,
+            .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen) } }
+        };
+        enqueue(&msg, is_fresh, dest);
+        xids_destroy(&xids);
+    }
+
+    invariant(num_entries() == n_in_this_buffer);
+}
+
+void message_buffer::_resize(size_t new_size) {
     XREALLOC_N(new_size, _memory);
     _memory_size = new_size;
 }
@@ -134,7 +190,7 @@ void message_buffer::enqueue(FT_MSG msg, bool is_fresh, int32_t *offset) {
     if (_memory == nullptr || need_space_total > _memory_size) {
         // resize the buffer to the next power of 2 greater than the needed space
         int next_2 = next_power_of_two(need_space_total);
-        resize(next_2);
+        _resize(next_2);
     }
     ITEMLEN keylen = ft_msg_get_keylen(msg);
     ITEMLEN datalen = ft_msg_get_vallen(msg);
@@ -212,6 +268,26 @@ bool message_buffer::equals(message_buffer *other) const {
             memcmp(_memory, other->_memory, _memory_used) == 0);
 }
 
+void message_buffer::serialize_to_wbuf(struct wbuf *wb) const {
+    wbuf_nocrc_int(wb, num_entries());
+    struct msg_serialize_fn {
+        struct wbuf *wb;
+        msg_serialize_fn(struct wbuf *w) : wb(w) { }
+        int operator()(FT_MSG msg, bool is_fresh) {
+            enum ft_msg_type type = (enum ft_msg_type) msg->type;
+            paranoid_invariant((int) type >= 0 && (int) type < 256);
+            wbuf_nocrc_char(wb, (unsigned char) type);
+            wbuf_nocrc_char(wb, (unsigned char) is_fresh);
+            wbuf_MSN(wb, msg->msn);
+            wbuf_nocrc_xids(wb, ft_msg_get_xids(msg));
+            wbuf_nocrc_bytes(wb, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
+            wbuf_nocrc_bytes(wb, ft_msg_get_val(msg), ft_msg_get_vallen(msg));
+            return 0;
+        }
+    } serialize_fn(wb);
+    iterate(serialize_fn);
+}
+
 size_t message_buffer::msg_memsize_in_buffer(FT_MSG msg) {
     const uint32_t keylen = ft_msg_get_keylen(msg);
     const uint32_t datalen = ft_msg_get_vallen(msg);
diff --git a/ft/msg_buffer.h b/ft/msg_buffer.h
index ded17820474..cbfa9617e20 100644
--- a/ft/msg_buffer.h
+++ b/ft/msg_buffer.h
@@ -102,7 +102,14 @@ public:
 
     void destroy();
 
-    void resize(size_t new_size);
+    // effect: deserializes a message buffer from the given rbuf
+    // returns: *fresh_offsets (etc) malloc'd to be num_entries large and
+    //          populated with *nfresh (etc) offsets in the message buffer
+    // requires: if fresh_offsets (etc) != nullptr, then nfresh != nullptr
+    void deserialize_from_rbuf(struct rbuf *rb,
+                               int32_t **fresh_offsets, int32_t *nfresh,
+                               int32_t **stale_offsets, int32_t *nstale,
+                               int32_t **broadcast_offsets, int32_t *nbroadcast);
 
     void enqueue(FT_MSG msg, bool is_fresh, int32_t *offset);
 
@@ -139,9 +146,13 @@ public:
 
     bool equals(message_buffer *other) const;
 
+    void serialize_to_wbuf(struct wbuf *wb) const;
+
     static size_t msg_memsize_in_buffer(FT_MSG msg);
 
 private:
+    void _resize(size_t new_size);
+
     // If this isn't packged, the compiler aligns the xids array and we waste a lot of space
     struct __attribute__((__packed__)) buffer_entry {
         unsigned int  keylen;

From 19dfd0dafaa7d7abb0c83c7a39ae3165ec03a713 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Sat, 21 Jun 2014 06:16:44 -0400
Subject: [PATCH 049/190] #254 fix i_s_tokudb_lock_waits result files

---
 mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_released.result | 1 -
 mysql-test/suite/tokudb/r/i_s_tokudb_locks_released.result      | 1 -
 2 files changed, 2 deletions(-)

diff --git a/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_released.result b/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_released.result
index db63d23e382..10431bb812a 100644
--- a/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_released.result
+++ b/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_released.result
@@ -27,7 +27,6 @@ commit;
 select * from information_schema.tokudb_locks;
 locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
 TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
-TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
 select * from information_schema.tokudb_lock_waits;
 requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
diff --git a/mysql-test/suite/tokudb/r/i_s_tokudb_locks_released.result b/mysql-test/suite/tokudb/r/i_s_tokudb_locks_released.result
index 628ff46ffc4..c135f3858b4 100644
--- a/mysql-test/suite/tokudb/r/i_s_tokudb_locks_released.result
+++ b/mysql-test/suite/tokudb/r/i_s_tokudb_locks_released.result
@@ -17,7 +17,6 @@ commit;
 select * from information_schema.tokudb_locks;
 locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
 TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
-TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 commit;
 select * from information_schema.tokudb_locks;

From 3625b932af54d5a913c6ccc7ce08a16c0784cef6 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Sat, 21 Jun 2014 06:18:52 -0400
Subject: [PATCH 050/190] #255 run iibench on PS 5.6

---
 scripts/run.iibench.bash | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/scripts/run.iibench.bash b/scripts/run.iibench.bash
index e6e57e7f512..31cb96ad434 100755
--- a/scripts/run.iibench.bash
+++ b/scripts/run.iibench.bash
@@ -62,15 +62,6 @@ while [ $# -gt 0 ] ; do
     fi
 done
 
-if [[ $mysqlbuild =~ (.*)-(tokudb-.*)-(linux)-(x86_64) ]] ; then
-    mysql=${BASH_REMATCH[1]}
-    tokudb=${BASH_REMATCH[2]}
-    system=${BASH_REMATCH[3]}
-    arch=${BASH_REMATCH[4]}
-else
-    exit 1
-fi
-
 # setup the dbname
 if [ $dbname = "iibench" ] ; then dbname=${cmd}_${engine}; fi
 if [ "$testinstance" != "" ] ; then dbname=${dbname}_${testinstance}; fi
@@ -139,7 +130,7 @@ if [ "$instancetype" != "" ] ; then runfile=$runfile-$instancetype; fi
 testresult="PASS"
 
 pushd $testdir/py
-    echo `date` $cmd start $mysql $svn_branch $svn_revision $max_rows $rows_per_report >>$runfile
+    echo `date` $cmd start $mysqlbuild $svn_branch $svn_revision $max_rows $rows_per_report >>$runfile
     runcmd=$cmd.py
     args="--db_user=$mysqluser --db_name=$dbname --db_socket=$mysqlsocket --engine=$engine --setup --max_rows=$max_rows --rows_per_report=$rows_per_report --table_name=$tblname"
     if [ $cmd = "iibench" -a $insert_only != 0 ] ; then runcmd="$runcmd --insert_only"; fi

From 725fc5ec467516fec39b1816ca436b1468cd2832 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 23 Jun 2014 11:30:17 -0400
Subject: [PATCH 051/190] #262 fix print of xid related recovery log entries

---
 ft/logger.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ft/logger.cc b/ft/logger.cc
index 9900e7d6f6a..da6ee960c1e 100644
--- a/ft/logger.cc
+++ b/ft/logger.cc
@@ -1103,7 +1103,7 @@ int toku_logprint_XIDP (FILE *outf, FILE *inf, const char *fieldname, struct x17
     XIDP vp;
     int r = toku_fread_XIDP(inf, &vp, checksum, len);
     if (r!=0) return r;
-    fprintf(outf, "%s={formatID=0x%lx gtrid_length=%ld bqual_length=%ld data=", fieldname, vp->formatID, vp->gtrid_length, vp->bqual_length);
+    fprintf(outf, " %s={formatID=0x%lx gtrid_length=%ld bqual_length=%ld data=", fieldname, vp->formatID, vp->gtrid_length, vp->bqual_length);
     toku_print_bytes(outf, vp->gtrid_length + vp->bqual_length, vp->data);
     fprintf(outf, "}");
     toku_free(vp);

From a13a8e840a8b68150e2160e4ac29c1da64bd8348 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 23 Jun 2014 14:26:44 -0400
Subject: [PATCH 052/190] FT-93 Align pivotkeys to a 4 byte boundary. Align
 memory allocation to a 64 byte boundary.

---
 ft/node.h                      |  9 +++++-
 ft/pivotkeys.cc                | 51 +++++++++++++++++++---------------
 portability/toku_portability.h | 10 ++-----
 3 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/ft/node.h b/ft/node.h
index 9fbc18cb896..4fe31110a6b 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -150,11 +150,15 @@ public:
     size_t total_size() const;
 
 private:
+    inline size_t _align4(size_t x) const {
+        return roundup_to_multiple(4, x);
+    }
+
     // effect: create pivot keys, in fixed key format, by copying the given key array
     void _create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n);
 
     char *_fixed_key(int i) const {
-        return &_fixed_keys[i * _fixed_keylen];
+        return &_fixed_keys[i * _fixed_keylen_aligned];
     }
 
     bool _fixed_format() const {
@@ -187,7 +191,10 @@ private:
     // If every key is _fixed_keylen long, then _fixed_key is a
     // packed array of keys..
     char *_fixed_keys;
+    // The actual length of the fixed key
     size_t _fixed_keylen;
+    // The aligned length that we use for fixed key storage
+    size_t _fixed_keylen_aligned;
 
     // ..otherwise _fixed_keys is null and we store an array of dbts,
     // each representing a key. this is simpler but less cache-efficient.
diff --git a/ft/pivotkeys.cc b/ft/pivotkeys.cc
index 6cad37106da..175745cd71f 100644
--- a/ft/pivotkeys.cc
+++ b/ft/pivotkeys.cc
@@ -102,6 +102,7 @@ void ftnode_pivot_keys::create_empty() {
     _total_size = 0;
     _fixed_keys = nullptr;
     _fixed_keylen = 0;
+    _fixed_keylen_aligned = 0;
     _dbt_keys = nullptr;
 }
 
@@ -121,15 +122,16 @@ void ftnode_pivot_keys::create_from_dbts(const DBT *keys, int n) {
     if (keys_same_size && _num_pivots > 0) {
         // if so, store pivots in a tightly packed array of fixed length keys
         _fixed_keylen = keys[0].size;
-        _total_size = _fixed_keylen * _num_pivots;
-        XMALLOC_N(_total_size, _fixed_keys);
+        _fixed_keylen_aligned = _align4(_fixed_keylen);
+        _total_size = _fixed_keylen_aligned * _num_pivots;
+        XMALLOC_N_ALIGNED(64, _total_size, _fixed_keys);
         for (int i = 0; i < _num_pivots; i++) {
             invariant(keys[i].size == _fixed_keylen);
             memcpy(_fixed_key(i), keys[i].data, _fixed_keylen);
         }
     } else {
         // otherwise we'll just store the pivots in an array of dbts
-        XMALLOC_N(_num_pivots, _dbt_keys);
+        XMALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys);
         for (int i = 0; i < _num_pivots; i++) {
             size_t size = keys[i].size;
             toku_memdup_dbt(&_dbt_keys[i], keys[i].data, size);
@@ -142,7 +144,8 @@ void ftnode_pivot_keys::_create_from_fixed_keys(const char *fixedkeys, size_t fi
     create_empty();
     _num_pivots = n;
     _fixed_keylen = fixed_keylen;
-    _total_size = _fixed_keylen * _num_pivots;
+    _fixed_keylen_aligned = _align4(fixed_keylen);
+    _total_size = _fixed_keylen_aligned * _num_pivots;
     XMEMDUP_N(_fixed_keys, fixedkeys, _total_size);
 }
 
@@ -168,6 +171,7 @@ void ftnode_pivot_keys::destroy() {
         _fixed_keys = nullptr;
     }
     _fixed_keylen = 0;
+    _fixed_keylen_aligned = 0;
     _num_pivots = 0;
     _total_size = 0;
 }
@@ -177,8 +181,9 @@ void ftnode_pivot_keys::_convert_to_fixed_format() {
 
     // convert to a tightly packed array of fixed length keys
     _fixed_keylen = _dbt_keys[0].size;
-    _total_size = _fixed_keylen * _num_pivots;
-    XMALLOC_N(_total_size, _fixed_keys);
+    _fixed_keylen_aligned = _align4(_fixed_keylen);
+    _total_size = _fixed_keylen_aligned * _num_pivots;
+    XMALLOC_N_ALIGNED(64, _total_size, _fixed_keys);
     for (int i = 0; i < _num_pivots; i++) {
         invariant(_dbt_keys[i].size == _fixed_keylen);
         memcpy(_fixed_key(i), _dbt_keys[i].data, _fixed_keylen);
@@ -198,7 +203,7 @@ void ftnode_pivot_keys::_convert_to_dbt_format() {
     invariant(_fixed_format());
 
     // convert to an aray of dbts
-    XREALLOC_N(_num_pivots, _dbt_keys);
+    REALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys);
     for (int i = 0; i < _num_pivots; i++) {
         toku_memdup_dbt(&_dbt_keys[i], _fixed_key(i), _fixed_keylen);
     }
@@ -218,7 +223,7 @@ void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
     _fixed_keylen = 0;
     _dbt_keys = nullptr;
 
-    XMALLOC_N(_num_pivots, _dbt_keys);
+    XMALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys);
     bool keys_same_size = true;
     for (int i = 0; i < _num_pivots; i++) {
         bytevec pivotkeyptr;
@@ -240,7 +245,7 @@ void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
 DBT ftnode_pivot_keys::get_pivot(int i) const {
     paranoid_invariant(i < _num_pivots);
     if (_fixed_format()) {
-        paranoid_invariant(i * _fixed_keylen < _total_size);
+        paranoid_invariant(i * _fixed_keylen_aligned < _total_size);
         DBT dbt;
         toku_fill_dbt(&dbt, _fixed_key(i), _fixed_keylen);
         return dbt;
@@ -272,16 +277,18 @@ void ftnode_pivot_keys::_destroy_key_dbt(int i) {
 
 void ftnode_pivot_keys::_insert_at_dbt(const DBT *key, int i) {
     // make space for a new pivot, slide existing keys to the right
-    REALLOC_N(_num_pivots + 1, _dbt_keys);
+    REALLOC_N_ALIGNED(64, _num_pivots + 1, _dbt_keys);
     memmove(&_dbt_keys[i + 1], &_dbt_keys[i], (_num_pivots - i) * sizeof(DBT));
     _add_key_dbt(key, i);
 }
 
 void ftnode_pivot_keys::_insert_at_fixed(const DBT *key, int i) {
-    REALLOC_N((_num_pivots + 1) * _fixed_keylen, _fixed_keys); 
-    memmove(_fixed_key(i + 1), _fixed_key(i), (_num_pivots - i) * _fixed_keylen);
+    REALLOC_N_ALIGNED(64, (_num_pivots + 1) * _fixed_keylen_aligned, _fixed_keys); 
+    // TODO: This is not going to be valgrind-safe, because we do not initialize the space
+    // between _fixed_keylen and _fixed_keylen_aligned (but we probably should)
+    memmove(_fixed_key(i + 1), _fixed_key(i), (_num_pivots - i) * _fixed_keylen_aligned);
     memcpy(_fixed_key(i), key->data, _fixed_keylen);
-    _total_size += _fixed_keylen;
+    _total_size += _fixed_keylen_aligned;
 }
 
 void ftnode_pivot_keys::insert_at(const DBT *key, int i) {
@@ -303,7 +310,7 @@ void ftnode_pivot_keys::insert_at(const DBT *key, int i) {
 }
 
 void ftnode_pivot_keys::_append_dbt(const ftnode_pivot_keys &pivotkeys) {
-    REALLOC_N(_num_pivots + pivotkeys._num_pivots, _dbt_keys);
+    REALLOC_N_ALIGNED(64, _num_pivots + pivotkeys._num_pivots, _dbt_keys);
     bool other_fixed = pivotkeys._fixed_format();
     for (int i = 0; i < pivotkeys._num_pivots; i++) {
         toku_memdup_dbt(&_dbt_keys[_num_pivots + i],
@@ -317,7 +324,7 @@ void ftnode_pivot_keys::_append_dbt(const ftnode_pivot_keys &pivotkeys) {
 void ftnode_pivot_keys::_append_fixed(const ftnode_pivot_keys &pivotkeys) {
     if (pivotkeys._fixed_format() && pivotkeys._fixed_keylen == _fixed_keylen) {
         // other pivotkeys have the same fixed keylen 
-        REALLOC_N((_num_pivots + pivotkeys._num_pivots) * _fixed_keylen, _fixed_keys);
+        REALLOC_N_ALIGNED(64, (_num_pivots + pivotkeys._num_pivots) * _fixed_keylen_aligned, _fixed_keys);
         memcpy(_fixed_key(_num_pivots), pivotkeys._fixed_keys, pivotkeys._total_size);
     } else {
         // must convert to dbt format, other pivotkeys have different length'd keys
@@ -366,15 +373,15 @@ void ftnode_pivot_keys::replace_at(const DBT *key, int i) {
 }
 
 void ftnode_pivot_keys::_delete_at_fixed(int i) {
-    memmove(_fixed_key(i), _fixed_key(i + 1), (_num_pivots - 1 - i) * _fixed_keylen);
-    _total_size -= _fixed_keylen;
+    memmove(_fixed_key(i), _fixed_key(i + 1), (_num_pivots - 1 - i) * _fixed_keylen_aligned);
+    _total_size -= _fixed_keylen_aligned;
 }
 
 void ftnode_pivot_keys::_delete_at_dbt(int i) {
     // slide over existing keys, then shrink down to size
     _destroy_key_dbt(i);
     memmove(&_dbt_keys[i], &_dbt_keys[i + 1], (_num_pivots - 1 - i) * sizeof(DBT));
-    REALLOC_N(_num_pivots - 1, _dbt_keys);
+    REALLOC_N_ALIGNED(64, _num_pivots - 1, _dbt_keys);
 }
 
 void ftnode_pivot_keys::delete_at(int i) {
@@ -395,7 +402,7 @@ void ftnode_pivot_keys::_split_at_fixed(int i, ftnode_pivot_keys *other) {
 
     // shrink down to size
     _total_size = i * _fixed_keylen;
-    REALLOC_N(_total_size, _fixed_keys);
+    REALLOC_N_ALIGNED(64, _total_size, _fixed_keys);
 }
 
 void ftnode_pivot_keys::_split_at_dbt(int i, ftnode_pivot_keys *other) {
@@ -406,7 +413,7 @@ void ftnode_pivot_keys::_split_at_dbt(int i, ftnode_pivot_keys *other) {
     for (int k = i; k < _num_pivots; k++) {
         _destroy_key_dbt(k);
     }
-    REALLOC_N(i, _dbt_keys);
+    REALLOC_N_ALIGNED(64, i, _dbt_keys);
 }
 
 void ftnode_pivot_keys::split_at(int i, ftnode_pivot_keys *other) {
@@ -434,12 +441,12 @@ void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const {
 
 int ftnode_pivot_keys::num_pivots() const {
     // if we have fixed size keys, the number of pivots should be consistent
-    paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen * _num_pivots));
+    paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen_aligned * _num_pivots));
     return _num_pivots;
 }
 
 size_t ftnode_pivot_keys::total_size() const {
     // if we have fixed size keys, the total size should be consistent
-    paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen * _num_pivots));
+    paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen_aligned * _num_pivots));
     return _total_size;
 }
diff --git a/portability/toku_portability.h b/portability/toku_portability.h
index e19fca5d626..915fb462897 100644
--- a/portability/toku_portability.h
+++ b/portability/toku_portability.h
@@ -352,14 +352,8 @@ void toku_set_func_pread (ssize_t (*)(int, void *, size_t, off_t));
 int toku_portability_init(void);
 void toku_portability_destroy(void);
 
-static inline uint64_t roundup_to_multiple(uint64_t alignment, uint64_t v)
 // Effect: Return X, where X the smallest multiple of ALIGNMENT such that X>=V.
 // Requires: ALIGNMENT is a power of two
-{
-    assert(0==(alignment&(alignment-1)));  // alignment must be a power of two
-    uint64_t result = (v+alignment-1)&~(alignment-1);
-    assert(result>=v);                     // The result is >=V.
-    assert(result%alignment==0);           // The result is a multiple of alignment.
-    assert(result<v+alignment);            // The result is the smallest such multiple of alignment.
-    return result;
+static inline uint64_t roundup_to_multiple(uint64_t alignment, uint64_t v) {
+    return (v + alignment - 1) & ~(alignment - 1);
 }

From 55b38998e89e96e48cb39a2f95a848619b66f525 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 23 Jun 2014 14:29:07 -0400
Subject: [PATCH 053/190] FT-278 Put ft_msg in its own class and file, organize
 deserialization and upgrade code so there's less complexity in
 ft/ft_node-serialize.cc

---
 ft/CMakeLists.txt                    |   2 +-
 ft/ft-flusher.cc                     |   2 +-
 ft/ft-internal.h                     |  14 +-
 ft/ft-ops.cc                         |  90 ++++------
 ft/ft-ops.h                          |   2 +-
 ft/ft-test-helpers.cc                |  29 ++-
 ft/ft-verify.cc                      |  22 +--
 ft/ft_node-serialize.cc              | 164 ++++++++---------
 ft/leafentry.h                       |   4 +-
 ft/loader/loader.cc                  |  12 +-
 ft/{ft_msg.cc => msg.cc}             |  96 +++++++---
 ft/{ft_msg.h => msg.h}               |  60 ++++---
 ft/msg_buffer.cc                     | 122 +++++++------
 ft/msg_buffer.h                      |  25 ++-
 ft/node.cc                           | 132 +++++++-------
 ft/node.h                            |   8 +-
 ft/roll.cc                           |  14 +-
 ft/tests/fifo-test.cc                |  18 +-
 ft/tests/make-tree.cc                |   4 +-
 ft/tests/msnfilter.cc                |  16 +-
 ft/tests/orthopush-flush.cc          | 254 +++++++++++----------------
 ft/tests/test-leafentry-child-txn.cc |  73 ++++----
 ft/tests/test-leafentry-nested.cc    | 156 ++++++++--------
 ft/tests/verify-bad-msn.cc           |   4 +-
 ft/tests/verify-bad-pivots.cc        |   4 +-
 ft/tests/verify-dup-in-leaf.cc       |   4 +-
 ft/tests/verify-dup-pivots.cc        |   4 +-
 ft/tests/verify-misrouted-msgs.cc    |   4 +-
 ft/tests/verify-unsorted-leaf.cc     |   4 +-
 ft/tests/verify-unsorted-pivots.cc   |   4 +-
 ft/tokuftdump.cc                     |  16 +-
 ft/ule-internal.h                    |   2 +-
 ft/ule.cc                            |  26 ++-
 ft/ybt.cc                            |  27 +--
 ft/ybt.h                             |   3 +
 35 files changed, 698 insertions(+), 723 deletions(-)
 rename ft/{ft_msg.cc => msg.cc} (63%)
 rename ft/{ft_msg.h => msg.h} (88%)

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 01db81c43b9..3fcc4c2f853 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -35,7 +35,6 @@ set(FT_SOURCES
   ft-cachetable-wrappers
   ft-flusher
   ft-hot-flusher
-  ft_msg
   ft_node-serialize
   ft-node-deserialize
   ft-ops
@@ -52,6 +51,7 @@ set(FT_SOURCES
   logfilemgr
   logger
   log_upgrade
+  msg
   msg_buffer
   node
   pivotkeys
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index 8b0ff8c8546..7e8524794dc 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -1682,7 +1682,7 @@ void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID p
             ft(t), child(n), bnc(nl), gc_info(g), remaining_memsize(bnc->msg_buffer.buffer_size_in_use()) {
             stats_delta = { 0, 0 };
         }
-        int operator()(FT_MSG msg, bool is_fresh) {
+        int operator()(const ft_msg &msg, bool is_fresh) {
             size_t flow_deltas[] = { 0, 0 };
             size_t memsize_in_buffer = message_buffer::msg_memsize_in_buffer(msg);
             if (remaining_memsize <= bnc->flow[0]) {
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 63bcc1f1280..f3d6c5fc2cb 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -119,8 +119,6 @@ PATENT RIGHTS GRANT:
 struct block_table;
 struct ft_search;
 
-enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
-enum { FT_MSG_OVERHEAD = (2 + sizeof(MSN)) };   // the type plus freshness plus MSN
 enum { FT_DEFAULT_FANOUT = 16 };
 enum { FT_DEFAULT_NODE_SIZE = 4 * 1024 * 1024 };
 enum { FT_DEFAULT_BASEMENT_NODE_SIZE = 128 * 1024 };
@@ -493,7 +491,7 @@ toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
 // allocate a block number
 // allocate and initialize a ftnode
 // put the ftnode into the cache table
-void toku_create_new_ftnode (FT_HANDLE t, FTNODE *result, int height, int n_children);
+void toku_create_new_ftnode(FT_HANDLE ft_handle, FTNODE *result, int height, int n_children);
 
 /* Stuff for testing */
 // toku_testsetup_initialize() must be called before any other test_setup_xxx() functions are called.
@@ -506,14 +504,10 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE ft_h, BLOCKNUM, const char *key, in
 int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_h, BLOCKNUM, enum ft_msg_type, const char *key, int keylen, const char *val, int vallen);
 void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);
 
-void toku_ft_root_put_msg(FT ft, FT_MSG msg, txn_gc_info *gc_info);
+void toku_ft_root_put_msg(FT ft, const ft_msg &msg, txn_gc_info *gc_info);
 
-void
-toku_get_node_for_verify(
-    BLOCKNUM blocknum,
-    FT_HANDLE ft_h,
-    FTNODE* nodep
-    );
+// TODO: Rename
+void toku_get_node_for_verify(BLOCKNUM blocknum, FT_HANDLE ft_h, FTNODE* nodep);
 
 int
 toku_verify_ftnode (FT_HANDLE ft_h,
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 0249e3ea09c..6db1d19bf66 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -208,7 +208,7 @@ basement nodes, bulk fetch,  and partial fetch:
 #include "ft/ft-flusher.h"
 #include "ft/ft-internal.h"
 #include "ft/ft_layout_version.h"
-#include "ft/ft_msg.h"
+#include "ft/msg.h"
 #include "ft/leafentry.h"
 #include "ft/log-internal.h"
 #include "ft/node.h"
@@ -1578,20 +1578,11 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp)
         );
 }
 
-// TODO Use this function to clean up other places where bits of messages are passed around
-//      such as toku_bnc_insert_msg() and the call stack above it.
-static uint64_t
-ft_msg_size(FT_MSG msg) {
-    size_t keyval_size = msg->u.id.key->size + msg->u.id.val->size;
-    size_t xids_size = xids_get_serialize_size(msg->xids);
-    return keyval_size + KEY_VALUE_OVERHEAD + FT_MSG_OVERHEAD + xids_size;
-}
-
 static void inject_message_in_locked_node(
     FT ft, 
     FTNODE node, 
     int childnum, 
-    FT_MSG_S *msg, 
+    const ft_msg &msg, 
     size_t flow_deltas[],
     txn_gc_info *gc_info
     ) 
@@ -1616,15 +1607,17 @@ static void inject_message_in_locked_node(
     // Get the MSN from the header.  Now that we have a write lock on the
     // node we're injecting into, we know no other thread will get an MSN
     // after us and get that message into our subtree before us.
-    msg->msn.msn = toku_sync_add_and_fetch(&ft->h->max_msn_in_ft.msn, 1);
-    paranoid_invariant(msg->msn.msn > node->max_msn_applied_to_node_on_disk.msn);
+    MSN msg_msn = { .msn = toku_sync_add_and_fetch(&ft->h->max_msn_in_ft.msn, 1) };
+    ft_msg msg_with_msn(msg.kdbt(), msg.vdbt(), msg.type(), msg_msn, msg.xids());
+    paranoid_invariant(msg_with_msn.msn().msn > node->max_msn_applied_to_node_on_disk.msn);
+
     STAT64INFO_S stats_delta = {0,0};
     toku_ftnode_put_msg(
         ft->cmp,
         ft->update_fun,
         node,
         childnum,
-        msg,
+        msg_with_msn,
         true,
         gc_info,
         flow_deltas,
@@ -1642,17 +1635,17 @@ static void inject_message_in_locked_node(
 
     // update some status variables
     if (node->height != 0) {
-        uint64_t msgsize = ft_msg_size(msg);
+        size_t msgsize = msg.total_size();
         STATUS_INC(FT_MSG_BYTES_IN, msgsize);
         STATUS_INC(FT_MSG_BYTES_CURR, msgsize);
         STATUS_INC(FT_MSG_NUM, 1);
-        if (ft_msg_type_applies_all(msg->type)) {
+        if (ft_msg_type_applies_all(msg.type())) {
             STATUS_INC(FT_MSG_NUM_BROADCAST, 1);
         }
     }
 
     // verify that msn of latest message was captured in root node
-    paranoid_invariant(msg->msn.msn == node->max_msn_applied_to_node_on_disk.msn);
+    paranoid_invariant(msg_with_msn.msn().msn == node->max_msn_applied_to_node_on_disk.msn);
 
     if (node->blocknum.b == ft->rightmost_blocknum.b) {
         if (ft->seqinsert_score < FT_SEQINSERT_SCORE_THRESHOLD) {
@@ -1794,7 +1787,7 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
     abort();
 }
 
-static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t fullhash, FT_MSG_S *msg, size_t flow_deltas[], txn_gc_info *gc_info)
+static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t fullhash, const ft_msg &msg, size_t flow_deltas[], txn_gc_info *gc_info)
 // Effect:
 //  Inject message into the node at this blocknum (cachekey).
 //  Gets a write lock on the node for you.
@@ -1845,7 +1838,7 @@ static void push_something_in_subtree(
     FT ft, 
     FTNODE subtree_root, 
     int target_childnum, 
-    FT_MSG_S *msg, 
+    const ft_msg &msg, 
     size_t flow_deltas[], 
     txn_gc_info *gc_info,
     int depth, 
@@ -1903,10 +1896,10 @@ static void push_something_in_subtree(
         NONLEAF_CHILDINFO bnc;
 
         // toku_ft_root_put_msg should not have called us otherwise.
-        paranoid_invariant(ft_msg_type_applies_once(msg->type));
+        paranoid_invariant(ft_msg_type_applies_once(msg.type()));
 
         childnum = (target_childnum >= 0 ? target_childnum
-                    : toku_ftnode_which_child(subtree_root, msg->u.id.key, ft->cmp));
+                    : toku_ftnode_which_child(subtree_root, msg.kdbt(), ft->cmp));
         bnc = BNC(subtree_root, childnum);
 
         if (toku_bnc_n_entries(bnc) > 0) {
@@ -2042,7 +2035,7 @@ static void push_something_in_subtree(
 
 void toku_ft_root_put_msg(
     FT ft, 
-    FT_MSG_S *msg, 
+    const ft_msg &msg, 
     txn_gc_info *gc_info
     )
 // Effect:
@@ -2142,7 +2135,7 @@ void toku_ft_root_put_msg(
     // anyway.
 
     // Now, either inject here or promote.  We decide based on a heuristic:
-    if (node->height == 0 || !ft_msg_type_applies_once(msg->type)) {
+    if (node->height == 0 || !ft_msg_type_applies_once(msg.type())) {
         // If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here.
         toku_unpin_ftnode_read_only(ft, node);
         STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1);
@@ -2153,7 +2146,7 @@ void toku_ft_root_put_msg(
     } else {
         // The root's height 1.  We may be eligible for promotion here.
         // On the extremes, we want to promote, in the middle, we don't.
-        int childnum = toku_ftnode_which_child(node, msg->u.id.key, ft->cmp);
+        int childnum = toku_ftnode_which_child(node, msg.kdbt(), ft->cmp);
         if (childnum == 0 || childnum == node->n_children - 1) {
             // On the extremes, promote.  We know which childnum we're going to, so pass that down too.
             push_something_in_subtree(ft, node, childnum, msg, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false);
@@ -2476,7 +2469,7 @@ void toku_ft_optimize (FT_HANDLE ft_h) {
         DBT val;
         toku_init_dbt(&key);
         toku_init_dbt(&val);
-        FT_MSG_S ftmsg = { FT_OPTIMIZE, ZERO_MSN, message_xids, .u = { .id = {&key,&val} } };
+        ft_msg msg(&key, &val, FT_OPTIMIZE, ZERO_MSN, message_xids);
 
         TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
         txn_manager_state txn_state_for_gc(txn_manager);
@@ -2487,7 +2480,7 @@ void toku_ft_optimize (FT_HANDLE ft_h) {
                             // no messages above us, we can implicitly promote uxrs based on this xid
                             oldest_referenced_xid_estimate,
                             true);
-        toku_ft_root_put_msg(ft_h->ft, &ftmsg, &gc_info);
+        toku_ft_root_put_msg(ft_h->ft, msg, &gc_info);
         xids_destroy(&message_xids);
     }
 }
@@ -2601,17 +2594,13 @@ static void ft_insert_directly_into_leaf(FT ft, FTNODE leaf, int target_childnum
 //           algorithm would have selected the given leaf node as the point of injection.
 //           That means this function relies on the current implementation of promotion.
 {
-    FT_MSG_S ftcmd = { type, ZERO_MSN, message_xids, .u = { .id = { key, val } } }; 
+    ft_msg msg(key, val, type, ZERO_MSN, message_xids);
     size_t flow_deltas[] = { 0, 0 }; 
-    inject_message_in_locked_node(ft, leaf, target_childnum, &ftcmd, flow_deltas, gc_info);
+    inject_message_in_locked_node(ft, leaf, target_childnum, msg, flow_deltas, gc_info);
 }
 
 static void
-ft_send_update_msg(FT_HANDLE ft_h, FT_MSG_S *msg, TOKUTXN txn) {
-    msg->xids = (txn
-                 ? toku_txn_get_xids(txn)
-                 : xids_get_root_xids());
-    
+ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) {
     TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
     txn_manager_state txn_state_for_gc(txn_manager);
 
@@ -2650,9 +2639,9 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func
     if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
         // do nothing
     } else {
-        FT_MSG_S msg = { FT_UPDATE, ZERO_MSN, NULL,
-                         .u = { .id = { key, update_function_extra } } };
-        ft_send_update_msg(ft_h, &msg, txn);
+        XIDS message_xids = txn ? toku_txn_get_xids(txn) : xids_get_root_xids();
+        ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
+        ft_send_update_msg(ft_h, msg, txn);
     }
 }
 
@@ -2682,23 +2671,22 @@ void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_e
         oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
 
     } else {
-        DBT nullkey;
-        const DBT *nullkeyp = toku_init_dbt(&nullkey);
-        FT_MSG_S msg = { FT_UPDATE_BROADCAST_ALL, ZERO_MSN, NULL,
-                         .u = { .id = { nullkeyp, update_function_extra } } };
-        ft_send_update_msg(ft_h, &msg, txn);
+        DBT empty_dbt;
+        XIDS message_xids = txn ? toku_txn_get_xids(txn) : xids_get_root_xids();
+        ft_msg msg(toku_init_dbt(&empty_dbt), update_function_extra, FT_UPDATE_BROADCAST_ALL, ZERO_MSN, message_xids);
+        ft_send_update_msg(ft_h, msg, txn);
     }
 }
 
 void toku_ft_send_insert(FT_HANDLE ft_handle, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info) {
-    FT_MSG_S ftmsg = { type, ZERO_MSN, xids, .u = { .id = { key, val } } };
-    toku_ft_root_put_msg(ft_handle->ft, &ftmsg, gc_info);
+    ft_msg msg(key, val, type, ZERO_MSN, xids);
+    toku_ft_root_put_msg(ft_handle->ft, msg, gc_info);
 }
 
 void toku_ft_send_commit_any(FT_HANDLE ft_handle, DBT *key, XIDS xids, txn_gc_info *gc_info) {
     DBT val;
-    FT_MSG_S ftmsg = { FT_COMMIT_ANY, ZERO_MSN, xids, .u = { .id = { key, toku_init_dbt(&val) } } };
-    toku_ft_root_put_msg(ft_handle->ft, &ftmsg, gc_info);
+    ft_msg msg(key, toku_init_dbt(&val), FT_COMMIT_ANY, ZERO_MSN, xids);
+    toku_ft_root_put_msg(ft_handle->ft, msg, gc_info);
 }
 
 void toku_ft_delete(FT_HANDLE ft_handle, DBT *key, TOKUTXN txn) {
@@ -2769,8 +2757,8 @@ void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_vali
 
 void toku_ft_send_delete(FT_HANDLE ft_handle, DBT *key, XIDS xids, txn_gc_info *gc_info) {
     DBT val; toku_init_dbt(&val);
-    FT_MSG_S ftmsg = { FT_DELETE_ANY, ZERO_MSN, xids, .u = { .id = { key, &val } } };
-    toku_ft_root_put_msg(ft_handle->ft, &ftmsg, gc_info);
+    ft_msg msg(key, toku_init_dbt(&val), FT_DELETE_ANY, ZERO_MSN, xids);
+    toku_ft_root_put_msg(ft_handle->ft, msg, gc_info);
 }
 
 /* ******************** open,close and create  ********************** */
@@ -4480,12 +4468,12 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
                     FILE *file;
                     int depth;
                     print_msg_fn(FILE *f, int d) : file(f), depth(d) { }
-                    int operator()(FT_MSG msg, bool UU(is_fresh)) {
+                    int operator()(const ft_msg &msg, bool UU(is_fresh)) {
                         fprintf(file, "%*s xid=%" PRIu64 " %u (type=%d) msn=0x%" PRIu64 "\n",
                                       depth+2, "",
-                                      xids_get_innermost_xid(ft_msg_get_xids(msg)),
-                                      (unsigned)toku_dtoh32(*(int*)ft_msg_get_key(msg)),
-                                      ft_msg_get_type(msg), msg->msn.msn);
+                                      xids_get_innermost_xid(msg.xids()),
+                                      static_cast<unsigned>(toku_dtoh32(*(int*)msg.kdbt()->data)),
+                                      msg.type(), msg.msn().msn);
                         return 0;
                     }
                 } print_fn(file, depth);
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index 0c94f1ca7c7..b8813049840 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -99,7 +99,7 @@ PATENT RIGHTS GRANT:
 #include "cachetable.h"
 #include "log.h"
 #include "compress.h"
-#include "ft_msg.h"
+#include "ft/msg.h"
 
 int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));
 
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index 1da34c48d94..6b560d21e26 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -224,25 +224,22 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, const
     toku_verify_or_set_counts(node);
     assert(node->height==0);
 
-    DBT keydbt,valdbt;
-    MSN msn = next_dummymsn();
-    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(),
-                     .u = { .id = { toku_fill_dbt(&keydbt, key, keylen),
-                                    toku_fill_dbt(&valdbt, val, vallen) } } };
+    DBT kdbt, vdbt;
+    ft_msg msg(toku_fill_dbt(&kdbt, key, keylen), toku_fill_dbt(&vdbt, val, vallen),
+               FT_INSERT, next_dummymsn(), xids_get_root_xids());
 
     static size_t zero_flow_deltas[] = { 0, 0 };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
-    toku_ftnode_put_msg(
-        ft_handle->ft->cmp,
-        ft_handle->ft->update_fun,
-        node,
-        -1,
-        &msg,
-        true,
-        &gc_info,
-        zero_flow_deltas,
-        NULL
-        );
+    toku_ftnode_put_msg(ft_handle->ft->cmp,
+                        ft_handle->ft->update_fun,
+                        node,
+                        -1,
+                        msg,
+                        true,
+                        &gc_info,
+                        zero_flow_deltas,
+                        NULL
+                        );
 
     toku_verify_or_set_counts(node);
 
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index ba78f11f421..8593351492c 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -204,13 +204,13 @@ int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), struct ve
     int keep_going_on_failure = e->keep_going_on_failure;
     int result = 0;
     DBT k, v;
-    FT_MSG_S msg = e->msg_buffer->get_message(offset, &k, &v);
+    ft_msg msg = e->msg_buffer->get_message(offset, &k, &v);
     bool is_fresh = e->msg_buffer->get_freshness(offset);
     if (e->broadcast) {
-        VERIFY_ASSERTION(ft_msg_type_applies_all((enum ft_msg_type) msg.type) || ft_msg_type_does_nothing((enum ft_msg_type) msg.type),
+        VERIFY_ASSERTION(ft_msg_type_applies_all((enum ft_msg_type) msg.type()) || ft_msg_type_does_nothing((enum ft_msg_type) msg.type()),
                          e->i, "message found in broadcast list that is not a broadcast");
     } else {
-        VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) msg.type),
+        VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) msg.type()),
                          e->i, "message found in fresh or stale message tree that does not apply once");
         if (e->is_fresh) {
             if (e->messages_have_been_moved) {
@@ -322,14 +322,14 @@ struct verify_msg_fn {
         blocknum(b), this_msn(tmsn), verbose(v), keep_going_on_failure(k), messages_have_been_moved(m), last_msn(ZERO_MSN), msg_i(0) {
     }
 
-    int operator()(FT_MSG msg, bool is_fresh) {
-        enum ft_msg_type type = (enum ft_msg_type) msg->type;
-        MSN msn = msg->msn;
-        XIDS xid = msg->xids;
-        const void *key = ft_msg_get_key(msg);
-        const void *data = ft_msg_get_val(msg);
-        ITEMLEN keylen = ft_msg_get_keylen(msg);
-        ITEMLEN datalen = ft_msg_get_vallen(msg);
+    int operator()(const ft_msg &msg, bool is_fresh) {
+        enum ft_msg_type type = (enum ft_msg_type) msg.type();
+        MSN msn = msg.msn();
+        XIDS xid = msg.xids();
+        const void *key = msg.kdbt()->data;
+        const void *data = msg.vdbt()->data;
+        ITEMLEN keylen = msg.kdbt()->size;
+        ITEMLEN datalen = msg.vdbt()->size;
 
         int r = verify_msg_in_child_buffer(ft_handle, type, msn, key, keylen, data, datalen, xid,
                                            curr_less_pivot,
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 7b3f81dad38..ddf114012cd 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -859,7 +859,62 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
 }
 
 static void
-deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const toku::comparator &cmp) {
+sort_and_steal_offset_arrays(NONLEAF_CHILDINFO bnc,
+                             const toku::comparator &cmp,
+                             int32_t **fresh_offsets, int32_t nfresh,
+                             int32_t **stale_offsets, int32_t nstale,
+                             int32_t **broadcast_offsets, int32_t nbroadcast) {
+    // We always have fresh / broadcast offsets (even if they are empty)
+    // but we may not have stale offsets, in the case of v13 upgrade.
+    invariant(fresh_offsets != nullptr);
+    invariant(broadcast_offsets != nullptr);
+    invariant(cmp.valid());
+
+    typedef toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp> msn_sort;
+
+    const int32_t n_in_this_buffer = nfresh + nstale + nbroadcast;
+    struct toku_msg_buffer_key_msn_cmp_extra extra(cmp, &bnc->msg_buffer);
+    msn_sort::mergesort_r(*fresh_offsets, nfresh, extra);
+    bnc->fresh_message_tree.destroy();
+    bnc->fresh_message_tree.create_steal_sorted_array(fresh_offsets, nfresh, n_in_this_buffer);
+    if (stale_offsets) {
+        msn_sort::mergesort_r(*stale_offsets, nstale, extra);
+        bnc->stale_message_tree.destroy();
+        bnc->stale_message_tree.create_steal_sorted_array(stale_offsets, nstale, n_in_this_buffer);
+    }
+    bnc->broadcast_list.destroy();
+    bnc->broadcast_list.create_steal_sorted_array(broadcast_offsets, nbroadcast, n_in_this_buffer);
+}
+
+static MSN
+deserialize_child_buffer_v13(FT ft, NONLEAF_CHILDINFO bnc, struct rbuf *rb) {
+    // We skip 'stale' offsets for upgraded nodes.
+    int32_t nfresh = 0, nbroadcast = 0;
+    int32_t *fresh_offsets = nullptr, *broadcast_offsets = nullptr;
+
+    // Only sort buffers if we have a valid comparison function. In certain scenarios,
+    // like deserialie_ft_versioned() or tokuftdump, we'll need to deserialize ftnodes
+    // for simple inspection and don't actually require that the message buffers are
+    // properly sorted. This is very ugly, but correct.
+    const bool sort = ft->cmp.valid();
+
+    MSN highest_msn_in_this_buffer =
+        bnc->msg_buffer.deserialize_from_rbuf_v13(rb, &ft->h->highest_unused_msn_for_upgrade,
+                                                  sort ? &fresh_offsets : nullptr, &nfresh,
+                                                  sort ? &broadcast_offsets : nullptr, &nbroadcast);
+
+    if (sort) {
+        sort_and_steal_offset_arrays(bnc, ft->cmp,
+                                     &fresh_offsets, nfresh,
+                                     nullptr, 0, // no stale offsets
+                                     &broadcast_offsets, nbroadcast);
+    }
+
+    return highest_msn_in_this_buffer;
+}
+
+static void
+deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rb, const toku::comparator &cmp) {
     int32_t nfresh = 0, nstale = 0, nbroadcast = 0;
     int32_t *fresh_offsets, *stale_offsets, *broadcast_offsets;
 
@@ -870,50 +925,44 @@ deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, const tok
     const bool sort = cmp.valid();
 
     // read in the message buffer
-    bnc->msg_buffer.deserialize_from_rbuf(rbuf,
+    bnc->msg_buffer.deserialize_from_rbuf(rb,
                                           sort ? &fresh_offsets : nullptr, &nfresh,
                                           sort ? &stale_offsets : nullptr, &nstale,
                                           sort ? &broadcast_offsets : nullptr, &nbroadcast);
 
     if (sort) {
-        int n_in_this_buffer = nfresh + nstale + nbroadcast;
-        struct toku_msg_buffer_key_msn_cmp_extra extra(cmp, &bnc->msg_buffer);
-        toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(fresh_offsets, nfresh, extra);
-        bnc->fresh_message_tree.destroy();
-        bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
-        toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp>::mergesort_r(stale_offsets, nstale, extra);
-        bnc->stale_message_tree.destroy();
-        bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer);
-        bnc->broadcast_list.destroy();
-        bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast, n_in_this_buffer);
+        sort_and_steal_offset_arrays(bnc, cmp,
+                                     &fresh_offsets, nfresh,
+                                     &stale_offsets, nstale,
+                                     &broadcast_offsets, nbroadcast);
     }
 }
 
 static void
-deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf) {
+deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rb) {
     // read in the message buffer
-    bnc->msg_buffer.deserialize_from_rbuf(rbuf,
+    bnc->msg_buffer.deserialize_from_rbuf(rb,
                                           nullptr, nullptr,  // fresh_offsets, nfresh,
                                           nullptr, nullptr,  // stale_offsets, nstale,
                                           nullptr, nullptr); // broadcast_offsets, nbroadcast
 
     // read in each message tree (fresh, stale, broadcast)
-    int32_t nfresh = rbuf_int(rbuf);
+    int32_t nfresh = rbuf_int(rb);
     int32_t *XMALLOC_N(nfresh, fresh_offsets);
     for (int i = 0; i < nfresh; i++) {
-        fresh_offsets[i] = rbuf_int(rbuf);
+        fresh_offsets[i] = rbuf_int(rb);
     }
 
-    int32_t nstale = rbuf_int(rbuf);
+    int32_t nstale = rbuf_int(rb);
     int32_t *XMALLOC_N(nstale, stale_offsets);
     for (int i = 0; i < nstale; i++) {
-        stale_offsets[i] = rbuf_int(rbuf);
+        stale_offsets[i] = rbuf_int(rb);
     }
 
-    int32_t nbroadcast = rbuf_int(rbuf);
+    int32_t nbroadcast = rbuf_int(rb);
     int32_t *XMALLOC_N(nbroadcast, broadcast_offsets);
     for (int i = 0; i < nbroadcast; i++) {
-        broadcast_offsets[i] = rbuf_int(rbuf);
+        broadcast_offsets[i] = rbuf_int(rb);
     }
 
     // build OMTs out of each offset array
@@ -1681,83 +1730,12 @@ deserialize_and_upgrade_internal_node(FTNODE node,
     MSN highest_msn;
     highest_msn.msn = 0;
 
-    // Only sort buffers if we have a valid comparison function. In certain scenarios,
-    // like deserialie_ft_versioned() or tokuftdump, we'll need to deserialize ftnodes
-    // for simple inspection and don't actually require that the message buffers are
-    // properly sorted. This is very ugly, but correct.
-    const bool sort_buffers = bfe->ft->cmp.valid();
-
     // Deserialize de-compressed buffers.
     for (int i = 0; i < node->n_children; ++i) {
         NONLEAF_CHILDINFO bnc = BNC(node, i);
-        int n_in_this_buffer = rbuf_int(rb);          // 22. node count
-
-        int32_t *fresh_offsets = nullptr;
-        int32_t *broadcast_offsets = nullptr;
-        int nfresh = 0;
-        int nbroadcast = 0;
-
-        // We skip 'stale' offsets for upgraded nodes.
-        if (sort_buffers) {
-            XMALLOC_N(n_in_this_buffer, fresh_offsets);
-            XMALLOC_N(n_in_this_buffer, broadcast_offsets);
-        }
-
-        // Atomically decrement the header's MSN count by the number
-        // of messages in the buffer.
-        MSN lowest;
-        uint64_t amount = n_in_this_buffer;
-        lowest.msn = toku_sync_sub_and_fetch(&bfe->ft->h->highest_unused_msn_for_upgrade.msn, amount);
+        MSN highest_msn_in_this_buffer = deserialize_child_buffer_v13(bfe->ft, bnc, rb);
         if (highest_msn.msn == 0) {
-            highest_msn.msn = lowest.msn + n_in_this_buffer;
-        }
-
-        // Create the message buffers from the deserialized buffer.
-        for (int j = 0; j < n_in_this_buffer; ++j) {
-            bytevec key; ITEMLEN keylen;
-            bytevec val; ITEMLEN vallen;
-            unsigned char ctype = rbuf_char(rb);       // 23. message type
-            enum ft_msg_type type = (enum ft_msg_type) ctype;
-            XIDS xids;
-            xids_create_from_buffer(rb, &xids);        // 24. XID
-            rbuf_bytes(rb, &key, &keylen);             // 25. key
-            rbuf_bytes(rb, &val, &vallen);             // 26. value
-
-            // <CER> can we factor this out?
-            int32_t *dest = nullptr;
-            if (sort_buffers) {
-                if (ft_msg_type_applies_once(type)) {
-                    dest = &fresh_offsets[nfresh];
-                    nfresh++;
-                } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) {
-                    dest = &broadcast_offsets[nbroadcast];
-                    nbroadcast++;
-                } else {
-                    abort();
-                }
-            }
-
-            // Increment our MSN, the last message should have the
-            // newest/highest MSN.  See above for a full explanation.
-            lowest.msn++;
-            // TODO: Function to parse stuff out of an rbuf into an FT_MSG
-            DBT k, v;
-            FT_MSG_S msg = {
-                type, lowest, xids,
-                .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen) } }
-            };
-            bnc->msg_buffer.enqueue(&msg, true, dest);
-            xids_destroy(&xids);
-        }
-
-        if (sort_buffers) {
-            struct toku_msg_buffer_key_msn_cmp_extra extra(bfe->ft->cmp, &bnc->msg_buffer);
-            typedef toku::sort<int32_t, const struct toku_msg_buffer_key_msn_cmp_extra, toku_msg_buffer_key_msn_cmp> key_msn_sort;
-            key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra);
-            bnc->fresh_message_tree.destroy();
-            bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer);
-            bnc->broadcast_list.destroy();
-            bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast, n_in_this_buffer);
+            highest_msn.msn = highest_msn_in_this_buffer.msn;
         }
     }
 
diff --git a/ft/leafentry.h b/ft/leafentry.h
index 5e9e9d77714..690d8c78905 100644
--- a/ft/leafentry.h
+++ b/ft/leafentry.h
@@ -99,7 +99,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft/txn_manager.h"
 #include "ft/rbuf.h"
-#include "ft/ft_msg.h"
+#include "ft/msg.h"
 
 /*
     Memory format of packed leaf entry
@@ -248,7 +248,7 @@ toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, // NULL if there was no stored
 class bn_data;
 
 void
-toku_le_apply_msg(FT_MSG   msg,
+toku_le_apply_msg(const ft_msg &msg,
                   LEAFENTRY old_leafentry, // NULL if there was no stored data.
                   bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
                   uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index b56cfd8e5f6..ec2fea4a1cf 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -2941,16 +2941,12 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
     // #3588 TODO can do the rebalancing here and avoid a lot of work later
     FTNODE leafnode = lbuf->node;
     uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs();
-    DBT thekey = { .data = key, .size = (uint32_t) keylen }; 
-    DBT theval = { .data = val, .size = (uint32_t) vallen };
-    FT_MSG_S msg = { .type = FT_INSERT,
-                     .msn = ZERO_MSN,
-                     .xids = lbuf->xids,
-                     .u = { .id = { &thekey, &theval } } };
-    uint64_t workdone=0;
+    DBT kdbt, vdbt;
+    ft_msg msg(toku_fill_dbt(&kdbt, key, keylen), toku_fill_dbt(&vdbt, val, vallen), FT_INSERT, ZERO_MSN, lbuf->xids);
+    uint64_t workdone = 0;
     // there's no mvcc garbage in a bulk-loaded FT, so there's no need to pass useful gc info
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, keylen, NULL, &gc_info, &workdone, stats_to_update);
+    toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, &workdone, stats_to_update);
 }
 
 static int write_literal(struct dbout *out, void*data,  size_t len) {
diff --git a/ft/ft_msg.cc b/ft/msg.cc
similarity index 63%
rename from ft/ft_msg.cc
rename to ft/msg.cc
index f03ae2a417c..08ab28d6624 100644
--- a/ft/ft_msg.cc
+++ b/ft/msg.cc
@@ -88,46 +88,84 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
+#include "portability/toku_portability.h"
 
-#include <toku_portability.h>
-#include "fttypes.h"
-#include "xids.h"
-#include "ft_msg.h"
+#include "ft/fttypes.h"
+#include "ft/msg.h"
+#include "ft/xids.h"
+#include "ft/ybt.h"
 
-
-uint32_t 
-ft_msg_get_keylen(FT_MSG ft_msg) {
-    uint32_t rval = ft_msg->u.id.key->size;
-    return rval;
+ft_msg::ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x) :
+    _key(key ? *key : toku_empty_dbt()),
+    _val(val ? *val : toku_empty_dbt()),
+    _type(t), _msn(m), _xids(x) {
 }
 
-uint32_t 
-ft_msg_get_vallen(FT_MSG ft_msg) {
-    uint32_t rval = ft_msg->u.id.val->size;
-    return rval;
+ft_msg ft_msg::deserialize_from_rbuf(struct rbuf *rb, XIDS *x, bool *is_fresh) {
+    bytevec keyp, valp;
+    ITEMLEN keylen, vallen;
+    enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb);
+    *is_fresh = rbuf_char(rb);
+    MSN m = rbuf_msn(rb);
+    xids_create_from_buffer(rb, x);
+    rbuf_bytes(rb, &keyp, &keylen);
+    rbuf_bytes(rb, &valp, &vallen);
+
+    DBT k, v;
+    return ft_msg(toku_fill_dbt(&k, keyp, keylen), toku_fill_dbt(&v, valp, vallen), t, m, *x);
 }
 
-XIDS
-ft_msg_get_xids(FT_MSG ft_msg) {
-    XIDS rval = ft_msg->xids;
-    return rval;
+ft_msg ft_msg::deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *x) {
+    bytevec keyp, valp;
+    ITEMLEN keylen, vallen;
+    enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb);
+    xids_create_from_buffer(rb, x);
+    rbuf_bytes(rb, &keyp, &keylen);
+    rbuf_bytes(rb, &valp, &vallen);
+
+    DBT k, v;
+    return ft_msg(toku_fill_dbt(&k, keyp, keylen), toku_fill_dbt(&v, valp, vallen), t, m, *x);
 }
 
-void *
-ft_msg_get_key(FT_MSG ft_msg) {
-    void * rval = ft_msg->u.id.key->data;
-    return rval;
+const DBT *ft_msg::kdbt() const {
+    return &_key;
 }
 
-void *
-ft_msg_get_val(FT_MSG ft_msg) {
-    void * rval = ft_msg->u.id.val->data;
-    return rval;
+const DBT *ft_msg::vdbt() const {
+    return &_val;
 }
 
-enum ft_msg_type
-ft_msg_get_type(FT_MSG ft_msg) {
-    enum ft_msg_type rval = ft_msg->type;
-    return rval;
+enum ft_msg_type ft_msg::type() const {
+    return _type;
 }
 
+MSN ft_msg::msn() const {
+    return _msn;
+}
+
+XIDS ft_msg::xids() const {
+    return _xids;
+}
+
+size_t ft_msg::total_size() const {
+    // Must store two 4-byte lengths
+    static const size_t key_val_overhead = 8;
+
+    // 1 byte type, 1 byte freshness, then 8 byte MSN
+    static const size_t msg_overhead = 2 + sizeof(MSN);
+
+    static const size_t total_overhead = key_val_overhead + msg_overhead;
+
+    const size_t keyval_size = _key.size + _val.size;
+    const size_t xids_size = xids_get_serialize_size(xids());
+    return total_overhead + keyval_size + xids_size;
+}
+
+void ft_msg::serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const {
+    wbuf_nocrc_char(wb, (unsigned char) _type);
+    wbuf_nocrc_char(wb, (unsigned char) is_fresh);
+    wbuf_MSN(wb, _msn);
+    wbuf_nocrc_xids(wb, _xids);
+    wbuf_nocrc_bytes(wb, _key.data, _key.size);
+    wbuf_nocrc_bytes(wb, _val.data, _val.size);
+}
diff --git a/ft/ft_msg.h b/ft/msg.h
similarity index 88%
rename from ft/ft_msg.h
rename to ft/msg.h
index 8a0b80be969..ea183f075b3 100644
--- a/ft/ft_msg.h
+++ b/ft/msg.h
@@ -181,32 +181,36 @@ ft_msg_type_does_nothing(enum ft_msg_type type)
 
 typedef struct xids_t *XIDS;
 
-/* tree commands */
-struct ft_msg {
-    enum ft_msg_type type;
-    MSN          msn;          // message sequence number
-    XIDS         xids;
-    union {
-        /* insert or delete */
-        struct ft_msg_insert_delete {
-            const DBT *key;   // for insert, delete, upsertdel
-            const DBT *val;   // for insert, delete, (and it is the "extra" for upsertdel, upsertdel_broadcast_all)
-        } id;
-    } u;
+class ft_msg {
+public:
+    ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x);
+
+    enum ft_msg_type type() const;
+
+    MSN msn() const;
+
+    XIDS xids() const;
+
+    const DBT *kdbt() const;
+
+    const DBT *vdbt() const;
+
+    size_t total_size() const;
+
+    void serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const;
+
+    // deserialization goes through a static factory function so the ft msg
+    // API stays completely const and there's no default constructor
+    static ft_msg deserialize_from_rbuf(struct rbuf *rb, XIDS *xids, bool *is_fresh);
+
+    // Version 13/14 messages did not have an msn - so `m' is the MSN
+    // that will be assigned to the message that gets deserialized.
+    static ft_msg deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *xids);
+
+private:
+    const DBT _key;
+    const DBT _val;
+    enum ft_msg_type _type;
+    MSN _msn;
+    XIDS _xids;
 };
-
-// Message sent into the ft to implement insert, delete, update, etc
-typedef struct ft_msg FT_MSG_S;
-typedef struct ft_msg *FT_MSG;
-
-uint32_t ft_msg_get_keylen(FT_MSG ft_msg);
-
-uint32_t ft_msg_get_vallen(FT_MSG ft_msg);
-
-XIDS ft_msg_get_xids(FT_MSG ft_msg);
-
-void *ft_msg_get_key(FT_MSG ft_msg);
-
-void *ft_msg_get_val(FT_MSG ft_msg);
-
-enum ft_msg_type ft_msg_get_type(FT_MSG ft_msg);
diff --git a/ft/msg_buffer.cc b/ft/msg_buffer.cc
index 1e95321e8b0..6ac23131969 100644
--- a/ft/msg_buffer.cc
+++ b/ft/msg_buffer.cc
@@ -128,42 +128,75 @@ void message_buffer::deserialize_from_rbuf(struct rbuf *rb,
 
     _resize(rb->size + 64); // rb->size is a good hint for how big the buffer will be
 
-    // read in each message individually
+    // deserialize each message individually, noting whether it was fresh
+    // and putting its buffer offset in the appropriate offsets array
     for (int i = 0; i < n_in_this_buffer; i++) {
-        bytevec key; ITEMLEN keylen;
-        bytevec val; ITEMLEN vallen;
-        // this is weird but it's necessary to pass icc and gcc together
-        unsigned char ctype = rbuf_char(rb);
-        enum ft_msg_type type = (enum ft_msg_type) ctype;
-        bool is_fresh = rbuf_char(rb);
-        MSN msn = rbuf_msn(rb);
         XIDS xids;
-        xids_create_from_buffer(rb, &xids);
-        rbuf_bytes(rb, &key, &keylen); /* Returns a pointer into the rbuf. */
-        rbuf_bytes(rb, &val, &vallen);
-        int32_t *dest = nullptr;
-        if (ft_msg_type_applies_once(type)) {
+        bool is_fresh;
+        const ft_msg msg = ft_msg::deserialize_from_rbuf(rb, &xids, &is_fresh);
+
+        int32_t *dest;
+        if (ft_msg_type_applies_once(msg.type())) {
             if (is_fresh) {
                 dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr;
             } else {
                 dest = stale_offsets ? *stale_offsets + (*nstale)++ : nullptr;
             }
         } else {
-            invariant(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type));
+            invariant(ft_msg_type_applies_all(msg.type()) || ft_msg_type_does_nothing(msg.type()));
             dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr;
         }
 
-        // TODO: Function to parse stuff out of an rbuf into an FT_MSG
-        DBT k, v;
-        FT_MSG_S msg = {
-            type, msn, xids,
-            .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen) } }
-        };
-        enqueue(&msg, is_fresh, dest);
+        enqueue(msg, is_fresh, dest);
         xids_destroy(&xids);
     }
 
-    invariant(num_entries() == n_in_this_buffer);
+    invariant(_num_entries == n_in_this_buffer);
+}
+
+MSN message_buffer::deserialize_from_rbuf_v13(struct rbuf *rb,
+                                              MSN *highest_unused_msn_for_upgrade,
+                                              int32_t **fresh_offsets, int32_t *nfresh,
+                                              int32_t **broadcast_offsets, int32_t *nbroadcast) {
+    // read the number of messages in this buffer
+    int n_in_this_buffer = rbuf_int(rb);
+    if (fresh_offsets != nullptr) {
+        XMALLOC_N(n_in_this_buffer, *fresh_offsets);
+    }
+    if (broadcast_offsets != nullptr) {
+        XMALLOC_N(n_in_this_buffer, *broadcast_offsets);
+    }
+
+    // Atomically decrement the header's MSN count by the number
+    // of messages in the buffer.
+    MSN highest_msn_in_this_buffer = {
+        .msn = toku_sync_sub_and_fetch(&highest_unused_msn_for_upgrade->msn, n_in_this_buffer)
+    };
+
+    // Create the message buffers from the deserialized buffer.
+    for (int i = 0; i < n_in_this_buffer; i++) {
+        XIDS xids;
+        // There were no stale messages at this version, so call it fresh.
+        const bool is_fresh = true;
+
+        // Increment our MSN, the last message should have the
+        // newest/highest MSN.  See above for a full explanation.
+        highest_msn_in_this_buffer.msn++;
+        const ft_msg msg = ft_msg::deserialize_from_rbuf_v13(rb, highest_msn_in_this_buffer, &xids);
+
+        int32_t *dest;
+        if (ft_msg_type_applies_once(msg.type())) {
+            dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr;
+        } else {
+            invariant(ft_msg_type_applies_all(msg.type()) || ft_msg_type_does_nothing(msg.type()));
+            dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr;
+        }
+
+        enqueue(msg, is_fresh, dest);
+        xids_destroy(&xids);
+    }
+
+    return highest_msn_in_this_buffer;
 }
 
 void message_buffer::_resize(size_t new_size) {
@@ -184,7 +217,7 @@ struct message_buffer::buffer_entry *message_buffer::get_buffer_entry(int32_t of
     return (struct buffer_entry *) (_memory + offset);
 }
 
-void message_buffer::enqueue(FT_MSG msg, bool is_fresh, int32_t *offset) {
+void message_buffer::enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset) {
     int need_space_here = msg_memsize_in_buffer(msg);
     int need_space_total = _memory_used + need_space_here;
     if (_memory == nullptr || need_space_total > _memory_size) {
@@ -192,18 +225,18 @@ void message_buffer::enqueue(FT_MSG msg, bool is_fresh, int32_t *offset) {
         int next_2 = next_power_of_two(need_space_total);
         _resize(next_2);
     }
-    ITEMLEN keylen = ft_msg_get_keylen(msg);
-    ITEMLEN datalen = ft_msg_get_vallen(msg);
+    ITEMLEN keylen = msg.kdbt()->size;
+    ITEMLEN datalen = msg.vdbt()->size;
     struct buffer_entry *entry = get_buffer_entry(_memory_used);
-    entry->type = (unsigned char) ft_msg_get_type(msg);
-    entry->msn = msg->msn;
-    xids_cpy(&entry->xids_s, ft_msg_get_xids(msg));
+    entry->type = (unsigned char) msg.type();
+    entry->msn = msg.msn();
+    xids_cpy(&entry->xids_s, msg.xids());
     entry->is_fresh = is_fresh;
     unsigned char *e_key = xids_get_end_of_array(&entry->xids_s);
     entry->keylen = keylen;
-    memcpy(e_key, ft_msg_get_key(msg), keylen);
+    memcpy(e_key, msg.kdbt()->data, keylen);
     entry->vallen = datalen;
-    memcpy(e_key + keylen, ft_msg_get_val(msg), datalen);
+    memcpy(e_key + keylen, msg.vdbt()->data, datalen);
     if (offset) {
         *offset = _memory_used;
     }
@@ -221,7 +254,7 @@ bool message_buffer::get_freshness(int32_t offset) const {
     return entry->is_fresh;
 }
 
-FT_MSG_S message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const {
+ft_msg message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const {
     struct buffer_entry *entry = get_buffer_entry(offset);
     ITEMLEN keylen = entry->keylen;
     ITEMLEN vallen = entry->vallen;
@@ -230,11 +263,7 @@ FT_MSG_S message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) c
     const XIDS xids = (XIDS) &entry->xids_s;
     bytevec key = xids_get_end_of_array(xids);
     bytevec val = (uint8_t *) key + entry->keylen;
-    FT_MSG_S msg = {
-        type, msn, xids,
-        .u = { .id = { toku_fill_dbt(keydbt, key, keylen), toku_fill_dbt(valdbt, val, vallen) } }
-    };
-    return msg;
+    return ft_msg(toku_fill_dbt(keydbt, key, keylen), toku_fill_dbt(valdbt, val, vallen), type, msn, xids);
 }
 
 void message_buffer::get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const {
@@ -269,28 +298,21 @@ bool message_buffer::equals(message_buffer *other) const {
 }
 
 void message_buffer::serialize_to_wbuf(struct wbuf *wb) const {
-    wbuf_nocrc_int(wb, num_entries());
+    wbuf_nocrc_int(wb, _num_entries);
     struct msg_serialize_fn {
         struct wbuf *wb;
         msg_serialize_fn(struct wbuf *w) : wb(w) { }
-        int operator()(FT_MSG msg, bool is_fresh) {
-            enum ft_msg_type type = (enum ft_msg_type) msg->type;
-            paranoid_invariant((int) type >= 0 && (int) type < 256);
-            wbuf_nocrc_char(wb, (unsigned char) type);
-            wbuf_nocrc_char(wb, (unsigned char) is_fresh);
-            wbuf_MSN(wb, msg->msn);
-            wbuf_nocrc_xids(wb, ft_msg_get_xids(msg));
-            wbuf_nocrc_bytes(wb, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
-            wbuf_nocrc_bytes(wb, ft_msg_get_val(msg), ft_msg_get_vallen(msg));
+        int operator()(const ft_msg &msg, bool is_fresh) {
+            msg.serialize_to_wbuf(wb, is_fresh);
             return 0;
         }
     } serialize_fn(wb);
     iterate(serialize_fn);
 }
 
-size_t message_buffer::msg_memsize_in_buffer(FT_MSG msg) {
-    const uint32_t keylen = ft_msg_get_keylen(msg);
-    const uint32_t datalen = ft_msg_get_vallen(msg);
-    const size_t xidslen = xids_get_size(msg->xids);
+size_t message_buffer::msg_memsize_in_buffer(const ft_msg &msg) {
+    const uint32_t keylen = msg.kdbt()->size;
+    const uint32_t datalen = msg.vdbt()->size;
+    const size_t xidslen = xids_get_size(msg.xids());
     return sizeof(struct buffer_entry) + keylen + datalen + xidslen - sizeof(XIDS_S);
 }
diff --git a/ft/msg_buffer.h b/ft/msg_buffer.h
index cbfa9617e20..fd5fe29e43b 100644
--- a/ft/msg_buffer.h
+++ b/ft/msg_buffer.h
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 #include "ft/fttypes.h"
 #include "ft/xids-internal.h"
 #include "ft/xids.h"
-#include "ft/ft_msg.h"
+#include "ft/msg.h"
 #include "ft/ybt.h"
 
 class message_buffer {
@@ -111,13 +111,24 @@ public:
                                int32_t **stale_offsets, int32_t *nstale,
                                int32_t **broadcast_offsets, int32_t *nbroadcast);
 
-    void enqueue(FT_MSG msg, bool is_fresh, int32_t *offset);
+    // effect: deserializes a message buffer whose messages are at version 13/14
+    // returns: similar to deserialize_from_rbuf(), excpet there are no stale messages
+    //          and each message is assigned a sequential value from *highest_unused_msn_for_upgrade,
+    //          which is modified as needed using toku_sync_fech_and_sub()
+    // returns: the highest MSN assigned to any message in this buffer
+    // requires: similar to deserialize_from_rbuf(), and highest_unused_msn_for_upgrade != nullptr
+    MSN deserialize_from_rbuf_v13(struct rbuf *rb,
+                                  MSN *highest_unused_msn_for_upgrade,
+                                  int32_t **fresh_offsets, int32_t *nfresh,
+                                  int32_t **broadcast_offsets, int32_t *nbroadcast);
+
+    void enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset);
 
     void set_freshness(int32_t offset, bool is_fresh);
 
     bool get_freshness(int32_t offset) const;
 
-    FT_MSG_S get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const;
+    ft_msg get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const;
 
     void get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const;
 
@@ -133,13 +144,13 @@ public:
     int iterate(F &fn) const {
         for (int32_t offset = 0; offset < _memory_used; ) {
             DBT k, v;
-            FT_MSG_S msg = get_message(offset, &k, &v);
+            const ft_msg msg = get_message(offset, &k, &v);
             bool is_fresh = get_freshness(offset);
-            int r = fn(&msg, is_fresh);
+            int r = fn(msg, is_fresh);
             if (r != 0) {
                 return r;
             }
-            offset += msg_memsize_in_buffer(&msg);
+            offset += msg_memsize_in_buffer(msg);
         }
         return 0;
     }
@@ -148,7 +159,7 @@ public:
 
     void serialize_to_wbuf(struct wbuf *wb) const;
 
-    static size_t msg_memsize_in_buffer(FT_MSG msg);
+    static size_t msg_memsize_in_buffer(const ft_msg &msg);
 
 private:
     void _resize(size_t new_size);
diff --git a/ft/node.cc b/ft/node.cc
index d00c4085d54..d1e879281c6 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -266,18 +266,18 @@ static void
 do_bn_apply_msg(FT_HANDLE ft_handle, BASEMENTNODE bn, message_buffer *msg_buffer, int32_t offset,
                 txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update) {
     DBT k, v;
-    FT_MSG_S msg = msg_buffer->get_message(offset, &k, &v);
+    ft_msg msg = msg_buffer->get_message(offset, &k, &v);
 
     // The messages are being iterated over in (key,msn) order or just in
     // msn order, so all the messages for one key, from one buffer, are in
     // ascending msn order.  So it's ok that we don't update the basement
     // node's msn until the end.
-    if (msg.msn.msn > bn->max_msn_applied.msn) {
+    if (msg.msn().msn > bn->max_msn_applied.msn) {
         toku_ft_bn_apply_msg(
             ft_handle->ft->cmp,
             ft_handle->ft->update_fun,
             bn,
-            &msg,
+            msg,
             gc_info,
             workdone,
             stats_to_update
@@ -1120,7 +1120,7 @@ void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey
 void
 toku_ft_bn_apply_msg_once (
     BASEMENTNODE bn,
-    const FT_MSG msg,
+    const ft_msg &msg,
     uint32_t idx,
     uint32_t le_keylen,
     LEAFENTRY le,
@@ -1137,7 +1137,7 @@ toku_ft_bn_apply_msg_once (
     LEAFENTRY new_le=0;
     int64_t numbytes_delta = 0;  // how many bytes of user data (not including overhead) were added or deleted from this row
     int64_t numrows_delta = 0;   // will be +1 or -1 or 0 (if row was added or deleted or not)
-    uint32_t key_storage_size = ft_msg_get_keylen(msg) + sizeof(uint32_t);
+    uint32_t key_storage_size = msg.kdbt()->size + sizeof(uint32_t);
     if (le) {
         oldsize = leafentry_memsize(le) + key_storage_size;
     }
@@ -1223,17 +1223,11 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
         // can't leave scope until toku_ft_bn_apply_msg_once if
         // this is a delete
         DBT val;
-        FT_MSG_S msg = { FT_NONE, svextra->msn, svextra->xids,
-                         .u = { .id = {svextra->key, NULL} } };
-        if (new_val) {
-            msg.type = FT_INSERT;
-            msg.u.id.val = new_val;
-        } else {
-            msg.type = FT_DELETE_ANY;
-            toku_init_dbt(&val);
-            msg.u.id.val = &val;
-        }
-        toku_ft_bn_apply_msg_once(svextra->bn, &msg,
+        ft_msg msg(svextra->key,
+                   new_val ? new_val : toku_init_dbt(&val),
+                   new_val ? FT_INSERT : FT_DELETE_ANY,
+                   svextra->msn, svextra->xids);
+        toku_ft_bn_apply_msg_once(svextra->bn, msg,
                                   svextra->idx, svextra->le_keylen, svextra->le,
                                   svextra->gc_info,
                                   svextra->workdone, svextra->stats_to_update);
@@ -1245,7 +1239,7 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
 // so capturing the msn in the setval_extra_s is not strictly required.         The alternative
 // would be to put a dummy msn in the messages created by setval_fun(), but preserving
 // the original msn seems cleaner and it preserves accountability at a lower layer.
-static int do_update(ft_update_func update_fun, const DESCRIPTOR_S *desc, BASEMENTNODE bn, FT_MSG msg, uint32_t idx,
+static int do_update(ft_update_func update_fun, const DESCRIPTOR_S *desc, BASEMENTNODE bn, const ft_msg &msg, uint32_t idx,
                      LEAFENTRY le,
                      void* keydata,
                      uint32_t keylen,
@@ -1261,24 +1255,24 @@ static int do_update(ft_update_func update_fun, const DESCRIPTOR_S *desc, BASEME
 
     // the location of data depends whether this is a regular or
     // broadcast update
-    if (msg->type == FT_UPDATE) {
+    if (msg.type() == FT_UPDATE) {
         // key is passed in with command (should be same as from le)
         // update function extra is passed in with command
-        keyp = msg->u.id.key;
-        update_function_extra = msg->u.id.val;
+        keyp = msg.kdbt();
+        update_function_extra = msg.vdbt();
     } else {
-        invariant(msg->type == FT_UPDATE_BROADCAST_ALL);
+        invariant(msg.type() == FT_UPDATE_BROADCAST_ALL);
         // key is not passed in with broadcast, it comes from le
         // update function extra is passed in with command
         paranoid_invariant(le);  // for broadcast updates, we just hit all leafentries
                      // so this cannot be null
         paranoid_invariant(keydata);
         paranoid_invariant(keylen);
-        paranoid_invariant(msg->u.id.key->size == 0);
+        paranoid_invariant(msg.kdbt()->size == 0);
         keyp = toku_fill_dbt(&key, keydata, keylen);
-        update_function_extra = msg->u.id.val;
+        update_function_extra = msg.vdbt();
     }
-    toku_ft_status_note_update(msg->type == FT_UPDATE_BROADCAST_ALL);
+    toku_ft_status_note_update(msg.type() == FT_UPDATE_BROADCAST_ALL);
 
     if (le && !le_latest_is_del(le)) {
         // if the latest val exists, use it, and we'll use the leafentry later
@@ -1291,7 +1285,7 @@ static int do_update(ft_update_func update_fun, const DESCRIPTOR_S *desc, BASEME
     }
     le_for_update = le;
 
-    struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, msg->msn, msg->xids,
+    struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, msg.msn(), msg.xids(),
                                           keyp, idx, keylen, le_for_update, gc_info,
                                           workdone, stats_to_update};
     // call handlerton's ft->update_fun(), which passes setval_extra to setval_fun()
@@ -1314,7 +1308,7 @@ toku_ft_bn_apply_msg (
     const toku::comparator &cmp,
     ft_update_func update_fun,
     BASEMENTNODE bn,
-    FT_MSG msg,
+    const ft_msg &msg,
     txn_gc_info *gc_info, 
     uint64_t *workdone,
     STAT64INFO stats_to_update
@@ -1330,12 +1324,12 @@ toku_ft_bn_apply_msg (
 
     uint32_t num_klpairs;
     int r;
-    struct toku_msg_leafval_heaviside_extra be(cmp, msg->u.id.key);
+    struct toku_msg_leafval_heaviside_extra be(cmp, msg.kdbt());
 
     unsigned int doing_seqinsert = bn->seqinsert;
     bn->seqinsert = 0;
 
-    switch (msg->type) {
+    switch (msg.type()) {
     case FT_INSERT_NO_OVERWRITE:
     case FT_INSERT: {
         uint32_t idx;
@@ -1415,10 +1409,10 @@ toku_ft_bn_apply_msg (
                 // work was done by this message. since this is a broadcast message,
                 // we have to create a new message whose key is the current le's key.
                 DBT curr_keydbt;
-                FT_MSG_S curr_msg = *msg;
-                curr_msg.u.id.key = toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen);
-                toku_ft_bn_apply_msg_once(bn, &curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
-                // at this point, we cannot trust msg->u.id.key to be valid.
+                ft_msg curr_msg(toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen),
+                                msg.vdbt(), msg.type(), msg.msn(), msg.xids());
+                toku_ft_bn_apply_msg_once(bn, curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
+                // at this point, we cannot trust msg.kdbt to be valid.
                 uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
                 if (new_dmt_size != num_klpairs) {
                     paranoid_invariant(new_dmt_size + 1 == num_klpairs);
@@ -1444,14 +1438,14 @@ toku_ft_bn_apply_msg (
             r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp);
             assert_zero(r);
             int deleted = 0;
-            if (le_has_xids(storeddata, msg->xids)) {
+            if (le_has_xids(storeddata, msg.xids())) {
                 // message application code needs a key in order to determine how much
                 // work was done by this message. since this is a broadcast message,
                 // we have to create a new message whose key is the current le's key.
                 DBT curr_keydbt;
-                FT_MSG_S curr_msg = *msg;
-                curr_msg.u.id.key = toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen);
-                toku_ft_bn_apply_msg_once(bn, &curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
+                ft_msg curr_msg(toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen),
+                                msg.vdbt(), msg.type(), msg.msn(), msg.xids());
+                toku_ft_bn_apply_msg_once(bn, curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update);
                 uint32_t new_dmt_size = bn->data_buffer.num_klpairs();
                 if (new_dmt_size != num_klpairs) {
                     paranoid_invariant(new_dmt_size + 1 == num_klpairs);
@@ -1480,8 +1474,8 @@ toku_ft_bn_apply_msg (
             {
                 //Point to msg's copy of the key so we don't worry about le being freed
                 //TODO: 46 MAYBE Get rid of this when le_apply message memory is better handled
-                key = msg->u.id.key->data;
-                keylen = msg->u.id.key->size;
+                key = msg.kdbt()->data;
+                keylen = msg.kdbt()->size;
             }
             r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update);
         } else if (r==0) {
@@ -1557,15 +1551,15 @@ int toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &
 // Effect: Enqueue the message represented by the parameters into the
 //   bnc's buffer, and put it in either the fresh or stale message tree,
 //   or the broadcast list.
-static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, FT_MSG msg, bool is_fresh, const toku::comparator &cmp) {
+static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, const ft_msg &msg, bool is_fresh, const toku::comparator &cmp) {
     int r = 0;
     int32_t offset;
     bnc->msg_buffer.enqueue(msg, is_fresh, &offset);
-    enum ft_msg_type type = ft_msg_get_type(msg);
+    enum ft_msg_type type = msg.type();
     if (ft_msg_type_applies_once(type)) {
         DBT key;
-        toku_fill_dbt(&key, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
-        struct toku_msg_buffer_key_msn_heaviside_extra extra(cmp, &bnc->msg_buffer, &key, msg->msn);
+        toku_fill_dbt(&key, msg.kdbt()->data, msg.kdbt()->size);
+        struct toku_msg_buffer_key_msn_heaviside_extra extra(cmp, &bnc->msg_buffer, &key, msg.msn());
         if (is_fresh) {
             r = bnc->fresh_message_tree.insert<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(offset, extra, nullptr);
             assert_zero(r);
@@ -1585,15 +1579,13 @@ static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, FT_MSG msg, bool is_fresh, con
 void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp)
 {
     DBT k, v;
-    FT_MSG_S msg = {
-        type, msn, xids, .u = { .id = { toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, data, datalen) } }
-    };
-    bnc_insert_msg(bnc, &msg, is_fresh, cmp);
+    ft_msg msg(toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, data, datalen), type, msn, xids);
+    bnc_insert_msg(bnc, msg, is_fresh, cmp);
 }
 
 // append a msg to a nonleaf node's child buffer
 static void ft_append_msg_to_child_buffer(const toku::comparator &cmp, FTNODE node,
-                                          int childnum, FT_MSG msg, bool is_fresh) {
+                                          int childnum, const ft_msg &msg, bool is_fresh) {
     paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL);
     bnc_insert_msg(BNC(node, childnum), msg, is_fresh, cmp);
     node->dirty = 1;
@@ -1601,19 +1593,17 @@ static void ft_append_msg_to_child_buffer(const toku::comparator &cmp, FTNODE no
 
 // This is only exported for tests.
 void toku_ft_append_to_child_buffer(const toku::comparator &cmp, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) {
-    FT_MSG_S msg = {
-        type, msn, xids, .u = { .id = { key, val } }
-    };
-    ft_append_msg_to_child_buffer(cmp, node, childnum, &msg, is_fresh);
+    ft_msg msg(key, val, type, msn, xids);
+    ft_append_msg_to_child_buffer(cmp, node, childnum, msg, is_fresh);
 }
 
-static void ft_nonleaf_msg_once_to_child(const toku::comparator &cmp, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
+static void ft_nonleaf_msg_once_to_child(const toku::comparator &cmp, FTNODE node, int target_childnum, const ft_msg &msg, bool is_fresh, size_t flow_deltas[])
 // Previously we had passive aggressive promotion, but that causes a lot of I/O a the checkpoint.  So now we are just putting it in the buffer here.
 // Also we don't worry about the node getting overfull here.  It's the caller's problem.
 {
     unsigned int childnum = (target_childnum >= 0
                              ? target_childnum
-                             : toku_ftnode_which_child(node, msg->u.id.key, cmp));
+                             : toku_ftnode_which_child(node, msg.kdbt(), cmp));
     ft_append_msg_to_child_buffer(cmp, node, childnum, msg, is_fresh);
     NONLEAF_CHILDINFO bnc = BNC(node, childnum);
     bnc->flow[0] += flow_deltas[0];
@@ -1684,7 +1674,7 @@ int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator
 }
 
 static void
-ft_nonleaf_msg_all(const toku::comparator &cmp, FTNODE node, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
+ft_nonleaf_msg_all(const toku::comparator &cmp, FTNODE node, const ft_msg &msg, bool is_fresh, size_t flow_deltas[])
 // Effect: Put the message into a nonleaf node.  We put it into all children, possibly causing the children to become reactive.
 //  We don't do the splitting and merging.  That's up to the caller after doing all the puts it wants to do.
 //  The re_array[i] gets set to the reactivity of any modified child i.         (And there may be several such children.)
@@ -1695,7 +1685,7 @@ ft_nonleaf_msg_all(const toku::comparator &cmp, FTNODE node, FT_MSG msg, bool is
 }
 
 static void
-ft_nonleaf_put_msg(const toku::comparator &cmp, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[])
+ft_nonleaf_put_msg(const toku::comparator &cmp, FTNODE node, int target_childnum, const ft_msg &msg, bool is_fresh, size_t flow_deltas[])
 // Effect: Put the message into a nonleaf node.  We may put it into a child, possibly causing the child to become reactive.
 //  We don't do the splitting and merging.  That's up to the caller after doing all the puts it wants to do.
 //  The re_array[i] gets set to the reactivity of any modified child i.         (And there may be several such children.)
@@ -1708,16 +1698,16 @@ ft_nonleaf_put_msg(const toku::comparator &cmp, FTNODE node, int target_childnum
     // node->max_msn_applied_to_node_on_disk here,
     // and don't do it in toku_ftnode_put_msg
     //
-    MSN msg_msn = msg->msn;
+    MSN msg_msn = msg.msn();
     invariant(msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn);
     node->max_msn_applied_to_node_on_disk = msg_msn;
 
-    if (ft_msg_type_applies_once(msg->type)) {
+    if (ft_msg_type_applies_once(msg.type())) {
         ft_nonleaf_msg_once_to_child(cmp, node, target_childnum, msg, is_fresh, flow_deltas);
-    } else if (ft_msg_type_applies_all(msg->type)) {
+    } else if (ft_msg_type_applies_all(msg.type())) {
         ft_nonleaf_msg_all(cmp, node, msg, is_fresh, flow_deltas);
     } else {
-        paranoid_invariant(ft_msg_type_does_nothing(msg->type));
+        paranoid_invariant(ft_msg_type_does_nothing(msg.type()));
     }
 }
 
@@ -1874,7 +1864,7 @@ toku_ftnode_put_msg (
     ft_update_func update_fun,
     FTNODE node,
     int target_childnum,
-    FT_MSG msg,
+    const ft_msg &msg,
     bool is_fresh,
     txn_gc_info *gc_info,
     size_t flow_deltas[],
@@ -1908,7 +1898,7 @@ void toku_ft_leaf_apply_msg(
     ft_update_func update_fun,
     FTNODE node,
     int target_childnum,  // which child to inject to, or -1 if unknown
-    FT_MSG msg,
+    const ft_msg &msg,
     txn_gc_info *gc_info,
     uint64_t *workdone,
     STAT64INFO stats_to_update
@@ -1937,18 +1927,18 @@ void toku_ft_leaf_apply_msg(
     // This is why we handle node->max_msn_applied_to_node_on_disk both here
     // and in ft_nonleaf_put_msg, as opposed to in one location, toku_ftnode_put_msg.
     //
-    MSN msg_msn = msg->msn;
+    MSN msg_msn = msg.msn();
     if (msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn) {
         node->max_msn_applied_to_node_on_disk = msg_msn;
     }
 
-    if (ft_msg_type_applies_once(msg->type)) {
+    if (ft_msg_type_applies_once(msg.type())) {
         unsigned int childnum = (target_childnum >= 0
                                  ? target_childnum
-                                 : toku_ftnode_which_child(node, msg->u.id.key, cmp));
+                                 : toku_ftnode_which_child(node, msg.kdbt(), cmp));
         BASEMENTNODE bn = BLB(node, childnum);
-        if (msg->msn.msn > bn->max_msn_applied.msn) {
-            bn->max_msn_applied = msg->msn;
+        if (msg.msn().msn > bn->max_msn_applied.msn) {
+            bn->max_msn_applied = msg.msn();
             toku_ft_bn_apply_msg(cmp,
                                  update_fun,
                                  bn,
@@ -1960,10 +1950,10 @@ void toku_ft_leaf_apply_msg(
             toku_ft_status_note_msn_discard();
         }
     }
-    else if (ft_msg_type_applies_all(msg->type)) {
+    else if (ft_msg_type_applies_all(msg.type())) {
         for (int childnum=0; childnum<node->n_children; childnum++) {
-            if (msg->msn.msn > BLB(node, childnum)->max_msn_applied.msn) {
-                BLB(node, childnum)->max_msn_applied = msg->msn;
+            if (msg.msn().msn > BLB(node, childnum)->max_msn_applied.msn) {
+                BLB(node, childnum)->max_msn_applied = msg.msn();
                 toku_ft_bn_apply_msg(cmp,
                                      update_fun,
                                      BLB(node, childnum),
@@ -1976,8 +1966,8 @@ void toku_ft_leaf_apply_msg(
             }
         }
     }
-    else if (!ft_msg_type_does_nothing(msg->type)) {
-        invariant(ft_msg_type_does_nothing(msg->type));
+    else if (!ft_msg_type_does_nothing(msg.type())) {
+        invariant(ft_msg_type_does_nothing(msg.type()));
     }
     VERIFY_NODE(t, node);
 }
diff --git a/ft/node.h b/ft/node.h
index 4fe31110a6b..ab94ce0ff65 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -495,20 +495,20 @@ int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator
 
 void toku_ftnode_put_msg(const toku::comparator &cmp, ft_update_func update_fun,
                          FTNODE node, int target_childnum,
-                         FT_MSG msg, bool is_fresh, txn_gc_info *gc_info,
+                         const ft_msg &msg, bool is_fresh, txn_gc_info *gc_info,
                          size_t flow_deltas[], STAT64INFO stats_to_update);
 
-void toku_ft_bn_apply_msg_once(BASEMENTNODE bn, const FT_MSG msg, uint32_t idx,
+void toku_ft_bn_apply_msg_once(BASEMENTNODE bn, const ft_msg &msg, uint32_t idx,
                                uint32_t le_keylen, LEAFENTRY le, txn_gc_info *gc_info,
                                uint64_t *workdonep, STAT64INFO stats_to_update);
 
 void toku_ft_bn_apply_msg(const toku::comparator &cmp, ft_update_func update_fun,
-                          BASEMENTNODE bn, FT_MSG msg, txn_gc_info *gc_info,
+                          BASEMENTNODE bn, const ft_msg &msg, txn_gc_info *gc_info,
                           uint64_t *workdone, STAT64INFO stats_to_update);
 
 void toku_ft_leaf_apply_msg(const toku::comparator &cmp, ft_update_func update_fun,
                             FTNODE node, int target_childnum,
-                            FT_MSG msg, txn_gc_info *gc_info,
+                            const ft_msg &msg, txn_gc_info *gc_info,
                             uint64_t *workdone, STAT64INFO stats_to_update);
 
 CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft);
diff --git a/ft/roll.cc b/ft/roll.cc
index 23514a339d7..2150d52dd19 100644
--- a/ft/roll.cc
+++ b/ft/roll.cc
@@ -257,13 +257,11 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
     XIDS xids;
     xids = toku_txn_get_xids(txn);
     {
-        FT_MSG_S ftmsg = { type, ZERO_MSN, xids,
-                           .u = { .id = { (key.len > 0)
-                                          ? toku_fill_dbt(&key_dbt,  key.data,  key.len)
-                                          : toku_init_dbt(&key_dbt),
-                                          data
-                                          ? toku_fill_dbt(&data_dbt, data->data, data->len)
-                                          : toku_init_dbt(&data_dbt) } } };
+        const DBT *kdbt = key.len > 0 ? toku_fill_dbt(&key_dbt, key.data, key.len) :
+                                        toku_init_dbt(&key_dbt);
+        const DBT *vdbt = data ? toku_fill_dbt(&data_dbt, data->data, data->len) :
+                                 toku_init_dbt(&data_dbt);
+        ft_msg msg(kdbt, vdbt, type, ZERO_MSN, xids);
 
         TXN_MANAGER txn_manager = toku_logger_get_txn_manager(txn->logger);
         txn_manager_state txn_state_for_gc(txn_manager);
@@ -274,7 +272,7 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
                             // no messages above us, we can implicitly promote uxrs based on this xid
                             oldest_referenced_xid_estimate,
                             !txn->for_recovery);
-        toku_ft_root_put_msg(ft, &ftmsg, &gc_info);
+        toku_ft_root_put_msg(ft, msg, &gc_info);
         if (reset_root_xid_that_created) {
             TXNID new_root_xid_that_created = xids_get_outermost_xid(xids);
             toku_reset_root_xid_that_created(ft, new_root_xid_that_created);
diff --git a/ft/tests/fifo-test.cc b/ft/tests/fifo-test.cc
index 9281f4db00b..1d708859465 100644
--- a/ft/tests/fifo-test.cc
+++ b/ft/tests/fifo-test.cc
@@ -136,10 +136,8 @@ test_enqueue(int n) {
             startmsn = msn;
         enum ft_msg_type type = (enum ft_msg_type) i;
         DBT k, v;
-        FT_MSG_S msg = {
-            type, msn, xids, .u = { .id = { toku_fill_dbt(&k, thekey, thekeylen), toku_fill_dbt(&v, theval, thevallen) } }
-        };
-        msg_buffer.enqueue(&msg, true, nullptr);
+        ft_msg msg(toku_fill_dbt(&k, thekey, thekeylen), toku_fill_dbt(&v, theval, thevallen), type, msn, xids);
+        msg_buffer.enqueue(msg, true, nullptr);
         xids_destroy(&xids);
         toku_free(thekey);
         toku_free(theval);
@@ -152,20 +150,20 @@ test_enqueue(int n) {
         checkit_fn(MSN smsn, bool v)
             : startmsn(smsn), verbose(v), i(0) {
         }
-        int operator()(FT_MSG msg, bool UU(is_fresh)) {
+        int operator()(const ft_msg &msg, bool UU(is_fresh)) {
             int thekeylen = i + 1;
             int thevallen = i + 2;
             char *thekey = buildkey(thekeylen);
             char *theval = buildval(thevallen);
 
-            MSN msn = msg->msn;
-            enum ft_msg_type type = ft_msg_get_type(msg);
+            MSN msn = msg.msn();
+            enum ft_msg_type type = msg.type();
             if (verbose) printf("checkit %d %d %" PRIu64 "\n", i, type, msn.msn);
             assert(msn.msn == startmsn.msn + i);
-            assert((int) ft_msg_get_keylen(msg) == thekeylen); assert(memcmp(ft_msg_get_key(msg), thekey, ft_msg_get_keylen(msg)) == 0);
-            assert((int) ft_msg_get_vallen(msg) == thevallen); assert(memcmp(ft_msg_get_val(msg), theval, ft_msg_get_vallen(msg)) == 0);
+            assert((int) msg.kdbt()->size == thekeylen); assert(memcmp(msg.kdbt()->data, thekey, msg.kdbt()->size) == 0);
+            assert((int) msg.vdbt()->size == thevallen); assert(memcmp(msg.vdbt()->data, theval, msg.vdbt()->size) == 0);
             assert(i % 256 == (int)type);
-            assert((TXNID)i==xids_get_innermost_xid(ft_msg_get_xids(msg)));
+            assert((TXNID)i==xids_get_innermost_xid(msg.xids()));
             i += 1;
             toku_free(thekey);
             toku_free(theval);
diff --git a/ft/tests/make-tree.cc b/ft/tests/make-tree.cc
index e48d4592800..66bed972d25 100644
--- a/ft/tests/make-tree.cc
+++ b/ft/tests/make-tree.cc
@@ -125,8 +125,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u = {.id = { &thekey, &theval }} };
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
+    toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     leafnode->max_msn_applied_to_node_on_disk = msn;
 
diff --git a/ft/tests/msnfilter.cc b/ft/tests/msnfilter.cc
index 29fb40c5d8f..bb172a844c8 100644
--- a/ft/tests/msnfilter.cc
+++ b/ft/tests/msnfilter.cc
@@ -131,18 +131,18 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
     ft->ft->h->max_msn_in_ft = msn;
-    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
 
-    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, &msg, &gc_info, nullptr, nullptr);
+    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg, &gc_info, nullptr, nullptr);
     {
 	int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair);
 	assert(r==0);
 	assert(pair.call_count==1);
     }
 
-    FT_MSG_S badmsg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval }} };
-    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, &badmsg, &gc_info, nullptr, nullptr);
+    ft_msg badmsg(&thekey, &badval, FT_INSERT, msn, xids_get_root_xids());
+    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, badmsg, &gc_info, nullptr, nullptr);
 
     // message should be rejected for duplicate msn, row should still have original val
     {
@@ -154,8 +154,8 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
     // now verify that message with proper msn gets through
     msn = next_dummymsn();
     ft->ft->h->max_msn_in_ft = msn;
-    FT_MSG_S msg2 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &val2 }} };
-    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, &msg2, &gc_info, nullptr, nullptr);
+    ft_msg msg2(&thekey, &val2,  FT_INSERT, msn, xids_get_root_xids());
+    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg2, &gc_info, nullptr, nullptr);
 
     // message should be accepted, val should have new value
     {
@@ -166,8 +166,8 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
 
     // now verify that message with lesser (older) msn is rejected
     msn.msn = msn.msn - 10;
-    FT_MSG_S msg3 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval } }};
-    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, &msg3, &gc_info, nullptr, nullptr);
+    ft_msg msg3(&thekey, &badval, FT_INSERT, msn, xids_get_root_xids());
+    toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg3, &gc_info, nullptr, nullptr);
 
     // message should be rejected, val should still have value in pair2
     {
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index 8ac3278c41e..963346cc1fe 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -132,7 +132,7 @@ rand_bytes_limited(void *dest, int size)
 // generate a random message with xids and a key starting with pfx, insert
 // it in bnc, and save it in output params save and is_fresh_out
 static void
-insert_random_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool *is_fresh_out, XIDS xids, int pfx)
+insert_random_message(NONLEAF_CHILDINFO bnc, ft_msg **save, bool *is_fresh_out, XIDS xids, int pfx)
 {
     int keylen = (random() % 128) + 16;
     int vallen = (random() % 128) + 16;
@@ -144,18 +144,10 @@ insert_random_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool *is_fresh_out
     MSN msn = next_dummymsn();
     bool is_fresh = (random() & 0x100) == 0;
 
-    DBT *keydbt, *valdbt;
-    XMALLOC(keydbt);
-    XMALLOC(valdbt);
-    toku_fill_dbt(keydbt, key, keylen + (sizeof pfx));
-    toku_fill_dbt(valdbt, val, vallen);
-    FT_MSG_S *XMALLOC(result);
-    result->type = FT_INSERT;
-    result->msn = msn;
-    result->xids = xids;
-    result->u.id.key = keydbt;
-    result->u.id.val = valdbt;
-    *save = result;
+    DBT keydbt, valdbt;
+    toku_fill_dbt(&keydbt, key, keylen + (sizeof pfx));
+    toku_fill_dbt(&valdbt, val, vallen);
+    *save = new ft_msg(&keydbt, &valdbt, FT_INSERT, msn, xids);
     *is_fresh_out = is_fresh;
 
     toku_bnc_insert_msg(bnc, key, keylen + (sizeof pfx), val, vallen,
@@ -193,17 +185,12 @@ insert_random_message_to_bn(
     valdbt = &valdbt_s;
     toku_fill_dbt(keydbt, key, (sizeof *pfxp) + keylen);
     toku_fill_dbt(valdbt, val, vallen);
-    FT_MSG_S msg;
-    msg.type = FT_INSERT;
-    msg.msn = msn;
-    msg.xids = xids;
-    msg.u.id.key = keydbt;
-    msg.u.id.val = valdbt;
     *keylenp = keydbt->size;
     *keyp = toku_xmemdup(keydbt->data, keydbt->size);
+    ft_msg msg(keydbt, valdbt, FT_INSERT, msn, xids);
     int64_t numbytes;
-    toku_le_apply_msg(&msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
-    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb, &msg, &non_mvcc_gc_info, NULL, NULL);
+    toku_le_apply_msg(msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
+    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb, msg, &non_mvcc_gc_info, NULL, NULL);
     if (msn.msn > blb->max_msn_applied.msn) {
         blb->max_msn_applied = msn;
     }
@@ -243,21 +230,16 @@ insert_same_message_to_bns(
     valdbt = &valdbt_s;
     toku_fill_dbt(keydbt, key, (sizeof *pfxp) + keylen);
     toku_fill_dbt(valdbt, val, vallen);
-    FT_MSG_S msg;
-    msg.type = FT_INSERT;
-    msg.msn = msn;
-    msg.xids = xids;
-    msg.u.id.key = keydbt;
-    msg.u.id.val = valdbt;
     *keylenp = keydbt->size;
     *keyp = toku_xmemdup(keydbt->data, keydbt->size);
+    ft_msg msg(keydbt, valdbt, FT_INSERT, msn, xids);
     int64_t numbytes;
-    toku_le_apply_msg(&msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
-    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb1, &msg, &non_mvcc_gc_info, NULL, NULL);
+    toku_le_apply_msg(msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes);
+    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb1, msg, &non_mvcc_gc_info, NULL, NULL);
     if (msn.msn > blb1->max_msn_applied.msn) {
         blb1->max_msn_applied = msn;
     }
-    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb2, &msg, &non_mvcc_gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb2, msg, &non_mvcc_gc_info, NULL, NULL);
     if (msn.msn > blb2->max_msn_applied.msn) {
         blb2->max_msn_applied = msn;
     }
@@ -284,7 +266,7 @@ orthopush_flush_update_fun(DB * UU(db), const DBT *UU(key), const DBT *UU(old_va
 // the update message will overwrite the value with something generated
 // here, and add one to the int pointed to by applied
 static void
-insert_random_update_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool is_fresh, XIDS xids, int pfx, int *applied, MSN *max_msn)
+insert_random_update_message(NONLEAF_CHILDINFO bnc, ft_msg **save, bool is_fresh, XIDS xids, int pfx, int *applied, MSN *max_msn)
 {
     int keylen = (random() % 16) + 16;
     int vallen = (random() % 16) + 16;
@@ -297,18 +279,10 @@ insert_random_update_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool is_fre
     update_extra->num_applications = applied;
     MSN msn = next_dummymsn();
 
-    DBT *keydbt, *valdbt;
-    XMALLOC(keydbt);
-    XMALLOC(valdbt);
-    toku_fill_dbt(keydbt, key, keylen + (sizeof pfx));
-    toku_fill_dbt(valdbt, update_extra, sizeof *update_extra);
-    FT_MSG_S *XMALLOC(result);
-    result->type = FT_UPDATE;
-    result->msn = msn;
-    result->xids = xids;
-    result->u.id.key = keydbt;
-    result->u.id.val = valdbt;
-    *save = result;
+    DBT keydbt, valdbt;
+    toku_fill_dbt(&keydbt, key, keylen + (sizeof pfx));
+    toku_fill_dbt(&valdbt, update_extra, sizeof *update_extra);
+    *save = new ft_msg(&keydbt, &valdbt, FT_UPDATE, msn, xids);
 
     toku_bnc_insert_msg(bnc, key, keylen + (sizeof pfx),
                         update_extra, sizeof *update_extra,
@@ -325,8 +299,8 @@ static void
 flush_to_internal(FT_HANDLE t) {
     int r;
 
-    FT_MSG_S **MALLOC_N(4096,parent_messages);  // 128k / 32 = 4096
-    FT_MSG_S **MALLOC_N(4096,child_messages);
+    ft_msg **MALLOC_N(4096,parent_messages);  // 128k / 32 = 4096
+    ft_msg **MALLOC_N(4096,child_messages);
     bool *MALLOC_N(4096,parent_messages_is_fresh);
     bool *MALLOC_N(4096,child_messages_is_fresh);
     memset(parent_messages_is_fresh, 0, 4096*(sizeof parent_messages_is_fresh[0]));
@@ -368,47 +342,47 @@ flush_to_internal(FT_HANDLE t) {
 
     struct checkit_fn {
         int num_parent_messages;
-        FT_MSG *parent_messages;
+        ft_msg **parent_messages;
         int *parent_messages_present;
         bool *parent_messages_is_fresh;
         int num_child_messages;
-        FT_MSG *child_messages;
+        ft_msg **child_messages;
         int *child_messages_present;
         bool *child_messages_is_fresh;
-        checkit_fn(int np, FT_MSG *pm, int *npp, bool *pmf, int nc, FT_MSG *cm, int *ncp, bool *cmf) :
+        checkit_fn(int np, ft_msg **pm, int *npp, bool *pmf, int nc, ft_msg **cm, int *ncp, bool *cmf) :
             num_parent_messages(np), parent_messages(pm), parent_messages_present(npp), parent_messages_is_fresh(pmf),
             num_child_messages(nc), child_messages(cm), child_messages_present(ncp), child_messages_is_fresh(cmf) {
         }
-        int operator()(FT_MSG msg, bool is_fresh) {
+        int operator()(const ft_msg &msg, bool is_fresh) {
             DBT keydbt;
             DBT valdbt;
-            toku_fill_dbt(&keydbt, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
-            toku_fill_dbt(&valdbt, ft_msg_get_val(msg), ft_msg_get_vallen(msg));
+            toku_fill_dbt(&keydbt, msg.kdbt()->data, msg.kdbt()->size);
+            toku_fill_dbt(&valdbt, msg.vdbt()->data, msg.vdbt()->size);
             int found = 0;
-            MSN msn = msg->msn;
-            enum ft_msg_type type = ft_msg_get_type(msg);
-            XIDS xids = ft_msg_get_xids(msg);
+            MSN msn = msg.msn();
+            enum ft_msg_type type = msg.type();
+            XIDS xids = msg.xids();
             for (int k = 0; k < num_parent_messages; ++k) {
-                if (dummy_cmp(&keydbt, parent_messages[k]->u.id.key) == 0 &&
-                        msn.msn == parent_messages[k]->msn.msn) {
+                if (dummy_cmp(&keydbt, parent_messages[k]->kdbt()) == 0 &&
+                        msn.msn == parent_messages[k]->msn().msn) {
                     assert(parent_messages_present[k] == 0);
                     assert(found == 0);
-                    assert(dummy_cmp(&valdbt, parent_messages[k]->u.id.val) == 0);
-                    assert(type == parent_messages[k]->type);
-                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[k]->xids));
+                    assert(dummy_cmp(&valdbt, parent_messages[k]->vdbt()) == 0);
+                    assert(type == parent_messages[k]->type());
+                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[k]->xids()));
                     assert(parent_messages_is_fresh[k] == is_fresh);
                     parent_messages_present[k]++;
                     found++;
                 }
             }
             for (int k = 0; k < num_child_messages; ++k) {
-                if (dummy_cmp(&keydbt, child_messages[k]->u.id.key) == 0 &&
-                        msn.msn == child_messages[k]->msn.msn) {
+                if (dummy_cmp(&keydbt, child_messages[k]->kdbt()) == 0 &&
+                        msn.msn == child_messages[k]->msn().msn) {
                     assert(child_messages_present[k] == 0);
                     assert(found == 0);
-                    assert(dummy_cmp(&valdbt, child_messages[k]->u.id.val) == 0);
-                    assert(type == child_messages[k]->type);
-                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[k]->xids));
+                    assert(dummy_cmp(&valdbt, child_messages[k]->vdbt()) == 0);
+                    assert(type == child_messages[k]->type());
+                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[k]->xids()));
                     assert(child_messages_is_fresh[k] == is_fresh);
                     child_messages_present[k]++;
                     found++;
@@ -433,18 +407,14 @@ flush_to_internal(FT_HANDLE t) {
     xids_destroy(&xids_234);
 
     for (i = 0; i < num_parent_messages; ++i) {
-        toku_free(parent_messages[i]->u.id.key->data);
-        toku_free((DBT *) parent_messages[i]->u.id.key);
-        toku_free(parent_messages[i]->u.id.val->data);
-        toku_free((DBT *) parent_messages[i]->u.id.val);
-        toku_free(parent_messages[i]);
+        toku_free(parent_messages[i]->kdbt()->data);
+        toku_free(parent_messages[i]->vdbt()->data);
+        delete parent_messages[i];
     }
     for (i = 0; i < num_child_messages; ++i) {
-        toku_free(child_messages[i]->u.id.key->data);
-        toku_free((DBT *) child_messages[i]->u.id.key);
-        toku_free(child_messages[i]->u.id.val->data);
-        toku_free((DBT *) child_messages[i]->u.id.val);
-        toku_free(child_messages[i]);
+        toku_free(child_messages[i]->kdbt()->data);
+        toku_free(child_messages[i]->vdbt()->data);
+        delete child_messages[i];
     }
     destroy_nonleaf_childinfo(parent_bnc);
     toku_ftnode_free(&child);
@@ -459,8 +429,8 @@ static void
 flush_to_internal_multiple(FT_HANDLE t) {
     int r;
 
-    FT_MSG_S **MALLOC_N(4096,parent_messages);  // 128k / 32 = 4096
-    FT_MSG_S **MALLOC_N(4096,child_messages);
+    ft_msg **MALLOC_N(4096,parent_messages);  // 128k / 32 = 4096
+    ft_msg **MALLOC_N(4096,child_messages);
     bool *MALLOC_N(4096,parent_messages_is_fresh);
     bool *MALLOC_N(4096,child_messages_is_fresh);
     memset(parent_messages_is_fresh, 0, 4096*(sizeof parent_messages_is_fresh[0]));
@@ -474,7 +444,7 @@ flush_to_internal_multiple(FT_HANDLE t) {
     CKERR(r);
 
     NONLEAF_CHILDINFO child_bncs[8];
-    FT_MSG childkeys[7];
+    ft_msg *childkeys[7];
     int i;
     for (i = 0; i < 8; ++i) {
         child_bncs[i] = toku_create_empty_nl();
@@ -488,7 +458,7 @@ flush_to_internal_multiple(FT_HANDLE t) {
         insert_random_message(child_bncs[i%8], &child_messages[i], &child_messages_is_fresh[i], xids_123, i%8);
         total_size += toku_bnc_memory_used(child_bncs[i%8]);
         if (i % 8 < 7) {
-            if (childkeys[i%8] == NULL || dummy_cmp(child_messages[i]->u.id.key, childkeys[i%8]->u.id.key) > 0) {
+            if (childkeys[i%8] == NULL || dummy_cmp(child_messages[i]->kdbt(), childkeys[i%8]->kdbt()) > 0) {
                 childkeys[i%8] = child_messages[i];
             }
         }
@@ -509,7 +479,7 @@ flush_to_internal_multiple(FT_HANDLE t) {
         set_BNC(child, i, child_bncs[i]);
         BP_STATE(child, i) = PT_AVAIL;
         if (i < 7) {
-            child->pivotkeys.insert_at(childkeys[i]->u.id.key, i);
+            child->pivotkeys.insert_at(childkeys[i]->kdbt(), i);
         }
     }
 
@@ -528,47 +498,47 @@ flush_to_internal_multiple(FT_HANDLE t) {
     for (int j = 0; j < 8; ++j) {
         struct checkit_fn {
             int num_parent_messages;
-            FT_MSG *parent_messages;
+            ft_msg **parent_messages;
             int *parent_messages_present;
             bool *parent_messages_is_fresh;
             int num_child_messages;
-            FT_MSG *child_messages;
+            ft_msg **child_messages;
             int *child_messages_present;
             bool *child_messages_is_fresh;
-            checkit_fn(int np, FT_MSG *pm, int *npp, bool *pmf, int nc, FT_MSG *cm, int *ncp, bool *cmf) :
+            checkit_fn(int np, ft_msg **pm, int *npp, bool *pmf, int nc, ft_msg **cm, int *ncp, bool *cmf) :
                 num_parent_messages(np), parent_messages(pm), parent_messages_present(npp), parent_messages_is_fresh(pmf),
                 num_child_messages(nc), child_messages(cm), child_messages_present(ncp), child_messages_is_fresh(cmf) {
             }
-            int operator()(FT_MSG msg, bool is_fresh) {
+            int operator()(const ft_msg &msg, bool is_fresh) {
                 DBT keydbt;
                 DBT valdbt;
-                toku_fill_dbt(&keydbt, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
-                toku_fill_dbt(&valdbt, ft_msg_get_val(msg), ft_msg_get_vallen(msg));
+                toku_fill_dbt(&keydbt, msg.kdbt()->data, msg.kdbt()->size);
+                toku_fill_dbt(&valdbt, msg.vdbt()->data, msg.vdbt()->size);
                 int found = 0;
-                MSN msn = msg->msn;
-                enum ft_msg_type type = ft_msg_get_type(msg);
-                XIDS xids = ft_msg_get_xids(msg);
+                MSN msn = msg.msn();
+                enum ft_msg_type type = msg.type();
+                XIDS xids = msg.xids();
                 for (int _i = 0; _i < num_parent_messages; ++_i) {
-                    if (dummy_cmp(&keydbt, parent_messages[_i]->u.id.key) == 0 &&
-                            msn.msn == parent_messages[_i]->msn.msn) {
+                    if (dummy_cmp(&keydbt, parent_messages[_i]->kdbt()) == 0 &&
+                            msn.msn == parent_messages[_i]->msn().msn) {
                         assert(parent_messages_present[_i] == 0);
                         assert(found == 0);
-                        assert(dummy_cmp(&valdbt, parent_messages[_i]->u.id.val) == 0);
-                        assert(type == parent_messages[_i]->type);
-                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[_i]->xids));
+                        assert(dummy_cmp(&valdbt, parent_messages[_i]->vdbt()) == 0);
+                        assert(type == parent_messages[_i]->type());
+                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[_i]->xids()));
                         assert(parent_messages_is_fresh[_i] == is_fresh);
                         parent_messages_present[_i]++;
                         found++;
                     }
                 }
                 for (int _i = 0; _i < num_child_messages; ++_i) {
-                    if (dummy_cmp(&keydbt, child_messages[_i]->u.id.key) == 0 &&
-                            msn.msn == child_messages[_i]->msn.msn) {
+                    if (dummy_cmp(&keydbt, child_messages[_i]->kdbt()) == 0 &&
+                            msn.msn == child_messages[_i]->msn().msn) {
                         assert(child_messages_present[_i] == 0);
                         assert(found == 0);
-                        assert(dummy_cmp(&valdbt, child_messages[_i]->u.id.val) == 0);
-                        assert(type == child_messages[_i]->type);
-                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[_i]->xids));
+                        assert(dummy_cmp(&valdbt, child_messages[_i]->vdbt()) == 0);
+                        assert(type == child_messages[_i]->type());
+                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[_i]->xids()));
                         assert(child_messages_is_fresh[_i] == is_fresh);
                         child_messages_present[_i]++;
                         found++;
@@ -594,18 +564,14 @@ flush_to_internal_multiple(FT_HANDLE t) {
     xids_destroy(&xids_234);
 
     for (i = 0; i < num_parent_messages; ++i) {
-        toku_free(parent_messages[i]->u.id.key->data);
-        toku_free((DBT *) parent_messages[i]->u.id.key);
-        toku_free(parent_messages[i]->u.id.val->data);
-        toku_free((DBT *) parent_messages[i]->u.id.val);
-        toku_free(parent_messages[i]);
+        toku_free(parent_messages[i]->kdbt()->data);
+        toku_free(parent_messages[i]->vdbt()->data);
+        delete parent_messages[i];
     }
     for (i = 0; i < num_child_messages; ++i) {
-        toku_free(child_messages[i]->u.id.key->data);
-        toku_free((DBT *) child_messages[i]->u.id.key);
-        toku_free(child_messages[i]->u.id.val->data);
-        toku_free((DBT *) child_messages[i]->u.id.val);
-        toku_free(child_messages[i]);
+        toku_free(child_messages[i]->kdbt()->data);
+        toku_free(child_messages[i]->vdbt()->data);
+        delete child_messages[i];
     }
     destroy_nonleaf_childinfo(parent_bnc);
     toku_ftnode_free(&child);
@@ -627,7 +593,7 @@ static void
 flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
     int r;
 
-    FT_MSG_S **MALLOC_N(4096,parent_messages);  // 128k / 32 = 4096
+    ft_msg **MALLOC_N(4096,parent_messages);  // 128k / 32 = 4096
     LEAFENTRY* child_messages = NULL;
     XMALLOC_N(4096,child_messages);
     void** key_pointers = NULL;
@@ -705,7 +671,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
     if (make_leaf_up_to_date) {
         for (i = 0; i < num_parent_messages; ++i) {
             if (!parent_messages_is_fresh[i]) {
-                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
             }
         }
         for (i = 0; i < 8; ++i) {
@@ -741,7 +707,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
         toku_apply_ancestors_messages_to_node(t, child, &ancestors, pivot_bounds::infinite_bounds(), &msgs_applied, -1);
 
         struct checkit_fn {
-            int operator()(FT_MSG UU(msg), bool is_fresh) {
+            int operator()(const ft_msg &UU(msg), bool is_fresh) {
                  assert(!is_fresh);
                 return 0;
             }
@@ -784,9 +750,9 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
             }
             int found = 0;
             for (i = num_parent_messages - 1; i >= 0; --i) {
-                if (dummy_cmp(&keydbt, parent_messages[i]->u.id.key) == 0) {
+                if (dummy_cmp(&keydbt, parent_messages[i]->kdbt()) == 0) {
                     if (found == 0) {
-                        struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(e, parent_messages[i]->u.id.val->data);
+                        struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(e, parent_messages[i]->vdbt()->data);
                         assert(dummy_cmp(&valdbt, &e->new_val) == 0);
                         found++;
                     }
@@ -827,13 +793,11 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
     xids_destroy(&xids_234);
 
     for (i = 0; i < num_parent_messages; ++i) {
-        toku_free(parent_messages[i]->u.id.key->data);
-        toku_free((DBT *) parent_messages[i]->u.id.key);
-        struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->u.id.val->data);
+        toku_free(parent_messages[i]->kdbt()->data);
+        struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->vdbt()->data);
         toku_free(extra->new_val.data);
-        toku_free(parent_messages[i]->u.id.val->data);
-        toku_free((DBT *) parent_messages[i]->u.id.val);
-        toku_free(parent_messages[i]);
+        toku_free(parent_messages[i]->vdbt()->data);
+        delete parent_messages[i];
     }
     for (i = 0; i < num_child_messages; ++i) {
         toku_free(child_messages[i]);
@@ -858,7 +822,7 @@ static void
 flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
     int r;
 
-    FT_MSG_S **MALLOC_N(4096,parent_messages);  // 128k / 32 = 4k
+    ft_msg **MALLOC_N(4096,parent_messages);  // 128k / 32 = 4k
     LEAFENTRY* child_messages = NULL;
     XMALLOC_N(4096,child_messages);
     void** key_pointers = NULL;
@@ -928,9 +892,9 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
 
     if (make_leaf_up_to_date) {
         for (i = 0; i < num_parent_messages; ++i) {
-            if (dummy_cmp(parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
+            if (dummy_cmp(parent_messages[i]->kdbt(), &childkeys[7]) <= 0 &&
                 !parent_messages_is_fresh[i]) {
-                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
             }
         }
         for (i = 0; i < 8; ++i) {
@@ -944,7 +908,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
 
     for (i = 0; i < num_parent_messages; ++i) {
         if (make_leaf_up_to_date &&
-            dummy_cmp(parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
+            dummy_cmp(parent_messages[i]->kdbt(), &childkeys[7]) <= 0 &&
             !parent_messages_is_fresh[i]) {
             assert(parent_messages_applied[i] == 1);
         } else {
@@ -970,19 +934,19 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
     struct checkit_fn {
         DBT *childkeys;
         int num_parent_messages;
-        FT_MSG *parent_messages;
+        ft_msg **parent_messages;
         bool *parent_messages_is_fresh;
-        checkit_fn(DBT *ck, int np, FT_MSG *pm, bool *pmf) :
+        checkit_fn(DBT *ck, int np, ft_msg **pm, bool *pmf) :
             childkeys(ck), num_parent_messages(np), parent_messages(pm), parent_messages_is_fresh(pmf) {
         }
-        int operator()(FT_MSG msg, bool is_fresh) {
+        int operator()(const ft_msg &msg, bool is_fresh) {
             DBT keydbt;
-            toku_fill_dbt(&keydbt, ft_msg_get_key(msg), ft_msg_get_keylen(msg));
-            MSN msn = msg->msn;
+            toku_fill_dbt(&keydbt, msg.kdbt()->data, msg.kdbt()->size);
+            MSN msn = msg.msn();
             if (dummy_cmp(&keydbt, &childkeys[7]) > 0) {
                 for (int _i = 0; _i < num_parent_messages; ++_i) {
-                    if (dummy_cmp(&keydbt, parent_messages[_i]->u.id.key) == 0 &&
-                            msn.msn == parent_messages[_i]->msn.msn) {
+                    if (dummy_cmp(&keydbt, parent_messages[_i]->kdbt()) == 0 &&
+                            msn.msn == parent_messages[_i]->msn().msn) {
                         assert(is_fresh == parent_messages_is_fresh[_i]);
                         break;
                     }
@@ -1004,7 +968,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
     assert(total_messages <= num_parent_messages + num_child_messages);
 
     for (i = 0; i < num_parent_messages; ++i) {
-        if (dummy_cmp(parent_messages[i]->u.id.key, &childkeys[7]) <= 0) {
+        if (dummy_cmp(parent_messages[i]->kdbt(), &childkeys[7]) <= 0) {
             assert(parent_messages_applied[i] == 1);
         } else {
             assert(parent_messages_applied[i] == 0);
@@ -1016,13 +980,11 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
     xids_destroy(&xids_234);
 
     for (i = 0; i < num_parent_messages; ++i) {
-        toku_free(parent_messages[i]->u.id.key->data);
-        toku_free((DBT *) parent_messages[i]->u.id.key);
-        struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->u.id.val->data);
+        toku_free(parent_messages[i]->kdbt()->data);
+        struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->vdbt()->data);
         toku_free(extra->new_val.data);
-        toku_free(parent_messages[i]->u.id.val->data);
-        toku_free((DBT *) parent_messages[i]->u.id.val);
-        toku_free(parent_messages[i]);
+        toku_free(parent_messages[i]->vdbt()->data);
+        delete parent_messages[i];
     }
     for (i = 0; i < num_child_messages; ++i) {
         toku_free(child_messages[i]);
@@ -1049,7 +1011,7 @@ static void
 compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     int r;
 
-    FT_MSG_S **MALLOC_N(4096,parent_messages);  // 128k / 32 = 4k
+    ft_msg **MALLOC_N(4096,parent_messages);  // 128k / 32 = 4k
     LEAFENTRY* child_messages = NULL;
     XMALLOC_N(4096,child_messages);
     void** key_pointers = NULL;
@@ -1135,8 +1097,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     if (make_leaf_up_to_date) {
         for (i = 0; i < num_parent_messages; ++i) {
             if (!parent_messages_is_fresh[i]) {
-                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child1, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
-                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child2, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child1, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
+                toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child2, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
             }
         }
         for (i = 0; i < 8; ++i) {
@@ -1164,7 +1126,7 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     toku_apply_ancestors_messages_to_node(t, child2, &ancestors, pivot_bounds::infinite_bounds(), &msgs_applied, -1);
 
     struct checkit_fn {
-        int operator()(FT_MSG UU(msg), bool is_fresh) {
+        int operator()(const ft_msg &UU(msg), bool is_fresh) {
             assert(!is_fresh);
             return 0;
         }
@@ -1211,13 +1173,11 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     xids_destroy(&xids_234);
 
     for (i = 0; i < num_parent_messages; ++i) {
-        toku_free(parent_messages[i]->u.id.key->data);
-        toku_free((DBT *) parent_messages[i]->u.id.key);
-        struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->u.id.val->data);
+        toku_free(parent_messages[i]->kdbt()->data);
+        struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->vdbt()->data);
         toku_free(extra->new_val.data);
-        toku_free(parent_messages[i]->u.id.val->data);
-        toku_free((DBT *) parent_messages[i]->u.id.val);
-        toku_free(parent_messages[i]);
+        toku_free(parent_messages[i]->vdbt()->data);
+        delete parent_messages[i];
     }
     for (i = 0; i < num_child_messages; ++i) {
         toku_free(key_pointers[i]);
diff --git a/ft/tests/test-leafentry-child-txn.cc b/ft/tests/test-leafentry-child-txn.cc
index e55b20d6a3f..b4625702745 100644
--- a/ft/tests/test-leafentry-child-txn.cc
+++ b/ft/tests/test-leafentry-child-txn.cc
@@ -111,17 +111,6 @@ static void add_committed_entry(ULE ule, DBT *val, TXNID xid) {
     ule->uxrs[index].xid    = xid;
 }
 
-static FT_MSG_S
-msg_init(enum ft_msg_type type, XIDS xids,
-         DBT *key, DBT *val) {
-    FT_MSG_S msg;
-    msg.type = type;
-    msg.xids = xids;
-    msg.u.id.key = key;
-    msg.u.id.val = val;
-    return msg;
-}
-
 //Test all the different things that can happen to a
 //committed leafentry (logical equivalent of a committed insert).
 static void
@@ -161,41 +150,45 @@ run_test(void) {
     add_committed_entry(&ule_initial, &val, 10);
 
     // now do the application of xids to the ule    
-    FT_MSG_S msg;
     // do a commit
-    msg = msg_init(FT_COMMIT_ANY, msg_xids_2, &key, &val);
-    test_msg_modify_ule(&ule_initial, &msg);
-    assert(ule->num_cuxrs == 2);
-    assert(ule->uxrs[0].xid == TXNID_NONE);
-    assert(ule->uxrs[1].xid == 10);
-    assert(ule->uxrs[0].valp == &val_data_one);
-    assert(ule->uxrs[1].valp == &val_data_two);
+    {
+        ft_msg msg(&key, &val, FT_COMMIT_ANY, ZERO_MSN, msg_xids_2);
+        test_msg_modify_ule(&ule_initial, msg);
+        assert(ule->num_cuxrs == 2);
+        assert(ule->uxrs[0].xid == TXNID_NONE);
+        assert(ule->uxrs[1].xid == 10);
+        assert(ule->uxrs[0].valp == &val_data_one);
+        assert(ule->uxrs[1].valp == &val_data_two);
+    }
 
     // do an abort
-    msg = msg_init(FT_ABORT_ANY, msg_xids_2, &key, &val);
-    test_msg_modify_ule(&ule_initial, &msg);
-    assert(ule->num_cuxrs == 2);
-    assert(ule->uxrs[0].xid == TXNID_NONE);
-    assert(ule->uxrs[1].xid == 10);
-    assert(ule->uxrs[0].valp == &val_data_one);
-    assert(ule->uxrs[1].valp == &val_data_two);
+    {
+        ft_msg msg(&key, &val, FT_ABORT_ANY, ZERO_MSN, msg_xids_2);
+        test_msg_modify_ule(&ule_initial, msg);
+        assert(ule->num_cuxrs == 2);
+        assert(ule->uxrs[0].xid == TXNID_NONE);
+        assert(ule->uxrs[1].xid == 10);
+        assert(ule->uxrs[0].valp == &val_data_one);
+        assert(ule->uxrs[1].valp == &val_data_two);
+    }
 
     // do an insert
     val.data = &val_data_three;
-    msg = msg_init(FT_INSERT, msg_xids_2, &key, &val);
-    test_msg_modify_ule(&ule_initial, &msg);
-    // now that message applied, verify that things are good
-    assert(ule->num_cuxrs == 2);
-    assert(ule->num_puxrs == 2);
-    assert(ule->uxrs[0].xid == TXNID_NONE);
-    assert(ule->uxrs[1].xid == 10);
-    assert(ule->uxrs[2].xid == 1000);
-    assert(ule->uxrs[3].xid == 10);
-    assert(ule->uxrs[0].valp == &val_data_one);
-    assert(ule->uxrs[1].valp == &val_data_two);
-    assert(ule->uxrs[2].type == XR_PLACEHOLDER);
-    assert(ule->uxrs[3].valp == &val_data_three);
-    
+    {
+        ft_msg msg(&key, &val, FT_INSERT, ZERO_MSN, msg_xids_2);
+        test_msg_modify_ule(&ule_initial, msg);
+        // now that message applied, verify that things are good
+        assert(ule->num_cuxrs == 2);
+        assert(ule->num_puxrs == 2);
+        assert(ule->uxrs[0].xid == TXNID_NONE);
+        assert(ule->uxrs[1].xid == 10);
+        assert(ule->uxrs[2].xid == 1000);
+        assert(ule->uxrs[3].xid == 10);
+        assert(ule->uxrs[0].valp == &val_data_one);
+        assert(ule->uxrs[1].valp == &val_data_two);
+        assert(ule->uxrs[2].type == XR_PLACEHOLDER);
+        assert(ule->uxrs[3].valp == &val_data_three);
+    } 
 
     xids_destroy(&msg_xids_2);
     xids_destroy(&msg_xids_1);
diff --git a/ft/tests/test-leafentry-nested.cc b/ft/tests/test-leafentry-nested.cc
index 2126dde2011..1720922a385 100644
--- a/ft/tests/test-leafentry-nested.cc
+++ b/ft/tests/test-leafentry-nested.cc
@@ -442,7 +442,7 @@ test_le_pack (void) {
 }
 
 static void
-test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
+test_le_apply(ULE ule_initial, const ft_msg &msg, ULE ule_expected) {
     int r;
     LEAFENTRY le_initial;
     LEAFENTRY le_expected;
@@ -496,17 +496,6 @@ static const ULE_S ule_committed_delete = {
     .uxrs = (UXR_S *)ule_committed_delete.uxrs_static
 };
 
-static FT_MSG_S
-msg_init(enum ft_msg_type type, XIDS xids,
-         DBT *key, DBT *val) {
-    FT_MSG_S msg;
-    msg.type = type;
-    msg.xids = xids;
-    msg.u.id.key = key;
-    msg.u.id.val = val;
-    return msg;
-}
-
 static uint32_t
 next_nesting_level(uint32_t current) {
     uint32_t rval = current + 1;
@@ -531,9 +520,9 @@ generate_committed_for(ULE ule, DBT *val) {
 }
 
 static void
-generate_provpair_for(ULE ule, FT_MSG msg) {
+generate_provpair_for(ULE ule, const ft_msg &msg) {
     uint32_t level;
-    XIDS xids = msg->xids;
+    XIDS xids = msg.xids();
     ule->uxrs = ule->uxrs_static;
 
     ule->num_cuxrs = 1;
@@ -550,8 +539,8 @@ generate_provpair_for(ULE ule, FT_MSG msg) {
         ule->uxrs[level].xid    = xids_get_xid(xids, level-1);
     }
     ule->uxrs[num_uxrs - 1].type   = XR_INSERT;
-    ule->uxrs[num_uxrs - 1].vallen = msg->u.id.val->size;
-    ule->uxrs[num_uxrs - 1].valp   = msg->u.id.val->data;
+    ule->uxrs[num_uxrs - 1].vallen = msg.vdbt()->size;
+    ule->uxrs[num_uxrs - 1].valp   = msg.vdbt()->data;
     ule->uxrs[num_uxrs - 1].xid    = xids_get_innermost_xid(xids);
 }
 
@@ -560,7 +549,6 @@ generate_provpair_for(ULE ule, FT_MSG msg) {
 static void
 test_le_empty_apply(void) {
     ULE_S ule_initial        = ule_committed_delete;
-    FT_MSG_S msg;
 
     DBT key;
     DBT val;
@@ -585,34 +573,41 @@ test_le_empty_apply(void) {
                     //Abort/commit of an empty le is an empty le
                     ULE_S ule_expected = ule_committed_delete;
 
-                    msg = msg_init(FT_COMMIT_ANY, msg_xids,  &key, &val);
-                    test_le_apply(&ule_initial, &msg, &ule_expected);
-                    msg = msg_init(FT_COMMIT_BROADCAST_TXN, msg_xids,  &key, &val);
-                    test_le_apply(&ule_initial, &msg, &ule_expected);
-
-                    msg = msg_init(FT_ABORT_ANY, msg_xids, &key, &val);
-                    test_le_apply(&ule_initial, &msg, &ule_expected);
-                    msg = msg_init(FT_ABORT_BROADCAST_TXN, msg_xids, &key, &val);
-                    test_le_apply(&ule_initial, &msg, &ule_expected);
+                    {
+                        ft_msg msg(&key, &val, FT_COMMIT_ANY, ZERO_MSN, msg_xids);
+                        test_le_apply(&ule_initial, msg, &ule_expected);
+                    }
+                    {
+                        ft_msg msg(&key, &val, FT_COMMIT_BROADCAST_TXN, ZERO_MSN, msg_xids);
+                        test_le_apply(&ule_initial, msg, &ule_expected);
+                    }
+                    {
+                        ft_msg msg(&key, &val, FT_ABORT_ANY, ZERO_MSN, msg_xids);
+                        test_le_apply(&ule_initial, msg, &ule_expected);
+                    }
+                    {
+                        ft_msg msg(&key, &val, FT_ABORT_BROADCAST_TXN, ZERO_MSN, msg_xids);
+                        test_le_apply(&ule_initial, msg, &ule_expected);
+                    }
                 }
                 {
                     //delete of an empty le is an empty le
                     ULE_S ule_expected = ule_committed_delete;
 
-                    msg = msg_init(FT_DELETE_ANY, msg_xids, &key, &val);
-                    test_le_apply(&ule_initial, &msg, &ule_expected);
+                    ft_msg msg(&key, &val, FT_DELETE_ANY, ZERO_MSN, msg_xids);
+                    test_le_apply(&ule_initial, msg, &ule_expected);
                 }
                 {
-                    msg = msg_init(FT_INSERT, msg_xids, &key, &val);
+                    ft_msg msg(&key, &val, FT_INSERT, ZERO_MSN, msg_xids);
                     ULE_S ule_expected;
-                    generate_provpair_for(&ule_expected, &msg);
-                    test_le_apply(&ule_initial, &msg, &ule_expected);
+                    generate_provpair_for(&ule_expected, msg);
+                    test_le_apply(&ule_initial, msg, &ule_expected);
                 }
                 {
-                    msg = msg_init(FT_INSERT_NO_OVERWRITE, msg_xids, &key, &val);
+                    ft_msg msg(&key, &val, FT_INSERT_NO_OVERWRITE, ZERO_MSN, msg_xids);
                     ULE_S ule_expected;
-                    generate_provpair_for(&ule_expected, &msg);
-                    test_le_apply(&ule_initial, &msg, &ule_expected);
+                    generate_provpair_for(&ule_expected, msg);
+                    test_le_apply(&ule_initial, msg, &ule_expected);
                 }
             }
         }
@@ -620,16 +615,16 @@ test_le_empty_apply(void) {
 }
 
 static void
-generate_provdel_for(ULE ule, FT_MSG msg) {
+generate_provdel_for(ULE ule, const ft_msg &msg) {
     uint32_t level;
-    XIDS xids = msg->xids;
+    XIDS xids = msg.xids();
 
     ule->num_cuxrs = 1;
     ule->num_puxrs = xids_get_num_xids(xids);
     uint32_t num_uxrs = ule->num_cuxrs + ule->num_puxrs;
     ule->uxrs[0].type   = XR_INSERT;
-    ule->uxrs[0].vallen = msg->u.id.val->size;
-    ule->uxrs[0].valp   = msg->u.id.val->data;
+    ule->uxrs[0].vallen = msg.vdbt()->size;
+    ule->uxrs[0].valp   = msg.vdbt()->data;
     ule->uxrs[0].xid    = TXNID_NONE;
     for (level = ule->num_cuxrs; level < ule->num_cuxrs + ule->num_puxrs - 1; level++) {
         ule->uxrs[level].type   = XR_PLACEHOLDER;
@@ -644,9 +639,9 @@ generate_provdel_for(ULE ule, FT_MSG msg) {
 }
 
 static void
-generate_both_for(ULE ule, DBT *oldval, FT_MSG msg) {
+generate_both_for(ULE ule, DBT *oldval, const ft_msg &msg) {
     uint32_t level;
-    XIDS xids = msg->xids;
+    XIDS xids = msg.xids();
 
     ule->num_cuxrs = 1;
     ule->num_puxrs = xids_get_num_xids(xids);
@@ -662,8 +657,8 @@ generate_both_for(ULE ule, DBT *oldval, FT_MSG msg) {
         ule->uxrs[level].xid    = xids_get_xid(xids, level-1);
     }
     ule->uxrs[num_uxrs - 1].type   = XR_INSERT;
-    ule->uxrs[num_uxrs - 1].vallen = msg->u.id.val->size;
-    ule->uxrs[num_uxrs - 1].valp   = msg->u.id.val->data;
+    ule->uxrs[num_uxrs - 1].vallen = msg.vdbt()->size;
+    ule->uxrs[num_uxrs - 1].valp   = msg.vdbt()->data;
     ule->uxrs[num_uxrs - 1].xid    = xids_get_innermost_xid(xids);
 }
 
@@ -673,7 +668,6 @@ static void
 test_le_committed_apply(void) {
     ULE_S ule_initial;
     ule_initial.uxrs = ule_initial.uxrs_static;
-    FT_MSG_S msg;
 
     DBT key;
     DBT val;
@@ -696,23 +690,30 @@ test_le_committed_apply(void) {
             if (nesting_level > 0) {
                 //Commit/abort will not change a committed le
                 ULE_S ule_expected = ule_initial;
-                msg = msg_init(FT_COMMIT_ANY, msg_xids,  &key, &val);
-                test_le_apply(&ule_initial, &msg, &ule_expected);
-                msg = msg_init(FT_COMMIT_BROADCAST_TXN, msg_xids,  &key, &val);
-                test_le_apply(&ule_initial, &msg, &ule_expected);
-
-                msg = msg_init(FT_ABORT_ANY, msg_xids, &key, &val);
-                test_le_apply(&ule_initial, &msg, &ule_expected);
-                msg = msg_init(FT_ABORT_BROADCAST_TXN, msg_xids, &key, &val);
-                test_le_apply(&ule_initial, &msg, &ule_expected);
+                {
+                    ft_msg msg(&key, &val, FT_COMMIT_ANY, ZERO_MSN, msg_xids);
+                    test_le_apply(&ule_initial, msg, &ule_expected);
+                }
+                {
+                    ft_msg msg(&key, &val, FT_COMMIT_BROADCAST_TXN, ZERO_MSN, msg_xids);
+                    test_le_apply(&ule_initial, msg, &ule_expected);
+                }
+                {
+                    ft_msg msg(&key, &val, FT_ABORT_ANY, ZERO_MSN, msg_xids);
+                    test_le_apply(&ule_initial, msg, &ule_expected);
+                }
+                {
+                    ft_msg msg(&key, &val, FT_ABORT_BROADCAST_TXN, ZERO_MSN, msg_xids);
+                    test_le_apply(&ule_initial, msg, &ule_expected);
+                }
             }
 
             {
-                msg = msg_init(FT_DELETE_ANY, msg_xids, &key, &val);
+                ft_msg msg(&key, &val, FT_DELETE_ANY, ZERO_MSN, msg_xids);
                 ULE_S ule_expected;
                 ule_expected.uxrs = ule_expected.uxrs_static;
-                generate_provdel_for(&ule_expected, &msg);
-                test_le_apply(&ule_initial, &msg, &ule_expected);
+                generate_provdel_for(&ule_expected, msg);
+                test_le_apply(&ule_initial, msg, &ule_expected);
             }
 
             {
@@ -721,11 +722,11 @@ test_le_committed_apply(void) {
                 fillrandom(valbuf2, valsize2);
                 DBT val2;
                 toku_fill_dbt(&val2, valbuf2, valsize2);
-                msg = msg_init(FT_INSERT, msg_xids, &key, &val2);
+                ft_msg msg(&key, &val2, FT_INSERT, ZERO_MSN, msg_xids);
                 ULE_S ule_expected;
                 ule_expected.uxrs = ule_expected.uxrs_static;
-                generate_both_for(&ule_expected, &val, &msg);
-                test_le_apply(&ule_initial, &msg, &ule_expected);
+                generate_both_for(&ule_expected, &val, msg);
+                test_le_apply(&ule_initial, msg, &ule_expected);
             }
             {
                 //INSERT_NO_OVERWRITE will not change a committed insert
@@ -735,8 +736,8 @@ test_le_committed_apply(void) {
                 fillrandom(valbuf2, valsize2);
                 DBT val2;
                 toku_fill_dbt(&val2, valbuf2, valsize2);
-                msg = msg_init(FT_INSERT_NO_OVERWRITE, msg_xids, &key, &val2);
-                test_le_apply(&ule_initial, &msg, &ule_expected);
+                ft_msg msg(&key, &val2, FT_INSERT_NO_OVERWRITE, ZERO_MSN, msg_xids);
+                test_le_apply(&ule_initial, msg, &ule_expected);
             }
         }
     }
@@ -855,7 +856,6 @@ static void test_le_garbage_collection_birdie(void) {
 }
 
 static void test_le_optimize(void) {
-    FT_MSG_S msg;
     DBT key;
     DBT val;
     ULE_S ule_initial;
@@ -873,7 +873,7 @@ static void test_le_optimize(void) {
     XIDS msg_xids; 
     int r = xids_create_child(root_xids, &msg_xids, optimize_txnid);
     assert(r==0);
-    msg = msg_init(FT_OPTIMIZE, msg_xids, &key, &val);
+    ft_msg msg(&key, &val, FT_OPTIMIZE, ZERO_MSN, msg_xids);
 
     //
     // create the key
@@ -898,8 +898,8 @@ static void test_le_optimize(void) {
     ule_expected.uxrs[0].vallen = valsize;
     ule_expected.uxrs[0].valp = valbuf;
 
-    test_msg_modify_ule(&ule_initial,&msg);
-    verify_ule_equal(&ule_initial,&ule_expected);
+    test_msg_modify_ule(&ule_initial, msg);
+    verify_ule_equal(&ule_initial, &ule_expected);
 
     //
     // add another committed entry and ensure no effect
@@ -916,8 +916,8 @@ static void test_le_optimize(void) {
     ule_expected.uxrs[1].vallen = 0;
     ule_expected.uxrs[1].valp = NULL;
     
-    test_msg_modify_ule(&ule_initial,&msg);
-    verify_ule_equal(&ule_initial,&ule_expected);
+    test_msg_modify_ule(&ule_initial, msg);
+    verify_ule_equal(&ule_initial, &ule_expected);
 
     //
     // now test when there is one provisional, three cases, after, equal, and before FT_OPTIMIZE's transaction
@@ -929,20 +929,20 @@ static void test_le_optimize(void) {
     ule_expected.num_cuxrs = 1;
     ule_expected.num_puxrs = 1;
     ule_expected.uxrs[1].xid = 1500;
-    test_msg_modify_ule(&ule_initial,&msg);
-    verify_ule_equal(&ule_initial,&ule_expected);
+    test_msg_modify_ule(&ule_initial, msg);
+    verify_ule_equal(&ule_initial, &ule_expected);
 
     ule_initial.uxrs[1].xid = 1000;
     ule_expected.uxrs[1].xid = 1000;
-    test_msg_modify_ule(&ule_initial,&msg);
-    verify_ule_equal(&ule_initial,&ule_expected);
+    test_msg_modify_ule(&ule_initial, msg);
+    verify_ule_equal(&ule_initial, &ule_expected);
 
     ule_initial.uxrs[1].xid = 500;
     ule_expected.uxrs[1].xid = 500;
     ule_expected.num_cuxrs = 2;
     ule_expected.num_puxrs = 0;
-    test_msg_modify_ule(&ule_initial,&msg);
-    verify_ule_equal(&ule_initial,&ule_expected);
+    test_msg_modify_ule(&ule_initial, msg);
+    verify_ule_equal(&ule_initial, &ule_expected);
 
     //
     // now test cases with two provisional
@@ -963,13 +963,13 @@ static void test_le_optimize(void) {
     ule_expected.uxrs[2].vallen = valsize;
     ule_expected.uxrs[2].valp = valbuf;
     ule_expected.uxrs[1].xid = 1200;
-    test_msg_modify_ule(&ule_initial,&msg);
-    verify_ule_equal(&ule_initial,&ule_expected);
+    test_msg_modify_ule(&ule_initial, msg);
+    verify_ule_equal(&ule_initial, &ule_expected);
 
     ule_initial.uxrs[1].xid = 1000;
     ule_expected.uxrs[1].xid = 1000;
-    test_msg_modify_ule(&ule_initial,&msg);
-    verify_ule_equal(&ule_initial,&ule_expected);
+    test_msg_modify_ule(&ule_initial, msg);
+    verify_ule_equal(&ule_initial, &ule_expected);
 
     ule_initial.uxrs[1].xid = 800;
     ule_expected.uxrs[1].xid = 800;
@@ -978,8 +978,8 @@ static void test_le_optimize(void) {
     ule_expected.uxrs[1].type = ule_initial.uxrs[2].type;
     ule_expected.uxrs[1].valp = ule_initial.uxrs[2].valp;
     ule_expected.uxrs[1].vallen = ule_initial.uxrs[2].vallen;
-    test_msg_modify_ule(&ule_initial,&msg);
-    verify_ule_equal(&ule_initial,&ule_expected);
+    test_msg_modify_ule(&ule_initial, msg);
+    verify_ule_equal(&ule_initial, &ule_expected);
 
     
     xids_destroy(&msg_xids);
diff --git a/ft/tests/verify-bad-msn.cc b/ft/tests/verify-bad-msn.cc
index 07e37d43cf9..ddb9fd447ef 100644
--- a/ft/tests/verify-bad-msn.cc
+++ b/ft/tests/verify-bad-msn.cc
@@ -127,9 +127,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
     MSN msn = next_dummymsn();
 
     // apply an insert to the leaf node
-    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // Create bad tree (don't do following):
     // leafnode->max_msn_applied_to_node = msn;
diff --git a/ft/tests/verify-bad-pivots.cc b/ft/tests/verify-bad-pivots.cc
index aac0cbd8ed2..bc402bdac92 100644
--- a/ft/tests/verify-bad-pivots.cc
+++ b/ft/tests/verify-bad-pivots.cc
@@ -115,9 +115,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tests/verify-dup-in-leaf.cc b/ft/tests/verify-dup-in-leaf.cc
index 510a3ce1de0..9f3f2848188 100644
--- a/ft/tests/verify-dup-in-leaf.cc
+++ b/ft/tests/verify-dup-in-leaf.cc
@@ -116,9 +116,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tests/verify-dup-pivots.cc b/ft/tests/verify-dup-pivots.cc
index e2cb20f105d..828350d891e 100644
--- a/ft/tests/verify-dup-pivots.cc
+++ b/ft/tests/verify-dup-pivots.cc
@@ -115,9 +115,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tests/verify-misrouted-msgs.cc b/ft/tests/verify-misrouted-msgs.cc
index 6b7555a89c6..e87597f1748 100644
--- a/ft/tests/verify-misrouted-msgs.cc
+++ b/ft/tests/verify-misrouted-msgs.cc
@@ -116,9 +116,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tests/verify-unsorted-leaf.cc b/ft/tests/verify-unsorted-leaf.cc
index d1178c1d8f0..450ff4ebe3f 100644
--- a/ft/tests/verify-unsorted-leaf.cc
+++ b/ft/tests/verify-unsorted-leaf.cc
@@ -118,9 +118,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tests/verify-unsorted-pivots.cc b/ft/tests/verify-unsorted-pivots.cc
index 3367cb9af8d..b9f3656878c 100644
--- a/ft/tests/verify-unsorted-pivots.cc
+++ b/ft/tests/verify-unsorted-pivots.cc
@@ -115,9 +115,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, keylen, NULL, &gc_info, NULL, NULL);
+    toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     // dont forget to dirty the node
     leafnode->dirty = 1;
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index a366e5de116..48515414e31 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -280,14 +280,14 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT ft) {
             }
             if (do_dump_data) {
                 struct dump_data_fn {
-                    int operator()(FT_MSG msg, bool UU(is_fresh)) {
-                        enum ft_msg_type type = (enum ft_msg_type) msg->type;
-                        MSN msn = msg->msn;
-                        XIDS xids = msg->xids;
-                        const void *key = ft_msg_get_key(msg);
-                        const void *data = ft_msg_get_val(msg);
-                        ITEMLEN keylen = ft_msg_get_keylen(msg);
-                        ITEMLEN datalen = ft_msg_get_vallen(msg);
+                    int operator()(const ft_msg &msg, bool UU(is_fresh)) {
+                        enum ft_msg_type type = (enum ft_msg_type) msg.type();
+                        MSN msn = msg.msn();
+                        XIDS xids = msg.xids();
+                        const void *key = msg.kdbt()->data;
+                        const void *data = msg.vdbt()->data;
+                        ITEMLEN keylen = msg.kdbt()->size;
+                        ITEMLEN datalen = msg.vdbt()->size;
                         printf("    msn=%" PRIu64 " (0x%" PRIx64 ") ", msn.msn, msn.msn);
                         printf("    TYPE=");
                         switch (type) {
diff --git a/ft/ule-internal.h b/ft/ule-internal.h
index 8e295195c4d..6c3c6bd013c 100644
--- a/ft/ule-internal.h
+++ b/ft/ule-internal.h
@@ -135,7 +135,7 @@ typedef struct ule {     // unpacked leaf entry
 
 
 
-void test_msg_modify_ule(ULE ule, FT_MSG msg);
+void test_msg_modify_ule(ULE ule, const ft_msg &msg);
 
 
 //////////////////////////////////////////////////////////////////////////////////////
diff --git a/ft/ule.cc b/ft/ule.cc
index 4be71314c62..a79ab92a28e 100644
--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -105,7 +105,7 @@ PATENT RIGHTS GRANT:
 #include <toku_portability.h>
 #include "ft/fttypes.h"
 #include "ft/ft-internal.h"
-#include "ft/ft_msg.h"
+#include "ft/msg.h"
 #include "ft/leafentry.h"
 #include "ft/logger.h"
 #include "ft/txn.h"
@@ -216,7 +216,7 @@ const UXR_S committed_delete = {
 // Local functions:
 
 static void msg_init_empty_ule(ULE ule);
-static void msg_modify_ule(ULE ule, FT_MSG msg);
+static void msg_modify_ule(ULE ule, const ft_msg &msg);
 static void ule_init_empty_ule(ULE ule);
 static void ule_do_implicit_promotions(ULE ule, XIDS xids);
 static void ule_try_promote_provisional_outermost(ULE ule, TXNID oldest_possible_live_xid);
@@ -496,7 +496,7 @@ enum {
 //   Otehrwise the new_leafentry_p points at the new leaf entry.
 // As of October 2011, this function always returns 0.
 void
-toku_le_apply_msg(FT_MSG   msg,
+toku_le_apply_msg(const ft_msg &msg,
                   LEAFENTRY old_leafentry, // NULL if there was no stored data.
                   bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
                   uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
@@ -510,7 +510,7 @@ toku_le_apply_msg(FT_MSG   msg,
     int64_t oldnumbytes = 0;
     int64_t newnumbytes = 0;
     uint64_t oldmemsize = 0;
-    uint32_t keylen = ft_msg_get_keylen(msg);
+    uint32_t keylen = msg.kdbt()->size;
 
     if (old_leafentry == NULL) {
         msg_init_empty_ule(&ule);
@@ -555,7 +555,7 @@ toku_le_apply_msg(FT_MSG   msg,
         &ule, // create packed leafentry
         data_buffer,
         idx,
-        ft_msg_get_key(msg), // contract of this function is caller has this set, always
+        msg.kdbt()->data, // contract of this function is caller has this set, always
         keylen, // contract of this function is caller has this set, always
         old_keylen,
         oldmemsize,
@@ -693,10 +693,10 @@ msg_init_empty_ule(ULE ule) {
 // Purpose is to modify the unpacked leafentry in our private workspace.
 //
 static void 
-msg_modify_ule(ULE ule, FT_MSG msg) {
-    XIDS xids = ft_msg_get_xids(msg);
+msg_modify_ule(ULE ule, const ft_msg &msg) {
+    XIDS xids = msg.xids();
     invariant(xids_get_num_xids(xids) < MAX_TRANSACTION_RECORDS);
-    enum ft_msg_type type = ft_msg_get_type(msg);
+    enum ft_msg_type type = msg.type();
     if (type != FT_OPTIMIZE && type != FT_OPTIMIZE_FOR_UPGRADE) {
         ule_do_implicit_promotions(ule, xids);
     }
@@ -709,9 +709,9 @@ msg_modify_ule(ULE ule, FT_MSG msg) {
         //fall through to FT_INSERT on purpose.
     }
     case FT_INSERT: {
-        uint32_t vallen = ft_msg_get_vallen(msg);
+        uint32_t vallen = msg.vdbt()->size;
         invariant(IS_VALID_LEN(vallen));
-        void * valp      = ft_msg_get_val(msg);
+        void * valp      = msg.vdbt()->data;
         ule_apply_insert(ule, xids, vallen, valp);
         break;
     }
@@ -738,17 +738,15 @@ msg_modify_ule(ULE ule, FT_MSG msg) {
         assert(false); // These messages don't get this far.  Instead they get translated (in setval_fun in do_update) into FT_INSERT messages.
         break;
     default:
-        assert(false /* illegal FT_MSG.type */);
+        assert(false); /* illegal ft msg type */
         break;
     }
 }
 
-void 
-test_msg_modify_ule(ULE ule, FT_MSG msg){
+void test_msg_modify_ule(ULE ule, const ft_msg &msg){
     msg_modify_ule(ule,msg);
 }
 
-
 static void ule_optimize(ULE ule, XIDS xids) {
     if (ule->num_puxrs) {
         TXNID uncommitted = ule->uxrs[ule->num_cuxrs].xid;      // outermost uncommitted
diff --git a/ft/ybt.cc b/ft/ybt.cc
index a86a019caf9..1e02b1c753d 100644
--- a/ft/ybt.cc
+++ b/ft/ybt.cc
@@ -90,23 +90,30 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include <db.h>
-#include <memory.h>
 #include <string.h>
-#include <fttypes.h>
 
-#include "ybt.h"
+#include "portability/memory.h"
+
+#include "ft/fttypes.h"
+#include "ft/ybt.h"
 
 DBT *
-toku_init_dbt(DBT *ybt) {
-    memset(ybt, 0, sizeof(*ybt));
-    return ybt;
+toku_init_dbt(DBT *dbt) {
+    memset(dbt, 0, sizeof(*dbt));
+    return dbt;
+}
+
+DBT
+toku_empty_dbt(void) {
+    static const DBT empty_dbt = { .data = 0, .size = 0, .ulen = 0, .flags = 0 };
+    return empty_dbt;
 }
 
 DBT *
-toku_init_dbt_flags(DBT *ybt, uint32_t flags) {
-    toku_init_dbt(ybt);
-    ybt->flags = flags;
-    return ybt;
+toku_init_dbt_flags(DBT *dbt, uint32_t flags) {
+    toku_init_dbt(dbt);
+    dbt->flags = flags;
+    return dbt;
 }
 
 DBT_ARRAY *
diff --git a/ft/ybt.h b/ft/ybt.h
index dd90e00fa0a..c07ad3a5a7b 100644
--- a/ft/ybt.h
+++ b/ft/ybt.h
@@ -102,6 +102,9 @@ PATENT RIGHTS GRANT:
 
 DBT *toku_init_dbt(DBT *);
 
+// returns: an initialized but empty dbt (for which toku_dbt_is_empty() is true)
+DBT toku_empty_dbt(void);
+
 DBT *toku_init_dbt_flags(DBT *, uint32_t flags);
 
 void toku_destroy_dbt(DBT *);

From 97db17fe3e3fcbbabf0f3d4e5f7e2347b6b29ced Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 23 Jun 2014 15:43:37 -0400
Subject: [PATCH 054/190] FT-93 Fix a bug with aligned pivotkeys where we may
 overestimate how much space is needed on disk

---
 ft/ft-ops.cc    | 2 +-
 ft/pivotkeys.cc | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 6db1d19bf66..a694ebc009b 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -540,7 +540,7 @@ ftnode_memory_size (FTNODE node)
     int n_children = node->n_children;
     retval += sizeof(*node);
     retval += (n_children)*(sizeof(node->bp[0]));
-    retval += node->pivotkeys.total_size();
+    retval += node->pivotkeys.serialized_size();
 
     // now calculate the sizes of the partitions
     for (int i = 0; i < n_children; i++) {
diff --git a/ft/pivotkeys.cc b/ft/pivotkeys.cc
index 175745cd71f..ba021cf8bd9 100644
--- a/ft/pivotkeys.cc
+++ b/ft/pivotkeys.cc
@@ -436,7 +436,7 @@ void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const {
         wbuf_nocrc_bytes(wb, fixed ? _fixed_key(i) : _dbt_keys[i].data, size);
         written += size;
     }
-    invariant(written == _total_size);
+    invariant(written == serialized_size());
 }
 
 int ftnode_pivot_keys::num_pivots() const {
@@ -450,3 +450,9 @@ size_t ftnode_pivot_keys::total_size() const {
     paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen_aligned * _num_pivots));
     return _total_size;
 }
+
+size_t ftnode_pivot_keys::serialized_size() const {
+    // we only return the size that will be used when serialized, so we calculate based
+    // on the fixed keylen and not the aligned keylen.
+    return _fixed_format() ? _num_pivots * _fixed_keylen : _total_size;
+}

From 452854285089478a9afcce4a0a139e43a0c27114 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 23 Jun 2014 15:46:20 -0400
Subject: [PATCH 055/190] FT-93 Add function prototype (missed last commit
 unfortunately)

---
 ft/node.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ft/node.h b/ft/node.h
index ab94ce0ff65..343c688592a 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -146,9 +146,12 @@ public:
 
     int num_pivots() const;
 
-    // return: the sum of the keys sizes of each pivot
+    // return: the total size of this data structure
     size_t total_size() const;
 
+    // return: the sum of the keys sizes of each pivot (for serialization)
+    size_t serialized_size() const;
+
 private:
     inline size_t _align4(size_t x) const {
         return roundup_to_multiple(4, x);

From caeafaa16ade8e04c2fb6a15e3997c4180c604e6 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 23 Jun 2014 15:54:14 -0400
Subject: [PATCH 056/190] FT-93 Fix an oops - accidentally swapped usage of
 serialized_size() with total_size()

---
 ft/ft-ops.cc            | 2 +-
 ft/ft_node-serialize.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index a694ebc009b..6db1d19bf66 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -540,7 +540,7 @@ ftnode_memory_size (FTNODE node)
     int n_children = node->n_children;
     retval += sizeof(*node);
     retval += (n_children)*(sizeof(node->bp[0]));
-    retval += node->pivotkeys.serialized_size();
+    retval += node->pivotkeys.total_size();
 
     // now calculate the sizes of the partitions
     for (int i = 0; i < n_children; i++) {
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index ddf114012cd..2cdac27918b 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -469,7 +469,7 @@ serialize_ftnode_info_size(FTNODE node)
     retval += 4; // flags
     retval += 4; // height;
     retval += 8; // oldest_referenced_xid_known
-    retval += node->pivotkeys.total_size();
+    retval += node->pivotkeys.serialized_size();
     retval += (node->n_children-1)*4; // encode length of each pivot
     if (node->height > 0) {
         retval += node->n_children*8; // child blocknum's

From 8cde040c65253380f16448107294d554f92987df Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 23 Jun 2014 17:46:50 -0400
Subject: [PATCH 057/190] FT-93 Fix another bug, add a sanity check after
 split/append/deserialize to help catch these in the future

---
 ft/pivotkeys.cc | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/ft/pivotkeys.cc b/ft/pivotkeys.cc
index ba021cf8bd9..49abc9e82d1 100644
--- a/ft/pivotkeys.cc
+++ b/ft/pivotkeys.cc
@@ -138,6 +138,8 @@ void ftnode_pivot_keys::create_from_dbts(const DBT *keys, int n) {
             _total_size += size;
         }
     }
+
+    sanity_check();
 }
 
 void ftnode_pivot_keys::_create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n) {
@@ -156,6 +158,8 @@ void ftnode_pivot_keys::create_from_pivot_keys(const ftnode_pivot_keys &pivotkey
     } else {
         create_from_dbts(pivotkeys._dbt_keys, pivotkeys._num_pivots);
     }
+
+    sanity_check();
 }
 
 void ftnode_pivot_keys::destroy() {
@@ -240,6 +244,8 @@ void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
     if (keys_same_size && _num_pivots > 0) {
         _convert_to_fixed_format();
     }
+
+    sanity_check();
 }
 
 DBT ftnode_pivot_keys::get_pivot(int i) const {
@@ -341,6 +347,8 @@ void ftnode_pivot_keys::append(const ftnode_pivot_keys &pivotkeys) {
     }
     _num_pivots += pivotkeys._num_pivots;
     _total_size += pivotkeys._total_size;
+
+    sanity_check();
 }
 
 void ftnode_pivot_keys::_replace_at_dbt(const DBT *key, int i) {
@@ -401,7 +409,7 @@ void ftnode_pivot_keys::_split_at_fixed(int i, ftnode_pivot_keys *other) {
     other->_create_from_fixed_keys(_fixed_key(i), _fixed_keylen, _num_pivots - i);
 
     // shrink down to size
-    _total_size = i * _fixed_keylen;
+    _total_size = i * _fixed_keylen_aligned;
     REALLOC_N_ALIGNED(64, _total_size, _fixed_keys);
 }
 
@@ -425,6 +433,8 @@ void ftnode_pivot_keys::split_at(int i, ftnode_pivot_keys *other) {
         }
         _num_pivots = i;
     }
+
+    sanity_check();
 }
 
 void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const {
@@ -456,3 +466,14 @@ size_t ftnode_pivot_keys::serialized_size() const {
     // on the fixed keylen and not the aligned keylen.
     return _fixed_format() ? _num_pivots * _fixed_keylen : _total_size;
 }
+
+void ftnode_pivot_keys::sanity_check() const {
+    if (_fixed_format()) {
+        invariant(_dbt_keys == nullptr);
+        invariant(_fixed_keylen_aligned == _align4(_fixed_keylen));
+        invariant(_num_pivots * _fixed_keylen <= _total_size);
+        invariant(_num_pivots * _fixed_keylen_aligned == _total_size);
+    } else {
+        invariant(_num_pivots == 0 || _dbt_keys != nullptr);
+    }
+}

From 2cfb1047b9b71fa6b14428888d17e9c140530133 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Wed, 25 Jun 2014 18:15:57 -0400
Subject: [PATCH 058/190] FT-93 Add a stronger sanity check, fix a bug where we
 wouldn't properly update the total size when convering to dbt format.

---
 ft/pivotkeys.cc | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/ft/pivotkeys.cc b/ft/pivotkeys.cc
index 49abc9e82d1..6f31f03c018 100644
--- a/ft/pivotkeys.cc
+++ b/ft/pivotkeys.cc
@@ -201,6 +201,7 @@ void ftnode_pivot_keys::_convert_to_fixed_format() {
     _dbt_keys = nullptr;
 
     invariant(_fixed_format());
+    sanity_check();
 }
 
 void ftnode_pivot_keys::_convert_to_dbt_format() {
@@ -211,13 +212,17 @@ void ftnode_pivot_keys::_convert_to_dbt_format() {
     for (int i = 0; i < _num_pivots; i++) {
         toku_memdup_dbt(&_dbt_keys[i], _fixed_key(i), _fixed_keylen);
     }
+    // pivots sizes are not aligned up  dbt format
+    _total_size = _num_pivots * _fixed_keylen;
 
     // destroy the fixed key format
     toku_free(_fixed_keys);
     _fixed_keys = nullptr;
     _fixed_keylen = 0;
+    _fixed_keylen_aligned = 0;
 
     invariant(!_fixed_format());
+    sanity_check();
 }
 
 void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
@@ -475,5 +480,10 @@ void ftnode_pivot_keys::sanity_check() const {
         invariant(_num_pivots * _fixed_keylen_aligned == _total_size);
     } else {
         invariant(_num_pivots == 0 || _dbt_keys != nullptr);
+        size_t size = 0;
+        for (int i = 0; i < _num_pivots; i++) {
+            size += _dbt_keys[i].size;
+        }
+        invariant(size == _total_size);
     }
 }

From 9b2f9edeed1991e4425bfc4139c4139d953f2342 Mon Sep 17 00:00:00 2001
From: John Esmet <esmet@tokutek.com>
Date: Thu, 26 Jun 2014 11:29:20 -0400
Subject: [PATCH 059/190] FT-93 Fix another sizing bug exposed by
 ftnode_pivot_keys::sanity_check()

---
 ft/pivotkeys.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ft/pivotkeys.cc b/ft/pivotkeys.cc
index 6f31f03c018..e5b999ae259 100644
--- a/ft/pivotkeys.cc
+++ b/ft/pivotkeys.cc
@@ -324,11 +324,13 @@ void ftnode_pivot_keys::_append_dbt(const ftnode_pivot_keys &pivotkeys) {
     REALLOC_N_ALIGNED(64, _num_pivots + pivotkeys._num_pivots, _dbt_keys);
     bool other_fixed = pivotkeys._fixed_format();
     for (int i = 0; i < pivotkeys._num_pivots; i++) {
+        size_t size = other_fixed ? pivotkeys._fixed_keylen :
+                                    pivotkeys._dbt_keys[i].size;
         toku_memdup_dbt(&_dbt_keys[_num_pivots + i],
                         other_fixed ? pivotkeys._fixed_key(i) :
                                       pivotkeys._dbt_keys[i].data,
-                        other_fixed ? pivotkeys._fixed_keylen :
-                                      pivotkeys._dbt_keys[i].size);
+                        size);
+        _total_size += size;
     }
 }
 
@@ -337,6 +339,7 @@ void ftnode_pivot_keys::_append_fixed(const ftnode_pivot_keys &pivotkeys) {
         // other pivotkeys have the same fixed keylen 
         REALLOC_N_ALIGNED(64, (_num_pivots + pivotkeys._num_pivots) * _fixed_keylen_aligned, _fixed_keys);
         memcpy(_fixed_key(_num_pivots), pivotkeys._fixed_keys, pivotkeys._total_size);
+        _total_size += pivotkeys._total_size;
     } else {
         // must convert to dbt format, other pivotkeys have different length'd keys
         _convert_to_dbt_format();
@@ -351,7 +354,6 @@ void ftnode_pivot_keys::append(const ftnode_pivot_keys &pivotkeys) {
         _append_dbt(pivotkeys);
     }
     _num_pivots += pivotkeys._num_pivots;
-    _total_size += pivotkeys._total_size;
 
     sanity_check();
 }

From 47f57224b7def2876a059f07167091442376e16f Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 26 Jun 2014 11:35:53 -0400
Subject: [PATCH 060/190] FT-273 Use a comparator in the locktree instead of a
 descriptor/ft_compare_func pair

---
 ft/comparator.h                               |  22 +++-
 ft/ft-ops.cc                                  |   5 +-
 ft/ft-ops.h                                   |   4 +-
 ft/tests/comparator-test.cc                   |  14 +-
 locktree/locktree.cc                          |   9 +-
 locktree/locktree.h                           |  30 ++---
 locktree/manager.cc                           |   6 +-
 locktree/tests/lock_request_killed.cc         |   2 +-
 locktree/tests/lock_request_not_killed.cc     |   2 +-
 locktree/tests/lock_request_start_deadlock.cc |   2 +-
 locktree/tests/lock_request_start_pending.cc  |   2 +-
 .../tests/lock_request_wait_time_callback.cc  |   2 +-
 locktree/tests/locktree_conflicts.cc          |   2 +-
 locktree/tests/locktree_create_destroy.cc     |   2 +-
 .../locktree_escalation_1big7lt_1small.cc     |   4 +-
 .../tests/locktree_escalation_2big_1lt.cc     |   7 +-
 .../tests/locktree_escalation_2big_2lt.cc     |   7 +-
 .../tests/locktree_escalation_impossible.cc   |   5 +-
 locktree/tests/locktree_escalation_stalls.cc  |  10 +-
 locktree/tests/locktree_infinity.cc           |   2 +-
 locktree/tests/locktree_misc.cc               |  16 ++-
 locktree/tests/locktree_overlapping_relock.cc |   2 +-
 locktree/tests/locktree_simple_lock.cc        |   4 +-
 .../locktree_single_txnid_optimization.cc     |   2 +-
 .../tests/manager_reference_release_lt.cc     |  23 ++--
 locktree/tests/manager_status.cc              |   5 +-
 locktree/tests/test.h                         | 124 ++++++++++--------
 src/indexer.cc                                |  31 ++---
 src/ydb-internal.h                            |   1 +
 src/ydb_db.cc                                 |  35 ++---
 src/ydb_db.h                                  |   5 +-
 src/ydb_write.cc                              |   4 +-
 32 files changed, 212 insertions(+), 179 deletions(-)

diff --git a/ft/comparator.h b/ft/comparator.h
index 74e110ecfd3..6cfd7aee883 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -115,6 +115,24 @@ namespace toku {
             _builtin = _cmp == &toku_builtin_compare_fun;
         }
 
+        // inherit the attributes of another comparator, but keep our own
+        // copy of fake_db that is owned separately from the one given.
+        void inherit(const comparator &cmp) {
+            invariant_notnull(_fake_db);
+            invariant_notnull(cmp._cmp);
+            invariant_notnull(cmp._fake_db);
+            _cmp = cmp._cmp;
+            _fake_db->cmp_descriptor = cmp._fake_db->cmp_descriptor;
+            _builtin = cmp._builtin;
+        }
+
+        // like inherit, but doesn't require that the this comparator
+        // was already created
+        void create_from(const comparator &cmp) {
+            XCALLOC(_fake_db);
+            inherit(cmp);
+        }
+
         void destroy() {
             toku_free(_fake_db);
         }
@@ -127,10 +145,6 @@ namespace toku {
             return _cmp;
         }
 
-        void set_descriptor(DESCRIPTOR desc) {
-            _fake_db->cmp_descriptor = desc;
-        }
-
         bool valid() const {
             return _cmp != nullptr;
         }
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 6db1d19bf66..ee87a69e671 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -3255,8 +3255,9 @@ void toku_ft_set_update(FT_HANDLE ft_handle, ft_update_func update_fun) {
     ft_handle->options.update_fun = update_fun;
 }
 
-ft_compare_func toku_ft_get_bt_compare (FT_HANDLE ft_handle) {
-    return ft_handle->options.compare_fun;
+const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle) {
+    invariant_notnull(ft_handle->ft);
+    return ft_handle->ft->cmp;
 }
 
 static void
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index b8813049840..8c01fa805f2 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -125,8 +125,8 @@ void toku_ft_handle_get_compression_method(FT_HANDLE, enum toku_compression_meth
 void toku_ft_handle_set_fanout(FT_HANDLE, unsigned int fanout);
 void toku_ft_handle_get_fanout(FT_HANDLE, unsigned int *fanout);
 
-void toku_ft_set_bt_compare(FT_HANDLE, ft_compare_func);
-ft_compare_func toku_ft_get_bt_compare (FT_HANDLE ft_h);
+void toku_ft_set_bt_compare(FT_HANDLE ft_handle, ft_compare_func cmp_func);
+const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle);
 
 void toku_ft_set_redirect_callback(FT_HANDLE ft_h, on_redirect_callback redir_cb, void* extra);
 
diff --git a/ft/tests/comparator-test.cc b/ft/tests/comparator-test.cc
index 359115886cb..8c5bac55f5d 100644
--- a/ft/tests/comparator-test.cc
+++ b/ft/tests/comparator-test.cc
@@ -116,11 +116,23 @@ static void test_desc(void) {
     invariant(c == MAGIC);
 
     // set desc to d2, make sure it gets used
-    cmp.set_descriptor(&d2);
+    toku::comparator cmp2;
+    cmp2.create(magic_compare, &d2);
+    cmp.inherit(cmp2);
     expected_desc = &d2;
     c = cmp(&dbt_a, &dbt_b);
     invariant(c == MAGIC);
 
+    // go back to using d1, but using the create_from API
+    toku::comparator cmp3, cmp4;
+    cmp3.create(magic_compare, &d1); // cmp3 has d1
+    cmp4.create_from(cmp3); // cmp4 should get d1 from cmp3
+    expected_desc = &d1;
+    c = cmp3(&dbt_a, &dbt_b);
+    invariant(c == MAGIC);
+    c = cmp4(&dbt_a, &dbt_b);
+    invariant(c == MAGIC);
+
     cmp.destroy();
 }
 
diff --git a/locktree/locktree.cc b/locktree/locktree.cc
index bda355a5238..78332fcfa28 100644
--- a/locktree/locktree.cc
+++ b/locktree/locktree.cc
@@ -116,12 +116,11 @@ namespace toku {
 // but does nothing based on the value of the reference count - it is
 // up to the user of the locktree to destroy it when it sees fit.
 
-void locktree::create(locktree_manager *mgr, DICTIONARY_ID dict_id,
-                      DESCRIPTOR desc, ft_compare_func cmp) {
+void locktree::create(locktree_manager *mgr, DICTIONARY_ID dict_id, const comparator &cmp) {
     m_mgr = mgr;
     m_dict_id = dict_id;
 
-    m_cmp.create(cmp, desc);
+    m_cmp.create_from(cmp);
     m_reference_count = 1;
     m_userdata = nullptr;
 
@@ -791,8 +790,8 @@ struct lt_lock_request_info *locktree::get_lock_request_info(void) {
     return &m_lock_request_info;
 }
 
-void locktree::set_descriptor(DESCRIPTOR desc) {
-    m_cmp.set_descriptor(desc);
+void locktree::set_comparator(const comparator &cmp) {
+    m_cmp.inherit(cmp);
 }
 
 locktree_manager *locktree::get_manager(void) const {
diff --git a/locktree/locktree.h b/locktree/locktree.h
index 666af3bfd6a..da0771fc995 100644
--- a/locktree/locktree.h
+++ b/locktree/locktree.h
@@ -183,10 +183,10 @@ namespace toku {
 
         // effect: Get a locktree from the manager. If a locktree exists with the given
         //         dict_id, it is referenced and then returned. If one did not exist, it
-        //         is created. It will use the given descriptor and comparison function
-        //         for comparing keys, and the on_create callback passed to locktree_manager::create()
-        //         will be called with the given extra parameter.
-        locktree *get_lt(DICTIONARY_ID dict_id, DESCRIPTOR desc, ft_compare_func cmp, void *on_create_extra);
+        //         is created. It will use the comparator for comparing keys. The on_create
+        //         callback (passed to locktree_manager::create()) will be called with the
+        //         given extra parameter.
+        locktree *get_lt(DICTIONARY_ID dict_id, const comparator &cmp, void *on_create_extra);
 
         void reference_lt(locktree *lt);
 
@@ -307,8 +307,7 @@ namespace toku {
 
     // A locktree represents the set of row locks owned by all transactions
     // over an open dictionary. Read and write ranges are represented as
-    // a left and right key which are compared with the given descriptor
-    // and comparison fn.
+    // a left and right key which are compared with the given comparator
     //
     // Locktrees are not created and destroyed by the user. Instead, they are
     // referenced and released using the locktree manager.
@@ -324,8 +323,7 @@ namespace toku {
     class locktree {
     public:
         // effect: Creates a locktree
-        void create(locktree_manager *mgr, DICTIONARY_ID dict_id,
-                    DESCRIPTOR desc, ft_compare_func cmp);
+        void create(locktree_manager *mgr, DICTIONARY_ID dict_id, const comparator &cmp);
 
         void destroy(void);
 
@@ -371,7 +369,7 @@ namespace toku {
 
         locktree_manager *get_manager(void) const;
 
-        void set_descriptor(DESCRIPTOR desc);
+        void set_comparator(const comparator &cmp);
 
         int compare(const locktree *lt) const;
 
@@ -389,15 +387,13 @@ namespace toku {
         DICTIONARY_ID m_dict_id;
         uint32_t m_reference_count;
 
-        // use a comparator object that encapsulates an ft compare
-        // function and a descriptor in a fake db. this way we can
-        // pass it around for easy key comparisons.
+        // Since the memory referenced by this comparator is not owned by the
+        // locktree, the user must guarantee it will outlive the locktree.
         //
-        // since this comparator will store a pointer to a descriptor,
-        // the user of the locktree needs to make sure that the descriptor
-        // is valid for as long as the locktree. this is currently
-        // implemented by opening an ft_handle for this locktree and
-        // storing it as userdata below.
+        // The ydb API accomplishes this by opening an ft_handle in the on_create
+        // callback, which will keep the underlying FT (and its descriptor) in memory
+        // for as long as the handle is open. The ft_handle is stored opaquely in the
+        // userdata pointer below. see locktree_manager::get_lt w/ on_create_extra
         comparator m_cmp;
 
         concurrent_tree *m_rangetree;
diff --git a/locktree/manager.cc b/locktree/manager.cc
index b1bc5da2fe9..4aa9a135214 100644
--- a/locktree/manager.cc
+++ b/locktree/manager.cc
@@ -183,8 +183,8 @@ void locktree_manager::locktree_map_remove(locktree *lt) {
     invariant_zero(r);
 }
 
-locktree *locktree_manager::get_lt(DICTIONARY_ID dict_id, DESCRIPTOR desc,
-        ft_compare_func cmp, void *on_create_extra) {
+locktree *locktree_manager::get_lt(DICTIONARY_ID dict_id,
+                                   const comparator &cmp, void *on_create_extra) {
 
     // hold the mutex around searching and maybe
     // inserting into the locktree map
@@ -193,7 +193,7 @@ locktree *locktree_manager::get_lt(DICTIONARY_ID dict_id, DESCRIPTOR desc,
     locktree *lt = locktree_map_find(dict_id);
     if (lt == nullptr) {
         XCALLOC(lt);
-        lt->create(this, dict_id, desc, cmp);
+        lt->create(this, dict_id, cmp);
 
         // new locktree created - call the on_create callback
         // and put it in the locktree map
diff --git a/locktree/tests/lock_request_killed.cc b/locktree/tests/lock_request_killed.cc
index 18fcd873423..593d2cc17fc 100644
--- a/locktree/tests/lock_request_killed.cc
+++ b/locktree/tests/lock_request_killed.cc
@@ -120,7 +120,7 @@ void lock_request_unit_test::test_wait_time_callback(void) {
     locktree lt;
 
     DICTIONARY_ID dict_id = { 1 };
-    lt.create(nullptr, dict_id, nullptr, compare_dbts);
+    lt.create(nullptr, dict_id, dbt_comparator);
 
     TXNID txnid_a = 1001;
     lock_request request_a;
diff --git a/locktree/tests/lock_request_not_killed.cc b/locktree/tests/lock_request_not_killed.cc
index abee11052f4..c2ad4817455 100644
--- a/locktree/tests/lock_request_not_killed.cc
+++ b/locktree/tests/lock_request_not_killed.cc
@@ -117,7 +117,7 @@ void lock_request_unit_test::test_wait_time_callback(void) {
     locktree lt;
 
     DICTIONARY_ID dict_id = { 1 };
-    lt.create(nullptr, dict_id, nullptr, compare_dbts);
+    lt.create(nullptr, dict_id, dbt_comparator);
 
     TXNID txnid_a = 1001;
     lock_request request_a;
diff --git a/locktree/tests/lock_request_start_deadlock.cc b/locktree/tests/lock_request_start_deadlock.cc
index 4710e19551b..38bea266c61 100644
--- a/locktree/tests/lock_request_start_deadlock.cc
+++ b/locktree/tests/lock_request_start_deadlock.cc
@@ -102,7 +102,7 @@ void lock_request_unit_test::test_start_deadlock(void) {
     const uint64_t lock_wait_time = 10;
 
     DICTIONARY_ID dict_id = { 1 };
-    lt.create(nullptr, dict_id, nullptr, compare_dbts);
+    lt.create(nullptr, dict_id, dbt_comparator);
 
     TXNID txnid_a = 1001;
     TXNID txnid_b = 2001;
diff --git a/locktree/tests/lock_request_start_pending.cc b/locktree/tests/lock_request_start_pending.cc
index 54d630078ac..dc7bf363300 100644
--- a/locktree/tests/lock_request_start_pending.cc
+++ b/locktree/tests/lock_request_start_pending.cc
@@ -101,7 +101,7 @@ void lock_request_unit_test::test_start_pending(void) {
     lock_request request;
 
     DICTIONARY_ID dict_id = { 1 };
-    lt.create(nullptr, dict_id, nullptr, compare_dbts);
+    lt.create(nullptr, dict_id, dbt_comparator);
 
     TXNID txnid_a = 1001;
     TXNID txnid_b = 2001;
diff --git a/locktree/tests/lock_request_wait_time_callback.cc b/locktree/tests/lock_request_wait_time_callback.cc
index bc67bac7465..60298f536da 100644
--- a/locktree/tests/lock_request_wait_time_callback.cc
+++ b/locktree/tests/lock_request_wait_time_callback.cc
@@ -101,7 +101,7 @@ void lock_request_unit_test::test_wait_time_callback(void) {
     locktree lt;
 
     DICTIONARY_ID dict_id = { 1 };
-    lt.create(nullptr, dict_id, nullptr, compare_dbts);
+    lt.create(nullptr, dict_id, dbt_comparator);
 
     TXNID txnid_a = 1001;
     lock_request request_a;
diff --git a/locktree/tests/locktree_conflicts.cc b/locktree/tests/locktree_conflicts.cc
index 3eb7bd3c3d4..3a78906e203 100644
--- a/locktree/tests/locktree_conflicts.cc
+++ b/locktree/tests/locktree_conflicts.cc
@@ -108,7 +108,7 @@ void locktree_unit_test::test_conflicts(void) {
     locktree lt;
 
     DICTIONARY_ID dict_id = { 1 };
-    lt.create(nullptr, dict_id, nullptr, compare_dbts);
+    lt.create(nullptr, dict_id, dbt_comparator);
 
     int r;
     TXNID txnid_a = 1001;
diff --git a/locktree/tests/locktree_create_destroy.cc b/locktree/tests/locktree_create_destroy.cc
index b3b1fb77629..e4abfb36d1f 100644
--- a/locktree/tests/locktree_create_destroy.cc
+++ b/locktree/tests/locktree_create_destroy.cc
@@ -98,7 +98,7 @@ void locktree_unit_test::test_create_destroy(void) {
     locktree lt;
     DICTIONARY_ID dict_id = { 1 };
 
-    lt.create(nullptr, dict_id, nullptr, compare_dbts);
+    lt.create(nullptr, dict_id, dbt_comparator);
 
     lt_lock_request_info *info = lt.get_lock_request_info();
     invariant_notnull(info);
diff --git a/locktree/tests/locktree_escalation_1big7lt_1small.cc b/locktree/tests/locktree_escalation_1big7lt_1small.cc
index 26e286eb8ca..ec1911308ea 100644
--- a/locktree/tests/locktree_escalation_1big7lt_1small.cc
+++ b/locktree/tests/locktree_escalation_1big7lt_1small.cc
@@ -247,11 +247,11 @@ int main(int argc, const char *argv[]) {
     locktree *big_lt[n_big];
     for (int i = 0; i < n_big; i++) {
         dict_id = { next_dict_id }; next_dict_id++;
-        big_lt[i] = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr);
+        big_lt[i] = mgr.get_lt(dict_id, dbt_comparator, nullptr);
     }
 
     dict_id = { next_dict_id }; next_dict_id++;
-    locktree *small_lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr);
+    locktree *small_lt = mgr.get_lt(dict_id, dbt_comparator, nullptr);
 
     // create the worker threads
     struct big_arg big_arg = { &mgr, big_lt, n_big, 1000 };
diff --git a/locktree/tests/locktree_escalation_2big_1lt.cc b/locktree/tests/locktree_escalation_2big_1lt.cc
index 8f6c697970e..c7135707a1d 100644
--- a/locktree/tests/locktree_escalation_2big_1lt.cc
+++ b/locktree/tests/locktree_escalation_2big_1lt.cc
@@ -210,13 +210,10 @@ int main(int argc, const char *argv[]) {
     mgr.set_max_lock_memory(max_lock_memory);
 
     // create lock trees
-    DESCRIPTOR desc[n_lt];
-    DICTIONARY_ID dict_id[n_lt];
     locktree *lt[n_big];
     for (int i = 0; i < n_lt; i++) {
-        desc[i] = nullptr;
-        dict_id[i] = { (uint64_t)i };
-        lt[i] = mgr.get_lt(dict_id[i], desc[i], compare_dbts, nullptr);
+        DICTIONARY_ID dict_id = { .dictid = (uint64_t) i };
+        lt[i] = mgr.get_lt(dict_id, dbt_comparator, nullptr);
         assert(lt[i]);
     }
 
diff --git a/locktree/tests/locktree_escalation_2big_2lt.cc b/locktree/tests/locktree_escalation_2big_2lt.cc
index 576208f1dcb..dcd55f72509 100644
--- a/locktree/tests/locktree_escalation_2big_2lt.cc
+++ b/locktree/tests/locktree_escalation_2big_2lt.cc
@@ -210,13 +210,10 @@ int main(int argc, const char *argv[]) {
     mgr.set_max_lock_memory(max_lock_memory);
 
     // create lock trees
-    DESCRIPTOR desc[n_lt];
-    DICTIONARY_ID dict_id[n_lt];
     locktree *lt[n_big];
     for (int i = 0; i < n_lt; i++) {
-        desc[i] = nullptr;
-        dict_id[i] = { (uint64_t)i };
-        lt[i] = mgr.get_lt(dict_id[i], desc[i], compare_dbts, nullptr);
+        DICTIONARY_ID dict_id = { .dictid = (uint64_t)i };
+        lt[i] = mgr.get_lt(dict_id, dbt_comparator, nullptr);
         assert(lt[i]);
     }
 
diff --git a/locktree/tests/locktree_escalation_impossible.cc b/locktree/tests/locktree_escalation_impossible.cc
index 4ee79b4f573..08a51b6a981 100644
--- a/locktree/tests/locktree_escalation_impossible.cc
+++ b/locktree/tests/locktree_escalation_impossible.cc
@@ -167,9 +167,8 @@ int main(int argc, const char *argv[]) {
     const TXNID txn_b = 100;
 
     // create lock trees
-    DESCRIPTOR desc = nullptr;
-    DICTIONARY_ID dict_id = { 1 };
-    locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr);
+    DICTIONARY_ID dict_id = { .dictid = 1 };
+    locktree *lt = mgr.get_lt(dict_id, dbt_comparator, nullptr);
 
     int64_t last_i = -1;
     for (int64_t i = 0; ; i++) {
diff --git a/locktree/tests/locktree_escalation_stalls.cc b/locktree/tests/locktree_escalation_stalls.cc
index 4fd102e2d49..5e8f84175bc 100644
--- a/locktree/tests/locktree_escalation_stalls.cc
+++ b/locktree/tests/locktree_escalation_stalls.cc
@@ -228,13 +228,11 @@ int main(int argc, const char *argv[]) {
     mgr.set_max_lock_memory(max_lock_memory);
 
     // create lock trees
-    DESCRIPTOR desc_0 = nullptr;
-    DICTIONARY_ID dict_id_0 = { 1 };
-    locktree *lt_0 = mgr.get_lt(dict_id_0, desc_0, compare_dbts, nullptr);
+    DICTIONARY_ID dict_id_0 = { .dictid = 1 };
+    locktree *lt_0 = mgr.get_lt(dict_id_0, dbt_comparator, nullptr);
 
-    DESCRIPTOR desc_1 = nullptr;
-    DICTIONARY_ID dict_id_1 = { 2 };
-    locktree *lt_1 = mgr.get_lt(dict_id_1, desc_1, compare_dbts, nullptr);
+    DICTIONARY_ID dict_id_1 = { .dictid = 2 };
+    locktree *lt_1 = mgr.get_lt(dict_id_1, dbt_comparator, nullptr);
 
     // create the worker threads
     struct arg big_arg = { &mgr, lt_0, 1000 };
diff --git a/locktree/tests/locktree_infinity.cc b/locktree/tests/locktree_infinity.cc
index b4e0d0765bc..cbabc131668 100644
--- a/locktree/tests/locktree_infinity.cc
+++ b/locktree/tests/locktree_infinity.cc
@@ -98,7 +98,7 @@ void locktree_unit_test::test_infinity(void) {
     locktree lt;
 
     DICTIONARY_ID dict_id = { 1 };
-    lt.create(nullptr, dict_id, nullptr, compare_dbts);
+    lt.create(nullptr, dict_id, dbt_comparator);
 
     int r;
     TXNID txnid_a = 1001;
diff --git a/locktree/tests/locktree_misc.cc b/locktree/tests/locktree_misc.cc
index a87716d862b..51dd17fa43c 100644
--- a/locktree/tests/locktree_misc.cc
+++ b/locktree/tests/locktree_misc.cc
@@ -109,7 +109,9 @@ static int my_compare_dbts(DB *db, const DBT *a, const DBT *b) {
 void locktree_unit_test::test_misc(void) {
     locktree lt;
     DICTIONARY_ID dict_id = { 1 };
-    lt.create(nullptr, dict_id, nullptr, my_compare_dbts);
+    toku::comparator my_dbt_comparator;
+    my_dbt_comparator.create(my_compare_dbts, nullptr);
+    lt.create(nullptr, dict_id, my_dbt_comparator);
 
     invariant(lt.get_userdata() == nullptr);
     int userdata;
@@ -124,19 +126,27 @@ void locktree_unit_test::test_misc(void) {
     expected_a = &dbt_a;
     expected_b = &dbt_b;
 
+    toku::comparator cmp_d1, cmp_d2;
+    cmp_d1.create(my_compare_dbts, &d1);
+    cmp_d2.create(my_compare_dbts, &d2);
+
     // make sure the comparator object has the correct
     // descriptor when we set the locktree's descriptor
-    lt.set_descriptor(&d1);
+    lt.set_comparator(cmp_d1);
     expected_descriptor = &d1;
     r = lt.m_cmp(&dbt_a, &dbt_b);
     invariant(r == expected_comparison_magic);
-    lt.set_descriptor(&d2);
+    lt.set_comparator(cmp_d2);
     expected_descriptor = &d2;
     r = lt.m_cmp(&dbt_a, &dbt_b);
     invariant(r == expected_comparison_magic);
 
     lt.release_reference();
     lt.destroy();
+
+    cmp_d1.destroy();
+    cmp_d2.destroy();
+    my_dbt_comparator.destroy();
 }
 
 } /* namespace toku */
diff --git a/locktree/tests/locktree_overlapping_relock.cc b/locktree/tests/locktree_overlapping_relock.cc
index d8212541c96..15f20f1a91d 100644
--- a/locktree/tests/locktree_overlapping_relock.cc
+++ b/locktree/tests/locktree_overlapping_relock.cc
@@ -101,7 +101,7 @@ void locktree_unit_test::test_overlapping_relock(void) {
     locktree lt;
     
     DICTIONARY_ID dict_id = { 1 };
-    lt.create(nullptr, dict_id, nullptr, compare_dbts);
+    lt.create(nullptr, dict_id, dbt_comparator);
 
     const DBT *zero = get_dbt(0);
     const DBT *one = get_dbt(1);
diff --git a/locktree/tests/locktree_simple_lock.cc b/locktree/tests/locktree_simple_lock.cc
index 2a4de0f7b77..6042fe71c13 100644
--- a/locktree/tests/locktree_simple_lock.cc
+++ b/locktree/tests/locktree_simple_lock.cc
@@ -98,8 +98,8 @@ void locktree_unit_test::test_simple_lock(void) {
     locktree_manager mgr;
     mgr.create(nullptr, nullptr, nullptr, nullptr);
 
-    DICTIONARY_ID dict_id = { 1 };
-    locktree *lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr);
+    DICTIONARY_ID dict_id = { .dictid = 1 };
+    locktree *lt = mgr.get_lt(dict_id, dbt_comparator, nullptr);
 
     int r;
     TXNID txnid_a = 1001;
diff --git a/locktree/tests/locktree_single_txnid_optimization.cc b/locktree/tests/locktree_single_txnid_optimization.cc
index b0e8e72efdc..ddbc88da400 100644
--- a/locktree/tests/locktree_single_txnid_optimization.cc
+++ b/locktree/tests/locktree_single_txnid_optimization.cc
@@ -101,7 +101,7 @@ void locktree_unit_test::test_single_txnid_optimization(void) {
     locktree lt;
 
     DICTIONARY_ID dict_id = { 1 };
-    lt.create(nullptr, dict_id, nullptr, compare_dbts);
+    lt.create(nullptr, dict_id, dbt_comparator);
 
     const DBT *zero = get_dbt(0);
     const DBT *one = get_dbt(1);
diff --git a/locktree/tests/manager_reference_release_lt.cc b/locktree/tests/manager_reference_release_lt.cc
index 65a2ee478e8..cdc876c829d 100644
--- a/locktree/tests/manager_reference_release_lt.cc
+++ b/locktree/tests/manager_reference_release_lt.cc
@@ -107,9 +107,15 @@ static void destroy_cb(locktree *lt) {
     (*k) = false;
 }
 
+static int my_cmp(DB *UU(db), const DBT *UU(a), const DBT *UU(b)) {
+    return 0;
+}
+
 void manager_unit_test::test_reference_release_lt(void) {
     locktree_manager mgr;
     mgr.create(create_cb, destroy_cb, nullptr, nullptr);
+    toku::comparator my_comparator;
+    my_comparator.create(my_cmp, nullptr);
 
     DICTIONARY_ID a = { 0 };
     DICTIONARY_ID b = { 1 };
@@ -117,18 +123,12 @@ void manager_unit_test::test_reference_release_lt(void) {
     bool aok = false;
     bool bok = false;
     bool cok = false;
-    
-    int d = 5;
-    DESCRIPTOR_S desc_s;
-    desc_s.dbt.data = &d;
-    desc_s.dbt.size = desc_s.dbt.ulen = sizeof(d);
-    desc_s.dbt.flags = DB_DBT_USERMEM;
 
-    locktree *alt = mgr.get_lt(a, &desc_s, nullptr, &aok);
+    locktree *alt = mgr.get_lt(a, my_comparator, &aok);
     invariant_notnull(alt);
-    locktree *blt = mgr.get_lt(b, &desc_s, nullptr, &bok);
+    locktree *blt = mgr.get_lt(b, my_comparator, &bok);
     invariant_notnull(alt);
-    locktree *clt = mgr.get_lt(c, &desc_s, nullptr, &cok);
+    locktree *clt = mgr.get_lt(c, my_comparator, &cok);
     invariant_notnull(alt);
 
     // three distinct locktrees should have been returned
@@ -152,9 +152,9 @@ void manager_unit_test::test_reference_release_lt(void) {
 
     // get another handle on a and b, they shoudl be the same
     // as the original alt and blt
-    locktree *blt2 = mgr.get_lt(b, &desc_s, nullptr, &bok);
+    locktree *blt2 = mgr.get_lt(b, my_comparator, &bok);
     invariant(blt2 == blt);
-    locktree *alt2 = mgr.get_lt(a, &desc_s, nullptr, &aok);
+    locktree *alt2 = mgr.get_lt(a, my_comparator, &aok);
     invariant(alt2 == alt);
 
     // remove one ref from everything. c should die. a and b are ok.
@@ -171,6 +171,7 @@ void manager_unit_test::test_reference_release_lt(void) {
     invariant(!aok);
     invariant(!bok);
     
+    my_comparator.destroy();
     mgr.destroy();
 }
 
diff --git a/locktree/tests/manager_status.cc b/locktree/tests/manager_status.cc
index e73814d8169..d6b9b1fbade 100644
--- a/locktree/tests/manager_status.cc
+++ b/locktree/tests/manager_status.cc
@@ -120,9 +120,8 @@ void manager_unit_test::test_status(void) {
     assert_status(&status, "LTM_WAIT_COUNT", 0);
     assert_status(&status, "LTM_TIMEOUT_COUNT", 0);
 
-    DESCRIPTOR desc = nullptr;
-    DICTIONARY_ID dict_id = { 1 };
-    locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr);
+    DICTIONARY_ID dict_id = { .dictid = 1 };
+    locktree *lt = mgr.get_lt(dict_id, dbt_comparator, nullptr);
     int r;
     TXNID txnid_a = 1001;
     TXNID txnid_b = 2001;
diff --git a/locktree/tests/test.h b/locktree/tests/test.h
index ea701a90a4a..c5bf06a29b2 100644
--- a/locktree/tests/test.h
+++ b/locktree/tests/test.h
@@ -91,71 +91,85 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <ft/ybt.h>
 #include <limits.h>
 
+#include "ft/comparator.h"
+#include "ft/ybt.h"
+
 namespace toku {
 
-__attribute__((__unused__))
-static DBT min_dbt(void) {
-    static int64_t min = INT_MIN;
-    DBT dbt;
-    toku_fill_dbt(&dbt, &min, sizeof(int64_t));
-    dbt.flags = DB_DBT_USERMEM;
-    return dbt;
-}
-
-__attribute__((__unused__))
-static DBT max_dbt(void) {
-    static int64_t max = INT_MAX;
-    DBT dbt;
-    toku_fill_dbt(&dbt, &max, sizeof(int64_t));
-    dbt.flags = DB_DBT_USERMEM;
-    return dbt;
-}
-
-__attribute__((__unused__))
-static const DBT *get_dbt(int64_t key) {
-    static const int NUM_DBTS = 1000;
-    static bool initialized;
-    static int64_t static_ints[NUM_DBTS];
-    static DBT static_dbts[NUM_DBTS];
-    invariant(key < NUM_DBTS);
-    if (!initialized) {
-        for (int i = 0; i < NUM_DBTS; i++) {
-            static_ints[i] = i;
-            toku_fill_dbt(&static_dbts[i],
-                    &static_ints[i],
-                    sizeof(int64_t));
-            static_dbts[i].flags = DB_DBT_USERMEM;
-        }
-        initialized = true;
+    __attribute__((__unused__))
+    static DBT min_dbt(void) {
+        static int64_t min = INT_MIN;
+        DBT dbt;
+        toku_fill_dbt(&dbt, &min, sizeof(int64_t));
+        dbt.flags = DB_DBT_USERMEM;
+        return dbt;
     }
 
-    invariant(key < NUM_DBTS);
-    return &static_dbts[key];
-}
+    __attribute__((__unused__))
+    static DBT max_dbt(void) {
+        static int64_t max = INT_MAX;
+        DBT dbt;
+        toku_fill_dbt(&dbt, &max, sizeof(int64_t));
+        dbt.flags = DB_DBT_USERMEM;
+        return dbt;
+    }
 
-__attribute__((__unused__))
-static int compare_dbts(DB *db, const DBT *key1, const DBT *key2) {
-    (void) db;
+    __attribute__((__unused__))
+    static const DBT *get_dbt(int64_t key) {
+        static const int NUM_DBTS = 1000;
+        static bool initialized;
+        static int64_t static_ints[NUM_DBTS];
+        static DBT static_dbts[NUM_DBTS];
+        invariant(key < NUM_DBTS);
+        if (!initialized) {
+            for (int i = 0; i < NUM_DBTS; i++) {
+                static_ints[i] = i;
+                toku_fill_dbt(&static_dbts[i],
+                        &static_ints[i],
+                        sizeof(int64_t));
+                static_dbts[i].flags = DB_DBT_USERMEM;
+            }
+            initialized = true;
+        }
 
-    // this emulates what a "infinity-aware" comparator object does
-    if (toku_dbt_is_infinite(key1) || toku_dbt_is_infinite(key2)) {
-        return toku_dbt_infinite_compare(key1, key2);
-    } else {
-        invariant(key1->size == sizeof(int64_t));
-        invariant(key2->size == sizeof(int64_t));
-        int64_t a = *(int64_t*) key1->data;
-        int64_t b = *(int64_t*) key2->data;
-        if (a < b) {
-            return -1;
-        } else if (a == b) {
-            return 0;
+        invariant(key < NUM_DBTS);
+        return &static_dbts[key];
+    }
+
+    __attribute__((__unused__))
+    static int compare_dbts(DB *db, const DBT *key1, const DBT *key2) {
+        (void) db;
+
+        // this emulates what a "infinity-aware" comparator object does
+        if (toku_dbt_is_infinite(key1) || toku_dbt_is_infinite(key2)) {
+            return toku_dbt_infinite_compare(key1, key2);
         } else {
-            return 1;
+            invariant(key1->size == sizeof(int64_t));
+            invariant(key2->size == sizeof(int64_t));
+            int64_t a = *(int64_t*) key1->data;
+            int64_t b = *(int64_t*) key2->data;
+            if (a < b) {
+                return -1;
+            } else if (a == b) {
+                return 0;
+            } else {
+                return 1;
+            }
         }
     }
-}
+
+    __attribute__((__unused__)) comparator dbt_comparator;
+
+    __attribute__((__constructor__))
+    static void construct_dbt_comparator(void) {
+        dbt_comparator.create(compare_dbts, nullptr); 
+    }
+
+    __attribute__((__destructor__))
+    static void destruct_dbt_comparator(void) {
+        dbt_comparator.destroy();
+    }
 
 } /* namespace toku */
diff --git a/src/indexer.cc b/src/indexer.cc
index b91b738d4d4..cdc66cdd39b 100644
--- a/src/indexer.cc
+++ b/src/indexer.cc
@@ -233,32 +233,25 @@ toku_indexer_unlock(DB_INDEXER* indexer) {
 // after grabbing the indexer lock
 bool
 toku_indexer_may_insert(DB_INDEXER* indexer, const DBT* key) {
-    bool retval = false;
+    bool may_insert = false;
     toku_mutex_lock(&indexer->i->indexer_estimate_lock);
+
     // if we have no position estimate, we can't tell, so return false
-    if (indexer->i->position_estimate.data == NULL) {
-        retval = false;
-    }
-    else {
-        FT_HANDLE ft_handle = indexer->i->src_db->i->ft_handle;
-        ft_compare_func keycompare = toku_ft_get_bt_compare(ft_handle);        
-        int r = keycompare(
-            indexer->i->src_db, 
-            &indexer->i->position_estimate, 
-            key
-            );
+    if (indexer->i->position_estimate.data == nullptr) {
+        may_insert = false;
+    } else {
+        DB *db = indexer->i->src_db;
+        const toku::comparator &cmp = toku_ft_get_comparator(db->i->ft_handle);
+        int c = cmp(&indexer->i->position_estimate, key);
+
         // if key > position_estimate, then we know the indexer cursor
         // is past key, and we can safely say that associated values of 
         // key must be inserted into the indexer's db
-        if (r  < 0) {
-            retval = true;
-        }
-        else {
-            retval = false;
-        }
+        may_insert = c < 0;
     }
+
     toku_mutex_unlock(&indexer->i->indexer_estimate_lock);
-    return retval;
+    return may_insert;
 }
 
 void
diff --git a/src/ydb-internal.h b/src/ydb-internal.h
index 8edda008e80..e1ad4a92f3a 100644
--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
 
 #include <ft/cachetable.h>
 #include <ft/cursor.h>
+#include <ft/comparator.h>
 #include <ft/fttypes.h>
 #include <ft/logger.h>
 #include <ft/txn.h>
diff --git a/src/ydb_db.cc b/src/ydb_db.cc
index 2852283bfce..aeb0f671550 100644
--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -390,10 +390,12 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
 // locktree's descriptor pointer if necessary
 static void
 db_set_descriptors(DB *db, FT_HANDLE ft_handle) {
+    const toku::comparator &cmp = toku_ft_get_comparator(ft_handle);
     db->descriptor = toku_ft_get_descriptor(ft_handle);
     db->cmp_descriptor = toku_ft_get_cmp_descriptor(ft_handle);
+    invariant(db->cmp_descriptor == cmp.get_descriptor());
     if (db->i->lt) {
-        db->i->lt->set_descriptor(db->cmp_descriptor);
+        db->i->lt->set_comparator(cmp);
     }
 }
 
@@ -476,7 +478,7 @@ toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t fla
                       db->dbenv->i->cachetable,
                       txn ? db_txn_struct_i(txn)->tokutxn : nullptr);
     if (r != 0) {
-        goto error_cleanup;
+        goto out;
     }
 
     // if the dictionary was opened as a blackhole, mark the
@@ -497,26 +499,27 @@ toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t fla
             .txn = txn,
             .ft_handle = db->i->ft_handle,
         };
-        db->i->lt = db->dbenv->i->ltm.get_lt(
-                db->i->dict_id,
-                db->cmp_descriptor,
-                toku_ft_get_bt_compare(db->i->ft_handle),
-                &on_create_extra);
+        db->i->lt = db->dbenv->i->ltm.get_lt(db->i->dict_id,
+                                             toku_ft_get_comparator(db->i->ft_handle),
+                                             &on_create_extra);
         if (db->i->lt == nullptr) {
             r = errno;
-            if (r == 0)
+            if (r == 0) {
                 r = EINVAL;
-            goto error_cleanup;
+            }
+            goto out;
         }
     }
-    return 0;
+    r = 0;
  
-error_cleanup:
-    db->i->dict_id = DICTIONARY_ID_NONE;
-    db->i->opened = 0;
-    if (db->i->lt) {
-        db->dbenv->i->ltm.release_lt(db->i->lt);
-        db->i->lt = NULL;
+out:
+    if (r != 0) {
+        db->i->dict_id = DICTIONARY_ID_NONE;
+        db->i->opened = 0;
+        if (db->i->lt) {
+            db->dbenv->i->ltm.release_lt(db->i->lt);
+            db->i->lt = nullptr;
+        }
     }
     return r;
 }
diff --git a/src/ydb_db.h b/src/ydb_db.h
index db3300cfed0..d8bc0223e0e 100644
--- a/src/ydb_db.h
+++ b/src/ydb_db.h
@@ -127,9 +127,8 @@ static inline int db_opened(DB *db) {
     return db->i->opened != 0;
 }
 
-static inline ft_compare_func
-toku_db_get_compare_fun(DB* db) {
-    return toku_ft_get_bt_compare(db->i->ft_handle);
+static inline const toku::comparator &toku_db_get_comparator(DB *db) {
+    return toku_ft_get_comparator(db->i->ft_handle);
 }
 
 int toku_db_pre_acquire_fileops_lock(DB *db, DB_TXN *txn);
diff --git a/src/ydb_write.cc b/src/ydb_write.cc
index 82fbf439885..62badd4916b 100644
--- a/src/ydb_write.cc
+++ b/src/ydb_write.cc
@@ -951,8 +951,8 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
                 } else if (idx_old == old_keys.size) {
                     cmp = +1;
                 } else {
-                    ft_compare_func cmpfun = toku_db_get_compare_fun(db);
-                    cmp = cmpfun(db, curr_old_key, curr_new_key);
+                    const toku::comparator &cmpfn = toku_db_get_comparator(db);
+                    cmp = cmpfn(curr_old_key, curr_new_key);
                 }
 
                 bool do_del = false;

From ca032f3e82993c5ddb6cf507c3a5366dcd64415c Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Thu, 26 Jun 2014 15:24:47 -0400
Subject: [PATCH 061/190] #256 only force MDL X for certain alter table
 operations

---
 storage/tokudb/ha_tokudb_alter_56.cc | 50 ++++++++++++++++------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/storage/tokudb/ha_tokudb_alter_56.cc b/storage/tokudb/ha_tokudb_alter_56.cc
index 5062a2ae67b..e55b7b48ad8 100644
--- a/storage/tokudb/ha_tokudb_alter_56.cc
+++ b/storage/tokudb/ha_tokudb_alter_56.cc
@@ -720,27 +720,6 @@ bool ha_tokudb::commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_i
     tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
     bool result = false; // success
     THD *thd = ha_thd();
-    MDL_ticket *ticket = table->mdl_ticket;
-    if (ticket->get_type() != MDL_EXCLUSIVE) {
-        // get exclusive lock no matter what
-#if defined(MARIADB_BASE_VERSION)
-        killed_state saved_killed_state = thd->killed;
-        thd->killed = NOT_KILLED;
-        while (wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) && thd->killed)
-            thd->killed = NOT_KILLED;
-        assert(ticket->get_type() == MDL_EXCLUSIVE);
-        if (thd->killed == NOT_KILLED)
-            thd->killed = saved_killed_state;
-#else
-        THD::killed_state saved_killed_state = thd->killed;
-        thd->killed = THD::NOT_KILLED;
-        while (wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) && thd->killed)
-            thd->killed = THD::NOT_KILLED;
-        assert(ticket->get_type() == MDL_EXCLUSIVE);
-        if (thd->killed == THD::NOT_KILLED)
-            thd->killed = saved_killed_state;
-#endif
-    }
 
     if (commit) {
 #if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
@@ -768,6 +747,35 @@ bool ha_tokudb::commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_i
     }
 
     if (!commit) {
+        if (table->mdl_ticket->get_type() != MDL_EXCLUSIVE && 
+            (ctx->add_index_changed || ctx->drop_index_changed || ctx->compression_changed)) {
+
+            // get exclusive lock no matter what
+#if defined(MARIADB_BASE_VERSION)
+            killed_state saved_killed_state = thd->killed;
+            thd->killed = NOT_KILLED;
+            for (volatile uint i = 0; wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED); i++) {
+                if (thd->killed != NOT_KILLED)
+                    thd->killed = NOT_KILLED;
+                sleep(1);
+            }
+            assert(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
+            if (thd->killed == NOT_KILLED)
+                thd->killed = saved_killed_state;
+#else
+            THD::killed_state saved_killed_state = thd->killed;
+            thd->killed = THD::NOT_KILLED;
+            for (volatile uint i = 0; wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED); i++) {
+                if (thd->killed != THD::NOT_KILLED)
+                    thd->killed = THD::NOT_KILLED;
+                sleep(1);
+            }
+            assert(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
+            if (thd->killed == THD::NOT_KILLED)
+                thd->killed = saved_killed_state;
+#endif
+        }
+
         // abort the alter transaction NOW so that any alters are rolled back. this allows the following restores to work.
         tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);
         assert(ctx->alter_txn == trx->stmt);

From 3bdf1bbd2c7e485540c68b467457084ef3da1f1b Mon Sep 17 00:00:00 2001
From: Zardosht Kasheff <zardosht@gmail.com>
Date: Wed, 2 Jul 2014 11:18:13 -0400
Subject: [PATCH 062/190] FT-287, have engine status report the amount of
 cloned data in the cachetable

---
 ft/cachetable-internal.h |  7 +++++--
 ft/cachetable.cc         | 23 +++++++++++++++++++++--
 ft/cachetable.h          |  1 +
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/ft/cachetable-internal.h b/ft/cachetable-internal.h
index 9eb3ec66568..f1dcb71a5ba 100644
--- a/ft/cachetable-internal.h
+++ b/ft/cachetable-internal.h
@@ -516,8 +516,8 @@ public:
     void add_pair_attr(PAIR_ATTR attr);
     void remove_pair_attr(PAIR_ATTR attr);    
     void change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr);
-    void add_to_size_current(long size);
-    void remove_from_size_current(long size);
+    void add_cloned_data_size(long size);
+    void remove_cloned_data_size(long size);
     uint64_t reserve_memory(double fraction, uint64_t upper_bound);
     void release_reserved_memory(uint64_t reserved_memory);
     void run_eviction_thread();
@@ -531,6 +531,8 @@ public:
     void get_state(long *size_current_ptr, long *size_limit_ptr);
     void fill_engine_status();
 private:
+    void add_to_size_current(long size);
+    void remove_from_size_current(long size);
     void run_eviction();
     bool run_eviction_on_pair(PAIR p);
     void try_evict_pair(PAIR p);
@@ -546,6 +548,7 @@ private:
     pair_list* m_pl;
     cachefile_list* m_cf_list;
     int64_t m_size_current;            // the sum of the sizes of the pairs in the cachetable
+    int64_t m_size_cloned_data; // stores amount of cloned data we have, only used for engine status
     // changes to these two values are protected
     // by ev_thread_lock
     int64_t m_size_reserved;           // How much memory is reserved (e.g., by the loader)
diff --git a/ft/cachetable.cc b/ft/cachetable.cc
index d57592286a4..892f2266641 100644
--- a/ft/cachetable.cc
+++ b/ft/cachetable.cc
@@ -144,6 +144,7 @@ status_init(void) {
     STATUS_INIT(CT_SIZE_LEAF,              CACHETABLE_SIZE_LEAF, UINT64, "size leaf", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     STATUS_INIT(CT_SIZE_ROLLBACK,          CACHETABLE_SIZE_ROLLBACK, UINT64, "size rollback", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     STATUS_INIT(CT_SIZE_CACHEPRESSURE,     CACHETABLE_SIZE_CACHEPRESSURE, UINT64, "size cachepressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
+    STATUS_INIT(CT_SIZE_CLONED,            CACHETABLE_SIZE_CACHEPRESSURE, UINT64, "size currently cloned data for checkpoint", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     STATUS_INIT(CT_EVICTIONS,              CACHETABLE_EVICTIONS, UINT64, "evictions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     STATUS_INIT(CT_CLEANER_EXECUTIONS,     CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     STATUS_INIT(CT_CLEANER_PERIOD,         CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
@@ -704,7 +705,7 @@ static void cachetable_only_write_locked_data(
     p->disk_data = disk_data;
     if (is_clone) {
         p->cloned_value_data = NULL;
-        ev->remove_from_size_current(p->cloned_value_size);
+        ev->remove_cloned_data_size(p->cloned_value_size);
         p->cloned_value_size = 0;
     }    
 }
@@ -949,7 +950,7 @@ clone_pair(evictor* ev, PAIR p) {
         ev->change_pair_attr(old_attr, new_attr);
     }
     p->cloned_value_size = clone_size;
-    ev->add_to_size_current(p->cloned_value_size);
+    ev->add_cloned_data_size(p->cloned_value_size);
 }
 
 static void checkpoint_cloned_pair(void* extra) {
@@ -3635,6 +3636,7 @@ int evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KI
     
     m_size_reserved = unreservable_memory(_size_limit);
     m_size_current = 0;
+    m_size_cloned_data = 0;
     m_size_evicting = 0;
 
     m_size_nonleaf = create_partitioned_counter(); 
@@ -3769,6 +3771,22 @@ void evictor::remove_from_size_current(long size) {
     (void) toku_sync_fetch_and_sub(&m_size_current, size);
 }
 
+//
+// Adds the size of cloned data to necessary variables in the evictor
+//
+void evictor::add_cloned_data_size(long size) {
+    (void) toku_sync_fetch_and_add(&m_size_cloned_data, size);
+    add_to_size_current(size);
+}
+
+//
+// Removes  the size of cloned data to necessary variables in the evictor
+//
+void evictor::remove_cloned_data_size(long size) {
+    (void) toku_sync_fetch_and_sub(&m_size_cloned_data, size);
+    remove_from_size_current(size);
+}
+
 //
 // TODO: (Zardosht) comment this function
 //
@@ -4333,6 +4351,7 @@ void evictor::fill_engine_status() {
     STATUS_VALUE(CT_SIZE_LEAF) = read_partitioned_counter(m_size_leaf);
     STATUS_VALUE(CT_SIZE_ROLLBACK) = read_partitioned_counter(m_size_rollback);
     STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = read_partitioned_counter(m_size_cachepressure);
+    STATUS_VALUE(CT_SIZE_CLONED) = m_size_cloned_data;
     STATUS_VALUE(CT_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_wait_pressure_count);
     STATUS_VALUE(CT_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_wait_pressure_time);
     STATUS_VALUE(CT_LONG_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_long_wait_pressure_count);
diff --git a/ft/cachetable.h b/ft/cachetable.h
index 5c5eb575909..b53cefc9204 100644
--- a/ft/cachetable.h
+++ b/ft/cachetable.h
@@ -594,6 +594,7 @@ typedef enum {
     CT_SIZE_LEAF,              // number of bytes in cachetable belonging to leaf nodes
     CT_SIZE_ROLLBACK,          // number of bytes in cachetable belonging to rollback nodes
     CT_SIZE_CACHEPRESSURE,     // number of bytes causing cache pressure (sum of buffers and workdone counters)
+    CT_SIZE_CLONED,            // number of bytes of cloned data in the system
     CT_EVICTIONS,
     CT_CLEANER_EXECUTIONS,     // number of times the cleaner thread's loop has executed
     CT_CLEANER_PERIOD,

From 3a030fac86fd64b7f1fc06c59443b3aa36879b76 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 2 Jul 2014 13:27:37 -0400
Subject: [PATCH 063/190] #257 disable missing field initializer warning

---
 storage/tokudb/CMakeLists.txt | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt
index 0fe8af39732..dfca3c67ded 100644
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
@@ -31,6 +31,25 @@ IF(DEFINED TOKUDB_CHECK_JEMALLOC)
     ADD_DEFINITIONS("-DTOKUDB_CHECK_JEMALLOC=${TOKUDB_CHECK_JEMALLOC}")
 ENDIF()
 
+## adds a compiler flag if the compiler supports it
+include(CheckCCompilerFlag)
+include(CheckCXXCompilerFlag)
+
+macro(set_cflags_if_supported)
+  foreach(flag ${ARGN})
+    check_c_compiler_flag(${flag} HAVE_C_${flag})
+    if (HAVE_C_${flag})
+      set(CMAKE_C_FLAGS "${flag} ${CMAKE_C_FLAGS}")
+    endif ()
+    check_cxx_compiler_flag(${flag} HAVE_CXX_${flag})
+    if (HAVE_CXX_${flag})
+      set(CMAKE_CXX_FLAGS "${flag} ${CMAKE_CXX_FLAGS}")
+    endif ()
+  endforeach(flag)
+endmacro(set_cflags_if_supported)
+
+set_cflags_if_supported(-Wno-missing-field-initializers)
+
 ADD_SUBDIRECTORY(ft-index)
 
 INCLUDE_DIRECTORIES(ft-index)

From 86fbfc34268468b213cea98a08fdd44e5c1fa343 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Thu, 3 Jul 2014 06:13:29 -0400
Subject: [PATCH 064/190] #264 tokuftdump should dump header with default args

---
 ft/tokuftdump.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index 48515414e31..a9394f581e7 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -249,11 +249,10 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT ft) {
     printf(" layout_version_read_from_disk=%d\n", n->layout_version_read_from_disk);
     printf(" build_id=%d\n", n->build_id);
     printf(" max_msn_applied_to_node_on_disk=%" PRId64 " (0x%" PRIx64 ")\n", n->max_msn_applied_to_node_on_disk.msn, n->max_msn_applied_to_node_on_disk.msn);
-    printf("io time %lf decompress time %lf deserialize time %lf\n", 
-        tokutime_to_seconds(bfe.io_time), 
-        tokutime_to_seconds(bfe.decompress_time), 
-        tokutime_to_seconds(bfe.deserialize_time) 
-        );
+    printf(" io time %lf decompress time %lf deserialize time %lf\n", 
+           tokutime_to_seconds(bfe.io_time), 
+           tokutime_to_seconds(bfe.decompress_time), 
+           tokutime_to_seconds(bfe.deserialize_time));
 
     printf(" n_children=%d\n", n->n_children);
     printf(" pivotkeys.total_size()=%u\n", (unsigned) n->pivotkeys.total_size());
@@ -695,8 +694,9 @@ int main (int argc, const char *const argv[]) {
         }
         if (!do_header && !do_rootnode && !do_fragmentation && !do_translation_table && !do_garbage) {
             printf("Block translation:");
-            
             toku_dump_translation_table(stdout, ft->blocktable);
+
+            dump_header(ft);
             
             struct __dump_node_extra info;
             info.fd = fd;

From cb182c2043b21497c4646bbf26e354cbe0de4740 Mon Sep 17 00:00:00 2001
From: Zardosht Kasheff <zardosht@gmail.com>
Date: Thu, 3 Jul 2014 09:09:12 -0400
Subject: [PATCH 065/190] FT-287, fix typo

---
 ft/cachetable.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ft/cachetable.cc b/ft/cachetable.cc
index 892f2266641..7eaf1c95679 100644
--- a/ft/cachetable.cc
+++ b/ft/cachetable.cc
@@ -144,7 +144,7 @@ status_init(void) {
     STATUS_INIT(CT_SIZE_LEAF,              CACHETABLE_SIZE_LEAF, UINT64, "size leaf", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     STATUS_INIT(CT_SIZE_ROLLBACK,          CACHETABLE_SIZE_ROLLBACK, UINT64, "size rollback", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     STATUS_INIT(CT_SIZE_CACHEPRESSURE,     CACHETABLE_SIZE_CACHEPRESSURE, UINT64, "size cachepressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
-    STATUS_INIT(CT_SIZE_CLONED,            CACHETABLE_SIZE_CACHEPRESSURE, UINT64, "size currently cloned data for checkpoint", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
+    STATUS_INIT(CT_SIZE_CLONED,            CACHETABLE_SIZE_CLONED, UINT64, "size currently cloned data for checkpoint", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     STATUS_INIT(CT_EVICTIONS,              CACHETABLE_EVICTIONS, UINT64, "evictions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     STATUS_INIT(CT_CLEANER_EXECUTIONS,     CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     STATUS_INIT(CT_CLEANER_PERIOD,         CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);

From da7029856e51239ec0d894060acb224408d0f43f Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 11:14:41 -0400
Subject: [PATCH 066/190] FT-288 Fix warn-uninitialized errors found by gcc 4.9

---
 ft/ft-serialize.cc      | 6 +++---
 ft/ft_node-serialize.cc | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ft/ft-serialize.cc b/ft/ft-serialize.cc
index 39d260deb7f..78f4e747614 100644
--- a/ft/ft-serialize.cc
+++ b/ft/ft-serialize.cc
@@ -660,11 +660,11 @@ toku_deserialize_ft_from(int fd,
 {
     struct rbuf rb_0;
     struct rbuf rb_1;
-    uint64_t checkpoint_count_0;
-    uint64_t checkpoint_count_1;
+    uint64_t checkpoint_count_0 = 0;
+    uint64_t checkpoint_count_1 = 0;
     LSN checkpoint_lsn_0;
     LSN checkpoint_lsn_1;
-    uint32_t version_0, version_1, version = 0;
+    uint32_t version_0 = 0, version_1 = 0, version = 0;
     bool h0_acceptable = false;
     bool h1_acceptable = false;
     struct rbuf *rb = NULL;
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 2cdac27918b..e05c2017123 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -197,7 +197,7 @@ toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size, int
 // Effect: make the file bigger by either doubling it or growing by 16MiB whichever is less, until it is at least size
 // Return 0 on success, otherwise an error number.
 {
-    int64_t file_size;
+    int64_t file_size = 0;
     //TODO(yoni): Allow variable stripe_width (perhaps from ft) for larger raids
     const uint64_t stripe_width = 4096;
     {

From 7ffd1fab114cbf635abb6c8c1d1d33654a748045 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:29:48 -0400
Subject: [PATCH 067/190] FT-242 Break up fttypes.h completely. FT-296 Move
 bytestring class to utils/

---
 ft/block_table.cc                    |   8 +-
 ft/block_table.h                     |  93 ++++++---
 ft/bndata.cc                         |  10 +-
 ft/cachetable.h                      |  18 +-
 ft/checkpoint.cc                     |  18 +-
 ft/checkpoint.h                      |   8 +-
 ft/comparator.h                      |   6 +-
 ft/cursor.cc                         |  10 +-
 ft/cursor.h                          |   2 +-
 ft/ft-cachetable-wrappers.cc         |   1 -
 ft/ft-flusher-internal.h             |   2 -
 ft/ft-internal.h                     |  35 ++--
 ft/ft-node-deserialize.cc            |   2 +-
 ft/ft-ops.cc                         |   6 +-
 ft/ft-ops.h                          |  28 ++-
 ft/ft-serialize.cc                   |  26 +--
 ft/ft-test-helpers.cc                |  13 +-
 ft/ft-verify.cc                      |  10 +-
 ft/ft.h                              |  24 ++-
 ft/ft_node-serialize.cc              |  18 +-
 ft/fttypes.h                         | 196 ------------------
 ft/ftverify.cc                       |  21 +-
 ft/le-cursor.h                       |   2 +-
 ft/loader/dbufio.cc                  |  15 +-
 ft/loader/loader-internal.h          |  29 +--
 ft/loader/loader.h                   |   3 +
 ft/log-internal.h                    |  97 ---------
 ft/log.h                             |  17 +-
 ft/logformat.cc                      |   2 +-
 ft/logger.cc                         |   1 -
 ft/logger.h                          |  87 +++++++-
 ft/msg.cc                            |  12 +-
 ft/msg.h                             |  30 +++
 ft/msg_buffer.cc                     |  12 +-
 ft/msg_buffer.h                      |   5 +-
 ft/node.cc                           |   2 +-
 ft/node.h                            |  19 +-
 ft/pivotkeys.cc                      |   2 +-
 ft/rbuf.h                            |  73 +------
 ft/recover.h                         |  13 +-
 ft/roll.cc                           |   1 -
 ft/rollback-apply.cc                 |   5 +-
 ft/rollback-ct-callbacks.cc          |   1 -
 ft/rollback-ct-callbacks.h           |   4 +-
 ft/rollback.h                        |   5 +-
 ft/sub_block.cc                      |  22 +-
 ft/sub_block.h                       |  10 +-
 ft/tests/block_allocator_test.cc     |   1 -
 ft/tests/ft-test-cursor-2.cc         |   2 +-
 ft/tests/ft-test-cursor.cc           |   2 +-
 ft/tests/ft-test5.cc                 |   2 +-
 ft/tests/ftloader-test.cc            |   2 +-
 ft/tests/le-cursor-provdel.cc        |   2 +-
 ft/tests/le-cursor-right.cc          |   2 +-
 ft/tests/le-cursor-walk.cc           |   2 +-
 ft/tests/test-leafentry-child-txn.cc |   5 +-
 ft/tests/test-leafentry-nested.cc    |   5 +-
 ft/tests/test.h                      |  39 ++--
 ft/tests/test3856.cc                 |   2 +-
 ft/tests/test_logcursor.cc           |   1 -
 ft/tests/upgrade_test_simple.cc      |   2 +-
 ft/tests/ybt-test.cc                 |  12 +-
 ft/tokuconst.h                       |   6 +-
 ft/tokuftdump.cc                     |   9 +-
 ft/txn.cc                            |   1 -
 ft/txn.h                             | 290 ++++++++++++++++++++++-----
 ft/txn_child_manager.h               |  10 +-
 ft/txn_manager.h                     |  31 +--
 ft/ule.cc                            |  16 +-
 ft/wbuf.h                            |  90 +--------
 ft/xids-internal.h                   |   4 +-
 ft/xids.cc                           |  21 +-
 ft/xids.h                            |  13 +-
 ft/ybt.cc                            |   5 +-
 ft/ybt.h                             |   8 +-
 locktree/lock_request.cc             |   1 +
 locktree/lock_request.h              |  11 +-
 locktree/locktree.h                  |   2 +-
 locktree/treenode.h                  |   1 +
 locktree/txnid_set.h                 |   4 +-
 locktree/wfg.h                       |   7 +-
 src/indexer.cc                       |   2 +-
 src/tests/hotindexer-undo-do-test.cc |   2 -
 src/ydb-internal.h                   |   1 -
 src/ydb_cursor.cc                    |  32 +--
 src/ydb_db.cc                        |   3 +-
 util/bytestring.h                    |  96 +++++++++
 87 files changed, 885 insertions(+), 886 deletions(-)
 delete mode 100644 ft/fttypes.h
 create mode 100644 util/bytestring.h

diff --git a/ft/block_table.cc b/ft/block_table.cc
index f6e7d543408..800ad3dc09e 100644
--- a/ft/block_table.cc
+++ b/ft/block_table.cc
@@ -97,7 +97,6 @@ PATENT RIGHTS GRANT:
 #include "ft/block_allocator.h"
 #include "ft/block_table.h"
 #include "ft/ft-internal.h"        // ugly but pragmatic, need access to dirty bits while holding translation lock
-#include "ft/fttypes.h"
 // TODO: reorganize this dependency
 #include "ft/ft-ops.h" // for toku_maybe_truncate_file
 #include "ft/rbuf.h"
@@ -925,10 +924,9 @@ translation_deserialize_from_buffer(struct translation *t,    // destination int
     assert(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
     t->blocknum_freelist_head       = rbuf_blocknum(&rt); 
     XMALLOC_N(t->length_of_array, t->block_translation);
-    int64_t i;
-    for (i=0; i < t->length_of_array; i++) {
-        t->block_translation[i].u.diskoff = rbuf_diskoff(&rt);
-        t->block_translation[i].size    = rbuf_diskoff(&rt);
+    for (int64_t i = 0; i < t->length_of_array; i++) {
+        t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rt);
+        t->block_translation[i].size = rbuf_DISKOFF(&rt);
     }
     assert(calculate_size_on_disk(t)                                     == (int64_t)size_on_disk);
     assert(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size      == (int64_t)size_on_disk);
diff --git a/ft/block_table.h b/ft/block_table.h
index 42b52c3e60b..52ea57ed0bd 100644
--- a/ft/block_table.h
+++ b/ft/block_table.h
@@ -91,12 +91,26 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "fttypes.h"
-#include "ft/ft-internal.h"
+#include <db.h>
+
+#include "portability/toku_stdint.h"
+
+struct ft;
 
 typedef struct block_table *BLOCK_TABLE;
 
-//Needed by tests, ftdump
+typedef struct blocknum_s { int64_t b; } BLOCKNUM;
+static inline BLOCKNUM make_blocknum(int64_t b) {
+    BLOCKNUM result = { .b = b };
+    return result;
+}
+static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
+
+
+// Offset in a disk. -1 is the 'null' pointer.
+typedef int64_t DISKOFF;
+
+// Needed by tests, ftdump
 struct block_translation_pair {
     union { // If in the freelist, use next_free_blocknum, otherwise diskoff.
         DISKOFF  diskoff; 
@@ -109,8 +123,8 @@ void toku_blocktable_create_new(BLOCK_TABLE *btp);
 int toku_blocktable_create_from_buffer(int fd, BLOCK_TABLE *btp, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
 void toku_blocktable_destroy(BLOCK_TABLE *btp);
 
-void toku_ft_lock(FT ft);
-void toku_ft_unlock(FT ft);
+void toku_ft_lock(struct ft *ft);
+void toku_ft_unlock(struct ft *ft);
 
 void toku_block_translation_note_start_checkpoint_unlocked(BLOCK_TABLE bt);
 void toku_block_translation_note_end_checkpoint(BLOCK_TABLE bt, int fd);
@@ -118,38 +132,39 @@ void toku_block_translation_note_skipped_checkpoint(BLOCK_TABLE bt);
 void toku_maybe_truncate_file_on_open(BLOCK_TABLE bt, int fd);
 
 //Blocknums
-void toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, FT ft);
-void toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, FT ft);
-void toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *b, FT ft, bool for_checkpoint);
+void toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, struct ft *ft);
+void toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, struct ft *ft);
+void toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
 void toku_verify_blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b);
 void toku_block_verify_no_data_blocks_except_root(BLOCK_TABLE bt, BLOCKNUM root);
 void toku_free_unused_blocknums(BLOCK_TABLE bt, BLOCKNUM root);
 void toku_block_verify_no_free_blocknums(BLOCK_TABLE bt);
-void toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT ft, int fd);
-void toku_realloc_descriptor_on_disk_unlocked(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT ft);
+void toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd);
+void toku_realloc_descriptor_on_disk_unlocked(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, struct ft *ft);
 void toku_get_descriptor_offset_size(BLOCK_TABLE bt, DISKOFF *offset, DISKOFF *size);
 
 //Blocks and Blocknums
-void toku_blocknum_realloc_on_disk(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint);
+void toku_blocknum_realloc_on_disk(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint);
 void toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
 
 //Serialization
 void toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, int fd, struct wbuf *w, int64_t *address, int64_t *size);
-
 void toku_block_table_swap_for_redirect(BLOCK_TABLE old_bt, BLOCK_TABLE new_bt);
 
-
 //DEBUG ONLY (ftdump included), tests included
 void toku_blocknum_dump_translation(BLOCK_TABLE bt, BLOCKNUM b);
 void toku_dump_translation_table_pretty(FILE *f, BLOCK_TABLE bt);
 void toku_dump_translation_table(FILE *f, BLOCK_TABLE bt);
 void toku_block_free(BLOCK_TABLE bt, uint64_t offset);
-typedef int(*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
-enum translation_type {TRANSLATION_NONE=0,
-                       TRANSLATION_CURRENT,
-                       TRANSLATION_INPROGRESS,
-                       TRANSLATION_CHECKPOINTED,
-                       TRANSLATION_DEBUG};
+typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
+
+enum translation_type {
+    TRANSLATION_NONE = 0,
+    TRANSLATION_CURRENT,
+    TRANSLATION_INPROGRESS,
+    TRANSLATION_CHECKPOINTED,
+    TRANSLATION_DEBUG
+};
 
 int toku_blocktable_iterate(BLOCK_TABLE bt, enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); 
 void toku_blocktable_internal_fragmentation(BLOCK_TABLE bt, int64_t *total_sizep, int64_t *used_sizep);
@@ -166,8 +181,40 @@ int toku_blocktable_iterate_translation_tables(BLOCK_TABLE, uint64_t, int (*)(ui
 
 //Unmovable reserved first, then reallocable.
 // We reserve one blocknum for the translation table itself.
-enum {RESERVED_BLOCKNUM_NULL       =0,
-      RESERVED_BLOCKNUM_TRANSLATION=1,
-      RESERVED_BLOCKNUM_DESCRIPTOR =2,
-      RESERVED_BLOCKNUMS};
+enum {
+    RESERVED_BLOCKNUM_NULL = 0,
+    RESERVED_BLOCKNUM_TRANSLATION = 1,
+    RESERVED_BLOCKNUM_DESCRIPTOR = 2,
+    RESERVED_BLOCKNUMS
+};
 
+// For serialize / deserialize
+
+#include "ft/wbuf.h"
+
+static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
+    wbuf_ulonglong(w, b.b);
+}
+
+static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
+    wbuf_nocrc_ulonglong(w, b.b);
+}
+
+static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) {
+    wbuf_ulonglong(wb, (uint64_t) off);
+}
+
+#include "ft/rbuf.h"
+
+static inline DISKOFF rbuf_DISKOFF(struct rbuf *rb) {
+    return rbuf_ulonglong(rb);
+}
+
+static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) {
+    BLOCKNUM result = make_blocknum(rbuf_longlong(rb));
+    return result;
+}
+
+static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) {
+    *blocknum = rbuf_blocknum(rb);
+}
diff --git a/ft/bndata.cc b/ft/bndata.cc
index 36e97b8c934..b95e02cda10 100644
--- a/ft/bndata.cc
+++ b/ft/bndata.cc
@@ -89,8 +89,8 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <bndata.h>
-#include <ft-ops.h>
+#include <ft/bndata.h>
+#include <ft/ft-internal.h>
 
 using namespace toku;
 uint32_t bn_data::klpair_disksize(const uint32_t klpair_len, const klpair_struct *klpair) const {
@@ -129,14 +129,14 @@ void bn_data::initialize_from_separate_keys_and_vals(uint32_t num_entries, struc
     uint32_t ndone_before = rb->ndone;
     init_zero();
     invariant(all_keys_same_length);  // Until otherwise supported.
-    bytevec keys_src;
+    const void *keys_src;
     rbuf_literal_bytes(rb, &keys_src, key_data_size);
     //Generate dmt
     this->m_buffer.create_from_sorted_memory_of_fixed_size_elements(
             keys_src, num_entries, key_data_size, fixed_klpair_length);
     toku_mempool_construct(&this->m_buffer_mempool, val_data_size);
 
-    bytevec vals_src;
+    const void *vals_src;
     rbuf_literal_bytes(rb, &vals_src, val_data_size);
 
     if (num_entries > 0) {
@@ -256,7 +256,7 @@ void bn_data::deserialize_from_rbuf(uint32_t num_entries, struct rbuf *rb, uint3
         }
     }
     // Version >= 26 and version 25 deserialization are now identical except that <= 25 might allocate too much memory.
-    bytevec bytes;
+    const void *bytes;
     rbuf_literal_bytes(rb, &bytes, data_size);
     const unsigned char *CAST_FROM_VOIDP(buf, bytes);
     if (data_size == 0) {
diff --git a/ft/cachetable.h b/ft/cachetable.h
index b53cefc9204..d657abbc2fe 100644
--- a/ft/cachetable.h
+++ b/ft/cachetable.h
@@ -94,7 +94,9 @@ PATENT RIGHTS GRANT:
 
 #include <fcntl.h>
 
-#include "ft/fttypes.h"
+#include "ft/block_table.h"
+#include "ft/logger.h"
+#include "ft/txn.h"
 #include "util/minicron.h"
 
 // Maintain a cache mapping from cachekeys to values (void*)
@@ -159,7 +161,7 @@ uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct);
 // create and initialize a cache table
 // size_limit is the upper limit on the size of the size of the values in the table
 // pass 0 if you want the default
-int toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, TOKULOGGER);
+int toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, struct tokulogger *logger);
 
 // Create a new cachetable.
 // Effects: a new cachetable is created and initialized.
@@ -184,9 +186,9 @@ int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CAC
 // Return the filename
 char *toku_cachefile_fname_in_cwd (CACHEFILE cf);
 
-void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, TOKULOGGER);
+void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, struct tokulogger *logger);
 
-void toku_cachetable_end_checkpoint(CHECKPOINTER cp, TOKULOGGER logger, 
+void toku_cachetable_end_checkpoint(CHECKPOINTER cp, struct tokulogger *logger, 
                                    void (*testcallback_f)(void*),  void * testextra);
 
 // Shuts down checkpoint thread
@@ -544,15 +546,15 @@ void toku_cachefile_unlink_on_close(CACHEFILE cf);
 bool toku_cachefile_is_unlink_on_close(CACHEFILE cf);
 
 // Return the logger associated with the cachefile
-TOKULOGGER toku_cachefile_logger (CACHEFILE);
+struct tokulogger *toku_cachefile_logger(CACHEFILE cf);
 
 // Return the filenum associated with the cachefile
-FILENUM toku_cachefile_filenum (CACHEFILE);
+FILENUM toku_cachefile_filenum(CACHEFILE cf);
 
 // Effect: Return a 32-bit hash key.  The hash key shall be suitable for using with bitmasking for a table of size power-of-two.
-uint32_t toku_cachetable_hash (CACHEFILE cachefile, CACHEKEY key);
+uint32_t toku_cachetable_hash(CACHEFILE cf, CACHEKEY key);
 
-uint32_t toku_cachefile_fullhash_of_header (CACHEFILE cachefile);
+uint32_t toku_cachefile_fullhash_of_header(CACHEFILE cf);
 
 // debug functions
 
diff --git a/ft/checkpoint.cc b/ft/checkpoint.cc
index 98a903675a0..88e984367fb 100644
--- a/ft/checkpoint.cc
+++ b/ft/checkpoint.cc
@@ -126,18 +126,18 @@ PATENT RIGHTS GRANT:
  *
  *****/
 
-#include <toku_portability.h>
 #include <time.h>
 
+#include "portability/toku_portability.h"
+#include "portability/toku_atomic.h"
+
+#include "ft/cachetable.h"
 #include "ft/ft.h"
-#include "fttypes.h"
-#include "cachetable.h"
-#include "log-internal.h"
-#include "logger.h"
-#include "checkpoint.h"
-#include <portability/toku_atomic.h>
-#include <util/status.h>
-#include <util/frwlock.h>
+#include "ft/log-internal.h"
+#include "ft/logger.h"
+#include "ft/checkpoint.h"
+#include "util/frwlock.h"
+#include "util/status.h"
 
 ///////////////////////////////////////////////////////////////////////////////////
 // Engine status
diff --git a/ft/checkpoint.h b/ft/checkpoint.h
index 63acfa7c0bc..fd08cd90e6f 100644
--- a/ft/checkpoint.h
+++ b/ft/checkpoint.h
@@ -160,13 +160,11 @@ typedef enum {SCHEDULED_CHECKPOINT  = 0,   // "normal" checkpoint taken on check
 // Callbacks are called during checkpoint procedure while checkpoint_safe lock is still held.
 // Callbacks are primarily intended for use in testing.
 // caller_id identifies why the checkpoint is being taken.
-int toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger,
-                    void (*callback_f)(void*),  void * extra,
-                    void (*callback2_f)(void*), void * extra2,
+int toku_checkpoint(CHECKPOINTER cp, struct tokulogger *logger,
+                    void (*callback_f)(void *extra), void *extra,
+                    void (*callback2_f)(void *extra2), void *extra2,
                     checkpoint_caller_t caller_id);
 
-
-
 /******
  * These functions are called from the ydb level.
  * They return status information and have no side effects.
diff --git a/ft/comparator.h b/ft/comparator.h
index 6cfd7aee883..2b38283adfa 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -94,11 +94,13 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 
 #include <ft/ybt.h>
-#include <ft/fttypes.h>
+//#include <ft/fttypes.h>
 #include <portability/memory.h>
 
+typedef int (*ft_compare_func)(DB *db, const DBT *a, const DBT *b);
+
 // TODO: this should really all be encapsulated in ft/comparator.cc
-int toku_builtin_compare_fun(DB *, const DBT *a, const DBT *b) __attribute__((__visibility__("default")));
+int toku_builtin_compare_fun(DB *db, const DBT *a, const DBT *b) __attribute__((__visibility__("default")));
 
 namespace toku {
 
diff --git a/ft/cursor.cc b/ft/cursor.cc
index bacb650a47c..d1679feb365 100644
--- a/ft/cursor.cc
+++ b/ft/cursor.cc
@@ -226,8 +226,8 @@ static int ft_cursor_compare_set(const ft_search &search, const DBT *x) {
 }
 
 static int
-ft_cursor_current_getf(ITEMLEN keylen,                 bytevec key,
-                        ITEMLEN vallen,                 bytevec val,
+ft_cursor_current_getf(uint32_t keylen,                 const void *key,
+                        uint32_t vallen,                 const void *val,
                         void *v, bool lock_only) {
     struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v);
     int r;
@@ -286,7 +286,7 @@ int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *g
     return r;
 }
 
-int toku_ft_cursor_check_restricted_range(FT_CURSOR c, bytevec key, ITEMLEN keylen) {
+int toku_ft_cursor_check_restricted_range(FT_CURSOR c, const void *key, uint32_t keylen) {
     if (c->out_of_range_error) {
         FT ft = c->ft_handle->ft;
         DBT found_key;
@@ -367,8 +367,8 @@ int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *g
     return r;
 }
 
-static int ft_cursor_search_eq_k_x_getf(ITEMLEN keylen, bytevec key,
-                                        ITEMLEN vallen, bytevec val,
+static int ft_cursor_search_eq_k_x_getf(uint32_t keylen, const void *key,
+                                        uint32_t vallen, const void *val,
                                         void *v, bool lock_only) {
     struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v);
     int r;
diff --git a/ft/cursor.h b/ft/cursor.h
index 21194f91140..947b69ccf47 100644
--- a/ft/cursor.h
+++ b/ft/cursor.h
@@ -220,7 +220,7 @@ bool toku_ft_cursor_uninitialized(FT_CURSOR cursor) __attribute__ ((warn_unused_
 
 void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval);
 
-int toku_ft_cursor_check_restricted_range(FT_CURSOR cursor, bytevec key, ITEMLEN keylen);
+int toku_ft_cursor_check_restricted_range(FT_CURSOR cursor, const void *key, uint32_t keylen);
 
 int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd,
                             FT_GET_CALLBACK_FUNCTION getf, void *getf_v,
diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index 105a2f03dc6..e8fc271bed4 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -90,7 +90,6 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "ft/block_table.h"
-#include "ft/fttypes.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-flusher.h"
 #include "ft/ft-internal.h"
diff --git a/ft/ft-flusher-internal.h b/ft/ft-flusher-internal.h
index b3568fe95c2..539ded24def 100644
--- a/ft/ft-flusher-internal.h
+++ b/ft/ft-flusher-internal.h
@@ -91,8 +91,6 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <fttypes.h>
-
 #define flt_flush_before_applying_inbox 1
 #define flt_flush_before_child_pin 2
 #define ft_flush_aflter_child_pin 3
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index f3d6c5fc2cb..fadab70917e 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -92,8 +92,16 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <portability/toku_config.h>
-#include <toku_race_tools.h>
+#include "portability/toku_config.h"
+#include "portability/toku_list.h"
+#include "portability/toku_race_tools.h"
+
+#include "ft/cachetable.h"
+#include "ft/comparator.h"
+#include "ft/ft.h"
+#include "ft/ft-ops.h"
+#include "ft/node.h"
+#include "ft/rollback.h"
 
 // Symbol TOKUDB_REVISION is not defined by fractal-tree makefiles, so
 // BUILD_ID of 1000 indicates development build of main, not a release build.  
@@ -103,19 +111,6 @@ PATENT RIGHTS GRANT:
 #error
 #endif
 
-#include "ft_layout_version.h"
-#include "block_allocator.h"
-#include "cachetable.h"
-#include "toku_list.h"
-#include <util/omt.h>
-#include "leafentry.h"
-#include "compress.h"
-#include <util/omt.h>
-#include "ft/bndata.h"
-#include "ft/comparator.h"
-#include "ft/rollback.h"
-#include "ft/msg_buffer.h"
-
 struct block_table;
 struct ft_search;
 
@@ -200,6 +195,7 @@ struct ft_header {
 
     STAT64INFO_S on_disk_stats;
 };
+typedef struct ft_header *FT_HEADER;
 
 // ft_header is always the current version.
 struct ft {
@@ -267,7 +263,6 @@ struct ft {
     // - if our attempt fails because the key was not in range of the rightmost leaf, we reset the score back to 0
     uint32_t seqinsert_score;
 };
-typedef struct ft *FT;
 
 // Allocate a DB struct off the stack and only set its comparison
 // descriptor. We don't bother setting any other fields because
@@ -312,6 +307,8 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs);
 int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep);
 int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs);
 
+CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft);
+
 /* serialization code */
 void toku_create_compressed_partition_from_available(FTNODE node, int childnum,
                                                      enum toku_compression_method compression_method,
@@ -497,7 +494,7 @@ void toku_create_new_ftnode(FT_HANDLE ft_handle, FTNODE *result, int height, int
 // toku_testsetup_initialize() must be called before any other test_setup_xxx() functions are called.
 void toku_testsetup_initialize(void);
 int toku_testsetup_leaf(FT_HANDLE ft_h, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens);
-int toku_testsetup_nonleaf (FT_HANDLE ft_h, int height, BLOCKNUM *diskoff, int n_children, BLOCKNUM *children, char **keys, int *keylens);
+int toku_testsetup_nonleaf (FT_HANDLE ft_h, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, char **keys, int *keylens);
 int toku_testsetup_root(FT_HANDLE ft_h, BLOCKNUM);
 int toku_testsetup_get_sersize(FT_HANDLE ft_h, BLOCKNUM); // Return the size on disk.
 int toku_testsetup_insert_to_leaf (FT_HANDLE ft_h, BLOCKNUM, const char *key, int keylen, const char *val, int vallen);
@@ -685,11 +682,11 @@ int toku_upgrade_msn_from_root_to_header(int fd, FT ft) __attribute__((nonnull))
 // The cursor object will have been updated (so that if result==0 the current value is the value being passed)
 //  (If r!=0 then the cursor won't have been updated.)
 // If r!=0, it's up to the callback function to return that value of r.
-// A 'key' bytevec of NULL means that element is not found (effectively infinity or
+// A 'key' pointer of NULL means that element is not found (effectively infinity or
 // -infinity depending on direction)
 // When lock_only is false, the callback does optional lock tree locking and then processes the key and val.
 // When lock_only is true, the callback only does optional lock tree locking.
-typedef int (*FT_GET_CALLBACK_FUNCTION)(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only);
+typedef int (*FT_GET_CALLBACK_FUNCTION)(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only);
 
 typedef bool (*FT_CHECK_INTERRUPT_CALLBACK)(void *extra);
 
diff --git a/ft/ft-node-deserialize.cc b/ft/ft-node-deserialize.cc
index 500b7960875..123035771db 100644
--- a/ft/ft-node-deserialize.cc
+++ b/ft/ft-node-deserialize.cc
@@ -132,7 +132,7 @@ int
 read_and_check_magic(struct rbuf *rb)
 {
     int r = 0;
-    bytevec magic;
+    const void *magic;
     rbuf_literal_bytes(rb, &magic, 8);
     if (memcmp(magic, "tokuleaf", 8)!=0 &&
         memcmp(magic, "tokunode", 8)!=0) {
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index ee87a69e671..ec05d398e25 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -2286,7 +2286,7 @@ static int ft_leaf_get_relative_key_pos(FT ft, FTNODE leaf, const DBT *key, bool
 
 static void ft_insert_directly_into_leaf(FT ft, FTNODE leaf, int target_childnum, DBT *key, DBT *val,
                                          XIDS message_xids, enum ft_msg_type type, txn_gc_info *gc_info);
-static int getf_nothing(ITEMLEN, bytevec, ITEMLEN, bytevec, void *, bool);
+static int getf_nothing(uint32_t, const void *, uint32_t, const void *, void *, bool);
 
 static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS message_xids, enum ft_msg_type type,
                                                txn_gc_info *gc_info, bool unique)
@@ -3957,7 +3957,7 @@ try_again:
 
 /* ********************************* delete **************************************/
 static int
-getf_nothing (ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN UU(vallen), bytevec UU(val), void *UU(pair_v), bool UU(lock_only)) {
+getf_nothing (uint32_t UU(keylen), const void *UU(key), uint32_t UU(vallen), const void *UU(val), void *UU(pair_v), bool UU(lock_only)) {
     return 0;
 }
 
@@ -4720,7 +4720,7 @@ int toku_ft_strerror_r(int error, char *buf, size_t buflen)
 }
 
 // when a and b are chars, return a-b is safe here because return type is int.  No over/underflow possible.
-int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) {
+int toku_keycompare (const void *key1, uint32_t key1len, const void *key2, uint32_t key2len) {
     int comparelen = key1len<key2len ? key1len : key2len;
     const unsigned char *k1;
     const unsigned char *k2;
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index 8c01fa805f2..786cc32380d 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -93,13 +93,15 @@ PATENT RIGHTS GRANT:
 
 // This must be first to make the 64-bit file mode work right in Linux
 #define _FILE_OFFSET_BITS 64
-#include "fttypes.h"
-#include "ybt.h"
+
 #include <db.h>
-#include "cachetable.h"
-#include "log.h"
-#include "compress.h"
+
+#include "ft/cachetable.h"
+#include "ft/comparator.h"
 #include "ft/msg.h"
+#include "ft/ybt.h"
+
+typedef struct ft_handle *FT_HANDLE;
 
 int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));
 
@@ -128,7 +130,8 @@ void toku_ft_handle_get_fanout(FT_HANDLE, unsigned int *fanout);
 void toku_ft_set_bt_compare(FT_HANDLE ft_handle, ft_compare_func cmp_func);
 const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle);
 
-void toku_ft_set_redirect_callback(FT_HANDLE ft_h, on_redirect_callback redir_cb, void* extra);
+typedef void (*on_redirect_callback)(FT_HANDLE ft_handle, void *extra);
+void toku_ft_set_redirect_callback(FT_HANDLE ft_handle, on_redirect_callback cb, void *extra);
 
 // How updates (update/insert/deletes) work:
 // There are two flavers of upsertdels:  Singleton and broadcast.
@@ -166,6 +169,9 @@ void toku_ft_set_redirect_callback(FT_HANDLE ft_h, on_redirect_callback redir_cb
 // Implementation note: Acquires a write lock on the entire database.
 //  This function works by sending an BROADCAST-UPDATE message containing
 //   the key and the extra.
+typedef int (*ft_update_func)(DB *db, const DBT *key, const DBT *old_val, const DBT *extra,
+                              void (*set_val)(const DBT *new_val, void *set_extra),
+                              void *set_extra);
 void toku_ft_set_update(FT_HANDLE ft_h, ft_update_func update_fun);
 
 int toku_ft_handle_open(FT_HANDLE, const char *fname_in_env,
@@ -182,6 +188,14 @@ void toku_ft_handle_close(FT_HANDLE ft_handle);
 // close an ft handle during recovery. the underlying ft must close, and will use the given lsn.
 void toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn);
 
+// At the ydb layer, a DICTIONARY_ID uniquely identifies an open dictionary.
+// With the introduction of the loader (ticket 2216), it is possible for the file that holds
+// an open dictionary to change, so these are now separate and independent unique identifiers (see FILENUM)
+struct DICTIONARY_ID {
+    uint64_t dictid;
+};
+static const DICTIONARY_ID DICTIONARY_ID_NONE = { .dictid = 0 };
+
 int
 toku_ft_handle_open_with_dict_id(
     FT_HANDLE ft_h, 
@@ -230,7 +244,7 @@ void toku_ft_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn);
 void toku_ft_maybe_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);
 
 TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
-TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h);
+struct txn_manager *toku_ft_get_txn_manager(FT_HANDLE ft_h);
 
 struct txn_gc_info;
 void toku_ft_send_insert(FT_HANDLE ft_h, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
diff --git a/ft/ft-serialize.cc b/ft/ft-serialize.cc
index 78f4e747614..3cf8f2a3294 100644
--- a/ft/ft-serialize.cc
+++ b/ft/ft-serialize.cc
@@ -89,10 +89,12 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
+#include "ft/block_allocator.h"
 #include "ft/block_table.h"
 #include "ft/compress.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
+#include "ft/msg.h"
 
 // not version-sensitive because we only serialize a descriptor using the current layout_version
 uint32_t
@@ -152,7 +154,7 @@ deserialize_descriptor_from_rbuf(struct rbuf *rb, DESCRIPTOR desc, int layout_ve
     }
 
     uint32_t size;
-    bytevec data;
+    const void *data;
     rbuf_bytes(rb, &data, &size);
     toku_memdup_dbt(&desc->dbt, data, size);
 }
@@ -212,7 +214,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
 
     //Verification of initial elements.
     //Check magic number
-    bytevec magic;
+    const void *magic;
     rbuf_literal_bytes(rb, &magic, 8);
     lazy_assert(memcmp(magic,"tokudata",8)==0);
 
@@ -234,7 +236,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
     size = rbuf_network_int(rb);
     lazy_assert(size == rb->size);
 
-    bytevec tmp_byte_order_check;
+    const void *tmp_byte_order_check;
     lazy_assert((sizeof tmp_byte_order_check) >= 8);
     rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order
     int64_t byte_order_stored;
@@ -244,13 +246,13 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
     uint64_t checkpoint_count;
     checkpoint_count = rbuf_ulonglong(rb);
     LSN checkpoint_lsn;
-    checkpoint_lsn = rbuf_lsn(rb);
+    checkpoint_lsn = rbuf_LSN(rb);
     unsigned nodesize;
     nodesize = rbuf_int(rb);
     DISKOFF translation_address_on_disk;
-    translation_address_on_disk = rbuf_diskoff(rb);
+    translation_address_on_disk = rbuf_DISKOFF(rb);
     DISKOFF translation_size_on_disk;
-    translation_size_on_disk = rbuf_diskoff(rb);
+    translation_size_on_disk = rbuf_DISKOFF(rb);
     lazy_assert(translation_address_on_disk > 0);
     lazy_assert(translation_size_on_disk > 0);
 
@@ -343,7 +345,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
         time_of_last_optimize_begin = rbuf_ulonglong(rb);
         time_of_last_optimize_end = rbuf_ulonglong(rb);
         count_of_optimize_in_progress = rbuf_int(rb);
-        msn_at_start_of_last_completed_optimize = rbuf_msn(rb);
+        msn_at_start_of_last_completed_optimize = rbuf_MSN(rb);
     }
 
     enum toku_compression_method compression_method;
@@ -352,7 +354,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
     if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_19) {
         unsigned char method = rbuf_char(rb);
         compression_method = (enum toku_compression_method) method;
-        highest_unused_msn_for_upgrade = rbuf_msn(rb);
+        highest_unused_msn_for_upgrade = rbuf_MSN(rb);
     } else {
         // we hard coded zlib until 5.2, then quicklz in 5.2
         if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_18) {
@@ -365,7 +367,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
     MSN max_msn_in_ft;
     max_msn_in_ft = ZERO_MSN;  // We'll upgrade it from the root node later if necessary
     if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_21) {
-        max_msn_in_ft = rbuf_msn(rb);
+        max_msn_in_ft = rbuf_MSN(rb);
     }
 
     (void) rbuf_int(rb); //Read in checksum and ignore (already verified).
@@ -552,7 +554,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
     rbuf_init(rb, prefix, prefix_size);
 
     //Check magic number
-    bytevec magic;
+    const void *magic;
     rbuf_literal_bytes(rb, &magic, 8);
     if (memcmp(magic,"tokudata",8)!=0) {
         if ((*(uint64_t*)magic) == 0) {
@@ -626,7 +628,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
     }
 
     //Verify byte order
-    bytevec tmp_byte_order_check;
+    const void *tmp_byte_order_check;
     lazy_assert((sizeof toku_byte_order_host) == 8);
     rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order
     int64_t byte_order_stored;
@@ -638,7 +640,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
 
     //Load checkpoint count
     *checkpoint_count = rbuf_ulonglong(rb);
-    *checkpoint_lsn = rbuf_lsn(rb);
+    *checkpoint_lsn = rbuf_LSN(rb);
     //Restart at beginning during regular deserialization
     rb->ndone = 0;
 
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index 6b560d21e26..2593c7c70da 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -89,13 +89,12 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft-cachetable-wrappers.h"
-#include "ft-flusher.h"
-#include "ft-internal.h"
-#include "ft.h"
-#include "node.h"
-#include "fttypes.h"
-#include "ule.h"
+#include "ft/ft.h"
+#include "ft/ft-cachetable-wrappers.h"
+#include "ft/ft-internal.h"
+#include "ft/ft-flusher.h"
+#include "ft/node.h"
+#include "ft/ule.h"
 
 // dummymsn needed to simulate msn because messages are injected at a lower level than toku_ft_root_put_msg()
 #define MIN_DUMMYMSN ((MSN) {(uint64_t)1 << 62})
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index 8593351492c..b5b4dfd711e 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -109,18 +109,18 @@ compare_pairs (FT_HANDLE ft_handle, const DBT *a, const DBT *b) {
 }
 
 static int 
-compare_pair_to_key (FT_HANDLE ft_handle, const DBT *a, bytevec key, ITEMLEN keylen) {
+compare_pair_to_key (FT_HANDLE ft_handle, const DBT *a, const void *key, uint32_t keylen) {
     DBT y;
     return ft_handle->ft->cmp(a, toku_fill_dbt(&y, key, keylen));
 }
 
 static int
-verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, bytevec key, ITEMLEN keylen, bytevec UU(data), ITEMLEN UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot)
+verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, const void *key, uint32_t keylen, const void *UU(data), uint32_t UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot)
     __attribute__((warn_unused_result));
 
 UU()
 static int
-verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, bytevec key, ITEMLEN keylen, bytevec UU(data), ITEMLEN UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) {
+verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, const void *key, uint32_t keylen, const void *UU(data), uint32_t UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) {
     int result = 0;
     if (msn.msn == ZERO_MSN.msn)
         result = EINVAL;
@@ -328,8 +328,8 @@ struct verify_msg_fn {
         XIDS xid = msg.xids();
         const void *key = msg.kdbt()->data;
         const void *data = msg.vdbt()->data;
-        ITEMLEN keylen = msg.kdbt()->size;
-        ITEMLEN datalen = msg.vdbt()->size;
+        uint32_t keylen = msg.kdbt()->size;
+        uint32_t datalen = msg.vdbt()->size;
 
         int r = verify_msg_in_child_buffer(ft_handle, type, msn, key, keylen, data, datalen, xid,
                                            curr_less_pivot,
diff --git a/ft/ft.h b/ft/ft.h
index a5a8ab9d5b8..2e01be9a0ed 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -91,13 +91,15 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "fttypes.h"
-#include "ybt.h"
 #include <db.h>
-#include "cachetable.h"
-#include "log.h"
-#include "ft-ops.h"
-#include "compress.h"
+
+#include "ft/cachetable.h"
+#include "ft/ft-ops.h"
+#include "ft/log.h"
+#include "ft/ybt.h"
+
+typedef struct ft *FT;
+typedef struct ft_options *FT_OPTIONS;
 
 // unlink a ft from the filesystem with or without a txn.
 // if with a txn, then the unlink happens on commit.
@@ -173,9 +175,17 @@ void toku_ft_update_cmp_descriptor(FT ft);
 DESCRIPTOR toku_ft_get_descriptor(FT_HANDLE ft_handle);
 DESCRIPTOR toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle);
 
+typedef struct {
+    // delta versions in basements could be negative
+    int64_t numrows;
+    int64_t numbytes;
+} STAT64INFO_S, *STAT64INFO;
+static const STAT64INFO_S ZEROSTATS = { .numrows = 0, .numbytes = 0};
+
 void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta);
 void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta);
 
+typedef void (*remove_ft_ref_callback)(FT ft, void *extra);
 void toku_ft_remove_reference(FT ft,
                               bool oplsn_valid, LSN oplsn,
                               remove_ft_ref_callback remove_ref, void *extra);
@@ -220,5 +230,5 @@ struct toku_product_name_strings_struct {
 extern struct toku_product_name_strings_struct toku_product_name_strings;
 extern int tokudb_num_envs;
 
-int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
+int toku_keycompare (const void *key1, uint32_t key1len, const void *key2, uint32_t key2len);
 int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default")));
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index e05c2017123..28684ac4dbb 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -1131,7 +1131,7 @@ read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb)
     int r = 0;
     sb->compressed_size = rbuf_int(rb);
     sb->uncompressed_size = rbuf_int(rb);
-    bytevec* cp = (bytevec*)&sb->compressed_ptr;
+    const void **cp = (const void **) &sb->compressed_ptr;
     rbuf_literal_bytes(rb, cp, sb->compressed_size);
     sb->xsum = rbuf_int(rb);
     // let's check the checksum
@@ -1212,7 +1212,7 @@ deserialize_ftnode_info(
     struct rbuf rb;
     rbuf_init(&rb, (unsigned char *) sb->uncompressed_ptr, data_size);
 
-    node->max_msn_applied_to_node_on_disk = rbuf_msn(&rb);
+    node->max_msn_applied_to_node_on_disk = rbuf_MSN(&rb);
     (void)rbuf_int(&rb);
     node->flags = rbuf_int(&rb);
     node->height = rbuf_int(&rb);
@@ -1488,7 +1488,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
         goto cleanup;
     }
 
-    bytevec magic;
+    const void *magic;
     rbuf_literal_bytes(rb, &magic, 8);
     if (memcmp(magic, "tokuleaf", 8)!=0 &&
         memcmp(magic, "tokunode", 8)!=0) {
@@ -1556,8 +1556,8 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
     }
 
     // Finish reading compressed the sub_block
-    bytevec* cp;
-    cp = (bytevec*)&sb_node_info.compressed_ptr;
+    const void **cp;
+    cp = (const void **) &sb_node_info.compressed_ptr;
     rbuf_literal_bytes(rb, cp, sb_node_info.compressed_size);
     sb_node_info.xsum = rbuf_int(rb);
     // let's check the checksum
@@ -1954,7 +1954,7 @@ deserialize_and_upgrade_ftnode(FTNODE node,
     // Re-read the magic field from the previous call, since we are
     // restarting with a fresh rbuf.
     {
-        bytevec magic;
+        const void *magic;
         rbuf_literal_bytes(&rb, &magic, 8);              // 1. magic
     }
 
@@ -2036,7 +2036,7 @@ deserialize_ftnode_from_rbuf(
 
     // now start reading from rbuf
     // first thing we do is read the header information
-    bytevec magic;
+    const void *magic;
     rbuf_literal_bytes(rb, &magic, 8);
     if (memcmp(magic, "tokuleaf", 8)!=0 &&
         memcmp(magic, "tokunode", 8)!=0) {
@@ -2561,7 +2561,7 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p,
     }
 
     //printf("Deserializing %lld datasize=%d\n", off, datasize);
-    bytevec magic;
+    const void *magic;
     rbuf_literal_bytes(rb, &magic, 8);
     lazy_assert(!memcmp(magic, "tokuroll", 8));
 
@@ -2594,7 +2594,7 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p,
     while (rb->ndone < rb->size) {
         struct roll_entry *item;
         uint32_t rollback_fsize = rbuf_int(rb); //Already read 4.  Rest is 4 smaller
-        bytevec item_vec;
+        const void *item_vec;
         rbuf_literal_bytes(rb, &item_vec, rollback_fsize-4);
         unsigned char* item_buf = (unsigned char*)item_vec;
         r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, &result->rollentry_arena);
diff --git a/ft/fttypes.h b/ft/fttypes.h
deleted file mode 100644
index f65d3e3a6a8..00000000000
--- a/ft/fttypes.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#pragma once
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-#include <sys/types.h>
-#ifndef _XOPEN_SOURCE
-#define _XOPEN_SOURCE 500
-#endif
-#define _FILE_OFFSET_BITS 64
-
-#include "toku_assert.h"
-#include <db.h>
-#include <inttypes.h>
-
-
-// Use the C++ bool and constants (true false), rather than BOOL, TRUE, and FALSE.
-
-typedef struct ft_handle *FT_HANDLE;
-typedef struct ftnode *FTNODE;
-typedef struct ftnode_disk_data *FTNODE_DISK_DATA;
-typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
-typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO;
-typedef struct sub_block *SUB_BLOCK;
-typedef struct ft *FT;
-typedef struct ft_header *FT_HEADER;
-typedef struct ft_options *FT_OPTIONS;
-
-typedef unsigned int ITEMLEN;
-typedef const void *bytevec;
-
-typedef int64_t DISKOFF;  /* Offset in a disk. -1 is the NULL pointer. */
-typedef uint64_t TXNID;
-
-typedef struct txnid_pair_s {
-    TXNID parent_id64;
-    TXNID child_id64;
-} TXNID_PAIR;
-
-
-#define TXNID_NONE_LIVING ((TXNID)0)
-#define TXNID_NONE        ((TXNID)0)
-#define TXNID_MAX         ((TXNID)-1)
-
-static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE };
-
-typedef struct blocknum_s { int64_t b; } BLOCKNUM; // make a struct so that we will notice type problems.
-typedef struct gid_s { uint8_t *gid; } GID; // the gid is of size [DB_GID_SIZE]
-typedef TOKU_XA_XID *XIDP; // this is the type that's passed to the logger code (so that we don't have to copy all 152 bytes when only a subset are even valid.)
-#define ROLLBACK_NONE     ((BLOCKNUM){0})
-
-static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; }
-
-typedef struct {
-    uint32_t len;
-    char *data;
-} BYTESTRING;
-
-/* Log Sequence Number (LSN)
- * Make the LSN be a struct instead of an integer so that we get better type checking. */
-typedef struct __toku_lsn { uint64_t lsn; } LSN;
-#define ZERO_LSN ((LSN){0})
-#define MAX_LSN  ((LSN){UINT64_MAX})
-
-/* Message Sequence Number (MSN)
- * Make the MSN be a struct instead of an integer so that we get better type checking. */
-typedef struct __toku_msn { uint64_t msn; } MSN;
-#define ZERO_MSN ((MSN){0})                 // dummy used for message construction, to be filled in when msg is applied to tree
-#define MIN_MSN  ((MSN){(uint64_t)1 << 62})  // first 2^62 values reserved for messages created before Dr. No (for upgrade)
-#define MAX_MSN  ((MSN){UINT64_MAX})
-
-typedef struct {
-    int64_t numrows;           // delta versions in basements could be negative
-    int64_t numbytes;
-} STAT64INFO_S, *STAT64INFO;
-
-static const STAT64INFO_S ZEROSTATS = {0,0};
-
-/* At the ft layer, a FILENUM uniquely identifies an open file.
- * At the ydb layer, a DICTIONARY_ID uniquely identifies an open dictionary.
- * With the introduction of the loader (ticket 2216), it is possible for the file that holds
- * an open dictionary to change, so these are now separate and independent unique identifiers.
- */
-typedef struct {uint32_t fileid;} FILENUM;
-#define FILENUM_NONE ((FILENUM){UINT32_MAX})
-
-typedef struct {uint64_t dictid;} DICTIONARY_ID;
-#define DICTIONARY_ID_NONE ((DICTIONARY_ID){0})
-
-typedef struct {
-    uint32_t num;
-    FILENUM  *filenums;
-} FILENUMS;
-
-typedef struct tokulogger *TOKULOGGER;
-typedef struct txn_manager *TXN_MANAGER;
-typedef struct tokutxn    *TOKUTXN;
-
-typedef struct xids_t *XIDS;
-
-typedef int (*ft_compare_func)(DB *, const DBT *, const DBT *);
-typedef void (*setval_func)(const DBT *, void *);
-typedef int (*ft_update_func)(DB *, const DBT *, const DBT *, const DBT *, setval_func, void *);
-typedef void (*remove_ft_ref_callback)(FT, void*);
-typedef void (*on_redirect_callback)(FT_HANDLE, void*);
-
-#define UU(x) x __attribute__((__unused__))
diff --git a/ft/ftverify.cc b/ft/ftverify.cc
index cd72de387c8..7afde55cddb 100644
--- a/ft/ftverify.cc
+++ b/ft/ftverify.cc
@@ -94,18 +94,17 @@ PATENT RIGHTS GRANT:
 // fractal tree file, one block at a time.
 ////////////////////////////////////////////////////////////////////
 
-#include "fttypes.h"
-#include "ft-internal.h"
-#include "node.h"
-#include "ft_layout_version.h"
-#include "block_table.h"
-#include "rbuf.h"
-#include "sub_block.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_list.h"
+#include "portability/toku_portability.h"
 
-#include <toku_assert.h>
-#include <toku_list.h>
-#include <toku_portability.h>
-#include <util/threadpool.h>
+#include "ft/block_allocator.h"
+#include "ft/ft-internal.h"
+#include "ft/ft_layout_version.h"
+#include "ft/node.h"
+#include "ft/rbuf.h"
+#include "ft/sub_block.h"
+#include "util/threadpool.h"
 
 #include <fcntl.h>
 #include <math.h>
diff --git a/ft/le-cursor.h b/ft/le-cursor.h
index 7295e59ae14..ca99767ad05 100644
--- a/ft/le-cursor.h
+++ b/ft/le-cursor.h
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft-ops.h"
+#include "ft/ft-internal.h"
 
 // A leaf entry cursor (LE_CURSOR) is a special type of FT_CURSOR that visits all of the leaf entries in a tree
 // and returns the leaf entry to the caller.  It maintains a copy of the key that it was last positioned over to
diff --git a/ft/loader/dbufio.cc b/ft/loader/dbufio.cc
index aa964f95353..2fde67cf442 100644
--- a/ft/loader/dbufio.cc
+++ b/ft/loader/dbufio.cc
@@ -89,16 +89,17 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "loader/dbufio.h"
-#include "fttypes.h"
-#include <toku_assert.h>
 #include <errno.h>
-#include <unistd.h>
-#include "memory.h"
 #include <string.h>
+#include <unistd.h>
+
+#include "portability/toku_assert.h"
+#include "portability/memory.h"
+
+#include "ft/ft.h"
+#include "ft/ft-internal.h"
+#include "loader/dbufio.h"
 #include "loader/loader-internal.h"
-#include "ft-internal.h"
-#include "ft.h"
 
 struct dbufio_file {
     // i/o thread owns these
diff --git a/ft/loader/loader-internal.h b/ft/loader/loader-internal.h
index c02d9619f1f..e7091df2650 100644
--- a/ft/loader/loader-internal.h
+++ b/ft/loader/loader-internal.h
@@ -91,25 +91,26 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2010-2013 Tokutek Inc.  All rights reserved."
 
 #include <db.h>
-#include "fttypes.h"
+
+#include "portability/toku_pthread.h"
+
+#include "loader/dbufio.h"
 #include "loader/loader.h"
 #include "util/queue.h"
-#include <toku_pthread.h>
-#include "loader/dbufio.h"
 
-enum { EXTRACTOR_QUEUE_DEPTH = 2,
-       FILE_BUFFER_SIZE  = 1<<24,
-       MIN_ROWSET_MEMORY = 1<<23,
-       MIN_MERGE_FANIN   = 2,
-       FRACTAL_WRITER_QUEUE_DEPTH = 3,
-       FRACTAL_WRITER_ROWSETS = FRACTAL_WRITER_QUEUE_DEPTH + 2,
-       DBUFIO_DEPTH = 2,
-       TARGET_MERGE_BUF_SIZE = 1<<24, // we'd like the merge buffer to be this big.
-       MIN_MERGE_BUF_SIZE = 1<<20, // always use at least this much
-       MAX_UNCOMPRESSED_BUF = MIN_MERGE_BUF_SIZE
+enum {
+    EXTRACTOR_QUEUE_DEPTH = 2,
+    FILE_BUFFER_SIZE  = 1<<24,
+    MIN_ROWSET_MEMORY = 1<<23,
+    MIN_MERGE_FANIN   = 2,
+    FRACTAL_WRITER_QUEUE_DEPTH = 3,
+    FRACTAL_WRITER_ROWSETS = FRACTAL_WRITER_QUEUE_DEPTH + 2,
+    DBUFIO_DEPTH = 2,
+    TARGET_MERGE_BUF_SIZE = 1<<24, // we'd like the merge buffer to be this big.
+    MIN_MERGE_BUF_SIZE = 1<<20, // always use at least this much
+    MAX_UNCOMPRESSED_BUF = MIN_MERGE_BUF_SIZE
 };
 
-
 /* These functions are exported to allow the tests to compile. */
 
 /* These structures maintain a collection of all the open temporary files used by the loader. */
diff --git a/ft/loader/loader.h b/ft/loader/loader.h
index ba4ee839262..b4b8a2a1f79 100644
--- a/ft/loader/loader.h
+++ b/ft/loader/loader.h
@@ -92,7 +92,10 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
+#include "ft/txn.h"
 #include "ft/cachetable.h"
+#include "ft/comparator.h"
+#include "ft/ft-ops.h"
 
 // The loader callbacks are C functions and need to be defined as such
 
diff --git a/ft/log-internal.h b/ft/log-internal.h
index 3d935c79810..52a5e4bf2a6 100644
--- a/ft/log-internal.h
+++ b/ft/log-internal.h
@@ -103,7 +103,6 @@ PATENT RIGHTS GRANT:
 #include "txn.h"
 #include "txn_manager.h"
 #include "rollback_log_node_cache.h"
-#include "txn_child_manager.h"
 
 #include <portability/toku_pthread.h>
 
@@ -182,8 +181,6 @@ struct tokulogger {
     tokutime_t time_spent_writing_to_disk; // how much tokutime did we spend writing to disk?
     uint64_t num_wait_buf_long;            // how many times we waited >= 100ms for the in buf
 
-    void (*remove_finalize_callback) (DICTIONARY_ID, void*);  // ydb-level callback to be called when a transaction that ...
-    void * remove_finalize_callback_extra;                    // ... deletes a file is committed or when one that creates a file is aborted.
     CACHEFILE rollback_cachefile;
     rollback_log_node_cache rollback_cache;
     TXN_MANAGER txn_manager;
@@ -192,100 +189,6 @@ struct tokulogger {
 int toku_logger_find_next_unused_log_file(const char *directory, long long *result);
 int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_logfiles);
 
-struct txn_roll_info {
-    // these are number of rollback nodes and rollback entries for this txn.
-    //
-    // the current rollback node below has sequence number num_rollback_nodes - 1
-    // (because they are numbered 0...num-1). often, the current rollback is
-    // already set to this block num, which means it exists and is available to
-    // log some entries. if the current rollback is NONE and the number of
-    // rollback nodes for this transaction is non-zero, then we will use
-    // the number of rollback nodes to know which sequence number to assign
-    // to a new one we create
-    uint64_t num_rollback_nodes;
-    uint64_t num_rollentries;
-    uint64_t num_rollentries_processed;
-    uint64_t rollentry_raw_count;  // the total count of every byte in the transaction and all its children.
-
-    // spilled rollback nodes are rollback nodes that were gorged by this
-    // transaction, retired, and saved in a list.
-
-    // the spilled rollback head is the block number of the first rollback node
-    // that makes up the rollback log chain
-    BLOCKNUM spilled_rollback_head;
-    // the spilled rollback is the block number of the last rollback node that
-    // makes up the rollback log chain. 
-    BLOCKNUM spilled_rollback_tail;
-    // the current rollback node block number we may use. if this is ROLLBACK_NONE,
-    // then we need to create one and set it here before using it.
-    BLOCKNUM current_rollback; 
-};
-
-struct tokutxn {
-    // These don't change after create:
-
-    TXNID_PAIR txnid;
-
-    uint64_t snapshot_txnid64; // this is the lsn of the snapshot
-    const TXN_SNAPSHOT_TYPE snapshot_type;
-    const bool for_recovery;
-    const TOKULOGGER logger;
-    const TOKUTXN parent;
-    // The child txn is protected by the child_txn_manager lock
-    // and by the user contract. The user contract states (and is
-    // enforced at the ydb layer) that a child txn should not be created
-    // while another child exists. The txn_child_manager will protect
-    // other threads from trying to read this value while another
-    // thread commits/aborts the child
-    TOKUTXN child;
-    // statically allocated child manager, if this 
-    // txn is a root txn, this manager will be used and set to 
-    // child_manager for this transaction and all of its children
-    txn_child_manager child_manager_s;
-    // child manager for this transaction, all of its children,
-    // and all of its ancestors
-    txn_child_manager* child_manager;
-    // These don't change but they're created in a way that's hard to make
-    // strictly const.
-    DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn
-    xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started.
-    XIDS xids; // Represents the xid list
-
-    TOKUTXN snapshot_next;
-    TOKUTXN snapshot_prev;
-
-    bool begin_was_logged;
-    bool declared_read_only; // true if the txn was declared read only when began
-    // These are not read until a commit, prepare, or abort starts, and
-    // they're "monotonic" (only go false->true) during operation:
-    bool do_fsync;
-    bool force_fsync_on_commit;  //This transaction NEEDS an fsync once (if) it commits.  (commit means root txn)
-
-    // Not used until commit, prepare, or abort starts:
-    LSN do_fsync_lsn;
-    TOKU_XA_XID xa_xid; // for prepared transactions
-    TXN_PROGRESS_POLL_FUNCTION progress_poll_fun;
-    void *progress_poll_fun_extra;
-
-    toku_mutex_t txn_lock;
-    // Protected by the txn lock:
-    omt<FT> open_fts; // a collection of the fts that we touched.  Indexed by filenum.
-    struct txn_roll_info roll_info; // Info used to manage rollback entries
-
-    // mutex that protects the transition of the state variable
-    // the rest of the variables are used by the txn code and 
-    // hot indexing to ensure that when hot indexing is processing a 
-    // leafentry, a TOKUTXN cannot dissappear or change state out from
-    // underneath it
-    toku_mutex_t state_lock;
-    toku_cond_t state_cond;
-    TOKUTXN_STATE state;
-    uint32_t num_pin; // number of threads (all hot indexes) that want this
-                      // txn to not transition to commit or abort
-    uint64_t client_id;
-};
-typedef struct tokutxn    *TOKUTXN;
-
 static inline int
 txn_has_current_rollback_log(TOKUTXN txn) {
     return txn->roll_info.current_rollback.b != ROLLBACK_NONE.b;
diff --git a/ft/log.h b/ft/log.h
index c59c981a45f..7127e4a12e9 100644
--- a/ft/log.h
+++ b/ft/log.h
@@ -92,16 +92,17 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <toku_portability.h>
-#include <errno.h>
 #include <db.h>
+#include <errno.h>
 
-#include "fttypes.h"
-#include "memory.h"
-#include "logger.h"
-#include "rollback.h"
-#include "recover.h"
-#include "txn.h"
+#include "portability/memory.h"
+#include "portability/toku_portability.h"
+
+#include "ft/logger.h"
+#include "ft/rollback.h"
+#include "ft/recover.h"
+#include "ft/txn.h"
+#include "util/bytestring.h"
 
 struct roll_entry;
 
diff --git a/ft/logformat.cc b/ft/logformat.cc
index bb35ea86c66..fba77d95df2 100644
--- a/ft/logformat.cc
+++ b/ft/logformat.cc
@@ -854,9 +854,9 @@ int main (int argc, const char *const argv[]) {
     fprintf2(cf, hf, "#ident \"Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved.\"\n");
     fprintf2(cf, pf, "#include <stdint.h>\n");
     fprintf2(cf, pf, "#include <sys/time.h>\n");
-    fprintf2(cf, pf, "#include <ft/fttypes.h>\n");
     fprintf2(cf, pf, "#include <ft/log-internal.h>\n");
     fprintf(hf, "#include <ft/ft-internal.h>\n");
+    fprintf(hf, "#include <util/bytestring.h>\n");
     fprintf(hf, "#include <util/memarena.h>\n");
     generate_enum();
     generate_log_struct();
diff --git a/ft/logger.cc b/ft/logger.cc
index da6ee960c1e..188b72b0c4c 100644
--- a/ft/logger.cc
+++ b/ft/logger.cc
@@ -171,7 +171,6 @@ int toku_logger_create (TOKULOGGER *resultp) {
     result->write_log_files = true;
     result->trim_log_files = true;
     result->directory=0;
-    result->remove_finalize_callback = NULL;
     // fd is uninitialized on purpose
     // ct is uninitialized on purpose
     result->lg_max = 100<<20; // 100MB default
diff --git a/ft/logger.h b/ft/logger.h
index 9a3ab3a248d..95ec620ea6b 100644
--- a/ft/logger.h
+++ b/ft/logger.h
@@ -92,9 +92,11 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/fttypes.h"
-#include "ft/ft-internal.h"
+#include "ft/block_table.h"
 #include "ft/ft_layout_version.h"
+#include "ft/txn.h"
+
+typedef struct tokulogger *TOKULOGGER;
 
 enum {
     TOKU_LOG_VERSION_1 = 1,
@@ -110,15 +112,15 @@ int toku_logger_open (const char *directory, TOKULOGGER logger);
 int toku_logger_open_with_last_xid(const char *directory, TOKULOGGER logger, TXNID last_xid);
 void toku_logger_shutdown(TOKULOGGER logger);
 int toku_logger_close(TOKULOGGER *loggerp);
-void toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft);
-int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create);
+void toku_logger_initialize_rollback_cache(TOKULOGGER logger, struct ft *ft);
+int toku_logger_open_rollback(TOKULOGGER logger, struct cachetable *ct, bool create);
 void toku_logger_close_rollback(TOKULOGGER logger);
 bool toku_logger_rollback_is_open (TOKULOGGER); // return true iff the rollback is open.
 
 void toku_logger_fsync (TOKULOGGER logger);
 void toku_logger_fsync_if_lsn_not_fsynced(TOKULOGGER logger, LSN lsn);
 int toku_logger_is_open(TOKULOGGER logger);
-void toku_logger_set_cachetable (TOKULOGGER logger, CACHETABLE ct);
+void toku_logger_set_cachetable (TOKULOGGER logger, struct cachetable *ct);
 int toku_logger_set_lg_max(TOKULOGGER logger, uint32_t lg_max);
 int toku_logger_get_lg_max(TOKULOGGER logger, uint32_t *lg_maxp);
 int toku_logger_set_lg_bsize(TOKULOGGER logger, uint32_t bsize);
@@ -139,10 +141,24 @@ int toku_logger_restart(TOKULOGGER logger, LSN lastlsn);
 // given LSN and delete them.
 void toku_logger_maybe_trim_log(TOKULOGGER logger, LSN oldest_open_lsn);
 
+// At the ft layer, a FILENUM uniquely identifies an open file.
+struct FILENUM {
+    uint32_t fileid;
+};
+static const FILENUM FILENUM_NONE = { .fileid = UINT32_MAX };
+
+struct FILENUMS {
+    uint32_t num;
+    FILENUM *filenums;
+};
+
 void toku_logger_log_fcreate(TOKUTXN txn, const char *fname, FILENUM filenum, uint32_t mode, uint32_t flags, uint32_t nodesize, uint32_t basementnodesize, enum toku_compression_method compression_method);
 void toku_logger_log_fdelete(TOKUTXN txn, FILENUM filenum);
 void toku_logger_log_fopen(TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags);
 
+// the log generation code requires a typedef if we want to pass by pointer
+typedef TOKU_XA_XID *XIDP;
+
 int toku_fread_uint8_t (FILE *f, uint8_t *v, struct x1764 *mm, uint32_t *len);
 int toku_fread_uint32_t_nocrclen (FILE *f, uint32_t *v);
 int toku_fread_uint32_t (FILE *f, uint32_t *v, struct x1764 *checksum, uint32_t *len);
@@ -258,4 +274,63 @@ void toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s);
 
 int toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs, uint32_t *version_found);
 
-TXN_MANAGER toku_logger_get_txn_manager(TOKULOGGER logger);
+struct txn_manager *toku_logger_get_txn_manager(TOKULOGGER logger);
+
+// For serialize / deserialize
+
+#include "ft/wbuf.h"
+
+static inline void wbuf_nocrc_FILENUM(struct wbuf *wb, FILENUM fileid) {
+    wbuf_nocrc_uint(wb, fileid.fileid);
+}
+
+static inline void wbuf_FILENUM(struct wbuf *wb, FILENUM fileid) {
+    wbuf_uint(wb, fileid.fileid);
+}
+
+static inline void wbuf_nocrc_FILENUMS(struct wbuf *wb, FILENUMS v) {
+    wbuf_nocrc_uint(wb, v.num);
+    for (uint32_t i = 0; i < v.num; i++) {
+        wbuf_nocrc_FILENUM(wb, v.filenums[i]);
+    }
+}
+
+static inline void wbuf_FILENUMS(struct wbuf *wb, FILENUMS v) {
+    wbuf_uint(wb, v.num);
+    for (uint32_t i = 0; i < v.num; i++) {
+        wbuf_FILENUM(wb, v.filenums[i]);
+    }
+}
+
+static inline void wbuf_nocrc_XIDP (struct wbuf *w, TOKU_XA_XID *xid) {
+    wbuf_nocrc_uint32_t(w, xid->formatID);
+    wbuf_nocrc_uint8_t(w, xid->gtrid_length);
+    wbuf_nocrc_uint8_t(w, xid->bqual_length);
+    wbuf_nocrc_literal_bytes(w, xid->data, xid->gtrid_length+xid->bqual_length);
+}
+
+#include "ft/rbuf.h"
+
+static inline void rbuf_FILENUM(struct rbuf *rb, FILENUM *filenum) {
+    filenum->fileid = rbuf_int(rb);
+}
+static inline void rbuf_ma_FILENUM(struct rbuf *rb, memarena *UU(ma), FILENUM *filenum) {
+    rbuf_FILENUM(rb, filenum);
+}
+
+static inline void rbuf_FILENUMS(struct rbuf *rb, FILENUMS *filenums) {
+    filenums->num = rbuf_int(rb);
+    XMALLOC_N(filenums->num, filenums->filenums);
+    for (uint32_t i = 0; i < filenums->num; i++) {
+        rbuf_FILENUM(rb, &(filenums->filenums[i]));
+    }
+}
+
+static inline void rbuf_ma_FILENUMS(struct rbuf *rb, memarena *ma, FILENUMS *filenums) {
+    rbuf_ma_uint32_t(rb, ma, &(filenums->num));
+    filenums->filenums = (FILENUM *) ma->malloc_from_arena(filenums->num * sizeof(FILENUM));
+    assert(filenums->filenums != NULL);
+    for (uint32_t i = 0; i < filenums->num; i++) {
+        rbuf_ma_FILENUM(rb, ma, &(filenums->filenums[i]));
+    }
+}
diff --git a/ft/msg.cc b/ft/msg.cc
index 08ab28d6624..db4b6ff891f 100644
--- a/ft/msg.cc
+++ b/ft/msg.cc
@@ -90,7 +90,6 @@ PATENT RIGHTS GRANT:
 
 #include "portability/toku_portability.h"
 
-#include "ft/fttypes.h"
 #include "ft/msg.h"
 #include "ft/xids.h"
 #include "ft/ybt.h"
@@ -102,11 +101,11 @@ ft_msg::ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x
 }
 
 ft_msg ft_msg::deserialize_from_rbuf(struct rbuf *rb, XIDS *x, bool *is_fresh) {
-    bytevec keyp, valp;
-    ITEMLEN keylen, vallen;
+    const void *keyp, *valp;
+    uint32_t keylen, vallen;
     enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb);
     *is_fresh = rbuf_char(rb);
-    MSN m = rbuf_msn(rb);
+    MSN m = rbuf_MSN(rb);
     xids_create_from_buffer(rb, x);
     rbuf_bytes(rb, &keyp, &keylen);
     rbuf_bytes(rb, &valp, &vallen);
@@ -116,8 +115,8 @@ ft_msg ft_msg::deserialize_from_rbuf(struct rbuf *rb, XIDS *x, bool *is_fresh) {
 }
 
 ft_msg ft_msg::deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *x) {
-    bytevec keyp, valp;
-    ITEMLEN keylen, vallen;
+    const void *keyp, *valp;
+    uint32_t keylen, vallen;
     enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb);
     xids_create_from_buffer(rb, x);
     rbuf_bytes(rb, &keyp, &keylen);
@@ -169,3 +168,4 @@ void ft_msg::serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const {
     wbuf_nocrc_bytes(wb, _key.data, _key.size);
     wbuf_nocrc_bytes(wb, _val.data, _val.size);
 }
+
diff --git a/ft/msg.h b/ft/msg.h
index ea183f075b3..a54fadb955b 100644
--- a/ft/msg.h
+++ b/ft/msg.h
@@ -94,9 +94,24 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
+#include <db.h>
+
+#include "portability/toku_assert.h"
+#include "portability/toku_stdint.h"
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
+// Message Sequence Number (MSN)
+typedef struct __toku_msn { uint64_t msn; } MSN;
+
+// dummy used for message construction, to be filled in when msg is applied to tree
+static const MSN ZERO_MSN = { .msn = 0 };
+
+// first 2^62 values reserved for messages created before Dr. No (for upgrade)
+static const MSN MIN_MSN = { .msn = 1ULL << 62 };
+static const MSN MAX_MSN = { .msn = UINT64_MAX };
+
 /* tree command types */
 enum ft_msg_type {
     FT_NONE = 0,
@@ -214,3 +229,18 @@ private:
     MSN _msn;
     XIDS _xids;
 };
+
+// For serialize / deserialize
+
+#include "ft/wbuf.h"
+
+static inline void wbuf_MSN(struct wbuf *wb, MSN msn) {
+    wbuf_ulonglong(wb, msn.msn);
+}
+
+#include "ft/rbuf.h"
+
+static inline MSN rbuf_MSN(struct rbuf *rb) {
+    MSN msn = { .msn = rbuf_ulonglong(rb) };
+    return msn;
+}
diff --git a/ft/msg_buffer.cc b/ft/msg_buffer.cc
index 6ac23131969..3dfcac234fc 100644
--- a/ft/msg_buffer.cc
+++ b/ft/msg_buffer.cc
@@ -225,8 +225,8 @@ void message_buffer::enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset)
         int next_2 = next_power_of_two(need_space_total);
         _resize(next_2);
     }
-    ITEMLEN keylen = msg.kdbt()->size;
-    ITEMLEN datalen = msg.vdbt()->size;
+    uint32_t keylen = msg.kdbt()->size;
+    uint32_t datalen = msg.vdbt()->size;
     struct buffer_entry *entry = get_buffer_entry(_memory_used);
     entry->type = (unsigned char) msg.type();
     entry->msn = msg.msn();
@@ -256,13 +256,13 @@ bool message_buffer::get_freshness(int32_t offset) const {
 
 ft_msg message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const {
     struct buffer_entry *entry = get_buffer_entry(offset);
-    ITEMLEN keylen = entry->keylen;
-    ITEMLEN vallen = entry->vallen;
+    uint32_t keylen = entry->keylen;
+    uint32_t vallen = entry->vallen;
     enum ft_msg_type type = (enum ft_msg_type) entry->type;
     MSN msn = entry->msn;
     const XIDS xids = (XIDS) &entry->xids_s;
-    bytevec key = xids_get_end_of_array(xids);
-    bytevec val = (uint8_t *) key + entry->keylen;
+    const void *key = xids_get_end_of_array(xids);
+    const void *val = (uint8_t *) key + entry->keylen;
     return ft_msg(toku_fill_dbt(keydbt, key, keylen), toku_fill_dbt(valdbt, val, vallen), type, msn, xids);
 }
 
diff --git a/ft/msg_buffer.h b/ft/msg_buffer.h
index fd5fe29e43b..c781c68d0c7 100644
--- a/ft/msg_buffer.h
+++ b/ft/msg_buffer.h
@@ -88,10 +88,9 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
-#include "ft/fttypes.h"
-#include "ft/xids-internal.h"
-#include "ft/xids.h"
 #include "ft/msg.h"
+#include "ft/xids.h"
+#include "ft/xids-internal.h"
 #include "ft/ybt.h"
 
 class message_buffer {
diff --git a/ft/node.cc b/ft/node.cc
index d1e879281c6..6ce460df293 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -1576,7 +1576,7 @@ static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, const ft_msg &msg, bool is_fre
 }
 
 // This is only exported for tests.
-void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp)
+void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, uint32_t keylen, const void *data, uint32_t datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp)
 {
     DBT k, v;
     ft_msg msg(toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, data, datalen), type, msn, xids);
diff --git a/ft/node.h b/ft/node.h
index 343c688592a..9870cfca65f 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -88,10 +88,9 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
-#include "ft/comparator.h"
-#include "ft/cachetable.h"
 #include "ft/bndata.h"
-#include "ft/fttypes.h"
+#include "ft/comparator.h"
+#include "ft/ft.h"
 #include "ft/msg_buffer.h"
 
 /* Pivot keys.
@@ -242,6 +241,7 @@ struct ftnode {
     struct ftnode_partition *bp;
     struct ctpair *ct_pair;
 };
+typedef struct ftnode *FTNODE;
 
 // data of an available partition of a leaf ftnode
 struct ftnode_leaf_basement_node {
@@ -251,6 +251,7 @@ struct ftnode_leaf_basement_node {
     bool stale_ancestor_messages_applied;
     STAT64INFO_S stat64_delta;      // change in stat64 counters since basement was last written to disk
 };
+typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
 
 enum pt_state {  // declare this to be packed so that when used below it will only take 1 byte.
     PT_INVALID = 0,
@@ -277,6 +278,7 @@ struct ftnode_nonleaf_childinfo {
     off_omt_t stale_message_tree;
     uint64_t flow[2];  // current and last checkpoint
 };
+typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO;
     
 typedef struct ftnode_child_pointer {
     union {
@@ -298,6 +300,9 @@ struct ftnode_disk_data {
     uint32_t start;
     uint32_t size;
 };
+typedef struct ftnode_disk_data *FTNODE_DISK_DATA;
+
+// TODO: Turn these into functions instead of macros
 #define BP_START(node_dd,i) ((node_dd)[i].start)
 #define BP_SIZE(node_dd,i) ((node_dd)[i].size)
 
@@ -463,7 +468,7 @@ unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc);
 int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc);
 long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
 long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
-void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp);
+void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, uint32_t keylen, const void *data, uint32_t datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp);
 void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
 void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
 bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
@@ -514,8 +519,6 @@ void toku_ft_leaf_apply_msg(const toku::comparator &cmp, ft_update_func update_f
                             const ft_msg &msg, txn_gc_info *gc_info,
                             uint64_t *workdone, STAT64INFO stats_to_update);
 
-CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft);
-
 //
 // Message management for orthopush
 //
@@ -602,7 +605,7 @@ static inline void set_BLB(FTNODE node, int i, BASEMENTNODE bn) {
     p->u.leaf = bn;
 }
 
-static inline SUB_BLOCK BSB(FTNODE node, int i) {
+static inline struct sub_block *BSB(FTNODE node, int i) {
     paranoid_invariant(i >= 0);
     paranoid_invariant(i < node->n_children);
     FTNODE_CHILD_POINTER p = node->bp[i].ptr;
@@ -610,7 +613,7 @@ static inline SUB_BLOCK BSB(FTNODE node, int i) {
     return p.u.subblock;
 }
 
-static inline void set_BSB(FTNODE node, int i, SUB_BLOCK sb) {
+static inline void set_BSB(FTNODE node, int i, struct sub_block *sb) {
     paranoid_invariant(i >= 0);
     paranoid_invariant(i < node->n_children);
     FTNODE_CHILD_POINTER *p = &node->bp[i].ptr;
diff --git a/ft/pivotkeys.cc b/ft/pivotkeys.cc
index e5b999ae259..5dd84cda643 100644
--- a/ft/pivotkeys.cc
+++ b/ft/pivotkeys.cc
@@ -235,7 +235,7 @@ void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
     XMALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys);
     bool keys_same_size = true;
     for (int i = 0; i < _num_pivots; i++) {
-        bytevec pivotkeyptr;
+        const void *pivotkeyptr;
         uint32_t size;
         rbuf_bytes(rb, &pivotkeyptr, &size);
         toku_memdup_dbt(&_dbt_keys[i], pivotkeyptr, size);
diff --git a/ft/rbuf.h b/ft/rbuf.h
index 83c19e4ceec..41f74ed2cfc 100644
--- a/ft/rbuf.h
+++ b/ft/rbuf.h
@@ -94,7 +94,6 @@ PATENT RIGHTS GRANT:
 
 #include <string.h>
 
-#include "ft/fttypes.h"
 #include "portability/memory.h"
 #include "portability/toku_assert.h"
 #include "portability/toku_htonl.h"
@@ -159,14 +158,14 @@ static unsigned int rbuf_int (struct rbuf *r) {
 #endif
 }
 
-static inline void rbuf_literal_bytes (struct rbuf *r, bytevec *bytes, unsigned int n_bytes) {
+static inline void rbuf_literal_bytes (struct rbuf *r, const void **bytes, unsigned int n_bytes) {
     *bytes =   &r->buf[r->ndone];
     r->ndone+=n_bytes;
     assert(r->ndone<=r->size);
 }
 
 /* Return a pointer into the middle of the buffer. */
-static inline void rbuf_bytes (struct rbuf *r, bytevec *bytes, unsigned int *n_bytes)
+static inline void rbuf_bytes (struct rbuf *r, const void **bytes, unsigned int *n_bytes)
 {
     *n_bytes = rbuf_int(r);
     rbuf_literal_bytes(r, bytes, *n_bytes);
@@ -182,28 +181,6 @@ static inline signed long long rbuf_longlong (struct rbuf *r) {
     return (signed long long)rbuf_ulonglong(r);
 }
 
-static inline DISKOFF rbuf_diskoff (struct rbuf *r) {
-    return rbuf_ulonglong(r);
-}
-
-static inline LSN rbuf_lsn (struct rbuf *r) {
-    LSN lsn = {rbuf_ulonglong(r)};
-    return lsn;
-}
-
-static inline MSN rbuf_msn (struct rbuf *r) {
-    MSN msn = {rbuf_ulonglong(r)};
-    return msn;
-}
-
-static inline BLOCKNUM rbuf_blocknum (struct rbuf *r) {
-    BLOCKNUM result = make_blocknum(rbuf_longlong(r));
-    return result;
-}
-static inline void rbuf_ma_BLOCKNUM (struct rbuf *r, memarena *ma __attribute__((__unused__)), BLOCKNUM *blocknum) {
-    *blocknum = rbuf_blocknum(r);
-}
-
 static inline void rbuf_ma_uint32_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint32_t *num) {
     *num = rbuf_int(r);
 }
@@ -212,52 +189,6 @@ static inline void rbuf_ma_uint64_t (struct rbuf *r, memarena *ma __attribute__(
     *num = rbuf_ulonglong(r);
 }
 
-
-static inline void rbuf_TXNID (struct rbuf *r, TXNID *txnid) {
-    *txnid = rbuf_ulonglong(r);
-}
-
-static inline void rbuf_TXNID_PAIR (struct rbuf *r, TXNID_PAIR *txnid) {
-    txnid->parent_id64 = rbuf_ulonglong(r);
-    txnid->child_id64 = rbuf_ulonglong(r);
-}
-
-static inline void rbuf_ma_TXNID (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID *txnid) {
-    rbuf_TXNID(r, txnid);
-}
-
-static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID_PAIR *txnid) {
-    rbuf_TXNID_PAIR(r, txnid);
-}
-
-static inline void rbuf_FILENUM (struct rbuf *r, FILENUM *filenum) {
-    filenum->fileid = rbuf_int(r);
-}
-static inline void rbuf_ma_FILENUM (struct rbuf *r, memarena *ma __attribute__((__unused__)), FILENUM *filenum) {
-    rbuf_FILENUM(r, filenum);
-}
-
-// 2954
-// Don't try to use the same space, malloc it
-static inline void rbuf_FILENUMS(struct rbuf *r, FILENUMS *filenums) {
-    filenums->num = rbuf_int(r);
-    filenums->filenums = (FILENUM *) toku_malloc( filenums->num * sizeof(FILENUM) );
-    assert(filenums->filenums != NULL);
-    for (uint32_t i=0; i < filenums->num; i++) {
-        rbuf_FILENUM(r, &(filenums->filenums[i]));
-    }
-}
-
-// 2954
-static inline void rbuf_ma_FILENUMS (struct rbuf *r, memarena *ma __attribute__((__unused__)), FILENUMS *filenums) {
-    rbuf_ma_uint32_t(r, ma, &(filenums->num));
-    filenums->filenums = (FILENUM *) ma->malloc_from_arena(filenums->num * sizeof(FILENUM));
-    assert(filenums->filenums != NULL);
-    for (uint32_t i=0; i < filenums->num; i++) {
-        rbuf_ma_FILENUM(r, ma, &(filenums->filenums[i]));
-    }
-}
-
 // Don't try to use the same space, malloc it
 static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) {
     bs->len  = rbuf_int(r);
diff --git a/ft/recover.h b/ft/recover.h
index 9d4d081cd7d..38d6903f448 100644
--- a/ft/recover.h
+++ b/ft/recover.h
@@ -92,14 +92,15 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <toku_portability.h>
+#include <db.h>
 #include <errno.h>
 
-#include <db.h>
-#include <util/x1764.h>
+#include "portability/memory.h"
+#include "portability/toku_portability.h"
 
-#include "fttypes.h"
-#include "memory.h"
+#include "ft/comparator.h"
+#include "ft/ft-ops.h"
+#include "util/x1764.h"
 
 typedef void (*prepared_txn_callback_t)(DB_ENV*, TOKUTXN);
 typedef void (*keep_cachetable_callback_t)(DB_ENV*, CACHETABLE);
@@ -109,7 +110,7 @@ typedef void (*keep_cachetable_callback_t)(DB_ENV*, CACHETABLE);
 int tokudb_recover (DB_ENV *env,
 		    prepared_txn_callback_t    prepared_txn_callback,
 		    keep_cachetable_callback_t keep_cachetable_callback,
-		    TOKULOGGER logger,
+		    struct tokulogger *logger,
 		    const char *env_dir, const char *log_dir,
                     ft_compare_func bt_compare,
                     ft_update_func update_function,
diff --git a/ft/roll.cc b/ft/roll.cc
index 2150d52dd19..d64e128c87b 100644
--- a/ft/roll.cc
+++ b/ft/roll.cc
@@ -96,7 +96,6 @@ PATENT RIGHTS GRANT:
 #include "ft.h"
 #include "ft-ops.h"
 #include "log-internal.h"
-//#include "txn_manager.h"
 #include "xids.h"
 #include "rollback-apply.h"
 
diff --git a/ft/rollback-apply.cc b/ft/rollback-apply.cc
index d5f0ab3a18f..94e5abf56d8 100644
--- a/ft/rollback-apply.cc
+++ b/ft/rollback-apply.cc
@@ -89,9 +89,8 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "fttypes.h"
-#include "log-internal.h"
-#include "rollback-apply.h"
+#include "ft/log-internal.h"
+#include "ft/rollback-apply.h"
 
 static void
 poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint) {
diff --git a/ft/rollback-ct-callbacks.cc b/ft/rollback-ct-callbacks.cc
index 997387eac2b..9b75692d44a 100644
--- a/ft/rollback-ct-callbacks.cc
+++ b/ft/rollback-ct-callbacks.cc
@@ -94,7 +94,6 @@ PATENT RIGHTS GRANT:
 
 #include "ft/block_table.h"
 #include "ft/ft-internal.h"
-#include "ft/fttypes.h"
 #include "ft/rollback.h"
 #include "ft/rollback-ct-callbacks.h"
 
diff --git a/ft/rollback-ct-callbacks.h b/ft/rollback-ct-callbacks.h
index 35a90613423..3c23473ed6a 100644
--- a/ft/rollback-ct-callbacks.h
+++ b/ft/rollback-ct-callbacks.h
@@ -92,9 +92,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-
-#include "cachetable.h"
-#include "fttypes.h"
+#include "ft/cachetable.h"
 
 void toku_rollback_flush_callback(CACHEFILE cachefile, int fd, BLOCKNUM logname, void *rollback_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool UU(is_clone));
 int toku_rollback_fetch_callback(CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM logname, uint32_t fullhash, void **rollback_pv,  void** UU(disk_data), PAIR_ATTR *sizep, int * UU(dirtyp), void *extraargs);
diff --git a/ft/rollback.h b/ft/rollback.h
index 4e68308473b..20cda4de8e7 100644
--- a/ft/rollback.h
+++ b/ft/rollback.h
@@ -92,8 +92,9 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/sub_block.h"
 #include "ft/cachetable.h"
+#include "ft/sub_block.h"
+#include "ft/txn.h"
 
 #include "util/memarena.h"
 
@@ -137,7 +138,7 @@ void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len);
 // if necessary.
 void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log);
 
-void toku_txn_maybe_note_ft (TOKUTXN txn, FT ft);
+void toku_txn_maybe_note_ft (TOKUTXN txn, struct ft *ft);
 int toku_logger_txn_rollback_stats(TOKUTXN txn, struct txn_stat *txn_stat);
 
 int toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind);
diff --git a/ft/sub_block.cc b/ft/sub_block.cc
index 5d8799fb2db..ee1e289d802 100644
--- a/ft/sub_block.cc
+++ b/ft/sub_block.cc
@@ -89,21 +89,21 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "compress.h"
-#include "sub_block.h"
-#include "quicklz.h"
-
-#include <memory.h>
-#include <toku_assert.h>
-#include <toku_portability.h>
-#include <util/threadpool.h>
-#include <util/x1764.h>
-
+#include <errno.h>
 #include <stdio.h>
 #include <string.h>
-#include <errno.h>
 #include <zlib.h>
 
+#include "portability/memory.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_portability.h"
+
+#include "ft/compress.h"
+#include "ft/sub_block.h"
+#include "ft/quicklz.h"
+#include "util/threadpool.h"
+#include "util/x1764.h"
+
 SUB_BLOCK sub_block_creat(void) {
     SUB_BLOCK XMALLOC(sb);
     sub_block_init(sb);
diff --git a/ft/sub_block.h b/ft/sub_block.h
index 64df17f55e9..26f99747b2d 100644
--- a/ft/sub_block.h
+++ b/ft/sub_block.h
@@ -92,14 +92,14 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "compress.h"
-#include "fttypes.h"
+#include "ft/compress.h"
 
+// TODO: Clean this abstraciton up
 static const int max_sub_blocks = 8;
-static const int target_sub_block_size = 512*1024;
+static const int target_sub_block_size = 512 * 1024;
 static const int max_basement_nodes = 32;
-static const int max_basement_node_uncompressed_size = 256*1024;
-static const int max_basement_node_compressed_size = 64*1024;
+static const int max_basement_node_uncompressed_size = 256 * 1024;
+static const int max_basement_node_compressed_size = 64 * 1024;
 
 struct sub_block {
     void *uncompressed_ptr;
diff --git a/ft/tests/block_allocator_test.cc b/ft/tests/block_allocator_test.cc
index 9b02061ae47..a7bb4f6641d 100644
--- a/ft/tests/block_allocator_test.cc
+++ b/ft/tests/block_allocator_test.cc
@@ -88,7 +88,6 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
-
 #include "test.h"
 
 static void ba_alloc_at(block_allocator *ba, uint64_t size, uint64_t offset) {
diff --git a/ft/tests/ft-test-cursor-2.cc b/ft/tests/ft-test-cursor-2.cc
index fc4ef53ff7a..6907a379289 100644
--- a/ft/tests/ft-test-cursor-2.cc
+++ b/ft/tests/ft-test-cursor-2.cc
@@ -96,7 +96,7 @@ static const char *fname = TOKU_TEST_FILENAME;
 static TOKUTXN const null_txn = 0;
 
 static int
-save_data (ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN vallen, bytevec val, void *v, bool lock_only) {
+save_data (uint32_t UU(keylen), const void *UU(key), uint32_t vallen, const void *val, void *v, bool lock_only) {
     if (lock_only) return 0;
     assert(key!=NULL);
     void **CAST_FROM_VOIDP(vp, v);
diff --git a/ft/tests/ft-test-cursor.cc b/ft/tests/ft-test-cursor.cc
index fa200705e1e..0ae777499ad 100644
--- a/ft/tests/ft-test-cursor.cc
+++ b/ft/tests/ft-test-cursor.cc
@@ -442,7 +442,7 @@ static void test_ft_cursor_rwalk(int n) {
 }
 
 static int
-ascending_key_string_checkf (ITEMLEN keylen, bytevec key, ITEMLEN UU(vallen), bytevec UU(val), void *v, bool lock_only)
+ascending_key_string_checkf (uint32_t keylen, const void *key, uint32_t UU(vallen), const void *UU(val), void *v, bool lock_only)
 // the keys are strings.  Verify that they keylen matches the key, that the keys are ascending.  Use (char**)v  to hold a
 // malloc'd previous string.
 {
diff --git a/ft/tests/ft-test5.cc b/ft/tests/ft-test5.cc
index 1615873d17d..8c1a53914b7 100644
--- a/ft/tests/ft-test5.cc
+++ b/ft/tests/ft-test5.cc
@@ -127,7 +127,7 @@ static void test5 (void) {
 	    if (i%1000==0 && verbose) { printf("r"); fflush(stdout); }
 	    snprintf(key, 100, "key%d", rk);
 	    snprintf(valexpected, 100, "val%d", values[rk]);
-	    struct check_pair pair = {(ITEMLEN) (1+strlen(key)), key, (ITEMLEN) (1+strlen(valexpected)), valexpected, 0};
+	    struct check_pair pair = {(uint32_t) (1+strlen(key)), key, (uint32_t) (1+strlen(valexpected)), valexpected, 0};
 	    r = toku_ft_lookup(t, toku_fill_dbt(&k, key, 1+strlen(key)), lookup_checkf, &pair);
 	    assert(r==0);
 	    assert(pair.call_count==1);
diff --git a/ft/tests/ftloader-test.cc b/ft/tests/ftloader-test.cc
index 34ff22b01b3..9a2eeed1292 100644
--- a/ft/tests/ftloader-test.cc
+++ b/ft/tests/ftloader-test.cc
@@ -350,7 +350,7 @@ static void verify_dbfile(int n, int sorted_keys[], const char *sorted_vals[], c
     size_t userdata = 0;
     int i;
     for (i=0; i<n; i++) {
-	struct check_pair pair = {sizeof sorted_keys[i], &sorted_keys[i], (ITEMLEN) strlen(sorted_vals[i]), sorted_vals[i], 0};
+	struct check_pair pair = {sizeof sorted_keys[i], &sorted_keys[i], (uint32_t) strlen(sorted_vals[i]), sorted_vals[i], 0};
         r = toku_ft_cursor_get(cursor, NULL, lookup_checkf, &pair, DB_NEXT);
         if (r != 0) {
 	    assert(pair.call_count ==0);
diff --git a/ft/tests/le-cursor-provdel.cc b/ft/tests/le-cursor-provdel.cc
index 5331bf8e3d3..868b237242d 100644
--- a/ft/tests/le-cursor-provdel.cc
+++ b/ft/tests/le-cursor-provdel.cc
@@ -97,7 +97,7 @@ PATENT RIGHTS GRANT:
 
 
 static int
-get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) {
+get_next_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) {
     DBT *CAST_FROM_VOIDP(key_dbt, extra);
     if (!lock_only) {
         toku_dbt_set(keylen, key, key_dbt, NULL);
diff --git a/ft/tests/le-cursor-right.cc b/ft/tests/le-cursor-right.cc
index 82dcfd4ff03..54d33397d13 100644
--- a/ft/tests/le-cursor-right.cc
+++ b/ft/tests/le-cursor-right.cc
@@ -101,7 +101,7 @@ PATENT RIGHTS GRANT:
 static TOKUTXN const null_txn = 0;
 
 static int
-get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) {
+get_next_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) {
     DBT *CAST_FROM_VOIDP(key_dbt, extra);
     if (!lock_only) {
         toku_dbt_set(keylen, key, key_dbt, NULL);
diff --git a/ft/tests/le-cursor-walk.cc b/ft/tests/le-cursor-walk.cc
index c2644b50e64..158f870a3b6 100644
--- a/ft/tests/le-cursor-walk.cc
+++ b/ft/tests/le-cursor-walk.cc
@@ -99,7 +99,7 @@ PATENT RIGHTS GRANT:
 static TOKUTXN const null_txn = 0;
 
 static int
-get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) {
+get_next_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) {
     DBT *CAST_FROM_VOIDP(key_dbt, extra);
     if (!lock_only) {
         toku_dbt_set(keylen, key, key_dbt, NULL);
diff --git a/ft/tests/test-leafentry-child-txn.cc b/ft/tests/test-leafentry-child-txn.cc
index b4625702745..33b5d86509a 100644
--- a/ft/tests/test-leafentry-child-txn.cc
+++ b/ft/tests/test-leafentry-child-txn.cc
@@ -91,10 +91,9 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 
 #include "test.h"
-#include "fttypes.h"
 
-#include "ule.h"
-#include "ule-internal.h"
+#include "ft/ule.h"
+#include "ft/ule-internal.h"
 
 static void init_empty_ule(ULE ule) {
     ule->num_cuxrs = 0;
diff --git a/ft/tests/test-leafentry-nested.cc b/ft/tests/test-leafentry-nested.cc
index 1720922a385..8a70c7b6611 100644
--- a/ft/tests/test-leafentry-nested.cc
+++ b/ft/tests/test-leafentry-nested.cc
@@ -91,10 +91,9 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 
 #include "test.h"
-#include "fttypes.h"
 
-#include "ule.h"
-#include "ule-internal.h"
+#include "ft/ule.h"
+#include "ft/ule-internal.h"
 
 enum {MAX_SIZE = 256};
 static XIDS nested_xids[MAX_TRANSACTION_RECORDS];
diff --git a/ft/tests/test.h b/ft/tests/test.h
index 3d6b049af5c..c7a00966f01 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -101,16 +101,17 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 #include <portability/toku_path.h>
 
-#include "ft.h"
-#include "node.h"
-#include "block_table.h"
-#include "log-internal.h"
-#include "logger.h"
-#include "fttypes.h"
-#include "ft-ops.h"
-#include "cursor.h"
-#include "cachetable.h"
-#include "cachetable-internal.h"
+#include "ft/ft.h"
+#include "ft/node.h"
+#include "ft/block_allocator.h"
+#include "ft/block_table.h"
+#include "ft/log-internal.h"
+#include "ft/logger.h"
+#include "ft/ft-ops.h"
+#include "ft/cursor.h"
+#include "ft/cachetable.h"
+#include "ft/cachetable-internal.h"
+#include "util/bytestring.h"
 
 #define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, strerror(r)); assert(__r==0); })
 #define CKERR2(r,r2) do { if (r!=r2) fprintf(stderr, "%s:%d error %d %s, expected %d\n", __FILE__, __LINE__, r, strerror(r), r2); assert(r==r2); } while (0)
@@ -121,7 +122,7 @@ PATENT RIGHTS GRANT:
     fflush(stderr); \
 } while (0)
 
-const ITEMLEN len_ignore = 0xFFFFFFFF;
+const uint32_t len_ignore = 0xFFFFFFFF;
 
 static const prepared_txn_callback_t NULL_prepared_txn_callback         __attribute__((__unused__)) = NULL;
 static const keep_cachetable_callback_t  NULL_keep_cachetable_callback  __attribute__((__unused__)) = NULL;
@@ -155,14 +156,14 @@ last_dummymsn(void) {
 
 
 struct check_pair {
-    ITEMLEN keylen;  // A keylen equal to 0xFFFFFFFF means don't check the keylen or the key.
-    bytevec key;     // A NULL key means don't check the key.
-    ITEMLEN vallen;  // Similarly for vallen and null val.
-    bytevec val;
+    uint32_t keylen;  // A keylen equal to 0xFFFFFFFF means don't check the keylen or the key.
+    const void *key;     // A NULL key means don't check the key.
+    uint32_t vallen;  // Similarly for vallen and null val.
+    const void *val;
     int call_count;
 };
 static int
-lookup_checkf (ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *pair_v, bool lock_only) {
+lookup_checkf (uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *pair_v, bool lock_only) {
     if (!lock_only) {
         struct check_pair *pair = (struct check_pair *) pair_v;
         if (key!=NULL) {
@@ -187,8 +188,8 @@ ft_lookup_and_check_nodup (FT_HANDLE t, const char *keystring, const char *valst
 {
     DBT k;
     toku_fill_dbt(&k, keystring, strlen(keystring) + 1);
-    struct check_pair pair = {(ITEMLEN) (1+strlen(keystring)), keystring,
-                              (ITEMLEN) (1+strlen(valstring)), valstring,
+    struct check_pair pair = {(uint32_t) (1+strlen(keystring)), keystring,
+                              (uint32_t) (1+strlen(valstring)), valstring,
 			      0};
     int r = toku_ft_lookup(t, &k, lookup_checkf, &pair);
     assert(r==0);
@@ -200,7 +201,7 @@ ft_lookup_and_fail_nodup (FT_HANDLE t, char *keystring)
 {
     DBT k;
     toku_fill_dbt(&k, keystring, strlen(keystring) + 1);
-    struct check_pair pair = {(ITEMLEN) (1+strlen(keystring)), keystring,
+    struct check_pair pair = {(uint32_t) (1+strlen(keystring)), keystring,
 			      0, 0,
 			      0};
     int r = toku_ft_lookup(t, &k, lookup_checkf, &pair);
diff --git a/ft/tests/test3856.cc b/ft/tests/test3856.cc
index 8ead29a32ea..4d601ec1c88 100644
--- a/ft/tests/test3856.cc
+++ b/ft/tests/test3856.cc
@@ -110,7 +110,7 @@ string_cmp(DB* UU(db), const DBT *a, const DBT *b)
 }
 
 static int
-found(ITEMLEN UU(keylen), bytevec key, ITEMLEN UU(vallen), bytevec UU(val), void *UU(extra), bool lock_only)
+found(uint32_t UU(keylen), const void *key, uint32_t UU(vallen), const void *UU(val), void *UU(extra), bool lock_only)
 {
     assert(key != NULL && !lock_only);
     return 0;
diff --git a/ft/tests/test_logcursor.cc b/ft/tests/test_logcursor.cc
index 51b1fcc1e64..4e08cd66d4e 100644
--- a/ft/tests/test_logcursor.cc
+++ b/ft/tests/test_logcursor.cc
@@ -92,7 +92,6 @@ PATENT RIGHTS GRANT:
 
 #include "logcursor.h"
 #include "test.h"
-#include "fttypes.h"
 
 #if defined(HAVE_LIMITS_H)
 # include <limits.h>
diff --git a/ft/tests/upgrade_test_simple.cc b/ft/tests/upgrade_test_simple.cc
index 619f8492b59..d1ec3f7d3fb 100644
--- a/ft/tests/upgrade_test_simple.cc
+++ b/ft/tests/upgrade_test_simple.cc
@@ -102,7 +102,7 @@ PATENT RIGHTS GRANT:
 static TOKUTXN const null_txn = NULL;
 
 static int
-noop_getf(ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN UU(vallen), bytevec UU(val), void *extra, bool UU(lock_only))
+noop_getf(uint32_t UU(keylen), const void *UU(key), uint32_t UU(vallen), const void *UU(val), void *extra, bool UU(lock_only))
 {
     int *CAST_FROM_VOIDP(calledp, extra);
     (*calledp)++;
diff --git a/ft/tests/ybt-test.cc b/ft/tests/ybt-test.cc
index 5e3c6f4b1a7..7c92418d3fd 100644
--- a/ft/tests/ybt-test.cc
+++ b/ft/tests/ybt-test.cc
@@ -111,11 +111,11 @@ static void ybt_test0 (void) {
     toku_init_dbt(&t0);
     toku_init_dbt(&t1);
     {
-	bytevec temp1 = "hello";
+	const void *temp1 = "hello";
         toku_dbt_set(6, temp1, &t0, &v0);
     }
     {
-        bytevec temp2 = "foo";
+        const void *temp2 = "foo";
 	toku_dbt_set(  4, temp2, &t1, &v1);
     }
     assert(t0.size==6);
@@ -124,7 +124,7 @@ static void ybt_test0 (void) {
     assert(strcmp((char*)t1.data, "foo")==0);
 
     {
-        bytevec temp3 = "byebye";
+        const void *temp3 = "byebye";
 	toku_dbt_set(7, temp3, &t1, &v0);      /* Use v0, not v1 */
     }
     // This assertion would be wrong, since v0 may have been realloc'd, and t0.data may now point
@@ -141,7 +141,7 @@ static void ybt_test0 (void) {
     t0.flags = DB_DBT_USERMEM;
     t0.ulen  = 0;
     {
-        bytevec temp4 = "hello";
+        const void *temp4 = "hello";
 	toku_dbt_set(6, temp4, &t0, 0);
     }
     assert(t0.data==0);
@@ -152,7 +152,7 @@ static void ybt_test0 (void) {
     t0.flags = DB_DBT_REALLOC;
     cleanup(&v0);
     {
-        bytevec temp5 = "internationalization";
+        const void *temp5 = "internationalization";
 	toku_dbt_set(21, temp5, &t0, &v0);
     }
     assert(v0.data==0); /* Didn't change v0 */
@@ -160,7 +160,7 @@ static void ybt_test0 (void) {
     assert(strcmp((char*)t0.data, "internationalization")==0);
 
     {
-        bytevec temp6 = "provincial";
+        const void *temp6 = "provincial";
 	toku_dbt_set(11, temp6, &t0, &v0);
     }
     assert(t0.size==11);
diff --git a/ft/tokuconst.h b/ft/tokuconst.h
index 9593cd5761c..7a55ccb107a 100644
--- a/ft/tokuconst.h
+++ b/ft/tokuconst.h
@@ -99,5 +99,7 @@ PATENT RIGHTS GRANT:
  * root transaction (id 0).
  */
 
-enum {MAX_NESTED_TRANSACTIONS = 253};
-enum {MAX_TRANSACTION_RECORDS = MAX_NESTED_TRANSACTIONS + 1};
+enum {
+    MAX_NESTED_TRANSACTIONS = 253,
+    MAX_TRANSACTION_RECORDS = MAX_NESTED_TRANSACTIONS + 1
+};
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index a9394f581e7..3c3082f5ea8 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -101,7 +101,6 @@ PATENT RIGHTS GRANT:
 #include "ft/block_table.h"
 #include "ft/cachetable.h"
 #include "ft/ft.h"
-#include "ft/fttypes.h"
 #include "ft/ft-internal.h"
 #include "ft/node.h"
 
@@ -124,9 +123,9 @@ static void format_time(const uint64_t time_int, char *buf) {
     buf[24] = 0;
 }
 
-static void print_item(bytevec val, ITEMLEN len) {
+static void print_item(const void *val, uint32_t len) {
     printf("\"");
-    ITEMLEN i;
+    uint32_t i;
     for (i=0; i<len; i++) {
         unsigned char ch = ((unsigned char*)val)[i];
         if (isprint(ch) && ch!='\\' && ch!='"') {
@@ -285,8 +284,8 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT ft) {
                         XIDS xids = msg.xids();
                         const void *key = msg.kdbt()->data;
                         const void *data = msg.vdbt()->data;
-                        ITEMLEN keylen = msg.kdbt()->size;
-                        ITEMLEN datalen = msg.vdbt()->size;
+                        uint32_t keylen = msg.kdbt()->size;
+                        uint32_t datalen = msg.vdbt()->size;
                         printf("    msn=%" PRIu64 " (0x%" PRIx64 ") ", msn.msn, msn.msn);
                         printf("    TYPE=");
                         switch (type) {
diff --git a/ft/txn.cc b/ft/txn.cc
index 0451bf1f2d5..018b9112aa4 100644
--- a/ft/txn.cc
+++ b/ft/txn.cc
@@ -97,7 +97,6 @@ PATENT RIGHTS GRANT:
 #include "ule.h"
 #include "rollback-apply.h"
 #include "txn_manager.h"
-#include "txn_child_manager.h"
 #include <util/status.h>
 
 ///////////////////////////////////////////////////////////////////////////////////
diff --git a/ft/txn.h b/ft/txn.h
index 0bdb48b80c0..2af4aca2120 100644
--- a/ft/txn.h
+++ b/ft/txn.h
@@ -92,17 +92,61 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "txn_manager.h"
+#include "portability/toku_stdint.h"
 
-void txn_status_init(void);
-void txn_status_destroy(void);
+#include "ft/block_table.h"
+#include "ft/txn_state.h"
+#include "util/omt.h"
 
+typedef uint64_t TXNID;
+
+typedef struct tokutxn *TOKUTXN;
+
+#define TXNID_NONE_LIVING ((TXNID)0)
+#define TXNID_NONE        ((TXNID)0)
+#define TXNID_MAX         ((TXNID)-1)
+
+typedef struct txnid_pair_s {
+    TXNID parent_id64;
+    TXNID child_id64;
+} TXNID_PAIR;
+
+static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE };
+
+// We include the child manager here beacuse it uses the TXNID / TOKUTXN types
+#include "ft/txn_child_manager.h"
+
+/* Log Sequence Number (LSN)
+ * Make the LSN be a struct instead of an integer so that we get better type checking. */
+typedef struct __toku_lsn { uint64_t lsn; } LSN;
+static const LSN ZERO_LSN = { .lsn = 0 };
+static const LSN MAX_LSN = { .lsn = UINT64_MAX };
+
+//
+// Types of snapshots that can be taken by a tokutxn
+//  - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads.
+//                       used for SERIALIZABLE and READ UNCOMMITTED
+//  - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot
+//                       used for REPEATABLE READ
+//  - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot
+//                        used for READ COMMITTED
+//
+
+typedef enum __TXN_SNAPSHOT_TYPE { 
+    TXN_SNAPSHOT_NONE=0,
+    TXN_SNAPSHOT_ROOT=1,
+    TXN_SNAPSHOT_CHILD=2
+} TXN_SNAPSHOT_TYPE;
+
+typedef toku::omt<struct tokutxn *> txn_omt_t;
+typedef toku::omt<TXNID> xid_omt_t;
+typedef toku::omt<struct referenced_xid_tuple, struct referenced_xid_tuple *> rx_omt_t;
 
 inline bool txn_pair_is_none(TXNID_PAIR txnid) {
     return txnid.parent_id64 == TXNID_NONE && txnid.child_id64 == TXNID_NONE;
 }
 
-inline bool txn_needs_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, TOKUTXN parent) {
+inline bool txn_needs_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, struct tokutxn *parent) {
     // we need a snapshot if the snapshot type is a child or
     // if the snapshot type is root and we have no parent.
     // Cases that we don't need a snapshot: when snapshot type is NONE
@@ -110,29 +154,131 @@ inline bool txn_needs_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, TOKUTXN parent)
     return (snapshot_type != TXN_SNAPSHOT_NONE && (parent==NULL || snapshot_type == TXN_SNAPSHOT_CHILD));
 }
 
-void toku_txn_lock(TOKUTXN txn);
-void toku_txn_unlock(TOKUTXN txn);
+struct tokulogger;
 
-uint64_t toku_txn_get_root_id(TOKUTXN txn);
-bool txn_declared_read_only(TOKUTXN txn);
+struct txn_roll_info {
+    // these are number of rollback nodes and rollback entries for this txn.
+    //
+    // the current rollback node below has sequence number num_rollback_nodes - 1
+    // (because they are numbered 0...num-1). often, the current rollback is
+    // already set to this block num, which means it exists and is available to
+    // log some entries. if the current rollback is NONE and the number of
+    // rollback nodes for this transaction is non-zero, then we will use
+    // the number of rollback nodes to know which sequence number to assign
+    // to a new one we create
+    uint64_t num_rollback_nodes;
+    uint64_t num_rollentries;
+    uint64_t num_rollentries_processed;
+    uint64_t rollentry_raw_count;  // the total count of every byte in the transaction and all its children.
+
+    // spilled rollback nodes are rollback nodes that were gorged by this
+    // transaction, retired, and saved in a list.
+
+    // the spilled rollback head is the block number of the first rollback node
+    // that makes up the rollback log chain
+    BLOCKNUM spilled_rollback_head;
+
+    // the spilled rollback is the block number of the last rollback node that
+    // makes up the rollback log chain. 
+    BLOCKNUM spilled_rollback_tail;
+
+    // the current rollback node block number we may use. if this is ROLLBACK_NONE,
+    // then we need to create one and set it here before using it.
+    BLOCKNUM current_rollback; 
+};
+
+struct tokutxn {
+    // These don't change after create:
+
+    TXNID_PAIR txnid;
+
+    uint64_t snapshot_txnid64; // this is the lsn of the snapshot
+    const TXN_SNAPSHOT_TYPE snapshot_type;
+    const bool for_recovery;
+    struct tokulogger *const logger;
+    struct tokutxn *const parent;
+    // The child txn is protected by the child_txn_manager lock
+    // and by the user contract. The user contract states (and is
+    // enforced at the ydb layer) that a child txn should not be created
+    // while another child exists. The txn_child_manager will protect
+    // other threads from trying to read this value while another
+    // thread commits/aborts the child
+    struct tokutxn *child;
+
+    // statically allocated child manager, if this 
+    // txn is a root txn, this manager will be used and set to 
+    // child_manager for this transaction and all of its children
+    txn_child_manager child_manager_s;
+
+    // child manager for this transaction, all of its children,
+    // and all of its ancestors
+    txn_child_manager* child_manager;
+
+    // These don't change but they're created in a way that's hard to make
+    // strictly const.
+    DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn
+    xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started.
+    struct xids_t *xids; // Represents the xid list
+
+    struct tokutxn *snapshot_next;
+    struct tokutxn *snapshot_prev;
+
+    bool begin_was_logged;
+    bool declared_read_only; // true if the txn was declared read only when began
+
+    // These are not read until a commit, prepare, or abort starts, and
+    // they're "monotonic" (only go false->true) during operation:
+    bool do_fsync;
+    bool force_fsync_on_commit;  //This transaction NEEDS an fsync once (if) it commits.  (commit means root txn)
+
+    // Not used until commit, prepare, or abort starts:
+    LSN do_fsync_lsn;
+    TOKU_XA_XID xa_xid; // for prepared transactions
+    TXN_PROGRESS_POLL_FUNCTION progress_poll_fun;
+    void *progress_poll_fun_extra;
+
+    toku_mutex_t txn_lock;
+    // Protected by the txn lock:
+    toku::omt<struct ft*> open_fts; // a collection of the fts that we touched.  Indexed by filenum.
+    struct txn_roll_info roll_info; // Info used to manage rollback entries
+
+    // mutex that protects the transition of the state variable
+    // the rest of the variables are used by the txn code and 
+    // hot indexing to ensure that when hot indexing is processing a 
+    // leafentry, a TOKUTXN cannot dissappear or change state out from
+    // underneath it
+    toku_mutex_t state_lock;
+    toku_cond_t state_cond;
+    TOKUTXN_STATE state;
+    uint32_t num_pin; // number of threads (all hot indexes) that want this
+                      // txn to not transition to commit or abort
+    uint64_t client_id;
+};
+typedef struct tokutxn *TOKUTXN;
+
+void toku_txn_lock(struct tokutxn *txn);
+void toku_txn_unlock(struct tokutxn *txn);
+
+uint64_t toku_txn_get_root_id(struct tokutxn *txn);
+bool txn_declared_read_only(struct tokutxn *txn);
 
 int toku_txn_begin_txn (
     DB_TXN  *container_db_txn,
-    TOKUTXN parent_tokutxn, 
-    TOKUTXN *tokutxn, 
-    TOKULOGGER logger,
+    struct tokutxn *parent_tokutxn, 
+    struct tokutxn **tokutxn, 
+    struct tokulogger *logger,
     TXN_SNAPSHOT_TYPE snapshot_type,
     bool read_only
     );
 
-DB_TXN * toku_txn_get_container_db_txn (TOKUTXN tokutxn);
-void toku_txn_set_container_db_txn (TOKUTXN, DB_TXN*);
+DB_TXN * toku_txn_get_container_db_txn (struct tokutxn *tokutxn);
+void toku_txn_set_container_db_txn(struct tokutxn *txn, DB_TXN *db_txn);
 
 // toku_txn_begin_with_xid is called from recovery and has no containing DB_TXN 
 int toku_txn_begin_with_xid (
-    TOKUTXN parent_tokutxn, 
-    TOKUTXN *tokutxn, 
-    TOKULOGGER logger, 
+    struct tokutxn *parent_tokutxn, 
+    struct tokutxn **tokutxn, 
+    struct tokulogger *logger, 
     TXNID_PAIR xid, 
     TXN_SNAPSHOT_TYPE snapshot_type,
     DB_TXN *container_db_txn,
@@ -140,43 +286,43 @@ int toku_txn_begin_with_xid (
     bool read_only
     );
 
-void toku_txn_update_xids_in_txn(TOKUTXN txn, TXNID xid);
+void toku_txn_update_xids_in_txn(struct tokutxn *txn, TXNID xid);
 
-int toku_txn_load_txninfo (TOKUTXN txn, struct txninfo *info);
+int toku_txn_load_txninfo (struct tokutxn *txn, struct txninfo *info);
 
-int toku_txn_commit_txn (TOKUTXN txn, int nosync,
+int toku_txn_commit_txn (struct tokutxn *txn, int nosync,
                          TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra);
-int toku_txn_commit_with_lsn(TOKUTXN txn, int nosync, LSN oplsn,
+int toku_txn_commit_with_lsn(struct tokutxn *txn, int nosync, LSN oplsn,
                              TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra);
 
-int toku_txn_abort_txn(TOKUTXN txn,
+int toku_txn_abort_txn(struct tokutxn *txn,
                        TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra);
-int toku_txn_abort_with_lsn(TOKUTXN txn, LSN oplsn,
+int toku_txn_abort_with_lsn(struct tokutxn *txn, LSN oplsn,
                             TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra);
 
-void toku_txn_prepare_txn (TOKUTXN txn, TOKU_XA_XID *xid);
+void toku_txn_prepare_txn (struct tokutxn *txn, TOKU_XA_XID *xid);
 // Effect: Do the internal work of preparing a transaction (does not log the prepare record).
 
-void toku_txn_get_prepared_xa_xid (TOKUTXN, TOKU_XA_XID *);
+void toku_txn_get_prepared_xa_xid(struct tokutxn *txn, TOKU_XA_XID *xa_xid);
 // Effect: Fill in the XID information for a transaction.  The caller allocates the XID and the function fills in values.
 
-void toku_txn_maybe_fsync_log(TOKULOGGER logger, LSN do_fsync_lsn, bool do_fsync);
+void toku_txn_maybe_fsync_log(struct tokulogger *logger, LSN do_fsync_lsn, bool do_fsync);
 
-void toku_txn_get_fsync_info(TOKUTXN ttxn, bool* do_fsync, LSN* do_fsync_lsn);
+void toku_txn_get_fsync_info(struct tokutxn *ttxn, bool* do_fsync, LSN* do_fsync_lsn);
 
 // Complete and destroy a txn
-void toku_txn_close_txn(TOKUTXN txn);
+void toku_txn_close_txn(struct tokutxn *txn);
 
 // Remove a txn from any live txn lists
-void toku_txn_complete_txn(TOKUTXN txn);
+void toku_txn_complete_txn(struct tokutxn *txn);
 
 // Free the memory of a txn
-void toku_txn_destroy_txn(TOKUTXN txn);
+void toku_txn_destroy_txn(struct tokutxn *txn);
 
-XIDS toku_txn_get_xids (TOKUTXN);
+struct xids_t *toku_txn_get_xids(struct tokutxn *txn);
 
 // Force fsync on commit
-void toku_txn_force_fsync_on_commit(TOKUTXN txn);
+void toku_txn_force_fsync_on_commit(struct tokutxn *txn);
 
 typedef enum {
     TXN_BEGIN,             // total number of transactions begun (does not include recovered txns)
@@ -195,34 +341,31 @@ void toku_txn_get_status(TXN_STATUS s);
 
 bool toku_is_txn_in_live_root_txn_list(const xid_omt_t &live_root_txn_list, TXNID xid);
 
-TXNID toku_get_oldest_in_live_root_txn_list(TOKUTXN txn);
+TXNID toku_get_oldest_in_live_root_txn_list(struct tokutxn *txn);
 
-#include "txn_state.h"
-
-TOKUTXN_STATE toku_txn_get_state(TOKUTXN txn);
+TOKUTXN_STATE toku_txn_get_state(struct tokutxn *txn);
 
 struct tokulogger_preplist {
     TOKU_XA_XID xid;
     DB_TXN *txn;
 };
-int toku_logger_recover_txn (TOKULOGGER logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags);
+int toku_logger_recover_txn (struct tokulogger *logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags);
 
-void toku_maybe_log_begin_txn_for_write_operation(TOKUTXN txn);
+void toku_maybe_log_begin_txn_for_write_operation(struct tokutxn *txn);
 
 // Return whether txn (or it's descendents) have done no work.
-bool toku_txn_is_read_only(TOKUTXN txn);
+bool toku_txn_is_read_only(struct tokutxn *txn);
 
-void toku_txn_lock_state(TOKUTXN txn);
-void toku_txn_unlock_state(TOKUTXN txn);
-void toku_txn_pin_live_txn_unlocked(TOKUTXN txn);
-void toku_txn_unpin_live_txn(TOKUTXN txn);
+void toku_txn_lock_state(struct tokutxn *txn);
+void toku_txn_unlock_state(struct tokutxn *txn);
+void toku_txn_pin_live_txn_unlocked(struct tokutxn *txn);
+void toku_txn_unpin_live_txn(struct tokutxn *txn);
 
-bool toku_txn_has_spilled_rollback(TOKUTXN txn);
+bool toku_txn_has_spilled_rollback(struct tokutxn *txn);
 
-uint64_t toku_txn_get_client_id(TOKUTXN txn);
-void toku_txn_set_client_id(TOKUTXN txn, uint64_t client_id);
+uint64_t toku_txn_get_client_id(struct tokutxn *txn);
+void toku_txn_set_client_id(struct tokutxn *txn, uint64_t client_id);
 
- 
 //
 // This function is used by the leafentry iterators.
 // returns TOKUDB_ACCEPT if live transaction context is allowed to read a value
@@ -234,4 +377,57 @@ void toku_txn_set_client_id(TOKUTXN txn, uint64_t client_id);
 // For the above to NOT be true:
 //  - id > context->snapshot_txnid64 OR id is in context's live root transaction list
 //
-int toku_txn_reads_txnid(TXNID txnid, TOKUTXN txn);
+int toku_txn_reads_txnid(TXNID txnid, struct tokutxn *txn);
+
+void txn_status_init(void);
+
+void txn_status_destroy(void);
+
+// For serialize / deserialize
+
+#include "ft/wbuf.h"
+
+static inline void wbuf_TXNID(struct wbuf *wb, TXNID txnid) {
+    wbuf_ulonglong(wb, txnid);
+}
+
+static inline void wbuf_nocrc_TXNID(struct wbuf *wb, TXNID txnid) {
+    wbuf_nocrc_ulonglong(wb, txnid);
+}
+
+static inline void wbuf_nocrc_TXNID_PAIR(struct wbuf *wb, TXNID_PAIR txnid) {
+    wbuf_nocrc_ulonglong(wb, txnid.parent_id64);
+    wbuf_nocrc_ulonglong(wb, txnid.child_id64);
+}
+
+static inline void wbuf_nocrc_LSN(struct wbuf *wb, LSN lsn) {
+    wbuf_nocrc_ulonglong(wb, lsn.lsn);
+}
+
+static inline void wbuf_LSN(struct wbuf *wb, LSN lsn) {
+    wbuf_ulonglong(wb, lsn.lsn);
+}
+
+#include "ft/rbuf.h"
+
+static inline void rbuf_TXNID(struct rbuf *rb, TXNID *txnid) {
+    *txnid = rbuf_ulonglong(rb);
+}
+
+static inline void rbuf_TXNID_PAIR(struct rbuf *rb, TXNID_PAIR *txnid) {
+    txnid->parent_id64 = rbuf_ulonglong(rb);
+    txnid->child_id64 = rbuf_ulonglong(rb);
+}
+
+static inline void rbuf_ma_TXNID(struct rbuf *rb, memarena *UU(ma), TXNID *txnid) {
+    rbuf_TXNID(rb, txnid);
+}
+
+static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID_PAIR *txnid) {
+    rbuf_TXNID_PAIR(r, txnid);
+}
+
+static inline LSN rbuf_LSN(struct rbuf *rb) {
+    LSN lsn = { .lsn = rbuf_ulonglong(rb) };
+    return lsn;
+}
diff --git a/ft/txn_child_manager.h b/ft/txn_child_manager.h
index 537f800cdd8..efc6b35312a 100644
--- a/ft/txn_child_manager.h
+++ b/ft/txn_child_manager.h
@@ -89,11 +89,13 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
+// We should be including ft/txn.h here but that header includes this one,
+// so we don't.
+#include "portability/toku_pthread.h"
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "txn_manager.h"
-
 class txn_child_manager {
 public:
     void init (TOKUTXN root);
@@ -104,14 +106,14 @@ public:
     void suspend();
     void resume();
     void find_tokutxn_by_xid_unlocked(TXNID_PAIR xid, TOKUTXN* result);
-    int iterate(txn_mgr_iter_callback cb, void* extra);
+    int iterate(int (*cb)(TOKUTXN txn, void *extra), void* extra);
 
 private:
     TXNID m_last_xid;
     TOKUTXN m_root;
     toku_mutex_t m_mutex;
 
-friend class txn_child_manager_unit_test;
+    friend class txn_child_manager_unit_test;
 };
 
 
diff --git a/ft/txn_manager.h b/ft/txn_manager.h
index 6a0ce1fadac..a94a003513b 100644
--- a/ft/txn_manager.h
+++ b/ft/txn_manager.h
@@ -92,11 +92,12 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <toku_portability.h>
-#include <util/omt.h>
-#include "fttypes.h"
-#include <portability/toku_pthread.h>
-#include <util/omt.h>
+#include "portability/toku_portability.h"
+#include "portability/toku_pthread.h"
+
+#include "ft/txn.h"
+
+typedef struct txn_manager *TXN_MANAGER;
 
 struct referenced_xid_tuple {
     TXNID begin_id;
@@ -104,10 +105,6 @@ struct referenced_xid_tuple {
     uint32_t references;
 };
 
-typedef toku::omt<TOKUTXN> txn_omt_t;
-typedef toku::omt<TXNID> xid_omt_t;
-typedef toku::omt<struct referenced_xid_tuple, struct referenced_xid_tuple *> rx_omt_t;
-
 struct txn_manager {
     toku_mutex_t txn_manager_lock;  // a lock protecting this object
     txn_omt_t live_root_txns; // a sorted tree.
@@ -190,22 +187,6 @@ TXNID toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager);
 
 TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager);
 
-//
-// Types of snapshots that can be taken by a tokutxn
-//  - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads.
-//                       used for SERIALIZABLE and READ UNCOMMITTED
-//  - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot
-//                       used for REPEATABLE READ
-//  - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot
-//                        used for READ COMMITTED
-//
-
-typedef enum __TXN_SNAPSHOT_TYPE { 
-    TXN_SNAPSHOT_NONE=0,
-    TXN_SNAPSHOT_ROOT=1,
-    TXN_SNAPSHOT_CHILD=2
-} TXN_SNAPSHOT_TYPE;
-
 void toku_txn_manager_handle_snapshot_create_for_child_txn(
     TOKUTXN txn,
     TXN_MANAGER txn_manager,
diff --git a/ft/ule.cc b/ft/ule.cc
index a79ab92a28e..9f7be3af7fb 100644
--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -102,27 +102,27 @@ PATENT RIGHTS GRANT:
 // See design documentation for nested transactions at
 // TokuWiki/Imp/TransactionsOverview.
 
-#include <toku_portability.h>
-#include "ft/fttypes.h"
+#include "portability/toku_portability.h"
+
 #include "ft/ft-internal.h"
-#include "ft/msg.h"
 #include "ft/leafentry.h"
 #include "ft/logger.h"
+#include "ft/msg.h"
 #include "ft/txn.h"
 #include "ft/txn_manager.h"
 #include "ft/ule.h"
 #include "ft/ule-internal.h"
 #include "ft/xids.h"
-#include <util/omt.h>
-#include <util/status.h>
-#include <util/scoped_malloc.h>
-#include <util/partitioned_counter.h>
+#include "util/bytestring.h"
+#include "util/omt.h"
+#include "util/partitioned_counter.h"
+#include "util/scoped_malloc.h"
+#include "util/status.h"
 
 #define ULE_DEBUG 0
 
 static uint32_t ule_get_innermost_numbytes(ULE ule, uint32_t keylen);
 
-
 ///////////////////////////////////////////////////////////////////////////////////
 // Engine status
 //
diff --git a/ft/wbuf.h b/ft/wbuf.h
index 5f5ab3e65c8..efa941f51c3 100644
--- a/ft/wbuf.h
+++ b/ft/wbuf.h
@@ -94,12 +94,10 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <string.h>
 
-#include <portability/toku_htonl.h>
-#include <util/x1764.h>
+#include "portability/toku_htonl.h"
 
-#include "fttypes.h"
-
-#define CRC_INCR
+#include "util/bytestring.h"
+#include "util/x1764.h"
 
 /* When serializing a value, write it into a buffer. */
 /* This code requires that the buffer be big enough to hold whatever you put into it. */
@@ -113,13 +111,13 @@ struct wbuf {
     struct x1764  checksum;    // The checksum state
 };
 
-static inline void wbuf_nocrc_init (struct wbuf *w, void *buf, DISKOFF size) {
+static inline void wbuf_nocrc_init (struct wbuf *w, void *buf, unsigned int size) {
     w->buf = (unsigned char *) buf;
     w->size = size;
     w->ndone = 0;
 }
 
-static inline void wbuf_init (struct wbuf *w, void *buf, DISKOFF size) {
+static inline void wbuf_init (struct wbuf *w, void *buf, unsigned int size) {
     wbuf_nocrc_init(w, buf, size);
     toku_x1764_init(&w->checksum);
 }
@@ -194,7 +192,7 @@ static inline uint8_t* wbuf_nocrc_reserve_literal_bytes(struct wbuf *w, uint32_t
     return dest;
 }
 
-static inline void wbuf_nocrc_literal_bytes(struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) {
+static inline void wbuf_nocrc_literal_bytes(struct wbuf *w, const void *bytes_bv, uint32_t nbytes) {
     const unsigned char *bytes = (const unsigned char *) bytes_bv;
 #if 0
     { int i; for (i=0; i<nbytes; i++) wbuf_nocrc_char(w, bytes[i]); }
@@ -205,17 +203,17 @@ static inline void wbuf_nocrc_literal_bytes(struct wbuf *w, bytevec bytes_bv, ui
 #endif
 }
 
-static inline void wbuf_literal_bytes(struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) {
+static inline void wbuf_literal_bytes(struct wbuf *w, const void *bytes_bv, uint32_t nbytes) {
     wbuf_nocrc_literal_bytes(w, bytes_bv, nbytes);
     toku_x1764_add(&w->checksum, &w->buf[w->ndone-nbytes], nbytes);
 }
 
-static void wbuf_nocrc_bytes (struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) {
+static void wbuf_nocrc_bytes (struct wbuf *w, const void *bytes_bv, uint32_t nbytes) {
     wbuf_nocrc_uint(w, nbytes);
     wbuf_nocrc_literal_bytes(w, bytes_bv, nbytes);
 }
 
-static void wbuf_bytes (struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) {
+static void wbuf_bytes (struct wbuf *w, const void *bytes_bv, uint32_t nbytes) {
     wbuf_uint(w, nbytes);
     wbuf_literal_bytes(w, bytes_bv, nbytes);
 }
@@ -262,73 +260,3 @@ static inline void wbuf_nocrc_uint32_t (struct wbuf *w, uint32_t v) {
 static inline void wbuf_uint32_t (struct wbuf *w, uint32_t v) {
     wbuf_uint(w, v);
 }
-
-static inline void wbuf_DISKOFF (struct wbuf *w, DISKOFF off) {
-    wbuf_ulonglong(w, (uint64_t)off);
-}
-
-static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
-    wbuf_ulonglong(w, b.b);
-}
-static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
-    wbuf_nocrc_ulonglong(w, b.b);
-}
-
-static inline void wbuf_nocrc_TXNID (struct wbuf *w, TXNID tid) {
-    wbuf_nocrc_ulonglong(w, tid);
-}
-
-static inline void wbuf_nocrc_TXNID_PAIR (struct wbuf *w, TXNID_PAIR tid) {
-    wbuf_nocrc_ulonglong(w, tid.parent_id64);
-    wbuf_nocrc_ulonglong(w, tid.child_id64);
-}
-
-
-static inline void wbuf_TXNID (struct wbuf *w, TXNID tid) {
-    wbuf_ulonglong(w, tid);
-}
-
-static inline void wbuf_nocrc_XIDP (struct wbuf *w, XIDP xid) {
-    wbuf_nocrc_uint32_t(w, xid->formatID);
-    wbuf_nocrc_uint8_t(w, xid->gtrid_length);
-    wbuf_nocrc_uint8_t(w, xid->bqual_length);
-    wbuf_nocrc_literal_bytes(w, xid->data, xid->gtrid_length+xid->bqual_length);
-}
-
-static inline void wbuf_nocrc_LSN (struct wbuf *w, LSN lsn) {
-    wbuf_nocrc_ulonglong(w, lsn.lsn);
-}
-
-static inline void wbuf_LSN (struct wbuf *w, LSN lsn) {
-    wbuf_ulonglong(w, lsn.lsn);
-}
-
-static inline void wbuf_MSN (struct wbuf *w, MSN msn) {
-    wbuf_ulonglong(w, msn.msn);
-}
-
-static inline void wbuf_nocrc_FILENUM (struct wbuf *w, FILENUM fileid) {
-    wbuf_nocrc_uint(w, fileid.fileid);
-}
-
-static inline void wbuf_FILENUM (struct wbuf *w, FILENUM fileid) {
-    wbuf_uint(w, fileid.fileid);
-}
-
-// 2954
-static inline void wbuf_nocrc_FILENUMS (struct wbuf *w, FILENUMS v) {
-    wbuf_nocrc_uint(w, v.num);
-    uint32_t i;
-    for (i = 0; i < v.num; i++) {
-        wbuf_nocrc_FILENUM(w, v.filenums[i]);
-    }
-}
-
-// 2954
-static inline void wbuf_FILENUMS (struct wbuf *w, FILENUMS v) {
-    wbuf_uint(w, v.num);
-    uint32_t i;
-    for (i = 0; i < v.num; i++) {
-        wbuf_FILENUM(w, v.filenums[i]);
-    }
-}
diff --git a/ft/xids-internal.h b/ft/xids-internal.h
index 7e074d90ffa..52f1a1db2a1 100644
--- a/ft/xids-internal.h
+++ b/ft/xids-internal.h
@@ -97,7 +97,9 @@ PATENT RIGHTS GRANT:
 // ids[num_xids - 1] is the innermost transaction.
 // Should only be accessed by accessor functions xids_xxx, not directly.
 
-#include <portability/toku_stdint.h>
+#include "portability/toku_stdint.h"
+
+#include "ft/txn.h"
 
 // If the xids struct is unpacked, the compiler aligns the ids[] and we waste a lot of space
 typedef struct __attribute__((__packed__)) xids_t {
diff --git a/ft/xids.cc b/ft/xids.cc
index 5733a10550f..775ae2757f8 100644
--- a/ft/xids.cc
+++ b/ft/xids.cc
@@ -101,18 +101,16 @@ PATENT RIGHTS GRANT:
  *       host order.
  */
 
-
 #include <errno.h>
 #include <string.h>
 
-#include <toku_portability.h>
-#include "fttypes.h"
-#include "xids.h"
-#include "xids-internal.h"
-#include "toku_assert.h"
-#include "memory.h"
-#include <toku_htod.h>
+#include "portability/memory.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_htod.h"
+#include "portability/toku_portability.h"
 
+#include "ft/xids.h"
+#include "ft/xids-internal.h"
 
 /////////////////////////////////////////////////////////////////////////////////
 //  This layer of abstraction (xids_xxx) understands xids<> and nothing else.
@@ -131,12 +129,10 @@ PATENT RIGHTS GRANT:
 // 
 //
 
-
 // This is the xids list for a transactionless environment.
 // It is also the initial state of any xids list created for
 // nested transactions.
 
-
 XIDS
 xids_get_root_xids(void) {
     static const struct xids_t root_xids = {
@@ -153,7 +149,6 @@ xids_can_create_child(XIDS xids) {
     return (xids->num_xids + 1) != MAX_TRANSACTION_RECORDS;
 }
 
-
 int
 xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p) {
     // Postcondition:
@@ -211,14 +206,12 @@ xids_create_from_buffer(struct rbuf *rb,		// xids list for parent transaction
     *xids_p = xids;
 }
 
-
 void
 xids_destroy(XIDS *xids_p) {
     if (*xids_p != xids_get_root_xids()) toku_free(*xids_p);
     *xids_p = NULL;
 }
 
-
 // Return xid at requested position.  
 // If requesting an xid out of range (which will be the case if xids array is empty)
 // then return 0, the xid of the root transaction.
@@ -236,7 +229,6 @@ xids_get_num_xids(XIDS xids) {
     return rval;
 }
 
-
 // Return innermost xid 
 TXNID 
 xids_get_innermost_xid(XIDS xids) {
@@ -281,7 +273,6 @@ xids_get_serialize_size(XIDS xids){
     return rval;
 }
 
-
 unsigned char *
 xids_get_end_of_array(XIDS xids) {
     TXNID *r = xids->ids + xids->num_xids;
diff --git a/ft/xids.h b/ft/xids.h
index 55a2440c0fd..1627ff5308e 100644
--- a/ft/xids.h
+++ b/ft/xids.h
@@ -103,11 +103,14 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "rbuf.h"
-#include "wbuf.h"
-#include "tokuconst.h"
+#include "ft/txn.h"
+#include "ft/rbuf.h"
+#include "ft/wbuf.h"
+#include "ft/tokuconst.h"
 
-//Retrieve an XIDS representing the root transaction.
+typedef struct xids_t *XIDS;
+
+// Retrieve an XIDS representing the root transaction.
 XIDS xids_get_root_xids(void);
 
 bool xids_can_create_child(XIDS xids);
@@ -116,7 +119,7 @@ void xids_cpy(XIDS target, XIDS source);
 
 //Creates an XIDS representing this transaction.
 //You must pass in an XIDS representing the parent of this transaction.
-int  xids_create_child(XIDS parent_xids, XIDS *xids_p, TXNID this_xid);
+int xids_create_child(XIDS parent_xids, XIDS *xids_p, TXNID this_xid);
 
 // The following two functions (in order) are equivalent to xids_create child,
 // but allow you to do most of the work without knowing the new xid.
diff --git a/ft/ybt.cc b/ft/ybt.cc
index 1e02b1c753d..4e0fd2b941c 100644
--- a/ft/ybt.cc
+++ b/ft/ybt.cc
@@ -94,7 +94,6 @@ PATENT RIGHTS GRANT:
 
 #include "portability/memory.h"
 
-#include "ft/fttypes.h"
 #include "ft/ybt.h"
 
 DBT *
@@ -187,7 +186,7 @@ toku_destroy_dbt(DBT *dbt) {
 }
 
 DBT *
-toku_fill_dbt(DBT *dbt, bytevec k, ITEMLEN len) {
+toku_fill_dbt(DBT *dbt, const void *k, uint32_t len) {
     toku_init_dbt(dbt);
     dbt->size=len;
     dbt->data=(char*)k;
@@ -246,7 +245,7 @@ dbt_realloc(DBT *dbt) {
 }
 
 int
-toku_dbt_set (ITEMLEN len, bytevec val, DBT *d, struct simple_dbt *sdbt) {
+toku_dbt_set (uint32_t len, const void *val, DBT *d, struct simple_dbt *sdbt) {
 // sdbt is the static value used when flags==0
 // Otherwise malloc or use the user-supplied memory, as according to the flags in d->flags.
     int r;
diff --git a/ft/ybt.h b/ft/ybt.h
index c07ad3a5a7b..480790dcdf5 100644
--- a/ft/ybt.h
+++ b/ft/ybt.h
@@ -92,8 +92,6 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-// fttypes.h must be first to make 64-bit file mode work right in linux.
-#include "fttypes.h"
 #include <db.h>
 
 // TODO: John
@@ -109,7 +107,7 @@ DBT *toku_init_dbt_flags(DBT *, uint32_t flags);
 
 void toku_destroy_dbt(DBT *);
 
-DBT *toku_fill_dbt(DBT *dbt, bytevec k, ITEMLEN len);
+DBT *toku_fill_dbt(DBT *dbt, const void *k, uint32_t len);
 
 DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len);
 
@@ -117,9 +115,9 @@ DBT *toku_copyref_dbt(DBT *dst, const DBT src);
 
 DBT *toku_clone_dbt(DBT *dst, const DBT &src);
 
-int toku_dbt_set(ITEMLEN len, bytevec val, DBT *d, struct simple_dbt *sdbt);
+int toku_dbt_set(uint32_t len, const void *val, DBT *d, struct simple_dbt *sdbt);
 
-int toku_dbt_set_value(DBT *, bytevec *val, ITEMLEN vallen, void **staticptrp, bool ybt1_disposable);
+int toku_dbt_set_value(DBT *, const void **val, uint32_t vallen, void **staticptrp, bool ybt1_disposable);
 
 void toku_sdbt_cleanup(struct simple_dbt *sdbt);
 
diff --git a/locktree/lock_request.cc b/locktree/lock_request.cc
index 2f43e8960db..66ae43a42d3 100644
--- a/locktree/lock_request.cc
+++ b/locktree/lock_request.cc
@@ -91,6 +91,7 @@ PATENT RIGHTS GRANT:
 
 #include <toku_race_tools.h>
 
+#include <ft/txn.h>
 #include <ft/ybt.h>
 
 #include "locktree.h"
diff --git a/locktree/lock_request.h b/locktree/lock_request.h
index f7f302c3298..2dfde89dd7c 100644
--- a/locktree/lock_request.h
+++ b/locktree/lock_request.h
@@ -92,14 +92,13 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include <db.h>
-#include <toku_pthread.h>
 
-#include <ft/fttypes.h>
-#include <ft/comparator.h>
+#include "portability/toku_pthread.h"
 
-#include "locktree.h"
-#include "txnid_set.h"
-#include "wfg.h"
+#include "locktree/locktree.h"
+#include "locktree/txnid_set.h"
+#include "locktree/wfg.h"
+#include "ft/comparator.h"
 
 namespace toku {
 
diff --git a/locktree/locktree.h b/locktree/locktree.h
index da0771fc995..74fab0af0ad 100644
--- a/locktree/locktree.h
+++ b/locktree/locktree.h
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <toku_time.h>
 #include <toku_pthread.h>
 
-#include <ft/fttypes.h>
+#include <ft/ft-ops.h> // just for DICTIONARY_ID..
 #include <ft/comparator.h>
 
 #include <util/omt.h>
diff --git a/locktree/treenode.h b/locktree/treenode.h
index d79c70a9dae..44c95e90825 100644
--- a/locktree/treenode.h
+++ b/locktree/treenode.h
@@ -94,6 +94,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <string.h>
 
+#include <ft/txn.h>
 #include <ft/comparator.h>
 
 #include <portability/toku_pthread.h>
diff --git a/locktree/txnid_set.h b/locktree/txnid_set.h
index 2caf4038995..31b0a1990a5 100644
--- a/locktree/txnid_set.h
+++ b/locktree/txnid_set.h
@@ -91,9 +91,9 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <ft/fttypes.h>
+#include "ft/txn.h"
 
-#include <util/omt.h>
+#include "util/omt.h"
 
 namespace toku {
 
diff --git a/locktree/wfg.h b/locktree/wfg.h
index 8f9abd67d42..3b6c2922ba3 100644
--- a/locktree/wfg.h
+++ b/locktree/wfg.h
@@ -91,11 +91,8 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <ft/fttypes.h>
-
-#include <util/omt.h>
-
-#include "txnid_set.h"
+#include "locktree/txnid_set.h"
+#include "util/omt.h"
 
 namespace toku {
 
diff --git a/src/indexer.cc b/src/indexer.cc
index cdc66cdd39b..dc2ca4453fc 100644
--- a/src/indexer.cc
+++ b/src/indexer.cc
@@ -539,7 +539,7 @@ struct le_cursor_extra {
 // cachetable pair locks. because no txn can commit on this db, read
 // the provisional info for the newly read ule.
 static int
-le_cursor_callback(ITEMLEN keylen, bytevec key, ITEMLEN UU(vallen), bytevec val, void *extra, bool lock_only) {
+le_cursor_callback(uint32_t keylen, const void *key, uint32_t UU(vallen), const void *val, void *extra, bool lock_only) {
     if (lock_only || val == NULL) {
         ; // do nothing if only locking. do nothing if val==NULL, means DB_NOTFOUND
     } else {
diff --git a/src/tests/hotindexer-undo-do-test.cc b/src/tests/hotindexer-undo-do-test.cc
index 51f60652d14..e8b56f66d08 100644
--- a/src/tests/hotindexer-undo-do-test.cc
+++ b/src/tests/hotindexer-undo-do-test.cc
@@ -96,8 +96,6 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 #include <ft/tokuconst.h>
-#include <ft/fttypes.h>
-#include <ft/leafentry.h>
 #include <ft/ule.h>
 #include <ft/ule-internal.h>
 #include <ft/le-cursor.h>
diff --git a/src/ydb-internal.h b/src/ydb-internal.h
index e1ad4a92f3a..a43347eb7a6 100644
--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -97,7 +97,6 @@ PATENT RIGHTS GRANT:
 #include <ft/cachetable.h>
 #include <ft/cursor.h>
 #include <ft/comparator.h>
-#include <ft/fttypes.h>
 #include <ft/logger.h>
 #include <ft/txn.h>
 
diff --git a/src/ydb_cursor.cc b/src/ydb_cursor.cc
index a372fab3006..8ccc00c285e 100644
--- a/src/ydb_cursor.cc
+++ b/src/ydb_cursor.cc
@@ -248,7 +248,7 @@ query_context_with_input_init(QUERY_CONTEXT_WITH_INPUT context, DBC *c, uint32_t
     context->input_val = val;
 }
 
-static int c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
+static int c_getf_first_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool);
 
 static void 
 c_query_context_init(QUERY_CONTEXT context, DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
@@ -291,7 +291,7 @@ c_getf_first(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
 
 //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
 static int
-c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
+c_getf_first_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) {
     QUERY_CONTEXT      super_context = (QUERY_CONTEXT) extra;
     QUERY_CONTEXT_BASE context       = &super_context->base;
 
@@ -318,7 +318,7 @@ c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val,
     return r;
 }
 
-static int c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
+static int c_getf_last_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool);
 
 static int
 c_getf_last(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
@@ -342,7 +342,7 @@ c_getf_last(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
 
 //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
 static int
-c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
+c_getf_last_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) {
     QUERY_CONTEXT      super_context = (QUERY_CONTEXT) extra;
     QUERY_CONTEXT_BASE context       = &super_context->base;
 
@@ -369,7 +369,7 @@ c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v
     return r;
 }
 
-static int c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
+static int c_getf_next_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool);
 
 static int
 c_getf_next(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
@@ -398,7 +398,7 @@ c_getf_next(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
 
 //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
 static int
-c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
+c_getf_next_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) {
     QUERY_CONTEXT      super_context = (QUERY_CONTEXT) extra;
     QUERY_CONTEXT_BASE context       = &super_context->base;
 
@@ -428,7 +428,7 @@ c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v
     return r;
 }
 
-static int c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
+static int c_getf_prev_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool);
 
 static int
 c_getf_prev(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
@@ -457,7 +457,7 @@ c_getf_prev(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
 
 //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
 static int
-c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
+c_getf_prev_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) {
     QUERY_CONTEXT      super_context = (QUERY_CONTEXT) extra;
     QUERY_CONTEXT_BASE context       = &super_context->base;
 
@@ -486,7 +486,7 @@ c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v
     return r;
 }
 
-static int c_getf_current_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
+static int c_getf_current_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool);
 
 static int
 c_getf_current(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
@@ -503,7 +503,7 @@ c_getf_current(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
 
 //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
 static int
-c_getf_current_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
+c_getf_current_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) {
     QUERY_CONTEXT      super_context = (QUERY_CONTEXT) extra;
     QUERY_CONTEXT_BASE context       = &super_context->base;
 
@@ -523,7 +523,7 @@ c_getf_current_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val
     return r;
 }
 
-static int c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
+static int c_getf_set_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool);
 
 int
 toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) {
@@ -548,7 +548,7 @@ toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *
 
 //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
 static int
-c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
+c_getf_set_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) {
     QUERY_CONTEXT_WITH_INPUT super_context = (QUERY_CONTEXT_WITH_INPUT) extra;
     QUERY_CONTEXT_BASE       context       = &super_context->base;
 
@@ -576,7 +576,7 @@ c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, vo
     return r;
 }
 
-static int c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
+static int c_getf_set_range_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool);
 
 static int
 c_getf_set_range(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) {
@@ -601,7 +601,7 @@ c_getf_set_range(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void
 
 //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
 static int
-c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
+c_getf_set_range_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) {
     QUERY_CONTEXT_WITH_INPUT super_context = (QUERY_CONTEXT_WITH_INPUT) extra;
     QUERY_CONTEXT_BASE       context       = &super_context->base;
 
@@ -653,7 +653,7 @@ c_getf_set_range_with_bound(DBC *c, uint32_t flag, DBT *key, DBT *key_bound, YDB
     return r;
 }
 
-static int c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
+static int c_getf_set_range_reverse_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool);
 
 static int
 c_getf_set_range_reverse(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) {
@@ -678,7 +678,7 @@ c_getf_set_range_reverse(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION
 
 //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
 static int
-c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
+c_getf_set_range_reverse_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) {
     QUERY_CONTEXT_WITH_INPUT super_context = (QUERY_CONTEXT_WITH_INPUT) extra;
     QUERY_CONTEXT_BASE       context       = &super_context->base;
 
diff --git a/src/ydb_db.cc b/src/ydb_db.cc
index aeb0f671550..f3cc74be4cf 100644
--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -96,7 +96,6 @@ PATENT RIGHTS GRANT:
 #include <ft/ft.h>
 #include <ft/ft-flusher.h>
 #include <ft/checkpoint.h>
-#include <ft/log_header.h>
 
 #include "ydb_cursor.h"
 #include "ydb_row_lock.h"
@@ -943,7 +942,7 @@ struct last_key_extra {
 };
 
 static int
-db_get_last_key_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) {
+db_get_last_key_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) {
     if (!lock_only) {
         DBT keydbt;
         toku_fill_dbt(&keydbt, key, keylen);
diff --git a/util/bytestring.h b/util/bytestring.h
new file mode 100644
index 00000000000..43119983452
--- /dev/null
+++ b/util/bytestring.h
@@ -0,0 +1,96 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2014 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#pragma once
+
+#include "portability/toku_stdint.h"
+
+struct BYTESTRING {
+    uint32_t len;
+    char *data;
+};

From 7ad1f1b925b58e3c1549dcb6d95371233272d79f Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:30:17 -0400
Subject: [PATCH 068/190] FT-298 Start breaking up ft-internal.h by moving
 serialization code to its own header

---
 ft/comparator.h              |   5 +-
 ft/ft-cachetable-wrappers.cc |   2 +-
 ft/ft-flusher.cc             |   1 +
 ft/ft-internal.h             | 127 +++++++------------------------
 ft/ft-node-deserialize.cc    |   3 +-
 ft/ft-ops.cc                 |   2 +
 ft/ft-serialize.cc           |   1 +
 ft/ft-serialize.h            | 112 ++++++++++++++++++++++++++++
 ft/ft-test-helpers.cc        |   1 +
 ft/ft.cc                     |   9 +--
 ft/ft.h                      |  10 +--
 ft/ft_node-serialize.cc      |   1 +
 ft/ft_node-serialize.h       | 140 +++++++++++++++++++++++++++++++++++
 ft/ftverify.cc               |   2 +
 ft/loader/dbufio.cc          |   2 +-
 ft/loader/loader.cc          |   2 +
 ft/node.cc                   |   6 ++
 ft/node.h                    |   1 +
 ft/rollback-ct-callbacks.cc  |   1 +
 ft/tests/test.h              |  14 ++--
 ft/tokuftdump.cc             |   2 +
 src/ydb.cc                   |  60 +++++++--------
 22 files changed, 345 insertions(+), 159 deletions(-)
 create mode 100644 ft/ft-serialize.h
 create mode 100644 ft/ft_node-serialize.h

diff --git a/ft/comparator.h b/ft/comparator.h
index 2b38283adfa..9533d0ca0d3 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -99,8 +99,9 @@ PATENT RIGHTS GRANT:
 
 typedef int (*ft_compare_func)(DB *db, const DBT *a, const DBT *b);
 
-// TODO: this should really all be encapsulated in ft/comparator.cc
-int toku_builtin_compare_fun(DB *db, const DBT *a, const DBT *b) __attribute__((__visibility__("default")));
+int toku_keycompare(const void *key1, uint32_t key1len, const void *key2, uint32_t key2len);
+
+int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default")));
 
 namespace toku {
 
diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index e8fc271bed4..17e7751d05f 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -140,7 +140,7 @@ cachetable_put_empty_node_with_dep_nodes(
         dependent_dirty_bits,
         name,
         fullhash,
-        toku_node_save_ct_pair);
+        toku_ftnode_save_ct_pair);
     *result = new_node;
 }
 
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index 7e8524794dc..f664878cc97 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -95,6 +95,7 @@ PATENT RIGHTS GRANT:
 #include "ft/ft-internal.h"
 #include "ft/ft-flusher.h"
 #include "ft/ft-flusher-internal.h"
+#include "ft/ft_node-serialize.h"
 #include "ft/node.h"
 #include "portability/toku_assert.h"
 #include "portability/toku_atomic.h"
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index fadab70917e..4018b5c4dfa 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -124,10 +124,12 @@ enum { FT_SEQINSERT_SCORE_THRESHOLD = 100 };
 
 uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum);
 
+enum ft_type {
+    FT_CURRENT = 1,
+    FT_CHECKPOINT_INPROGRESS
+};
+
 // The ft_header is not managed by the cachetable.  Instead, it hangs off the cachefile as userdata.
-
-enum ft_type {FT_CURRENT=1, FT_CHECKPOINT_INPROGRESS};
-
 struct ft_header {
     enum ft_type type;
 
@@ -292,9 +294,11 @@ struct ft_handle {
     struct ft_options options;
 };
 
-// TODO: Move to cachetable header
 PAIR_ATTR make_ftnode_pair_attr(FTNODE node);
 PAIR_ATTR make_invalid_pair_attr(void);
+
+
+// Only exported for tests.
 // Cachetable callbacks for ftnodes.
 void toku_ftnode_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs);
 void toku_ftnode_checkpoint_complete_callback(void *value_data);
@@ -309,91 +313,11 @@ int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t f
 
 CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft);
 
-/* serialization code */
-void toku_create_compressed_partition_from_available(FTNODE node, int childnum,
-                                                     enum toku_compression_method compression_method,
-                                                     SUB_BLOCK sb);
-int toku_serialize_ftnode_to_memory (FTNODE node,
-                                      FTNODE_DISK_DATA* ndd,
-                                      unsigned int basementnodesize,
-                                      enum toku_compression_method compression_method,
-                                      bool do_rebalancing,
-                                      bool in_parallel,
-                              /*out*/ size_t *n_bytes_to_write,
-                              /*out*/ size_t *n_uncompressed_bytes,
-                              /*out*/ char  **bytes_to_write);
-int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint);
-int toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
-                                    FT ft, bool for_checkpoint);
-void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized);
-int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft);
-int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe);
-int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe);
-int toku_deserialize_ftnode_from (int fd, BLOCKNUM off, uint32_t /*fullhash*/, FTNODE *ftnode, FTNODE_DISK_DATA* ndd, struct ftnode_fetch_extra* bfe);
-
-// <CER> For verifying old, non-upgraded nodes (versions 13 and 14).
-int
-decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum);
-// 
-    
-//////////////// <CER> TODO: Move these function declarations
-int
-deserialize_ft_from_fd_into_rbuf(int fd,
-                                 toku_off_t offset_of_header,
-                                 struct rbuf *rb,
-                                 uint64_t *checkpoint_count,
-                                 LSN *checkpoint_lsn,
-                                 uint32_t * version_p);
-
-int
-deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version);
-
-void read_block_from_fd_into_rbuf(
-    int fd, 
-    BLOCKNUM blocknum,
-    FT ft,
-    struct rbuf *rb
-    );
-
-int
-read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb);
-
-int
-verify_ftnode_sub_block (struct sub_block *sb);
-
-void
-just_decompress_sub_block(struct sub_block *sb);
-
-/* Beginning of ft-node-deserialize.c helper functions. */
-void initialize_ftnode(FTNODE node, BLOCKNUM blocknum);
-int read_and_check_magic(struct rbuf *rb);
-int read_and_check_version(FTNODE node, struct rbuf *rb);
-void read_node_info(FTNODE node, struct rbuf *rb, int version);
-void allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd);
-int check_node_info_checksum(struct rbuf *rb);
-void read_legacy_node_info(FTNODE node, struct rbuf *rb, int version);
-int check_legacy_end_checksum(struct rbuf *rb);
-/* End of ft-node-deserialization.c helper functions. */
-
-unsigned int toku_serialize_ftnode_size(FTNODE node); /* How much space will it take? */
-
-void toku_verify_or_set_counts(FTNODE);
-
-size_t toku_serialize_ft_size (FT_HEADER h);
-void toku_serialize_ft_to (int fd, FT_HEADER h, struct block_table *blocktable, CACHEFILE cf);
-void toku_serialize_ft_to_wbuf (
-    struct wbuf *wbuf, 
-    FT_HEADER h, 
-    DISKOFF translation_location_on_disk, 
-    DISKOFF translation_size_on_disk
-    );
-int toku_deserialize_ft_from (int fd, LSN max_acceptable_lsn, FT *ft);
-void toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset);
-void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc);
-
+// This is only exported for tests.
 // append a child node to a parent node
 void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey);
 
+// This is only exported for tests.
 // append a message to a nonleaf node child buffer
 void toku_ft_append_to_child_buffer(const toku::comparator &cmp, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val);
 
@@ -406,14 +330,7 @@ STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode);
 #define VERIFY_NODE(t,n) ((void)0)
 #endif
 
-void toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe);
-void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint);
-void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time);
-void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time);
-void toku_ft_status_note_msn_discard(void);
-void toku_ft_status_note_update(bool broadcast);
-void toku_ft_status_note_msg_bytes_out(size_t buffsize);
-void toku_ft_status_note_ftnode(int height, bool created); // created = false means destroyed
+void toku_verify_or_set_counts(FTNODE);
 
 //
 // Helper function to fill a ftnode_fetch_extra with data
@@ -456,6 +373,7 @@ void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_curs
 
 void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe);
 
+// TODO: consider moving this to ft/pivotkeys.cc
 class pivot_bounds {
 public:
     pivot_bounds(const DBT &lbe_dbt, const DBT &ubi_dbt);
@@ -477,13 +395,10 @@ private:
     const DBT _upper_bound_inclusive;
 };
 
-bool
-toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum);
-
-int
-toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
-int
-toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
+// TODO: move into the ftnode_fetch_extra class
+bool toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum);
+int toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
+int toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
 
 // allocate a block number
 // allocate and initialize a ftnode
@@ -668,10 +583,20 @@ typedef struct {
     TOKU_ENGINE_STATUS_ROW_S status[FT_STATUS_NUM_ROWS];
 } FT_STATUS_S, *FT_STATUS;
 
+void toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe);
+void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint);
+void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time);
+void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time);
+void toku_ft_status_note_msn_discard(void);
+void toku_ft_status_note_update(bool broadcast);
+void toku_ft_status_note_msg_bytes_out(size_t buffsize);
+void toku_ft_status_note_ftnode(int height, bool created); // created = false means destroyed
+
 void toku_ft_get_status(FT_STATUS);
 
 void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra);
 
+// For upgrade
 int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) __attribute__((nonnull));
 int toku_upgrade_msn_from_root_to_header(int fd, FT ft) __attribute__((nonnull));
 
diff --git a/ft/ft-node-deserialize.cc b/ft/ft-node-deserialize.cc
index 123035771db..9ea167c3cd9 100644
--- a/ft/ft-node-deserialize.cc
+++ b/ft/ft-node-deserialize.cc
@@ -90,7 +90,8 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "ft/node.h"
-#include "ft-internal.h"
+#include "ft/ft-internal.h"
+#include "ft/ft_node-serialize.h"
 
 /*
  * ft-node-deserialize.c -
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index ec05d398e25..9ebcd4939c3 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -207,7 +207,9 @@ basement nodes, bulk fetch,  and partial fetch:
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-flusher.h"
 #include "ft/ft-internal.h"
+#include "ft/ft-serialize.h"
 #include "ft/ft_layout_version.h"
+#include "ft/ft_node-serialize.h"
 #include "ft/msg.h"
 #include "ft/leafentry.h"
 #include "ft/log-internal.h"
diff --git a/ft/ft-serialize.cc b/ft/ft-serialize.cc
index 3cf8f2a3294..475dc023149 100644
--- a/ft/ft-serialize.cc
+++ b/ft/ft-serialize.cc
@@ -94,6 +94,7 @@ PATENT RIGHTS GRANT:
 #include "ft/compress.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
+#include "ft/ft-serialize.h"
 #include "ft/msg.h"
 
 // not version-sensitive because we only serialize a descriptor using the current layout_version
diff --git a/ft/ft-serialize.h b/ft/ft-serialize.h
new file mode 100644
index 00000000000..55b3af6ba20
--- /dev/null
+++ b/ft/ft-serialize.h
@@ -0,0 +1,112 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#pragma once
+
+#include "ft/block_table.h"
+#include "ft/ft.h"
+
+size_t toku_serialize_ft_size(struct ft_header *h);
+void toku_serialize_ft_to(int fd, struct ft_header *h, struct block_table *blocktable, CACHEFILE cf);
+void toku_serialize_ft_to_wbuf(struct wbuf *wbuf, struct ft_header *h, DISKOFF translation_location_on_disk, DISKOFF translation_size_on_disk);
+void toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset);
+void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc);
+
+int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft);
+
+// TODO rename
+int deserialize_ft_from_fd_into_rbuf(int fd,
+                                     toku_off_t offset_of_header,
+                                     struct rbuf *rb,
+                                     uint64_t *checkpoint_count,
+                                     LSN *checkpoint_lsn,
+                                     uint32_t *version_p);
+
+// used by verify
+// TODO rename
+int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version);
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index 2593c7c70da..b2b11e2ada0 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -93,6 +93,7 @@ PATENT RIGHTS GRANT:
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-internal.h"
 #include "ft/ft-flusher.h"
+#include "ft/ft_node-serialize.h"
 #include "ft/node.h"
 #include "ft/ule.h"
 
diff --git a/ft/ft.cc b/ft/ft.cc
index e60e57d547c..6df7531d311 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -93,6 +93,8 @@ PATENT RIGHTS GRANT:
 #include "ft/ft.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-internal.h"
+#include "ft/ft-serialize.h"
+#include "ft/ft_node-serialize.h"
 #include "ft/log-internal.h"
 #include "ft/log_header.h"
 #include "ft/node.h"
@@ -360,11 +362,6 @@ static void ft_note_unpin_by_checkpoint (CACHEFILE UU(cachefile), void *header_v
 // End of Functions that are callbacks to the cachefile
 /////////////////////////////////////////////////////////////////////////
 
-void toku_node_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p) {
-    FTNODE CAST_FROM_VOIDP(node, value_data);
-    node->ct_pair = p;
-}
-
 static void setup_initial_ft_root_node(FT ft, BLOCKNUM blocknum) {
     FTNODE XCALLOC(node);
     toku_initialize_empty_ftnode(node, blocknum, 0, 1, ft->h->layout_version, ft->h->flags);
@@ -375,7 +372,7 @@ static void setup_initial_ft_root_node(FT ft, BLOCKNUM blocknum) {
     toku_cachetable_put(ft->cf, blocknum, fullhash,
                         node, make_ftnode_pair_attr(node),
                         get_write_callbacks_for_node(ft),
-                        toku_node_save_ct_pair);
+                        toku_ftnode_save_ct_pair);
     toku_unpin_ftnode(ft, node);
 }
 
diff --git a/ft/ft.h b/ft/ft.h
index 2e01be9a0ed..21dd7da4407 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -198,7 +198,6 @@ void toku_ft_set_compression_method(FT ft, enum toku_compression_method method);
 void toku_ft_get_compression_method(FT ft, enum toku_compression_method *methodp);
 void toku_ft_set_fanout(FT ft, unsigned int fanout);
 void toku_ft_get_fanout(FT ft, unsigned int *fanout);
-void toku_node_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p);
 
 // mark the ft as a blackhole. any message injections will be a no op.
 void toku_ft_set_blackhole(FT_HANDLE ft_handle);
@@ -207,12 +206,14 @@ void toku_ft_set_blackhole(FT_HANDLE ft_handle);
 //         The difference between the two is MVCC garbage.
 void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space);
 
+// TODO: Should be in portability
 int get_num_cores(void);
+
+// TODO: Use the cachetable's worker pool instead of something managed by the FT...
 struct toku_thread_pool *get_ft_pool(void);
-void dump_bad_block(unsigned char *vp, uint64_t size);
 
+// TODO: Should be in portability
 int toku_single_process_lock(const char *lock_dir, const char *which, int *lockfd);
-
 int toku_single_process_unlock(int *lockfd);
 
 void tokudb_update_product_name_strings(void);
@@ -229,6 +230,3 @@ struct toku_product_name_strings_struct {
 
 extern struct toku_product_name_strings_struct toku_product_name_strings;
 extern int tokudb_num_envs;
-
-int toku_keycompare (const void *key1, uint32_t key1len, const void *key2, uint32_t key2len);
-int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default")));
diff --git a/ft/ft_node-serialize.cc b/ft/ft_node-serialize.cc
index 28684ac4dbb..d9615b730b2 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -94,6 +94,7 @@ PATENT RIGHTS GRANT:
 #include "ft/compress.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
+#include "ft/ft_node-serialize.h"
 #include "ft/node.h"
 #include "ft/log-internal.h"
 #include "ft/rollback.h"
diff --git a/ft/ft_node-serialize.h b/ft/ft_node-serialize.h
new file mode 100644
index 00000000000..022769a3648
--- /dev/null
+++ b/ft/ft_node-serialize.h
@@ -0,0 +1,140 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#pragma once
+
+#include "ft/block_table.h"
+#include "ft/node.h"
+#include "ft/ft.h"
+#include "ft/sub_block.h"
+#include "ft/rbuf.h"
+#include "ft/wbuf.h"
+
+unsigned int toku_serialize_ftnode_size(FTNODE node);
+int toku_serialize_ftnode_to_memory(FTNODE node, FTNODE_DISK_DATA *ndd,
+                                    unsigned int basementnodesize,
+                                    enum toku_compression_method compression_method,
+                                    bool do_rebalancing, bool in_parallel,
+                                    size_t *n_bytes_to_write, size_t *n_uncompressed_bytes,
+                                    char **bytes_to_write);
+int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA *ndd, bool do_rebalancing, FT ft, bool for_checkpoint);
+int toku_serialize_rollback_log_to(int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
+                                    FT ft, bool for_checkpoint);
+void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized);
+
+int toku_deserialize_rollback_log_from(int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft);
+int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra *bfe);
+int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe);
+int toku_deserialize_ftnode_from(int fd, BLOCKNUM off, uint32_t fullhash, FTNODE *node, FTNODE_DISK_DATA *ndd, struct ftnode_fetch_extra *bfe);
+
+// used by nonleaf node partial eviction
+void toku_create_compressed_partition_from_available(FTNODE node, int childnum,
+                                                     enum toku_compression_method compression_method, SUB_BLOCK sb);
+
+// <CER> For verifying old, non-upgraded nodes (versions 13 and 14).
+int decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum);
+
+// used by verify
+int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version);
+void read_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, FT ft, struct rbuf *rb);
+int read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb);
+int verify_ftnode_sub_block(struct sub_block *sb);
+void just_decompress_sub_block(struct sub_block *sb);
+
+// used by ft-node-deserialize.cc
+void initialize_ftnode(FTNODE node, BLOCKNUM blocknum);
+int read_and_check_magic(struct rbuf *rb);
+int read_and_check_version(FTNODE node, struct rbuf *rb);
+void read_node_info(FTNODE node, struct rbuf *rb, int version);
+void allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd);
+int check_node_info_checksum(struct rbuf *rb);
+void read_legacy_node_info(FTNODE node, struct rbuf *rb, int version);
+int check_legacy_end_checksum(struct rbuf *rb);
+
+// exported so the loader can dump bad blocks
+void dump_bad_block(unsigned char *vp, uint64_t size);
diff --git a/ft/ftverify.cc b/ft/ftverify.cc
index 7afde55cddb..5e3dbc1b57a 100644
--- a/ft/ftverify.cc
+++ b/ft/ftverify.cc
@@ -100,7 +100,9 @@ PATENT RIGHTS GRANT:
 
 #include "ft/block_allocator.h"
 #include "ft/ft-internal.h"
+#include "ft/ft-serialize.h"
 #include "ft/ft_layout_version.h"
+#include "ft/ft_node-serialize.h"
 #include "ft/node.h"
 #include "ft/rbuf.h"
 #include "ft/sub_block.h"
diff --git a/ft/loader/dbufio.cc b/ft/loader/dbufio.cc
index 2fde67cf442..7df0e0fe562 100644
--- a/ft/loader/dbufio.cc
+++ b/ft/loader/dbufio.cc
@@ -96,8 +96,8 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_assert.h"
 #include "portability/memory.h"
 
-#include "ft/ft.h"
 #include "ft/ft-internal.h"
+#include "ft/ft_node-serialize.h"
 #include "loader/dbufio.h"
 #include "loader/loader-internal.h"
 
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index ec2fea4a1cf..11024f93c19 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -103,6 +103,8 @@ PATENT RIGHTS GRANT:
 #include "ft/block_table.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
+#include "ft/ft-serialize.h"
+#include "ft/ft_node-serialize.h"
 #include "ft/leafentry.h"
 #include "ft/loader/loader-internal.h"
 #include "ft/loader/pqueue.h"
diff --git a/ft/node.cc b/ft/node.cc
index 6ce460df293..8c87a5194bf 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -91,6 +91,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
+#include "ft/ft_node-serialize.h"
 #include "ft/node.h"
 #include "ft/rbuf.h"
 #include "ft/wbuf.h"
@@ -1673,6 +1674,11 @@ int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator
     return low;
 }
 
+void toku_ftnode_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p) {
+    FTNODE CAST_FROM_VOIDP(node, value_data);
+    node->ct_pair = p;
+}
+
 static void
 ft_nonleaf_msg_all(const toku::comparator &cmp, FTNODE node, const ft_msg &msg, bool is_fresh, size_t flow_deltas[])
 // Effect: Put the message into a nonleaf node.  We put it into all children, possibly causing the children to become reactive.
diff --git a/ft/node.h b/ft/node.h
index 9870cfca65f..1c77ff95eca 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -364,6 +364,7 @@ void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM blocknum, int height, in
                                   int layout_version, unsigned int flags);
 
 int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &cmp);
+void toku_ftnode_save_ct_pair(CACHEKEY key, void *value_data, PAIR p);
 
 //
 // Field in ftnode_fetch_extra that tells the 
diff --git a/ft/rollback-ct-callbacks.cc b/ft/rollback-ct-callbacks.cc
index 9b75692d44a..680ba2c7cea 100644
--- a/ft/rollback-ct-callbacks.cc
+++ b/ft/rollback-ct-callbacks.cc
@@ -94,6 +94,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft/block_table.h"
 #include "ft/ft-internal.h"
+#include "ft/ft_node-serialize.h"
 #include "ft/rollback.h"
 #include "ft/rollback-ct-callbacks.h"
 
diff --git a/ft/tests/test.h b/ft/tests/test.h
index c7a00966f01..4f7ba0b5c21 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -101,16 +101,18 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 #include <portability/toku_path.h>
 
-#include "ft/ft.h"
-#include "ft/node.h"
 #include "ft/block_allocator.h"
 #include "ft/block_table.h"
-#include "ft/log-internal.h"
-#include "ft/logger.h"
-#include "ft/ft-ops.h"
-#include "ft/cursor.h"
 #include "ft/cachetable.h"
 #include "ft/cachetable-internal.h"
+#include "ft/cursor.h"
+#include "ft/ft.h"
+#include "ft/ft-ops.h"
+#include "ft/ft-serialize.h"
+#include "ft/ft_node-serialize.h"
+#include "ft/log-internal.h"
+#include "ft/logger.h"
+#include "ft/node.h"
 #include "util/bytestring.h"
 
 #define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, strerror(r)); assert(__r==0); })
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index 3c3082f5ea8..00bb505d64d 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -102,6 +102,8 @@ PATENT RIGHTS GRANT:
 #include "ft/cachetable.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
+#include "ft/ft-serialize.h"
+#include "ft/ft_node-serialize.h"
 #include "ft/node.h"
 
 static int do_dump_data = 1;
diff --git a/src/ydb.cc b/src/ydb.cc
index d164eb4adbc..c75cb306b61 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -92,46 +92,36 @@ PATENT RIGHTS GRANT:
 extern const char *toku_patent_string;
 const char *toku_copyright_string = "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved.";
 
-#include <toku_portability.h>
-#include <toku_pthread.h>
-#include <toku_assert.h>
-
 #include <db.h>
-#include <ctype.h>
 #include <errno.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <memory.h>
 
-#include <sys/stat.h>
-#include <sys/types.h>
+#include "portability/memory.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_portability.h"
+#include "portability/toku_pthread.h"
+#include "portability/toku_stdlib.h"
 
-#include <util/status.h>
-#include <util/context.h>
-
-#include <ft/ft-flusher.h>
-#include <ft/cachetable.h>
-#include <ft/log.h>
-#include <ft/checkpoint.h>
-#include <ft/loader/loader.h>
-#include <ft/log_header.h>
-#include <ft/ft.h>
-#include <ft/txn_manager.h>
-
-#include "ydb.h"
-#include "ydb-internal.h"
-#include "ydb_cursor.h"
-#include "ydb_row_lock.h"
-#include "ydb_env_func.h"
-#include "ydb_db.h"
-#include "ydb_write.h"
-#include "ydb_txn.h"
-#include "loader.h"
-#include "indexer.h"
+#include "ft/ft-flusher.h"
+#include "ft/cachetable.h"
+#include "ft/log.h"
+#include "ft/checkpoint.h"
+#include "ft/loader/loader.h"
+#include "ft/log_header.h"
+#include "ft/ft.h"
+#include "ft/txn_manager.h"
+#include "src/ydb.h"
+#include "src/ydb-internal.h"
+#include "src/ydb_cursor.h"
+#include "src/ydb_row_lock.h"
+#include "src/ydb_env_func.h"
+#include "src/ydb_db.h"
+#include "src/ydb_write.h"
+#include "src/ydb_txn.h"
+#include "src/loader.h"
+#include "src/indexer.h"
+#include "util/status.h"
+#include "util/context.h"
 
 // Include ydb_lib.cc here so that its constructor/destructor gets put into
 // ydb.o, to make sure they don't get erased at link time (when linking to

From 40ea904b36c5e85dd7d9016229b4433c64e7aba8 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:30:17 -0400
Subject: [PATCH 069/190] FT-299 Clean up XIDs abstraction

---
 ft/ft-ops.cc                         |  18 ++---
 ft/ft-test-helpers.cc                |   4 +-
 ft/loader/loader.cc                  |   8 +-
 ft/msg.cc                            |   6 +-
 ft/msg.h                             |   4 +-
 ft/msg_buffer.cc                     |  14 ++--
 ft/msg_buffer.h                      |   1 -
 ft/roll.cc                           |   2 +-
 ft/tests/bnc-insert-benchmark.cc     |   4 +-
 ft/tests/fifo-test.cc                |   8 +-
 ft/tests/ft-bfe-query.cc             |  12 +--
 ft/tests/ft-clock-test.cc            |  12 +--
 ft/tests/ft-serialize-benchmark.cc   |   8 +-
 ft/tests/ft-serialize-test.cc        |  12 +--
 ft/tests/make-tree.cc                |   4 +-
 ft/tests/msnfilter.cc                |   8 +-
 ft/tests/orthopush-flush.cc          |  68 ++++++++---------
 ft/tests/test-leafentry-child-txn.cc |  12 +--
 ft/tests/test-leafentry-nested.cc    |  32 ++++----
 ft/tests/verify-bad-msn.cc           |   4 +-
 ft/tests/verify-bad-pivots.cc        |   2 +-
 ft/tests/verify-dup-in-leaf.cc       |   2 +-
 ft/tests/verify-dup-pivots.cc        |   2 +-
 ft/tests/verify-misrouted-msgs.cc    |   4 +-
 ft/tests/verify-unsorted-leaf.cc     |   2 +-
 ft/tests/verify-unsorted-pivots.cc   |   2 +-
 ft/tokuconst.h                       | 105 --------------------------
 ft/tokuftdump.cc                     |   2 +-
 ft/txn.cc                            |  12 +--
 ft/txn.h                             |   4 +-
 ft/ule.cc                            |  30 ++++----
 ft/xids-internal.h                   | 109 ---------------------------
 ft/xids.cc                           |  74 +++++++++---------
 ft/xids.h                            |  59 ++++++++++-----
 src/indexer-undo-do.cc               |  23 +++---
 src/indexer.cc                       |   1 -
 src/tests/hotindexer-undo-do-test.cc |   4 +-
 src/tests/test_txn_nested1.cc        |   2 +-
 src/tests/test_txn_nested2.cc        |   2 +-
 src/tests/test_txn_nested3.cc        |   2 +-
 src/tests/test_txn_nested4.cc        |   2 +-
 src/tests/test_txn_nested5.cc        |   2 +-
 42 files changed, 246 insertions(+), 442 deletions(-)
 delete mode 100644 ft/tokuconst.h
 delete mode 100644 ft/xids-internal.h

diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 9ebcd4939c3..ccbee3256ce 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -2384,7 +2384,7 @@ static void ft_txn_log_insert(FT ft, DBT *key, DBT *val, TOKUTXN txn, bool do_lo
 int toku_ft_insert_unique(FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool do_logging) {
 // Effect: Insert a unique key-val pair into the fractal tree.
 // Return: 0 on success, DB_KEYEXIST if the overwrite constraint failed
-    XIDS message_xids = txn != nullptr ? toku_txn_get_xids(txn) : xids_get_root_xids();
+    XIDS message_xids = txn != nullptr ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
 
     TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
     txn_manager_state txn_state_for_gc(txn_manager);
@@ -2457,13 +2457,13 @@ void toku_ft_optimize (FT_HANDLE ft_h) {
     if (logger) {
         TXNID oldest = toku_txn_manager_get_oldest_living_xid(logger->txn_manager);
 
-        XIDS root_xids = xids_get_root_xids();
+        XIDS root_xids = toku_xids_get_root_xids();
         XIDS message_xids;
         if (oldest == TXNID_NONE_LIVING) {
             message_xids = root_xids;
         }
         else {
-            int r = xids_create_child(root_xids, &message_xids, oldest);
+            int r = toku_xids_create_child(root_xids, &message_xids, oldest);
             invariant(r == 0);
         }
 
@@ -2483,7 +2483,7 @@ void toku_ft_optimize (FT_HANDLE ft_h) {
                             oldest_referenced_xid_estimate,
                             true);
         toku_ft_root_put_msg(ft_h->ft, msg, &gc_info);
-        xids_destroy(&message_xids);
+        toku_xids_destroy(&message_xids);
     }
 }
 
@@ -2570,7 +2570,7 @@ void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool
     if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
         // do nothing
     } else {
-        XIDS message_xids = txn ? toku_txn_get_xids(txn) : xids_get_root_xids();
+        XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
 
         TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h);
         txn_manager_state txn_state_for_gc(txn_manager);
@@ -2641,7 +2641,7 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func
     if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
         // do nothing
     } else {
-        XIDS message_xids = txn ? toku_txn_get_xids(txn) : xids_get_root_xids();
+        XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
         ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
         ft_send_update_msg(ft_h, msg, txn);
     }
@@ -2674,7 +2674,7 @@ void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_e
 
     } else {
         DBT empty_dbt;
-        XIDS message_xids = txn ? toku_txn_get_xids(txn) : xids_get_root_xids();
+        XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
         ft_msg msg(toku_init_dbt(&empty_dbt), update_function_extra, FT_UPDATE_BROADCAST_ALL, ZERO_MSN, message_xids);
         ft_send_update_msg(ft_h, msg, txn);
     }
@@ -2726,7 +2726,7 @@ toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_
 }
 
 void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging) {
-    XIDS message_xids = xids_get_root_xids(); //By default use committed messages
+    XIDS message_xids = toku_xids_get_root_xids(); //By default use committed messages
     TXNID_PAIR xid = toku_txn_get_txnid(txn);
     if (txn) {
         BYTESTRING keybs = {key->size, (char *) key->data};
@@ -4474,7 +4474,7 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
                     int operator()(const ft_msg &msg, bool UU(is_fresh)) {
                         fprintf(file, "%*s xid=%" PRIu64 " %u (type=%d) msn=0x%" PRIu64 "\n",
                                       depth+2, "",
-                                      xids_get_innermost_xid(msg.xids()),
+                                      toku_xids_get_innermost_xid(msg.xids()),
                                       static_cast<unsigned>(toku_dtoh32(*(int*)msg.kdbt()->data)),
                                       msg.type(), msg.msn().msn);
                         return 0;
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index b2b11e2ada0..1d70aeec7d3 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -226,7 +226,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, const
 
     DBT kdbt, vdbt;
     ft_msg msg(toku_fill_dbt(&kdbt, key, keylen), toku_fill_dbt(&vdbt, val, vallen),
-               FT_INSERT, next_dummymsn(), xids_get_root_xids());
+               FT_INSERT, next_dummymsn(), toku_xids_get_root_xids());
 
     static size_t zero_flow_deltas[] = { 0, 0 };
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
@@ -299,7 +299,7 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, en
     DBT k;
     int childnum = toku_ftnode_which_child(node, toku_fill_dbt(&k, key, keylen), ft_handle->ft->cmp);
 
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     MSN msn = next_dummymsn();
     toku::comparator cmp;
     cmp.create(testhelper_string_key_cmp, nullptr);
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index 11024f93c19..9f44b2eb8d5 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -2349,12 +2349,12 @@ static struct leaf_buf *start_leaf (struct dbout *out, const DESCRIPTOR UU(desc)
     lbuf->nkeys = lbuf->ndata = lbuf->dsize = 0;
     lbuf->off = 0;
 
-    lbuf->xids = xids_get_root_xids();
+    lbuf->xids = toku_xids_get_root_xids();
     if (xid != TXNID_NONE) {
         XIDS new_xids = NULL;
-        int r = xids_create_child(lbuf->xids, &new_xids, xid); 
+        int r = toku_xids_create_child(lbuf->xids, &new_xids, xid); 
         assert(r == 0 && new_xids);
-        xids_destroy(&lbuf->xids);
+        toku_xids_destroy(&lbuf->xids);
         lbuf->xids = new_xids;
     }
 
@@ -2988,7 +2988,7 @@ static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progr
         toku_free(serialized_leaf);
     }
     toku_ftnode_free(&lbuf->node);
-    xids_destroy(&lbuf->xids);
+    toku_xids_destroy(&lbuf->xids);
     toku_free(lbuf);
 
     //printf("Nodewrite %d (%.1f%%):", progress_allocation, 100.0*progress_allocation/PROGRESS_MAX);
diff --git a/ft/msg.cc b/ft/msg.cc
index db4b6ff891f..c1ce9fdd477 100644
--- a/ft/msg.cc
+++ b/ft/msg.cc
@@ -106,7 +106,7 @@ ft_msg ft_msg::deserialize_from_rbuf(struct rbuf *rb, XIDS *x, bool *is_fresh) {
     enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb);
     *is_fresh = rbuf_char(rb);
     MSN m = rbuf_MSN(rb);
-    xids_create_from_buffer(rb, x);
+    toku_xids_create_from_buffer(rb, x);
     rbuf_bytes(rb, &keyp, &keylen);
     rbuf_bytes(rb, &valp, &vallen);
 
@@ -118,7 +118,7 @@ ft_msg ft_msg::deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *x) {
     const void *keyp, *valp;
     uint32_t keylen, vallen;
     enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb);
-    xids_create_from_buffer(rb, x);
+    toku_xids_create_from_buffer(rb, x);
     rbuf_bytes(rb, &keyp, &keylen);
     rbuf_bytes(rb, &valp, &vallen);
 
@@ -156,7 +156,7 @@ size_t ft_msg::total_size() const {
     static const size_t total_overhead = key_val_overhead + msg_overhead;
 
     const size_t keyval_size = _key.size + _val.size;
-    const size_t xids_size = xids_get_serialize_size(xids());
+    const size_t xids_size = toku_xids_get_serialize_size(xids());
     return total_overhead + keyval_size + xids_size;
 }
 
diff --git a/ft/msg.h b/ft/msg.h
index a54fadb955b..f049fefadd9 100644
--- a/ft/msg.h
+++ b/ft/msg.h
@@ -99,6 +99,8 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_assert.h"
 #include "portability/toku_stdint.h"
 
+#include "ft/xids.h"
+
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
@@ -194,8 +196,6 @@ ft_msg_type_does_nothing(enum ft_msg_type type)
     return (type == FT_NONE);
 }
 
-typedef struct xids_t *XIDS;
-
 class ft_msg {
 public:
     ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x);
diff --git a/ft/msg_buffer.cc b/ft/msg_buffer.cc
index 3dfcac234fc..d6ff954360b 100644
--- a/ft/msg_buffer.cc
+++ b/ft/msg_buffer.cc
@@ -148,7 +148,7 @@ void message_buffer::deserialize_from_rbuf(struct rbuf *rb,
         }
 
         enqueue(msg, is_fresh, dest);
-        xids_destroy(&xids);
+        toku_xids_destroy(&xids);
     }
 
     invariant(_num_entries == n_in_this_buffer);
@@ -193,7 +193,7 @@ MSN message_buffer::deserialize_from_rbuf_v13(struct rbuf *rb,
         }
 
         enqueue(msg, is_fresh, dest);
-        xids_destroy(&xids);
+        toku_xids_destroy(&xids);
     }
 
     return highest_msn_in_this_buffer;
@@ -230,9 +230,9 @@ void message_buffer::enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset)
     struct buffer_entry *entry = get_buffer_entry(_memory_used);
     entry->type = (unsigned char) msg.type();
     entry->msn = msg.msn();
-    xids_cpy(&entry->xids_s, msg.xids());
+    toku_xids_cpy(&entry->xids_s, msg.xids());
     entry->is_fresh = is_fresh;
-    unsigned char *e_key = xids_get_end_of_array(&entry->xids_s);
+    unsigned char *e_key = toku_xids_get_end_of_array(&entry->xids_s);
     entry->keylen = keylen;
     memcpy(e_key, msg.kdbt()->data, keylen);
     entry->vallen = datalen;
@@ -261,7 +261,7 @@ ft_msg message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) con
     enum ft_msg_type type = (enum ft_msg_type) entry->type;
     MSN msn = entry->msn;
     const XIDS xids = (XIDS) &entry->xids_s;
-    const void *key = xids_get_end_of_array(xids);
+    const void *key = toku_xids_get_end_of_array(xids);
     const void *val = (uint8_t *) key + entry->keylen;
     return ft_msg(toku_fill_dbt(keydbt, key, keylen), toku_fill_dbt(valdbt, val, vallen), type, msn, xids);
 }
@@ -269,7 +269,7 @@ ft_msg message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) con
 void message_buffer::get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const {
     struct buffer_entry *entry = get_buffer_entry(offset);
     if (key != nullptr) {
-        toku_fill_dbt(key, xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen);
+        toku_fill_dbt(key, toku_xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen);
     }
     if (msn != nullptr) {
         *msn = entry->msn;
@@ -313,6 +313,6 @@ void message_buffer::serialize_to_wbuf(struct wbuf *wb) const {
 size_t message_buffer::msg_memsize_in_buffer(const ft_msg &msg) {
     const uint32_t keylen = msg.kdbt()->size;
     const uint32_t datalen = msg.vdbt()->size;
-    const size_t xidslen = xids_get_size(msg.xids());
+    const size_t xidslen = toku_xids_get_size(msg.xids());
     return sizeof(struct buffer_entry) + keylen + datalen + xidslen - sizeof(XIDS_S);
 }
diff --git a/ft/msg_buffer.h b/ft/msg_buffer.h
index c781c68d0c7..efd39b52670 100644
--- a/ft/msg_buffer.h
+++ b/ft/msg_buffer.h
@@ -90,7 +90,6 @@ PATENT RIGHTS GRANT:
 
 #include "ft/msg.h"
 #include "ft/xids.h"
-#include "ft/xids-internal.h"
 #include "ft/ybt.h"
 
 class message_buffer {
diff --git a/ft/roll.cc b/ft/roll.cc
index d64e128c87b..9ad3dfcf5bf 100644
--- a/ft/roll.cc
+++ b/ft/roll.cc
@@ -273,7 +273,7 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
                             !txn->for_recovery);
         toku_ft_root_put_msg(ft, msg, &gc_info);
         if (reset_root_xid_that_created) {
-            TXNID new_root_xid_that_created = xids_get_outermost_xid(xids);
+            TXNID new_root_xid_that_created = toku_xids_get_outermost_xid(xids);
             toku_reset_root_xid_that_created(ft, new_root_xid_that_created);
         }
     }
diff --git a/ft/tests/bnc-insert-benchmark.cc b/ft/tests/bnc-insert-benchmark.cc
index a1313440cbb..43d30be1e3e 100644
--- a/ft/tests/bnc-insert-benchmark.cc
+++ b/ft/tests/bnc-insert-benchmark.cc
@@ -127,9 +127,9 @@ run_test(unsigned long eltsize, unsigned long nodesize, unsigned long repeat)
             *p = (rand() & 0xff);
         }
     }
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
-    int r = xids_create_child(xids_0, &xids_123, (TXNID)123);
+    int r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
 
     NONLEAF_CHILDINFO bnc;
diff --git a/ft/tests/fifo-test.cc b/ft/tests/fifo-test.cc
index 1d708859465..856c9f57d02 100644
--- a/ft/tests/fifo-test.cc
+++ b/ft/tests/fifo-test.cc
@@ -126,9 +126,9 @@ test_enqueue(int n) {
         char *theval = buildval(thevallen);
         XIDS xids;
         if (i == 0) {
-            xids = xids_get_root_xids();
+            xids = toku_xids_get_root_xids();
         } else {
-            int r = xids_create_child(xids_get_root_xids(), &xids, (TXNID)i);
+            int r = toku_xids_create_child(toku_xids_get_root_xids(), &xids, (TXNID)i);
             assert_zero(r);
         }
         MSN msn = next_dummymsn();
@@ -138,7 +138,7 @@ test_enqueue(int n) {
         DBT k, v;
         ft_msg msg(toku_fill_dbt(&k, thekey, thekeylen), toku_fill_dbt(&v, theval, thevallen), type, msn, xids);
         msg_buffer.enqueue(msg, true, nullptr);
-        xids_destroy(&xids);
+        toku_xids_destroy(&xids);
         toku_free(thekey);
         toku_free(theval);
     }
@@ -163,7 +163,7 @@ test_enqueue(int n) {
             assert((int) msg.kdbt()->size == thekeylen); assert(memcmp(msg.kdbt()->data, thekey, msg.kdbt()->size) == 0);
             assert((int) msg.vdbt()->size == thevallen); assert(memcmp(msg.vdbt()->data, theval, msg.vdbt()->size) == 0);
             assert(i % 256 == (int)type);
-            assert((TXNID)i==xids_get_innermost_xid(msg.xids()));
+            assert((TXNID)i == toku_xids_get_innermost_xid(msg.xids()));
             i += 1;
             toku_free(thekey);
             toku_free(theval);
diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc
index 0b1f0f4d394..30d09344cd6 100644
--- a/ft/tests/ft-bfe-query.cc
+++ b/ft/tests/ft-bfe-query.cc
@@ -396,19 +396,19 @@ test_prefetching(void) {
     set_BNC(&sn, 1, toku_create_empty_nl());
     set_BNC(&sn, 2, toku_create_empty_nl());
     //Create XIDS
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     XIDS xids_234;
-    r = xids_create_child(xids_0, &xids_123, (TXNID)123);
+    r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
-    r = xids_create_child(xids_123, &xids_234, (TXNID)234);
+    r = toku_xids_create_child(xids_123, &xids_234, (TXNID)234);
     CKERR(r);
 
     // data in the buffers does not matter in this test
     //Cleanup:
-    xids_destroy(&xids_0);
-    xids_destroy(&xids_123);
-    xids_destroy(&xids_234);
+    toku_xids_destroy(&xids_0);
+    toku_xids_destroy(&xids_123);
+    toku_xids_destroy(&xids_234);
 
     FT_HANDLE XMALLOC(ft);
     FT XCALLOC(ft_h);
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index ceadd6aaedb..641d33dcf42 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -324,12 +324,12 @@ test_serialize_nonleaf(void) {
     set_BNC(&sn, 0, toku_create_empty_nl());
     set_BNC(&sn, 1, toku_create_empty_nl());
     //Create XIDS
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     XIDS xids_234;
-    r = xids_create_child(xids_0, &xids_123, (TXNID)123);
+    r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
-    r = xids_create_child(xids_123, &xids_234, (TXNID)234);
+    r = toku_xids_create_child(xids_123, &xids_234, (TXNID)234);
     CKERR(r);
 
     toku::comparator cmp;
@@ -340,9 +340,9 @@ test_serialize_nonleaf(void) {
     toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
 
     //Cleanup:
-    xids_destroy(&xids_0);
-    xids_destroy(&xids_123);
-    xids_destroy(&xids_234);
+    toku_xids_destroy(&xids_0);
+    toku_xids_destroy(&xids_123);
+    toku_xids_destroy(&xids_234);
     cmp.destroy();
 
     FT_HANDLE XMALLOC(ft);
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index 342fae76d81..4c1e3b8cbdb 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -314,9 +314,9 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
         set_BNC(&sn, i, toku_create_empty_nl());
     }
     //Create XIDS
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
-    r = xids_create_child(xids_0, &xids_123, (TXNID)123);
+    r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
     toku::comparator cmp;
     cmp.create(long_key_cmp, nullptr);
@@ -344,8 +344,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     }
 
     //Cleanup:
-    xids_destroy(&xids_0);
-    xids_destroy(&xids_123);
+    toku_xids_destroy(&xids_0);
+    toku_xids_destroy(&xids_123);
     cmp.destroy();
 
     FT_HANDLE XMALLOC(ft);
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index de85e6c609b..d82a42b6e08 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -1041,12 +1041,12 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     set_BNC(&sn, 0, toku_create_empty_nl());
     set_BNC(&sn, 1, toku_create_empty_nl());
     //Create XIDS
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     XIDS xids_234;
-    r = xids_create_child(xids_0, &xids_123, (TXNID)123);
+    r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
-    r = xids_create_child(xids_123, &xids_234, (TXNID)234);
+    r = toku_xids_create_child(xids_123, &xids_234, (TXNID)234);
     CKERR(r);
 
     toku::comparator cmp;
@@ -1057,9 +1057,9 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
 
     //Cleanup:
-    xids_destroy(&xids_0);
-    xids_destroy(&xids_123);
-    xids_destroy(&xids_234);
+    toku_xids_destroy(&xids_0);
+    toku_xids_destroy(&xids_123);
+    toku_xids_destroy(&xids_234);
     cmp.destroy();
 
     FT_HANDLE XMALLOC(ft);
diff --git a/ft/tests/make-tree.cc b/ft/tests/make-tree.cc
index 66bed972d25..14700ddbfb3 100644
--- a/ft/tests/make-tree.cc
+++ b/ft/tests/make-tree.cc
@@ -125,7 +125,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
-    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
     toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
     leafnode->max_msn_applied_to_node_on_disk = msn;
@@ -152,7 +152,7 @@ insert_into_child_buffer(FT_HANDLE ft, FTNODE node, int childnum, int minkey, in
         unsigned int key = htonl(val);
         DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key);
         DBT theval; toku_fill_dbt(&theval, &val, sizeof val);
-        toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval);
+        toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, toku_xids_get_root_xids(), true, &thekey, &theval);
 	node->max_msn_applied_to_node_on_disk = msn;
     }
 }
diff --git a/ft/tests/msnfilter.cc b/ft/tests/msnfilter.cc
index bb172a844c8..ea8d3a97649 100644
--- a/ft/tests/msnfilter.cc
+++ b/ft/tests/msnfilter.cc
@@ -131,7 +131,7 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
     ft->ft->h->max_msn_in_ft = msn;
-    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
 
     toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg, &gc_info, nullptr, nullptr);
@@ -141,7 +141,7 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
 	assert(pair.call_count==1);
     }
 
-    ft_msg badmsg(&thekey, &badval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg badmsg(&thekey, &badval, FT_INSERT, msn, toku_xids_get_root_xids());
     toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, badmsg, &gc_info, nullptr, nullptr);
 
     // message should be rejected for duplicate msn, row should still have original val
@@ -154,7 +154,7 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
     // now verify that message with proper msn gets through
     msn = next_dummymsn();
     ft->ft->h->max_msn_in_ft = msn;
-    ft_msg msg2(&thekey, &val2,  FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg2(&thekey, &val2,  FT_INSERT, msn, toku_xids_get_root_xids());
     toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg2, &gc_info, nullptr, nullptr);
 
     // message should be accepted, val should have new value
@@ -166,7 +166,7 @@ append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val
 
     // now verify that message with lesser (older) msn is rejected
     msn.msn = msn.msn - 10;
-    ft_msg msg3(&thekey, &badval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg3(&thekey, &badval, FT_INSERT, msn, toku_xids_get_root_xids());
     toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg3, &gc_info, nullptr, nullptr);
 
     // message should be rejected, val should still have value in pair2
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index 963346cc1fe..21dcc1281ad 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -306,11 +306,11 @@ flush_to_internal(FT_HANDLE t) {
     memset(parent_messages_is_fresh, 0, 4096*(sizeof parent_messages_is_fresh[0]));
     memset(child_messages_is_fresh, 0, 4096*(sizeof child_messages_is_fresh[0]));
 
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123, xids_234;
-    r = xids_create_child(xids_0, &xids_123, (TXNID)123);
+    r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
-    r = xids_create_child(xids_0, &xids_234, (TXNID)234);
+    r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234);
     CKERR(r);
 
     NONLEAF_CHILDINFO child_bnc = toku_create_empty_nl();
@@ -369,7 +369,7 @@ flush_to_internal(FT_HANDLE t) {
                     assert(found == 0);
                     assert(dummy_cmp(&valdbt, parent_messages[k]->vdbt()) == 0);
                     assert(type == parent_messages[k]->type());
-                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[k]->xids()));
+                    assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(parent_messages[k]->xids()));
                     assert(parent_messages_is_fresh[k] == is_fresh);
                     parent_messages_present[k]++;
                     found++;
@@ -382,7 +382,7 @@ flush_to_internal(FT_HANDLE t) {
                     assert(found == 0);
                     assert(dummy_cmp(&valdbt, child_messages[k]->vdbt()) == 0);
                     assert(type == child_messages[k]->type());
-                    assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[k]->xids()));
+                    assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(child_messages[k]->xids()));
                     assert(child_messages_is_fresh[k] == is_fresh);
                     child_messages_present[k]++;
                     found++;
@@ -402,9 +402,9 @@ flush_to_internal(FT_HANDLE t) {
         assert(child_messages_present[i] == 1);
     }
 
-    xids_destroy(&xids_0);
-    xids_destroy(&xids_123);
-    xids_destroy(&xids_234);
+    toku_xids_destroy(&xids_0);
+    toku_xids_destroy(&xids_123);
+    toku_xids_destroy(&xids_234);
 
     for (i = 0; i < num_parent_messages; ++i) {
         toku_free(parent_messages[i]->kdbt()->data);
@@ -436,11 +436,11 @@ flush_to_internal_multiple(FT_HANDLE t) {
     memset(parent_messages_is_fresh, 0, 4096*(sizeof parent_messages_is_fresh[0]));
     memset(child_messages_is_fresh, 0, 4096*(sizeof child_messages_is_fresh[0]));
 
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123, xids_234;
-    r = xids_create_child(xids_0, &xids_123, (TXNID)123);
+    r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
-    r = xids_create_child(xids_0, &xids_234, (TXNID)234);
+    r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234);
     CKERR(r);
 
     NONLEAF_CHILDINFO child_bncs[8];
@@ -525,7 +525,7 @@ flush_to_internal_multiple(FT_HANDLE t) {
                         assert(found == 0);
                         assert(dummy_cmp(&valdbt, parent_messages[_i]->vdbt()) == 0);
                         assert(type == parent_messages[_i]->type());
-                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[_i]->xids()));
+                        assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(parent_messages[_i]->xids()));
                         assert(parent_messages_is_fresh[_i] == is_fresh);
                         parent_messages_present[_i]++;
                         found++;
@@ -538,7 +538,7 @@ flush_to_internal_multiple(FT_HANDLE t) {
                         assert(found == 0);
                         assert(dummy_cmp(&valdbt, child_messages[_i]->vdbt()) == 0);
                         assert(type == child_messages[_i]->type());
-                        assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[_i]->xids()));
+                        assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(child_messages[_i]->xids()));
                         assert(child_messages_is_fresh[_i] == is_fresh);
                         child_messages_present[_i]++;
                         found++;
@@ -559,9 +559,9 @@ flush_to_internal_multiple(FT_HANDLE t) {
         assert(child_messages_present[i] == 1);
     }
 
-    xids_destroy(&xids_0);
-    xids_destroy(&xids_123);
-    xids_destroy(&xids_234);
+    toku_xids_destroy(&xids_0);
+    toku_xids_destroy(&xids_123);
+    toku_xids_destroy(&xids_234);
 
     for (i = 0; i < num_parent_messages; ++i) {
         toku_free(parent_messages[i]->kdbt()->data);
@@ -605,11 +605,11 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
     int *MALLOC_N(4096,parent_messages_applied);
     memset(parent_messages_applied, 0, 4096*(sizeof parent_messages_applied[0]));
 
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123, xids_234;
-    r = xids_create_child(xids_0, &xids_123, (TXNID)123);
+    r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
-    r = xids_create_child(xids_0, &xids_234, (TXNID)234);
+    r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234);
     CKERR(r);
     
     BASEMENTNODE child_blbs[8];
@@ -788,9 +788,9 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
         assert(child_messages_present[i] == 1);
     }
 
-    xids_destroy(&xids_0);
-    xids_destroy(&xids_123);
-    xids_destroy(&xids_234);
+    toku_xids_destroy(&xids_0);
+    toku_xids_destroy(&xids_123);
+    toku_xids_destroy(&xids_234);
 
     for (i = 0; i < num_parent_messages; ++i) {
         toku_free(parent_messages[i]->kdbt()->data);
@@ -834,11 +834,11 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
     int *MALLOC_N(4096,parent_messages_applied);
     memset(parent_messages_applied, 0, 4096*(sizeof parent_messages_applied[0]));
 
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123, xids_234;
-    r = xids_create_child(xids_0, &xids_123, (TXNID)123);
+    r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
-    r = xids_create_child(xids_0, &xids_234, (TXNID)234);
+    r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234);
     CKERR(r);
 
     BASEMENTNODE child_blbs[8];
@@ -975,9 +975,9 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
         }
     }
 
-    xids_destroy(&xids_0);
-    xids_destroy(&xids_123);
-    xids_destroy(&xids_234);
+    toku_xids_destroy(&xids_0);
+    toku_xids_destroy(&xids_123);
+    toku_xids_destroy(&xids_234);
 
     for (i = 0; i < num_parent_messages; ++i) {
         toku_free(parent_messages[i]->kdbt()->data);
@@ -1023,11 +1023,11 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
     int *MALLOC_N(4096,parent_messages_applied);
     memset(parent_messages_applied, 0, 4096*(sizeof parent_messages_applied[0]));
 
-    XIDS xids_0 = xids_get_root_xids();
+    XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123, xids_234;
-    r = xids_create_child(xids_0, &xids_123, (TXNID)123);
+    r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
     CKERR(r);
-    r = xids_create_child(xids_0, &xids_234, (TXNID)234);
+    r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234);
     CKERR(r);
 
     BASEMENTNODE child1_blbs[8], child2_blbs[8];
@@ -1168,9 +1168,9 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
         }
     }
 
-    xids_destroy(&xids_0);
-    xids_destroy(&xids_123);
-    xids_destroy(&xids_234);
+    toku_xids_destroy(&xids_0);
+    toku_xids_destroy(&xids_123);
+    toku_xids_destroy(&xids_234);
 
     for (i = 0; i < num_parent_messages; ++i) {
         toku_free(parent_messages[i]->kdbt()->data);
diff --git a/ft/tests/test-leafentry-child-txn.cc b/ft/tests/test-leafentry-child-txn.cc
index 33b5d86509a..5c1c326ddb7 100644
--- a/ft/tests/test-leafentry-child-txn.cc
+++ b/ft/tests/test-leafentry-child-txn.cc
@@ -132,14 +132,14 @@ run_test(void) {
 
     // test case where we apply a message and the innermost child_id
     // is the same as the innermost committed TXNID    
-    XIDS root_xids = xids_get_root_xids();
+    XIDS root_xids = toku_xids_get_root_xids();
     TXNID root_txnid = 1000;
     TXNID child_id = 10;
     XIDS msg_xids_1;
     XIDS msg_xids_2;
-    r = xids_create_child(root_xids, &msg_xids_1, root_txnid);
+    r = toku_xids_create_child(root_xids, &msg_xids_1, root_txnid);
     assert(r==0);
-    r = xids_create_child(msg_xids_1, &msg_xids_2, child_id);
+    r = toku_xids_create_child(msg_xids_1, &msg_xids_2, child_id);
     assert(r==0);
 
     init_empty_ule(&ule_initial);
@@ -189,9 +189,9 @@ run_test(void) {
         assert(ule->uxrs[3].valp == &val_data_three);
     } 
 
-    xids_destroy(&msg_xids_2);
-    xids_destroy(&msg_xids_1);
-    xids_destroy(&root_xids);
+    toku_xids_destroy(&msg_xids_2);
+    toku_xids_destroy(&msg_xids_1);
+    toku_xids_destroy(&root_xids);
 
 }
 
diff --git a/ft/tests/test-leafentry-nested.cc b/ft/tests/test-leafentry-nested.cc
index 8a70c7b6611..1976b70c7cb 100644
--- a/ft/tests/test-leafentry-nested.cc
+++ b/ft/tests/test-leafentry-nested.cc
@@ -525,7 +525,7 @@ generate_provpair_for(ULE ule, const ft_msg &msg) {
     ule->uxrs = ule->uxrs_static;
 
     ule->num_cuxrs = 1;
-    ule->num_puxrs = xids_get_num_xids(xids);
+    ule->num_puxrs = toku_xids_get_num_xids(xids);
     uint32_t num_uxrs = ule->num_cuxrs + ule->num_puxrs;
     ule->uxrs[0].type   = XR_DELETE;
     ule->uxrs[0].vallen = 0;
@@ -535,12 +535,12 @@ generate_provpair_for(ULE ule, const ft_msg &msg) {
         ule->uxrs[level].type   = XR_PLACEHOLDER;
         ule->uxrs[level].vallen = 0;
         ule->uxrs[level].valp   = NULL;
-        ule->uxrs[level].xid    = xids_get_xid(xids, level-1);
+        ule->uxrs[level].xid    = toku_xids_get_xid(xids, level-1);
     }
     ule->uxrs[num_uxrs - 1].type   = XR_INSERT;
     ule->uxrs[num_uxrs - 1].vallen = msg.vdbt()->size;
     ule->uxrs[num_uxrs - 1].valp   = msg.vdbt()->data;
-    ule->uxrs[num_uxrs - 1].xid    = xids_get_innermost_xid(xids);
+    ule->uxrs[num_uxrs - 1].xid    = toku_xids_get_innermost_xid(xids);
 }
 
 //Test all the different things that can happen to a
@@ -619,7 +619,7 @@ generate_provdel_for(ULE ule, const ft_msg &msg) {
     XIDS xids = msg.xids();
 
     ule->num_cuxrs = 1;
-    ule->num_puxrs = xids_get_num_xids(xids);
+    ule->num_puxrs = toku_xids_get_num_xids(xids);
     uint32_t num_uxrs = ule->num_cuxrs + ule->num_puxrs;
     ule->uxrs[0].type   = XR_INSERT;
     ule->uxrs[0].vallen = msg.vdbt()->size;
@@ -629,12 +629,12 @@ generate_provdel_for(ULE ule, const ft_msg &msg) {
         ule->uxrs[level].type   = XR_PLACEHOLDER;
         ule->uxrs[level].vallen = 0;
         ule->uxrs[level].valp   = NULL;
-        ule->uxrs[level].xid    = xids_get_xid(xids, level-1);
+        ule->uxrs[level].xid    = toku_xids_get_xid(xids, level-1);
     }
     ule->uxrs[num_uxrs - 1].type   = XR_DELETE;
     ule->uxrs[num_uxrs - 1].vallen = 0;
     ule->uxrs[num_uxrs - 1].valp   = NULL;
-    ule->uxrs[num_uxrs - 1].xid    = xids_get_innermost_xid(xids);
+    ule->uxrs[num_uxrs - 1].xid    = toku_xids_get_innermost_xid(xids);
 }
 
 static void
@@ -643,7 +643,7 @@ generate_both_for(ULE ule, DBT *oldval, const ft_msg &msg) {
     XIDS xids = msg.xids();
 
     ule->num_cuxrs = 1;
-    ule->num_puxrs = xids_get_num_xids(xids);
+    ule->num_puxrs = toku_xids_get_num_xids(xids);
     uint32_t num_uxrs = ule->num_cuxrs + ule->num_puxrs;
     ule->uxrs[0].type   = XR_INSERT;
     ule->uxrs[0].vallen = oldval->size;
@@ -653,12 +653,12 @@ generate_both_for(ULE ule, DBT *oldval, const ft_msg &msg) {
         ule->uxrs[level].type   = XR_PLACEHOLDER;
         ule->uxrs[level].vallen = 0;
         ule->uxrs[level].valp   = NULL;
-        ule->uxrs[level].xid    = xids_get_xid(xids, level-1);
+        ule->uxrs[level].xid    = toku_xids_get_xid(xids, level-1);
     }
     ule->uxrs[num_uxrs - 1].type   = XR_INSERT;
     ule->uxrs[num_uxrs - 1].vallen = msg.vdbt()->size;
     ule->uxrs[num_uxrs - 1].valp   = msg.vdbt()->data;
-    ule->uxrs[num_uxrs - 1].xid    = xids_get_innermost_xid(xids);
+    ule->uxrs[num_uxrs - 1].xid    = toku_xids_get_innermost_xid(xids);
 }
 
 //Test all the different things that can happen to a
@@ -868,9 +868,9 @@ static void test_le_optimize(void) {
     TXNID optimize_txnid = 1000;
     memset(&key, 0, sizeof(key));
     memset(&val, 0, sizeof(val));
-    XIDS root_xids = xids_get_root_xids();
+    XIDS root_xids = toku_xids_get_root_xids();
     XIDS msg_xids; 
-    int r = xids_create_child(root_xids, &msg_xids, optimize_txnid);
+    int r = toku_xids_create_child(root_xids, &msg_xids, optimize_txnid);
     assert(r==0);
     ft_msg msg(&key, &val, FT_OPTIMIZE, ZERO_MSN, msg_xids);
 
@@ -981,8 +981,8 @@ static void test_le_optimize(void) {
     verify_ule_equal(&ule_initial, &ule_expected);
 
     
-    xids_destroy(&msg_xids);
-    xids_destroy(&root_xids);
+    toku_xids_destroy(&msg_xids);
+    toku_xids_destroy(&root_xids);
 }
 
 //TODO: #1125 tests:
@@ -1020,9 +1020,9 @@ static void test_le_optimize(void) {
 static void
 init_xids(void) {
     uint32_t i;
-    nested_xids[0] = xids_get_root_xids();
+    nested_xids[0] = toku_xids_get_root_xids();
     for (i = 1; i < MAX_TRANSACTION_RECORDS; i++) {
-        int r = xids_create_child(nested_xids[i-1], &nested_xids[i], i * 37 + random() % 36);
+        int r = toku_xids_create_child(nested_xids[i-1], &nested_xids[i], i * 37 + random() % 36);
         assert(r==0);
     }
 }
@@ -1031,7 +1031,7 @@ static void
 destroy_xids(void) {
     uint32_t i;
     for (i = 0; i < MAX_TRANSACTION_RECORDS; i++) {
-        xids_destroy(&nested_xids[i]);
+        toku_xids_destroy(&nested_xids[i]);
     }
 }
 
diff --git a/ft/tests/verify-bad-msn.cc b/ft/tests/verify-bad-msn.cc
index ddb9fd447ef..2ac6dde456f 100644
--- a/ft/tests/verify-bad-msn.cc
+++ b/ft/tests/verify-bad-msn.cc
@@ -127,7 +127,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
     MSN msn = next_dummymsn();
 
     // apply an insert to the leaf node
-    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
     toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
@@ -156,7 +156,7 @@ insert_into_child_buffer(FT_HANDLE ft, FTNODE node, int childnum, int minkey, in
         unsigned int key = htonl(val);
         DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key);
         DBT theval; toku_fill_dbt(&theval, &val, sizeof val);
-        toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval);
+        toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, toku_xids_get_root_xids(), true, &thekey, &theval);
 
 	// Create bad tree (don't do following):
 	// node->max_msn_applied_to_node = msn;
diff --git a/ft/tests/verify-bad-pivots.cc b/ft/tests/verify-bad-pivots.cc
index bc402bdac92..55a67b507a1 100644
--- a/ft/tests/verify-bad-pivots.cc
+++ b/ft/tests/verify-bad-pivots.cc
@@ -115,7 +115,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
     toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
diff --git a/ft/tests/verify-dup-in-leaf.cc b/ft/tests/verify-dup-in-leaf.cc
index 9f3f2848188..81089f7955f 100644
--- a/ft/tests/verify-dup-in-leaf.cc
+++ b/ft/tests/verify-dup-in-leaf.cc
@@ -116,7 +116,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
     toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
diff --git a/ft/tests/verify-dup-pivots.cc b/ft/tests/verify-dup-pivots.cc
index 828350d891e..03a7f6eba3f 100644
--- a/ft/tests/verify-dup-pivots.cc
+++ b/ft/tests/verify-dup-pivots.cc
@@ -115,7 +115,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
     toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
diff --git a/ft/tests/verify-misrouted-msgs.cc b/ft/tests/verify-misrouted-msgs.cc
index e87597f1748..f9490cfd6c4 100644
--- a/ft/tests/verify-misrouted-msgs.cc
+++ b/ft/tests/verify-misrouted-msgs.cc
@@ -116,7 +116,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
     toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
@@ -144,7 +144,7 @@ insert_into_child_buffer(FT_HANDLE ft, FTNODE node, int childnum, int minkey, in
         DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key);
         DBT theval; toku_fill_dbt(&theval, &val, sizeof val);
 	MSN msn = next_dummymsn();
-        toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval);
+        toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, toku_xids_get_root_xids(), true, &thekey, &theval);
     }
 }
 
diff --git a/ft/tests/verify-unsorted-leaf.cc b/ft/tests/verify-unsorted-leaf.cc
index 450ff4ebe3f..365b7bd4a5f 100644
--- a/ft/tests/verify-unsorted-leaf.cc
+++ b/ft/tests/verify-unsorted-leaf.cc
@@ -118,7 +118,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
     toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
diff --git a/ft/tests/verify-unsorted-pivots.cc b/ft/tests/verify-unsorted-pivots.cc
index b9f3656878c..a12307f8555 100644
--- a/ft/tests/verify-unsorted-pivots.cc
+++ b/ft/tests/verify-unsorted-pivots.cc
@@ -115,7 +115,7 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
 
     // apply an insert to the leaf node
     MSN msn = next_dummymsn();
-    ft_msg msg(&thekey, &theval, FT_INSERT, msn, xids_get_root_xids());
+    ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids());
     txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
     toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL);
 
diff --git a/ft/tokuconst.h b/ft/tokuconst.h
deleted file mode 100644
index 7a55ccb107a..00000000000
--- a/ft/tokuconst.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#pragma once
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-/* The number of transaction ids stored in the xids structure is 
- * represented by an 8-bit value.  The value 255 is reserved. 
- * The constant MAX_NESTED_TRANSACTIONS is one less because
- * one slot in the packed leaf entry is used for the implicit
- * root transaction (id 0).
- */
-
-enum {
-    MAX_NESTED_TRANSACTIONS = 253,
-    MAX_TRANSACTION_RECORDS = MAX_NESTED_TRANSACTIONS + 1
-};
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index 00bb505d64d..281cde1c9d5 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -308,7 +308,7 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT ft) {
                         printf("HUH?");
 ok:
                         printf(" xid=");
-                        xids_fprintf(stdout, xids);
+                        toku_xids_fprintf(stdout, xids);
                         printf(" ");
                         print_item(key, keylen);
                         if (datalen>0) {
diff --git a/ft/txn.cc b/ft/txn.cc
index 018b9112aa4..7a7aa8ad236 100644
--- a/ft/txn.cc
+++ b/ft/txn.cc
@@ -188,13 +188,13 @@ txn_create_xids(TOKUTXN txn, TOKUTXN parent) {
     XIDS xids;
     XIDS parent_xids;
     if (parent == NULL) {
-        parent_xids = xids_get_root_xids();
+        parent_xids = toku_xids_get_root_xids();
     } else {
         parent_xids = parent->xids;
     }
-    xids_create_unknown_child(parent_xids, &xids);
+    toku_xids_create_unknown_child(parent_xids, &xids);
     TXNID finalized_xid = (parent == NULL) ? txn->txnid.parent_id64 : txn->txnid.child_id64;
-    xids_finalize_with_child(xids, finalized_xid);
+    toku_xids_finalize_with_child(xids, finalized_xid);
     txn->xids = xids;
 }
 
@@ -217,7 +217,7 @@ toku_txn_begin_with_xid (
     TOKUTXN txn;
     // check for case where we are trying to 
     // create too many nested transactions
-    if (!read_only && parent && !xids_can_create_child(parent->xids)) {
+    if (!read_only && parent && !toku_xids_can_create_child(parent->xids)) {
         r = EINVAL;
         goto exit;
     }
@@ -648,7 +648,7 @@ void toku_txn_complete_txn(TOKUTXN txn) {
 void toku_txn_destroy_txn(TOKUTXN txn) {
     txn->open_fts.destroy();
     if (txn->xids) {
-        xids_destroy(&txn->xids);
+        toku_xids_destroy(&txn->xids);
     }
     toku_mutex_destroy(&txn->txn_lock);
     toku_mutex_destroy(&txn->state_lock);
@@ -657,7 +657,7 @@ void toku_txn_destroy_txn(TOKUTXN txn) {
 }
 
 XIDS toku_txn_get_xids (TOKUTXN txn) {
-    if (txn==0) return xids_get_root_xids();
+    if (txn==0) return toku_xids_get_root_xids();
     else return txn->xids;
 }
 
diff --git a/ft/txn.h b/ft/txn.h
index 2af4aca2120..27cc622c5d5 100644
--- a/ft/txn.h
+++ b/ft/txn.h
@@ -218,7 +218,7 @@ struct tokutxn {
     // strictly const.
     DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn
     xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started.
-    struct xids_t *xids; // Represents the xid list
+    struct XIDS_S *xids; // Represents the xid list
 
     struct tokutxn *snapshot_next;
     struct tokutxn *snapshot_prev;
@@ -319,7 +319,7 @@ void toku_txn_complete_txn(struct tokutxn *txn);
 // Free the memory of a txn
 void toku_txn_destroy_txn(struct tokutxn *txn);
 
-struct xids_t *toku_txn_get_xids(struct tokutxn *txn);
+struct XIDS_S *toku_txn_get_xids(struct tokutxn *txn);
 
 // Force fsync on commit
 void toku_txn_force_fsync_on_commit(struct tokutxn *txn);
diff --git a/ft/ule.cc b/ft/ule.cc
index 9f7be3af7fb..bb1e4915c3f 100644
--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -695,7 +695,7 @@ msg_init_empty_ule(ULE ule) {
 static void 
 msg_modify_ule(ULE ule, const ft_msg &msg) {
     XIDS xids = msg.xids();
-    invariant(xids_get_num_xids(xids) < MAX_TRANSACTION_RECORDS);
+    invariant(toku_xids_get_num_xids(xids) < MAX_TRANSACTION_RECORDS);
     enum ft_msg_type type = msg.type();
     if (type != FT_OPTIMIZE && type != FT_OPTIMIZE_FOR_UPGRADE) {
         ule_do_implicit_promotions(ule, xids);
@@ -751,10 +751,10 @@ static void ule_optimize(ULE ule, XIDS xids) {
     if (ule->num_puxrs) {
         TXNID uncommitted = ule->uxrs[ule->num_cuxrs].xid;      // outermost uncommitted
         TXNID oldest_living_xid = TXNID_NONE;
-        uint32_t num_xids = xids_get_num_xids(xids);
+        uint32_t num_xids = toku_xids_get_num_xids(xids);
         if (num_xids > 0) {
             invariant(num_xids==1);
-            oldest_living_xid = xids_get_xid(xids, 0);
+            oldest_living_xid = toku_xids_get_xid(xids, 0);
         }
         if (oldest_living_xid == TXNID_NONE || uncommitted < oldest_living_xid) {
             ule_promote_provisional_innermost_to_committed(ule);
@@ -1350,9 +1350,9 @@ int le_latest_is_del(LEAFENTRY le) {
 bool
 le_has_xids(LEAFENTRY le, XIDS xids) {
     //Read num_uxrs
-    uint32_t num_xids = xids_get_num_xids(xids);
+    uint32_t num_xids = toku_xids_get_num_xids(xids);
     invariant(num_xids > 0); //Disallow checking for having TXNID_NONE
-    TXNID xid = xids_get_xid(xids, 0);
+    TXNID xid = toku_xids_get_xid(xids, 0);
     invariant(xid!=TXNID_NONE);
 
     bool rval = (le_outermost_uncommitted_xid(le) == xid);
@@ -1602,13 +1602,13 @@ ule_do_implicit_promotions(ULE ule, XIDS xids) {
     //Optimization for (most) common case.
     //No commits necessary if everything is already committed.
     if (ule->num_puxrs > 0) {
-        int num_xids = xids_get_num_xids(xids);
+        int num_xids = toku_xids_get_num_xids(xids);
         invariant(num_xids>0);
         uint32_t max_index = ule->num_cuxrs + min_i32(ule->num_puxrs, num_xids) - 1;
         uint32_t ica_index = max_index;
         uint32_t index;
         for (index = ule->num_cuxrs; index <= max_index; index++) {
-            TXNID current_msg_xid = xids_get_xid(xids, index - ule->num_cuxrs);
+            TXNID current_msg_xid = toku_xids_get_xid(xids, index - ule->num_cuxrs);
             TXNID current_ule_xid = ule_get_xid(ule, index);
             if (current_msg_xid != current_ule_xid) {
                 //ica is innermost transaction with matching xids.
@@ -1698,7 +1698,7 @@ ule_promote_provisional_innermost_to_index(ULE ule, uint32_t index) {
 static void 
 ule_apply_insert(ULE ule, XIDS xids, uint32_t vallen, void * valp) {
     ule_prepare_for_new_uxr(ule, xids);
-    TXNID this_xid = xids_get_innermost_xid(xids);  // xid of transaction doing this insert
+    TXNID this_xid = toku_xids_get_innermost_xid(xids);  // xid of transaction doing this insert
     ule_push_insert_uxr(ule, this_xid == TXNID_NONE, this_xid, vallen, valp);
 }
 
@@ -1706,7 +1706,7 @@ ule_apply_insert(ULE ule, XIDS xids, uint32_t vallen, void * valp) {
 static void 
 ule_apply_delete(ULE ule, XIDS xids) {
     ule_prepare_for_new_uxr(ule, xids);
-    TXNID this_xid = xids_get_innermost_xid(xids);  // xid of transaction doing this delete
+    TXNID this_xid = toku_xids_get_innermost_xid(xids);  // xid of transaction doing this delete
     ule_push_delete_uxr(ule, this_xid == TXNID_NONE, this_xid);
 }
 
@@ -1717,7 +1717,7 @@ ule_apply_delete(ULE ule, XIDS xids) {
 // with placeholders.
 static void 
 ule_prepare_for_new_uxr(ULE ule, XIDS xids) {
-    TXNID this_xid = xids_get_innermost_xid(xids);
+    TXNID this_xid = toku_xids_get_innermost_xid(xids);
     //This is for LOADER_USE_PUTS or transactionless environment
     //where messages use XIDS of 0
     if (this_xid == TXNID_NONE && ule_get_innermost_xid(ule) == TXNID_NONE) {
@@ -1742,7 +1742,7 @@ ule_prepare_for_new_uxr(ULE ule, XIDS xids) {
 // Remember, the innermost uxr can only be an insert or a delete, not a placeholder. 
 static void 
 ule_apply_abort(ULE ule, XIDS xids) {
-    TXNID this_xid = xids_get_innermost_xid(xids);   // xid of transaction doing this abort
+    TXNID this_xid = toku_xids_get_innermost_xid(xids);   // xid of transaction doing this abort
     invariant(this_xid!=TXNID_NONE);
     UXR innermost = ule_get_innermost_uxr(ule);
     // need to check for provisional entries in ule, otherwise
@@ -1773,7 +1773,7 @@ ule_apply_broadcast_commit_all (ULE ule) {
 // If this transaction did modify the leafentry, then promote whatever it did.
 // Remember, the innermost uxr can only be an insert or a delete, not a placeholder. 
 void ule_apply_commit(ULE ule, XIDS xids) {
-    TXNID this_xid = xids_get_innermost_xid(xids);  // xid of transaction committing
+    TXNID this_xid = toku_xids_get_innermost_xid(xids);  // xid of transaction committing
     invariant(this_xid!=TXNID_NONE);
     // need to check for provisional entries in ule, otherwise
     // there is nothing to abort, not checking this may result
@@ -1915,7 +1915,7 @@ ule_add_placeholders(ULE ule, XIDS xids) {
     //Placeholders can be placed on top of the committed uxr.
     invariant(ule->num_cuxrs > 0);
 
-    uint32_t num_xids = xids_get_num_xids(xids);
+    uint32_t num_xids = toku_xids_get_num_xids(xids);
     // we assume that implicit promotion has happened
     // when we get this call, so the number of xids MUST
     // be greater than the number of provisional entries
@@ -1923,12 +1923,12 @@ ule_add_placeholders(ULE ule, XIDS xids) {
     // make sure that the xids stack matches up to a certain amount
     // this first for loop is just debug code
     for (uint32_t i = 0; i < ule->num_puxrs; i++) {
-        TXNID current_msg_xid = xids_get_xid(xids, i);
+        TXNID current_msg_xid = toku_xids_get_xid(xids, i);
         TXNID current_ule_xid = ule_get_xid(ule, i + ule->num_cuxrs);
         invariant(current_msg_xid == current_ule_xid);
     }
     for (uint32_t i = ule->num_puxrs; i < num_xids-1; i++) {
-        TXNID current_msg_xid = xids_get_xid(xids, i);
+        TXNID current_msg_xid = toku_xids_get_xid(xids, i);
         ule_push_placeholder_uxr(ule, current_msg_xid);
     }
 }
diff --git a/ft/xids-internal.h b/ft/xids-internal.h
deleted file mode 100644
index 52f1a1db2a1..00000000000
--- a/ft/xids-internal.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#pragma once
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-// Variable size list of transaction ids (known in design doc as xids<>).
-// ids[0] is the outermost transaction.
-// ids[num_xids - 1] is the innermost transaction.
-// Should only be accessed by accessor functions xids_xxx, not directly.
-
-#include "portability/toku_stdint.h"
-
-#include "ft/txn.h"
-
-// If the xids struct is unpacked, the compiler aligns the ids[] and we waste a lot of space
-typedef struct __attribute__((__packed__)) xids_t {
-    uint8_t  num_xids;    // maximum value of MAX_TRANSACTION_RECORDS - 1 ...
-                           // ... because transaction 0 is implicit
-    TXNID     ids[];
-} XIDS_S;
diff --git a/ft/xids.cc b/ft/xids.cc
index 775ae2757f8..9d629377014 100644
--- a/ft/xids.cc
+++ b/ft/xids.cc
@@ -110,7 +110,6 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_portability.h"
 
 #include "ft/xids.h"
-#include "ft/xids-internal.h"
 
 /////////////////////////////////////////////////////////////////////////////////
 //  This layer of abstraction (xids_xxx) understands xids<> and nothing else.
@@ -122,7 +121,7 @@ PATENT RIGHTS GRANT:
 //  the variable num_xids.
 //
 // The xids struct is immutable.  The caller gets an initial version of XIDS
-// by calling xids_get_root_xids(), which returns the constant struct
+// by calling toku_xids_get_root_xids(), which returns the constant struct
 // representing the root transaction (id 0).  When a transaction begins, 
 // a new XIDS is created with the id of the current transaction appended to
 // the list.
@@ -134,8 +133,8 @@ PATENT RIGHTS GRANT:
 // nested transactions.
 
 XIDS
-xids_get_root_xids(void) {
-    static const struct xids_t root_xids = {
+toku_xids_get_root_xids(void) {
+    static const struct XIDS_S root_xids = {
         .num_xids = 0
     };
 
@@ -144,13 +143,13 @@ xids_get_root_xids(void) {
 }
 
 bool 
-xids_can_create_child(XIDS xids) {
+toku_xids_can_create_child(XIDS xids) {
     invariant(xids->num_xids < MAX_TRANSACTION_RECORDS);
     return (xids->num_xids + 1) != MAX_TRANSACTION_RECORDS;
 }
 
 int
-xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p) {
+toku_xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p) {
     // Postcondition:
     //  xids_p points to an xids that is an exact copy of parent_xids, but with room for one more xid.
     int rval;
@@ -169,9 +168,9 @@ xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p) {
 }
 
 void
-xids_finalize_with_child(XIDS xids, TXNID this_xid) {
+toku_xids_finalize_with_child(XIDS xids, TXNID this_xid) {
     // Precondition:
-    //  - xids was created by xids_create_unknown_child
+    //  - xids was created by toku_xids_create_unknown_child
     TXNID this_xid_disk = toku_htod64(this_xid);
     uint32_t num_child_xids = ++xids->num_xids;
     xids->ids[num_child_xids - 1] = this_xid_disk;
@@ -180,21 +179,21 @@ xids_finalize_with_child(XIDS xids, TXNID this_xid) {
 // xids is immutable.  This function creates a new xids by copying the
 // parent's list and then appending the xid of the new transaction.
 int
-xids_create_child(XIDS   parent_xids,		// xids list for parent transaction
-		  XIDS * xids_p,		// xids list created
-		  TXNID  this_xid) {		// xid of this transaction (new innermost)
-    bool can_create_child = xids_can_create_child(parent_xids);
+toku_xids_create_child(XIDS parent_xids,		// xids list for parent transaction
+                       XIDS *xids_p,		// xids list created
+                       TXNID this_xid) {		// xid of this transaction (new innermost)
+    bool can_create_child = toku_xids_can_create_child(parent_xids);
     if (!can_create_child) {
         return EINVAL;
     }
-    xids_create_unknown_child(parent_xids, xids_p);
-    xids_finalize_with_child(*xids_p, this_xid);
+    toku_xids_create_unknown_child(parent_xids, xids_p);
+    toku_xids_finalize_with_child(*xids_p, this_xid);
     return 0;
 }
 
 void
-xids_create_from_buffer(struct rbuf *rb,		// xids list for parent transaction
-		        XIDS * xids_p) {		// xids list created
+toku_xids_create_from_buffer(struct rbuf *rb,		// xids list for parent transaction
+                             XIDS *xids_p) {		// xids list created
     uint8_t num_xids = rbuf_char(rb);
     invariant(num_xids < MAX_TRANSACTION_RECORDS);
     XIDS CAST_FROM_VOIDP(xids, toku_xmalloc(sizeof(*xids) + num_xids*sizeof(xids->ids[0])));
@@ -207,8 +206,8 @@ xids_create_from_buffer(struct rbuf *rb,		// xids list for parent transaction
 }
 
 void
-xids_destroy(XIDS *xids_p) {
-    if (*xids_p != xids_get_root_xids()) toku_free(*xids_p);
+toku_xids_destroy(XIDS *xids_p) {
+    if (*xids_p != toku_xids_get_root_xids()) toku_free(*xids_p);
     *xids_p = NULL;
 }
 
@@ -216,48 +215,49 @@ xids_destroy(XIDS *xids_p) {
 // If requesting an xid out of range (which will be the case if xids array is empty)
 // then return 0, the xid of the root transaction.
 TXNID 
-xids_get_xid(XIDS xids, uint8_t index) {
-    invariant(index < xids_get_num_xids(xids));
+toku_xids_get_xid(XIDS xids, uint8_t index) {
+    invariant(index < toku_xids_get_num_xids(xids));
     TXNID rval = xids->ids[index];
     rval = toku_dtoh64(rval);
     return rval;
 }
 
 uint8_t 
-xids_get_num_xids(XIDS xids) {
+toku_xids_get_num_xids(XIDS xids) {
     uint8_t rval = xids->num_xids;
     return rval;
 }
 
 // Return innermost xid 
 TXNID 
-xids_get_innermost_xid(XIDS xids) {
+toku_xids_get_innermost_xid(XIDS xids) {
     TXNID rval = TXNID_NONE;
-    if (xids_get_num_xids(xids)) {
+    if (toku_xids_get_num_xids(xids)) {
         // if clause above makes this cast ok
-        uint8_t innermost_xid = (uint8_t)(xids_get_num_xids(xids)-1);
-        rval = xids_get_xid(xids, innermost_xid);
+        uint8_t innermost_xid = (uint8_t) (toku_xids_get_num_xids(xids) - 1);
+        rval = toku_xids_get_xid(xids, innermost_xid);
     }
     return rval;
 }
 
 TXNID
-xids_get_outermost_xid(XIDS xids) {
+toku_xids_get_outermost_xid(XIDS xids) {
     TXNID rval = TXNID_NONE;
-    if (xids_get_num_xids(xids))
-        rval = xids_get_xid(xids, 0);
+    if (toku_xids_get_num_xids(xids)) {
+        rval = toku_xids_get_xid(xids, 0);
+    }
     return rval;
 }
 
 void
-xids_cpy(XIDS target, XIDS source) {
-    size_t size = xids_get_size(source);
+toku_xids_cpy(XIDS target, XIDS source) {
+    size_t size = toku_xids_get_size(source);
     memcpy(target, source, size);
 }
 
 // return size in bytes
 uint32_t 
-xids_get_size(XIDS xids){
+toku_xids_get_size(XIDS xids) {
     uint32_t rval;
     uint8_t num_xids = xids->num_xids;
     rval = sizeof(*xids) + num_xids * sizeof(xids->ids[0]);
@@ -265,7 +265,7 @@ xids_get_size(XIDS xids){
 }
 
 uint32_t 
-xids_get_serialize_size(XIDS xids){
+toku_xids_get_serialize_size(XIDS xids) {
     uint32_t rval;
     uint8_t num_xids = xids->num_xids;
     rval = 1 + //num xids
@@ -274,7 +274,7 @@ xids_get_serialize_size(XIDS xids){
 }
 
 unsigned char *
-xids_get_end_of_array(XIDS xids) {
+toku_xids_get_end_of_array(XIDS xids) {
     TXNID *r = xids->ids + xids->num_xids;
     return (unsigned char*)r;
 }
@@ -288,13 +288,13 @@ void wbuf_nocrc_xids(struct wbuf *wb, XIDS xids) {
 }
 
 void
-xids_fprintf(FILE* fp, XIDS xids) {
+toku_xids_fprintf(FILE *fp, XIDS xids) {
     uint8_t index;
-    unsigned num_xids = xids_get_num_xids(xids);
+    unsigned num_xids = toku_xids_get_num_xids(xids);
     fprintf(fp, "[|%u| ", num_xids);
-    for (index = 0; index < xids_get_num_xids(xids); index++) {
+    for (index = 0; index < toku_xids_get_num_xids(xids); index++) {
         if (index) fprintf(fp, ",");
-        fprintf(fp, "%" PRIx64, xids_get_xid(xids, index));
+        fprintf(fp, "%" PRIx64, toku_xids_get_xid(xids, index));
     }
     fprintf(fp, "]");
 }
diff --git a/ft/xids.h b/ft/xids.h
index 1627ff5308e..bd0d3c0254d 100644
--- a/ft/xids.h
+++ b/ft/xids.h
@@ -106,44 +106,65 @@ PATENT RIGHTS GRANT:
 #include "ft/txn.h"
 #include "ft/rbuf.h"
 #include "ft/wbuf.h"
-#include "ft/tokuconst.h"
 
-typedef struct xids_t *XIDS;
+/* The number of transaction ids stored in the xids structure is 
+ * represented by an 8-bit value.  The value 255 is reserved. 
+ * The constant MAX_NESTED_TRANSACTIONS is one less because
+ * one slot in the packed leaf entry is used for the implicit
+ * root transaction (id 0).
+ */
+enum {
+    MAX_NESTED_TRANSACTIONS = 253,
+    MAX_TRANSACTION_RECORDS = MAX_NESTED_TRANSACTIONS + 1
+};
+
+// Variable size list of transaction ids (known in design doc as xids<>).
+// ids[0] is the outermost transaction.
+// ids[num_xids - 1] is the innermost transaction.
+// Should only be accessed by accessor functions toku_xids_xxx, not directly.
+
+// If the xids struct is unpacked, the compiler aligns the ids[] and we waste a lot of space
+struct __attribute__((__packed__)) XIDS_S {
+    // maximum value of MAX_TRANSACTION_RECORDS - 1 because transaction 0 is implicit
+    uint8_t num_xids; 
+    TXNID ids[];
+};
+typedef struct XIDS_S *XIDS;
 
 // Retrieve an XIDS representing the root transaction.
-XIDS xids_get_root_xids(void);
+XIDS toku_xids_get_root_xids(void);
 
-bool xids_can_create_child(XIDS xids);
+bool toku_xids_can_create_child(XIDS xids);
 
-void xids_cpy(XIDS target, XIDS source);
+void toku_xids_cpy(XIDS target, XIDS source);
 
 //Creates an XIDS representing this transaction.
 //You must pass in an XIDS representing the parent of this transaction.
-int xids_create_child(XIDS parent_xids, XIDS *xids_p, TXNID this_xid);
+int toku_xids_create_child(XIDS parent_xids, XIDS *xids_p, TXNID this_xid);
 
-// The following two functions (in order) are equivalent to xids_create child,
+// The following two functions (in order) are equivalent to toku_xids_create child,
 // but allow you to do most of the work without knowing the new xid.
-int xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p);
-void xids_finalize_with_child(XIDS xids, TXNID this_xid);
+int toku_xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p);
+void toku_xids_finalize_with_child(XIDS xids, TXNID this_xid);
 
-void xids_create_from_buffer(struct rbuf *rb, XIDS * xids_p);
+void toku_xids_create_from_buffer(struct rbuf *rb, XIDS *xids_p);
 
-void xids_destroy(XIDS *xids_p);
+void toku_xids_destroy(XIDS *xids_p);
 
-TXNID xids_get_xid(XIDS xids, uint8_t index);
+TXNID toku_xids_get_xid(XIDS xids, uint8_t index);
 
-uint8_t xids_get_num_xids(XIDS xids);
+uint8_t toku_xids_get_num_xids(XIDS xids);
 
-TXNID xids_get_innermost_xid(XIDS xids);
-TXNID xids_get_outermost_xid(XIDS xids);
+TXNID toku_xids_get_innermost_xid(XIDS xids);
+TXNID toku_xids_get_outermost_xid(XIDS xids);
 
 // return size in bytes
-uint32_t xids_get_size(XIDS xids);
+uint32_t toku_xids_get_size(XIDS xids);
 
-uint32_t xids_get_serialize_size(XIDS xids);
+uint32_t toku_xids_get_serialize_size(XIDS xids);
 
-unsigned char *xids_get_end_of_array(XIDS xids);
+unsigned char *toku_xids_get_end_of_array(XIDS xids);
 
 void wbuf_nocrc_xids(struct wbuf *wb, XIDS xids);
 
-void xids_fprintf(FILE* fp, XIDS xids);
+void toku_xids_fprintf(FILE* fp, XIDS xids);
diff --git a/src/indexer-undo-do.cc b/src/indexer-undo-do.cc
index 559dfef7291..b18efbed235 100644
--- a/src/indexer-undo-do.cc
+++ b/src/indexer-undo-do.cc
@@ -96,7 +96,6 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 
 #include <ft/le-cursor.h>
-#include <ft/tokuconst.h>
 #include <ft/ft-ops.h>
 #include <ft/leafentry.h>
 #include <ft/ule.h>
@@ -199,7 +198,7 @@ indexer_undo_do_committed(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info *
     ULEHANDLE ule = prov_info->ule;
 
     // init the xids to the root xid
-    XIDS xids = xids_get_root_xids();
+    XIDS xids = toku_xids_get_root_xids();
 
     // scan the committed stack from bottom to top
     uint32_t num_committed = ule_get_num_committed(ule);
@@ -280,7 +279,7 @@ indexer_undo_do_committed(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info *
             break;
     }
 
-    xids_destroy(&xids);
+    toku_xids_destroy(&xids);
 
     return result;
 }
@@ -312,7 +311,7 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info
     ULEHANDLE ule = prov_info->ule;
 
     // init the xids to the root xid
-    XIDS xids = xids_get_root_xids();
+    XIDS xids = toku_xids_get_root_xids();
 
     uint32_t num_provisional = prov_info->num_provisional;
     uint32_t num_committed = prov_info->num_committed;
@@ -472,7 +471,7 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info
     // then this will need to be handled below exit
     release_txns(ule, prov_states, prov_txns, indexer);
 exit:
-    xids_destroy(&xids);
+    toku_xids_destroy(&xids);
     return result;
 }
 
@@ -496,16 +495,16 @@ static int
 indexer_set_xid(DB_INDEXER *UU(indexer), TXNID this_xid, XIDS *xids_result) {
     int result = 0;
     XIDS old_xids = *xids_result;
-    XIDS new_xids = xids_get_root_xids();
+    XIDS new_xids = toku_xids_get_root_xids();
     if (this_xid != TXNID_NONE) {
         XIDS child_xids;
-        result = xids_create_child(new_xids, &child_xids, this_xid);
-        xids_destroy(&new_xids);
+        result = toku_xids_create_child(new_xids, &child_xids, this_xid);
+        toku_xids_destroy(&new_xids);
         if (result == 0)
             new_xids = child_xids;
     }
     if (result == 0) {
-        xids_destroy(&old_xids);
+        toku_xids_destroy(&old_xids);
         *xids_result = new_xids;
     }
 
@@ -517,9 +516,9 @@ static int
 indexer_append_xid(DB_INDEXER *UU(indexer), TXNID xid, XIDS *xids_result) {
     XIDS old_xids = *xids_result;
     XIDS new_xids;
-    int result = xids_create_child(old_xids, &new_xids, xid);
+    int result = toku_xids_create_child(old_xids, &new_xids, xid);
     if (result == 0) {
-        xids_destroy(&old_xids);
+        toku_xids_destroy(&old_xids);
         *xids_result = new_xids;
     }
     return result;
@@ -682,7 +681,7 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho
 static int 
 indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids) {
     int result = 0;
-    if (xids_get_num_xids(xids) > 0) {// send commit only when not the root xid
+    if (toku_xids_get_num_xids(xids) > 0) {// send commit only when not the root xid
         // TEST
         if (indexer->i->test_commit_any) {
             result = indexer->i->test_commit_any(indexer, hotdb, hotkey, xids);
diff --git a/src/indexer.cc b/src/indexer.cc
index dc2ca4453fc..093f3751733 100644
--- a/src/indexer.cc
+++ b/src/indexer.cc
@@ -99,7 +99,6 @@ PATENT RIGHTS GRANT:
 #include "ydb-internal.h"
 #include <ft/le-cursor.h>
 #include "indexer.h"
-#include <ft/tokuconst.h>
 #include <ft/ft-ops.h>
 #include <ft/leafentry.h>
 #include <ft/ule.h>
diff --git a/src/tests/hotindexer-undo-do-test.cc b/src/tests/hotindexer-undo-do-test.cc
index e8b56f66d08..a478666d87c 100644
--- a/src/tests/hotindexer-undo-do-test.cc
+++ b/src/tests/hotindexer-undo-do-test.cc
@@ -95,11 +95,11 @@ PATENT RIGHTS GRANT:
 
 #include "test.h"
 
-#include <ft/tokuconst.h>
+#include <ft/xids.h>
 #include <ft/ule.h>
 #include <ft/ule-internal.h>
 #include <ft/le-cursor.h>
-#include <ft/xids-internal.h>
+#include <ft/xids.h>
 
 #include "indexer-internal.h"
 
diff --git a/src/tests/test_txn_nested1.cc b/src/tests/test_txn_nested1.cc
index d25e7c61ce2..4b95a6466e4 100644
--- a/src/tests/test_txn_nested1.cc
+++ b/src/tests/test_txn_nested1.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <sys/stat.h>
 #include <db.h>
-#include <ft/tokuconst.h>
+#include <ft/xids.h>
 #define MAX_NEST MAX_NESTED_TRANSACTIONS
 
 
diff --git a/src/tests/test_txn_nested2.cc b/src/tests/test_txn_nested2.cc
index 542f2574c85..8caa98e734d 100644
--- a/src/tests/test_txn_nested2.cc
+++ b/src/tests/test_txn_nested2.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <sys/stat.h>
 #include <db.h>
-#include <ft/tokuconst.h>
+#include <ft/xids.h>
 #define MAX_NEST MAX_TRANSACTION_RECORDS
 #define MAX_SIZE MAX_TRANSACTION_RECORDS
 
diff --git a/src/tests/test_txn_nested3.cc b/src/tests/test_txn_nested3.cc
index 22e5d984a7a..082a1dc8770 100644
--- a/src/tests/test_txn_nested3.cc
+++ b/src/tests/test_txn_nested3.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <sys/stat.h>
 #include <db.h>
-#include <ft/tokuconst.h>
+#include <ft/xids.h>
 #define MAX_NEST MAX_TRANSACTION_RECORDS
 #define MAX_SIZE MAX_TRANSACTION_RECORDS
 
diff --git a/src/tests/test_txn_nested4.cc b/src/tests/test_txn_nested4.cc
index edc6430a8c7..6f8c7a984cb 100644
--- a/src/tests/test_txn_nested4.cc
+++ b/src/tests/test_txn_nested4.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <sys/stat.h>
 #include <db.h>
-#include <ft/tokuconst.h>
+#include <ft/xids.h>
 #define MAX_NEST MAX_TRANSACTION_RECORDS
 #define MAX_SIZE MAX_TRANSACTION_RECORDS
 
diff --git a/src/tests/test_txn_nested5.cc b/src/tests/test_txn_nested5.cc
index df5ad696984..6009e3b4fee 100644
--- a/src/tests/test_txn_nested5.cc
+++ b/src/tests/test_txn_nested5.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <sys/stat.h>
 #include <db.h>
-#include <ft/tokuconst.h>
+#include <ft/xids.h>
 #define MAX_NEST MAX_TRANSACTION_RECORDS
 #define MAX_SIZE (MAX_TRANSACTION_RECORDS + 1)
 

From 6e3d772fea2f75d6da65b4abdd9c015f7f9af040 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:30:17 -0400
Subject: [PATCH 070/190] FT-294 Move serialization code to its own
 subdirectory, serialize/

---
 ft/CMakeLists.txt                         | 16 ++++++++--------
 ft/bndata.h                               |  9 +++++----
 ft/cachetable.h                           |  2 +-
 ft/ft-cachetable-wrappers.cc              |  2 +-
 ft/ft-flusher.cc                          |  4 ++--
 ft/ft-internal.h                          |  2 +-
 ft/ft-ops.cc                              | 10 +++++-----
 ft/ft-test-helpers.cc                     |  2 +-
 ft/ft-verify.cc                           |  2 +-
 ft/ft.cc                                  |  6 +++---
 ft/ftverify.cc                            | 12 ++++++------
 ft/leafentry.cc                           |  2 +-
 ft/leafentry.h                            |  2 +-
 ft/loader/dbufio.cc                       |  2 +-
 ft/loader/loader.cc                       | 10 +++++-----
 ft/log.h                                  |  8 --------
 ft/logger.cc                              |  2 +-
 ft/logger.h                               |  8 ++++----
 ft/msg.h                                  |  4 ++--
 ft/node.cc                                |  6 +++---
 ft/pivotkeys.cc                           |  4 ++--
 ft/rollback-ct-callbacks.cc               |  4 ++--
 ft/rollback.cc                            |  2 +-
 ft/rollback.h                             |  2 +-
 ft/{ => serialize}/block_allocator.cc     |  2 +-
 ft/{ => serialize}/block_allocator.h      |  0
 ft/{ => serialize}/block_table.cc         |  8 ++++----
 ft/{ => serialize}/block_table.h          |  4 ++--
 ft/{ => serialize}/compress.cc            |  0
 ft/{ => serialize}/compress.h             |  0
 ft/{ => serialize}/ft-node-deserialize.cc |  2 +-
 ft/{ => serialize}/ft-serialize.cc        |  8 ++++----
 ft/{ => serialize}/ft-serialize.h         |  2 +-
 ft/{ => serialize}/ft_layout_version.h    |  0
 ft/{ => serialize}/ft_node-serialize.cc   |  9 +++++----
 ft/{ => serialize}/ft_node-serialize.h    | 10 +++++-----
 ft/{ => serialize}/quicklz.cc             |  0
 ft/{ => serialize}/quicklz.h              |  0
 ft/{ => serialize}/rbuf.h                 |  0
 ft/{ => serialize}/sub_block.cc           |  6 +++---
 ft/{ => serialize}/sub_block.h            |  2 +-
 ft/{ => serialize}/sub_block_map.h        |  0
 ft/{ => serialize}/wbuf.h                 |  0
 ft/{ => serialize}/workset.h              |  0
 ft/tests/compress-test.cc                 |  2 +-
 ft/tests/quicklz-test.cc                  |  2 +-
 ft/tests/subblock-test-checksum.cc        |  4 ++--
 ft/tests/subblock-test-compression.cc     |  2 +-
 ft/tests/subblock-test-index.cc           |  2 +-
 ft/tests/subblock-test-size.cc            |  2 +-
 ft/tests/test.h                           |  8 ++++----
 ft/tests/test_block_allocator_merge.cc    |  2 +-
 ft/tokuftdump.cc                          |  6 +++---
 ft/txn.h                                  |  6 +++---
 ft/xids.h                                 |  4 ++--
 util/dmt.h                                | 15 +++++++++------
 56 files changed, 114 insertions(+), 117 deletions(-)
 rename ft/{ => serialize}/block_allocator.cc (99%)
 rename ft/{ => serialize}/block_allocator.h (100%)
 rename ft/{ => serialize}/block_table.cc (99%)
 rename ft/{ => serialize}/block_table.h (99%)
 rename ft/{ => serialize}/compress.cc (100%)
 rename ft/{ => serialize}/compress.h (100%)
 rename ft/{ => serialize}/ft-node-deserialize.cc (99%)
 rename ft/{ => serialize}/ft-serialize.cc (99%)
 rename ft/{ => serialize}/ft-serialize.h (99%)
 rename ft/{ => serialize}/ft_layout_version.h (100%)
 rename ft/{ => serialize}/ft_node-serialize.cc (99%)
 rename ft/{ => serialize}/ft_node-serialize.h (98%)
 rename ft/{ => serialize}/quicklz.cc (100%)
 rename ft/{ => serialize}/quicklz.h (100%)
 rename ft/{ => serialize}/rbuf.h (100%)
 rename ft/{ => serialize}/sub_block.cc (99%)
 rename ft/{ => serialize}/sub_block.h (99%)
 rename ft/{ => serialize}/sub_block_map.h (100%)
 rename ft/{ => serialize}/wbuf.h (100%)
 rename ft/{ => serialize}/workset.h (100%)

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 3fcc4c2f853..47d49ba1ddb 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -24,21 +24,15 @@ add_custom_target(
 
 set(FT_SOURCES
   background_job_manager
-  block_allocator
-  block_table
   bndata
   cachetable
   checkpoint
-  compress
   cursor
   ft
   ft-cachetable-wrappers
   ft-flusher
   ft-hot-flusher
-  ft_node-serialize
-  ft-node-deserialize
   ft-ops
-  ft-serialize
   ft-test-helpers
   ft-verify
   loader/callbacks
@@ -55,14 +49,20 @@ set(FT_SOURCES
   msg_buffer
   node
   pivotkeys
-  quicklz
   recover
   rollback
   rollback-apply
   rollback-ct-callbacks
   rollback_log_node_cache
   roll
-  sub_block
+  serialize/block_allocator
+  serialize/block_table
+  serialize/compress
+  serialize/ft_node-serialize
+  serialize/ft-node-deserialize
+  serialize/ft-serialize
+  serialize/quicklz
+  serialize/sub_block
   txn
   txn_child_manager
   txn_manager
diff --git a/ft/bndata.h b/ft/bndata.h
index 6c34833c00e..f228a3b04d7 100644
--- a/ft/bndata.h
+++ b/ft/bndata.h
@@ -90,10 +90,11 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
-#include <util/mempool.h>
-#include "wbuf.h"
-#include <util/dmt.h>
-#include "leafentry.h"
+#include "util/dmt.h"
+#include "util/mempool.h"
+
+#include "ft/leafentry.h"
+#include "ft/serialize/wbuf.h"
 
 // Key/leafentry pair stored in a dmt.  The key is inlined, the offset (in leafentry mempool) is stored for the leafentry.
 struct klpair_struct {
diff --git a/ft/cachetable.h b/ft/cachetable.h
index d657abbc2fe..e3b5df82bc1 100644
--- a/ft/cachetable.h
+++ b/ft/cachetable.h
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 
 #include <fcntl.h>
 
-#include "ft/block_table.h"
+#include "ft/serialize/block_table.h"
 #include "ft/logger.h"
 #include "ft/txn.h"
 #include "util/minicron.h"
diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index 17e7751d05f..02589ef5c56 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/block_table.h"
+#include "ft/serialize/block_table.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-flusher.h"
 #include "ft/ft-internal.h"
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index f664878cc97..10264133728 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -89,14 +89,14 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/block_table.h"
 #include "ft/ft.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-internal.h"
 #include "ft/ft-flusher.h"
 #include "ft/ft-flusher-internal.h"
-#include "ft/ft_node-serialize.h"
 #include "ft/node.h"
+#include "ft/serialize/block_table.h"
+#include "ft/serialize/ft_node-serialize.h"
 #include "portability/toku_assert.h"
 #include "portability/toku_atomic.h"
 #include "util/status.h"
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 4018b5c4dfa..177484f31c7 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -142,7 +142,7 @@ struct ft_header {
     // LSN of creation of "checkpoint-begin" record in log.
     LSN checkpoint_lsn;
 
-    // see ft_layout_version.h.  maybe don't need this if we assume
+    // see serialize/ft_layout_version.h.  maybe don't need this if we assume
     // it's always the current version after deserializing
     const int layout_version;
     // different (<) from layout_version if upgraded from a previous
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index ccbee3256ce..95a2cfc51f0 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -200,21 +200,21 @@ basement nodes, bulk fetch,  and partial fetch:
 
 */
 
-#include "ft/block_table.h"
 #include "ft/checkpoint.h"
 #include "ft/cursor.h"
 #include "ft/ft.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-flusher.h"
 #include "ft/ft-internal.h"
-#include "ft/ft-serialize.h"
-#include "ft/ft_layout_version.h"
-#include "ft/ft_node-serialize.h"
 #include "ft/msg.h"
 #include "ft/leafentry.h"
 #include "ft/log-internal.h"
 #include "ft/node.h"
-#include "ft/sub_block.h"
+#include "ft/serialize/block_table.h"
+#include "ft/serialize/sub_block.h"
+#include "ft/serialize/ft-serialize.h"
+#include "ft/serialize/ft_layout_version.h"
+#include "ft/serialize/ft_node-serialize.h"
 #include "ft/txn_manager.h"
 #include "ft/ule.h"
 #include "ft/xids.h"
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index 1d70aeec7d3..769965686c1 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-internal.h"
 #include "ft/ft-flusher.h"
-#include "ft/ft_node-serialize.h"
+#include "ft/serialize/ft_node-serialize.h"
 #include "ft/node.h"
 #include "ft/ule.h"
 
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index b5b4dfd711e..28dc8eef17c 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -97,7 +97,7 @@ PATENT RIGHTS GRANT:
  *   For each nonleaf node:  All the messages have keys that are between the associated pivot keys ( left_pivot_key < message <= right_pivot_key)
  */
 
-#include "ft/block_table.h"
+#include "ft/serialize/block_table.h"
 #include "ft/ft.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-internal.h"
diff --git a/ft/ft.cc b/ft/ft.cc
index 6df7531d311..bec8ddd4450 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -89,15 +89,15 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/block_table.h"
+#include "ft/serialize/block_table.h"
 #include "ft/ft.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-internal.h"
-#include "ft/ft-serialize.h"
-#include "ft/ft_node-serialize.h"
 #include "ft/log-internal.h"
 #include "ft/log_header.h"
 #include "ft/node.h"
+#include "ft/serialize/ft-serialize.h"
+#include "ft/serialize/ft_node-serialize.h"
 
 #include <memory.h>
 #include <toku_assert.h>
diff --git a/ft/ftverify.cc b/ft/ftverify.cc
index 5e3dbc1b57a..89de2d6b12b 100644
--- a/ft/ftverify.cc
+++ b/ft/ftverify.cc
@@ -98,14 +98,14 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_list.h"
 #include "portability/toku_portability.h"
 
-#include "ft/block_allocator.h"
+#include "ft/serialize/block_allocator.h"
 #include "ft/ft-internal.h"
-#include "ft/ft-serialize.h"
-#include "ft/ft_layout_version.h"
-#include "ft/ft_node-serialize.h"
+#include "ft/serialize/ft-serialize.h"
+#include "ft/serialize/ft_layout_version.h"
+#include "ft/serialize/ft_node-serialize.h"
 #include "ft/node.h"
-#include "ft/rbuf.h"
-#include "ft/sub_block.h"
+#include "ft/serialize/rbuf.h"
+#include "ft/serialize/sub_block.h"
 #include "util/threadpool.h"
 
 #include <fcntl.h>
diff --git a/ft/leafentry.cc b/ft/leafentry.cc
index bcd3cf01b0c..57d4241de9a 100644
--- a/ft/leafentry.cc
+++ b/ft/leafentry.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "wbuf.h"
+#include "serialize/wbuf.h"
 #include "leafentry.h"
 
 void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le) {
diff --git a/ft/leafentry.h b/ft/leafentry.h
index 690d8c78905..148c4092c37 100644
--- a/ft/leafentry.h
+++ b/ft/leafentry.h
@@ -98,7 +98,7 @@ PATENT RIGHTS GRANT:
 #include <util/omt.h>
 
 #include "ft/txn_manager.h"
-#include "ft/rbuf.h"
+#include "ft/serialize/rbuf.h"
 #include "ft/msg.h"
 
 /*
diff --git a/ft/loader/dbufio.cc b/ft/loader/dbufio.cc
index 7df0e0fe562..0be68d250c4 100644
--- a/ft/loader/dbufio.cc
+++ b/ft/loader/dbufio.cc
@@ -97,7 +97,7 @@ PATENT RIGHTS GRANT:
 #include "portability/memory.h"
 
 #include "ft/ft-internal.h"
-#include "ft/ft_node-serialize.h"
+#include "ft/serialize/ft_node-serialize.h"
 #include "loader/dbufio.h"
 #include "loader/loader-internal.h"
 
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index 9f44b2eb8d5..474adcd1bbf 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -100,19 +100,19 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 #include <fcntl.h>
 
-#include "ft/block_table.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
-#include "ft/ft-serialize.h"
-#include "ft/ft_node-serialize.h"
 #include "ft/leafentry.h"
 #include "ft/loader/loader-internal.h"
 #include "ft/loader/pqueue.h"
 #include "ft/loader/dbufio.h"
 #include "ft/log-internal.h"
 #include "ft/node.h"
-#include "ft/sub_block.h"
-#include "ft/sub_block_map.h"
+#include "ft/serialize/block_table.h"
+#include "ft/serialize/ft-serialize.h"
+#include "ft/serialize/ft_node-serialize.h"
+#include "ft/serialize/sub_block.h"
+#include "ft/serialize/sub_block_map.h"
 
 #include "util/x1764.h"
 
diff --git a/ft/log.h b/ft/log.h
index 7127e4a12e9..5b958b2fda7 100644
--- a/ft/log.h
+++ b/ft/log.h
@@ -106,14 +106,6 @@ PATENT RIGHTS GRANT:
 
 struct roll_entry;
 
-static inline int toku_copy_BYTESTRING(BYTESTRING *target, BYTESTRING val) {
-    target->len = val.len;
-    target->data = (char *) toku_memdup(val.data, (size_t)val.len);
-    if (target->data==0) {
-        return get_error_errno();
-    }
-    return 0;
-}
 static inline void toku_free_TXNID(TXNID txnid __attribute__((__unused__))) {}
 static inline void toku_free_TXNID_PAIR(TXNID_PAIR txnid __attribute__((__unused__))) {}
 
diff --git a/ft/logger.cc b/ft/logger.cc
index 188b72b0c4c..28ca3ae82bb 100644
--- a/ft/logger.cc
+++ b/ft/logger.cc
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 #include <limits.h>
 #include <unistd.h>
 
-#include "ft/block_table.h"
+#include "ft/serialize/block_table.h"
 #include "ft/ft.h"
 #include "ft/log-internal.h"
 #include "ft/txn_manager.h"
diff --git a/ft/logger.h b/ft/logger.h
index 95ec620ea6b..2c04f6b8ac5 100644
--- a/ft/logger.h
+++ b/ft/logger.h
@@ -92,8 +92,8 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/block_table.h"
-#include "ft/ft_layout_version.h"
+#include "ft/serialize/block_table.h"
+#include "ft/serialize/ft_layout_version.h"
 #include "ft/txn.h"
 
 typedef struct tokulogger *TOKULOGGER;
@@ -278,7 +278,7 @@ struct txn_manager *toku_logger_get_txn_manager(TOKULOGGER logger);
 
 // For serialize / deserialize
 
-#include "ft/wbuf.h"
+#include "ft/serialize/wbuf.h"
 
 static inline void wbuf_nocrc_FILENUM(struct wbuf *wb, FILENUM fileid) {
     wbuf_nocrc_uint(wb, fileid.fileid);
@@ -309,7 +309,7 @@ static inline void wbuf_nocrc_XIDP (struct wbuf *w, TOKU_XA_XID *xid) {
     wbuf_nocrc_literal_bytes(w, xid->data, xid->gtrid_length+xid->bqual_length);
 }
 
-#include "ft/rbuf.h"
+#include "ft/serialize/rbuf.h"
 
 static inline void rbuf_FILENUM(struct rbuf *rb, FILENUM *filenum) {
     filenum->fileid = rbuf_int(rb);
diff --git a/ft/msg.h b/ft/msg.h
index f049fefadd9..7d738a3cdb0 100644
--- a/ft/msg.h
+++ b/ft/msg.h
@@ -232,13 +232,13 @@ private:
 
 // For serialize / deserialize
 
-#include "ft/wbuf.h"
+#include "ft/serialize/wbuf.h"
 
 static inline void wbuf_MSN(struct wbuf *wb, MSN msn) {
     wbuf_ulonglong(wb, msn.msn);
 }
 
-#include "ft/rbuf.h"
+#include "ft/serialize/rbuf.h"
 
 static inline MSN rbuf_MSN(struct rbuf *rb) {
     MSN msn = { .msn = rbuf_ulonglong(rb) };
diff --git a/ft/node.cc b/ft/node.cc
index 8c87a5194bf..b03c64c5f32 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -91,10 +91,10 @@ PATENT RIGHTS GRANT:
 
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
-#include "ft/ft_node-serialize.h"
+#include "ft/serialize/ft_node-serialize.h"
 #include "ft/node.h"
-#include "ft/rbuf.h"
-#include "ft/wbuf.h"
+#include "ft/serialize/rbuf.h"
+#include "ft/serialize/wbuf.h"
 #include "util/scoped_malloc.h"
 #include "util/sort.h"
 
diff --git a/ft/pivotkeys.cc b/ft/pivotkeys.cc
index 5dd84cda643..27e1ea14cc1 100644
--- a/ft/pivotkeys.cc
+++ b/ft/pivotkeys.cc
@@ -94,8 +94,8 @@ PATENT RIGHTS GRANT:
 #include "portability/memory.h"
 
 #include "ft/node.h"
-#include "ft/rbuf.h"
-#include "ft/wbuf.h"
+#include "ft/serialize/rbuf.h"
+#include "ft/serialize/wbuf.h"
 
 void ftnode_pivot_keys::create_empty() {
     _num_pivots = 0;
diff --git a/ft/rollback-ct-callbacks.cc b/ft/rollback-ct-callbacks.cc
index 680ba2c7cea..e2810285bf1 100644
--- a/ft/rollback-ct-callbacks.cc
+++ b/ft/rollback-ct-callbacks.cc
@@ -92,9 +92,9 @@ PATENT RIGHTS GRANT:
 #include "portability/memory.h"
 #include "portability/toku_portability.h"
 
-#include "ft/block_table.h"
+#include "ft/serialize/block_table.h"
 #include "ft/ft-internal.h"
-#include "ft/ft_node-serialize.h"
+#include "ft/serialize/ft_node-serialize.h"
 #include "ft/rollback.h"
 #include "ft/rollback-ct-callbacks.h"
 
diff --git a/ft/rollback.cc b/ft/rollback.cc
index 0648246d4b3..c9dc1cbe564 100644
--- a/ft/rollback.cc
+++ b/ft/rollback.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 #include <toku_stdint.h>
 
-#include "ft/block_table.h"
+#include "ft/serialize/block_table.h"
 #include "ft/ft.h"
 #include "ft/log-internal.h"
 #include "ft/rollback-ct-callbacks.h"
diff --git a/ft/rollback.h b/ft/rollback.h
index 20cda4de8e7..d9b400b9903 100644
--- a/ft/rollback.h
+++ b/ft/rollback.h
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "ft/cachetable.h"
-#include "ft/sub_block.h"
+#include "ft/serialize/sub_block.h"
 #include "ft/txn.h"
 
 #include "util/memarena.h"
diff --git a/ft/block_allocator.cc b/ft/serialize/block_allocator.cc
similarity index 99%
rename from ft/block_allocator.cc
rename to ft/serialize/block_allocator.cc
index f595950bc52..aa11ac8cb91 100644
--- a/ft/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -97,7 +97,7 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_stdint.h"
 #include "portability/toku_stdlib.h"
 
-#include "ft/block_allocator.h"
+#include "ft/serialize/block_allocator.h"
 
 // Here's a very simple implementation.
 // It's not very fast at allocating or freeing.
diff --git a/ft/block_allocator.h b/ft/serialize/block_allocator.h
similarity index 100%
rename from ft/block_allocator.h
rename to ft/serialize/block_allocator.h
diff --git a/ft/block_table.cc b/ft/serialize/block_table.cc
similarity index 99%
rename from ft/block_table.cc
rename to ft/serialize/block_table.cc
index 800ad3dc09e..630dc28200c 100644
--- a/ft/block_table.cc
+++ b/ft/serialize/block_table.cc
@@ -94,13 +94,13 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_assert.h"
 #include "portability/toku_pthread.h"
 
-#include "ft/block_allocator.h"
-#include "ft/block_table.h"
 #include "ft/ft-internal.h"        // ugly but pragmatic, need access to dirty bits while holding translation lock
 // TODO: reorganize this dependency
 #include "ft/ft-ops.h" // for toku_maybe_truncate_file
-#include "ft/rbuf.h"
-#include "ft/wbuf.h"
+#include "ft/serialize/block_table.h"
+#include "ft/serialize/rbuf.h"
+#include "ft/serialize/wbuf.h"
+#include "ft/serialize/block_allocator.h"
 
 #include "util/nb_mutex.h"
 
diff --git a/ft/block_table.h b/ft/serialize/block_table.h
similarity index 99%
rename from ft/block_table.h
rename to ft/serialize/block_table.h
index 52ea57ed0bd..cb0f50f51f3 100644
--- a/ft/block_table.h
+++ b/ft/serialize/block_table.h
@@ -190,7 +190,7 @@ enum {
 
 // For serialize / deserialize
 
-#include "ft/wbuf.h"
+#include "ft/serialize/wbuf.h"
 
 static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
     wbuf_ulonglong(w, b.b);
@@ -204,7 +204,7 @@ static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) {
     wbuf_ulonglong(wb, (uint64_t) off);
 }
 
-#include "ft/rbuf.h"
+#include "ft/serialize/rbuf.h"
 
 static inline DISKOFF rbuf_DISKOFF(struct rbuf *rb) {
     return rbuf_ulonglong(rb);
diff --git a/ft/compress.cc b/ft/serialize/compress.cc
similarity index 100%
rename from ft/compress.cc
rename to ft/serialize/compress.cc
diff --git a/ft/compress.h b/ft/serialize/compress.h
similarity index 100%
rename from ft/compress.h
rename to ft/serialize/compress.h
diff --git a/ft/ft-node-deserialize.cc b/ft/serialize/ft-node-deserialize.cc
similarity index 99%
rename from ft/ft-node-deserialize.cc
rename to ft/serialize/ft-node-deserialize.cc
index 9ea167c3cd9..eced7a5e1b9 100644
--- a/ft/ft-node-deserialize.cc
+++ b/ft/serialize/ft-node-deserialize.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft/node.h"
 #include "ft/ft-internal.h"
-#include "ft/ft_node-serialize.h"
+#include "ft/serialize/ft_node-serialize.h"
 
 /*
  * ft-node-deserialize.c -
diff --git a/ft/ft-serialize.cc b/ft/serialize/ft-serialize.cc
similarity index 99%
rename from ft/ft-serialize.cc
rename to ft/serialize/ft-serialize.cc
index 475dc023149..0badabdb42b 100644
--- a/ft/ft-serialize.cc
+++ b/ft/serialize/ft-serialize.cc
@@ -89,13 +89,13 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/block_allocator.h"
-#include "ft/block_table.h"
-#include "ft/compress.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
-#include "ft/ft-serialize.h"
 #include "ft/msg.h"
+#include "ft/serialize/block_allocator.h"
+#include "ft/serialize/block_table.h"
+#include "ft/serialize/compress.h"
+#include "ft/serialize/ft-serialize.h"
 
 // not version-sensitive because we only serialize a descriptor using the current layout_version
 uint32_t
diff --git a/ft/ft-serialize.h b/ft/serialize/ft-serialize.h
similarity index 99%
rename from ft/ft-serialize.h
rename to ft/serialize/ft-serialize.h
index 55b3af6ba20..856d32d549d 100644
--- a/ft/ft-serialize.h
+++ b/ft/serialize/ft-serialize.h
@@ -88,8 +88,8 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
-#include "ft/block_table.h"
 #include "ft/ft.h"
+#include "ft/serialize/block_table.h"
 
 size_t toku_serialize_ft_size(struct ft_header *h);
 void toku_serialize_ft_to(int fd, struct ft_header *h, struct block_table *blocktable, CACHEFILE cf);
diff --git a/ft/ft_layout_version.h b/ft/serialize/ft_layout_version.h
similarity index 100%
rename from ft/ft_layout_version.h
rename to ft/serialize/ft_layout_version.h
diff --git a/ft/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
similarity index 99%
rename from ft/ft_node-serialize.cc
rename to ft/serialize/ft_node-serialize.cc
index d9615b730b2..bd51a0e2013 100644
--- a/ft/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -89,16 +89,17 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/block_table.h"
+#include "portability/toku_atomic.h"
+
 #include "ft/cachetable.h"
-#include "ft/compress.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
-#include "ft/ft_node-serialize.h"
 #include "ft/node.h"
 #include "ft/log-internal.h"
 #include "ft/rollback.h"
-#include "portability/toku_atomic.h"
+#include "ft/serialize/block_table.h"
+#include "ft/serialize/compress.h"
+#include "ft/serialize/ft_node-serialize.h"
 #include "util/sort.h"
 #include "util/threadpool.h"
 #include "util/status.h"
diff --git a/ft/ft_node-serialize.h b/ft/serialize/ft_node-serialize.h
similarity index 98%
rename from ft/ft_node-serialize.h
rename to ft/serialize/ft_node-serialize.h
index 022769a3648..14b6e307415 100644
--- a/ft/ft_node-serialize.h
+++ b/ft/serialize/ft_node-serialize.h
@@ -88,12 +88,12 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
-#include "ft/block_table.h"
-#include "ft/node.h"
 #include "ft/ft.h"
-#include "ft/sub_block.h"
-#include "ft/rbuf.h"
-#include "ft/wbuf.h"
+#include "ft/node.h"
+#include "ft/serialize/sub_block.h"
+#include "ft/serialize/rbuf.h"
+#include "ft/serialize/wbuf.h"
+#include "ft/serialize/block_table.h"
 
 unsigned int toku_serialize_ftnode_size(FTNODE node);
 int toku_serialize_ftnode_to_memory(FTNODE node, FTNODE_DISK_DATA *ndd,
diff --git a/ft/quicklz.cc b/ft/serialize/quicklz.cc
similarity index 100%
rename from ft/quicklz.cc
rename to ft/serialize/quicklz.cc
diff --git a/ft/quicklz.h b/ft/serialize/quicklz.h
similarity index 100%
rename from ft/quicklz.h
rename to ft/serialize/quicklz.h
diff --git a/ft/rbuf.h b/ft/serialize/rbuf.h
similarity index 100%
rename from ft/rbuf.h
rename to ft/serialize/rbuf.h
diff --git a/ft/sub_block.cc b/ft/serialize/sub_block.cc
similarity index 99%
rename from ft/sub_block.cc
rename to ft/serialize/sub_block.cc
index ee1e289d802..8ea4fb03be4 100644
--- a/ft/sub_block.cc
+++ b/ft/serialize/sub_block.cc
@@ -98,9 +98,9 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_assert.h"
 #include "portability/toku_portability.h"
 
-#include "ft/compress.h"
-#include "ft/sub_block.h"
-#include "ft/quicklz.h"
+#include "ft/serialize/compress.h"
+#include "ft/serialize/sub_block.h"
+#include "ft/serialize/quicklz.h"
 #include "util/threadpool.h"
 #include "util/x1764.h"
 
diff --git a/ft/sub_block.h b/ft/serialize/sub_block.h
similarity index 99%
rename from ft/sub_block.h
rename to ft/serialize/sub_block.h
index 26f99747b2d..b165d5cd545 100644
--- a/ft/sub_block.h
+++ b/ft/serialize/sub_block.h
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/compress.h"
+#include "ft/serialize/compress.h"
 
 // TODO: Clean this abstraciton up
 static const int max_sub_blocks = 8;
diff --git a/ft/sub_block_map.h b/ft/serialize/sub_block_map.h
similarity index 100%
rename from ft/sub_block_map.h
rename to ft/serialize/sub_block_map.h
diff --git a/ft/wbuf.h b/ft/serialize/wbuf.h
similarity index 100%
rename from ft/wbuf.h
rename to ft/serialize/wbuf.h
diff --git a/ft/workset.h b/ft/serialize/workset.h
similarity index 100%
rename from ft/workset.h
rename to ft/serialize/workset.h
diff --git a/ft/tests/compress-test.cc b/ft/tests/compress-test.cc
index 55b70132029..98c3a774d43 100644
--- a/ft/tests/compress-test.cc
+++ b/ft/tests/compress-test.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 #ident "$Id$"
 
 #include "test.h"
-#include "compress.h"
+#include "serialize/compress.h"
 
 static void test_compress_buf_method (unsigned char *buf, int i, enum toku_compression_method m) {
     int bound = toku_compress_bound(m, i);
diff --git a/ft/tests/quicklz-test.cc b/ft/tests/quicklz-test.cc
index 44bec12fb08..a7970abb057 100644
--- a/ft/tests/quicklz-test.cc
+++ b/ft/tests/quicklz-test.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 #ident "$Id$"
 
 #include "test.h"
-#include "quicklz.h"
+#include "serialize/quicklz.h"
 
 static void test_qlz_random_i (int i) {
     if (verbose) printf("i=%d\n", i);
diff --git a/ft/tests/subblock-test-checksum.cc b/ft/tests/subblock-test-checksum.cc
index 1885ce0f55c..8d6156f04e7 100644
--- a/ft/tests/subblock-test-checksum.cc
+++ b/ft/tests/subblock-test-checksum.cc
@@ -91,8 +91,8 @@ PATENT RIGHTS GRANT:
 
 #include "test.h"
 
-#include "compress.h"
-#include "sub_block.h"
+#include "serialize/compress.h"
+#include "serialize/sub_block.h"
 
 #include <toku_portability.h>
 #include <util/threadpool.h>
diff --git a/ft/tests/subblock-test-compression.cc b/ft/tests/subblock-test-compression.cc
index ccd7a4e521c..2874e50a146 100644
--- a/ft/tests/subblock-test-compression.cc
+++ b/ft/tests/subblock-test-compression.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <errno.h>
 #include <string.h>
 
-#include "sub_block.h"
+#include "serialize/sub_block.h"
 
 static void
 test_sub_block_compression(void *buf, int total_size, int my_max_sub_blocks, int n_cores, enum toku_compression_method method) {
diff --git a/ft/tests/subblock-test-index.cc b/ft/tests/subblock-test-index.cc
index 2821429c3eb..e805bf1ead3 100644
--- a/ft/tests/subblock-test-index.cc
+++ b/ft/tests/subblock-test-index.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <errno.h>
 #include <string.h>
 
-#include "sub_block.h"
+#include "serialize/sub_block.h"
 
 static void
 test_sub_block_index(void) {
diff --git a/ft/tests/subblock-test-size.cc b/ft/tests/subblock-test-size.cc
index 5a226a4b443..8b1119b30c8 100644
--- a/ft/tests/subblock-test-size.cc
+++ b/ft/tests/subblock-test-size.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <errno.h>
 #include <string.h>
 
-#include "sub_block.h"
+#include "serialize/sub_block.h"
 
 static void
 test_sub_block_size(int total_size) {
diff --git a/ft/tests/test.h b/ft/tests/test.h
index 4f7ba0b5c21..f97edba0d8f 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -101,15 +101,15 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 #include <portability/toku_path.h>
 
-#include "ft/block_allocator.h"
-#include "ft/block_table.h"
+#include "ft/serialize/block_allocator.h"
+#include "ft/serialize/block_table.h"
 #include "ft/cachetable.h"
 #include "ft/cachetable-internal.h"
 #include "ft/cursor.h"
 #include "ft/ft.h"
 #include "ft/ft-ops.h"
-#include "ft/ft-serialize.h"
-#include "ft/ft_node-serialize.h"
+#include "ft/serialize/ft-serialize.h"
+#include "ft/serialize/ft_node-serialize.h"
 #include "ft/log-internal.h"
 #include "ft/logger.h"
 #include "ft/node.h"
diff --git a/ft/tests/test_block_allocator_merge.cc b/ft/tests/test_block_allocator_merge.cc
index 796a09f398b..e0cd6ca1e15 100644
--- a/ft/tests/test_block_allocator_merge.cc
+++ b/ft/tests/test_block_allocator_merge.cc
@@ -87,7 +87,7 @@ PATENT RIGHTS GRANT:
 */
 
 #ident "Copyright (c) 2009-2013 Tokutek Inc.  All rights reserved."
-#include "../block_allocator.h"
+#include "ft/serialize/block_allocator.h"
 #include <memory.h>
 #include <assert.h>
 // Test the merger.
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index 281cde1c9d5..e591bcec84f 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -98,12 +98,12 @@ PATENT RIGHTS GRANT:
 #include <inttypes.h>
 #include <limits.h>
 
-#include "ft/block_table.h"
+#include "ft/serialize/block_table.h"
 #include "ft/cachetable.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
-#include "ft/ft-serialize.h"
-#include "ft/ft_node-serialize.h"
+#include "ft/serialize/ft-serialize.h"
+#include "ft/serialize/ft_node-serialize.h"
 #include "ft/node.h"
 
 static int do_dump_data = 1;
diff --git a/ft/txn.h b/ft/txn.h
index 27cc622c5d5..a9380b0cb06 100644
--- a/ft/txn.h
+++ b/ft/txn.h
@@ -94,8 +94,8 @@ PATENT RIGHTS GRANT:
 
 #include "portability/toku_stdint.h"
 
-#include "ft/block_table.h"
 #include "ft/txn_state.h"
+#include "ft/serialize/block_table.h"
 #include "util/omt.h"
 
 typedef uint64_t TXNID;
@@ -385,7 +385,7 @@ void txn_status_destroy(void);
 
 // For serialize / deserialize
 
-#include "ft/wbuf.h"
+#include "ft/serialize/wbuf.h"
 
 static inline void wbuf_TXNID(struct wbuf *wb, TXNID txnid) {
     wbuf_ulonglong(wb, txnid);
@@ -408,7 +408,7 @@ static inline void wbuf_LSN(struct wbuf *wb, LSN lsn) {
     wbuf_ulonglong(wb, lsn.lsn);
 }
 
-#include "ft/rbuf.h"
+#include "ft/serialize/rbuf.h"
 
 static inline void rbuf_TXNID(struct rbuf *rb, TXNID *txnid) {
     *txnid = rbuf_ulonglong(rb);
diff --git a/ft/xids.h b/ft/xids.h
index bd0d3c0254d..4e37cb9a73c 100644
--- a/ft/xids.h
+++ b/ft/xids.h
@@ -104,8 +104,8 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "ft/txn.h"
-#include "ft/rbuf.h"
-#include "ft/wbuf.h"
+#include "ft/serialize/rbuf.h"
+#include "ft/serialize/wbuf.h"
 
 /* The number of transaction ids stored in the xids structure is 
  * represented by an 8-bit value.  The value 255 is reserved. 
diff --git a/util/dmt.h b/util/dmt.h
index 5bde11ab378..f927b966002 100644
--- a/util/dmt.h
+++ b/util/dmt.h
@@ -90,14 +90,17 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <stdint.h>
-#include <memory.h>
-#include <toku_portability.h>
-#include <toku_race_tools.h>
-#include "growable_array.h"
-#include "../ft/wbuf.h"
 #include <vector>
 
+#include "portability/memory.h"
+#include "portability/toku_portability.h"
+#include "portability/toku_race_tools.h"
+#include "portability/toku_stdint.h"
+
+#include "ft/serialize/wbuf.h"
+#include "util/growable_array.h"
+#include "util/mempool.h"
+
 namespace toku {
 typedef uint32_t node_offset;
 

From c821fd54e99806bbe0075754803033abf674a4d2 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:30:17 -0400
Subject: [PATCH 071/190] FT-294 Move the logging code to its own
 sudbdirectory, logger/

---
 ft/CMakeLists.txt                     | 12 ++++++------
 ft/cachetable.cc                      | 26 +++++++++++++-------------
 ft/cachetable.h                       |  2 +-
 ft/checkpoint.cc                      |  4 ++--
 ft/ft-ops.cc                          |  2 +-
 ft/ft.cc                              |  2 +-
 ft/ft.h                               |  2 +-
 ft/loader/loader.cc                   |  2 +-
 ft/{ => logger}/log-internal.h        | 20 ++++++++++----------
 ft/{ => logger}/log.h                 |  2 +-
 ft/{ => logger}/log_upgrade.cc        |  2 +-
 ft/{ => logger}/logcursor.cc          |  2 +-
 ft/{ => logger}/logcursor.h           |  0
 ft/{ => logger}/logfilemgr.cc         |  6 +++---
 ft/{ => logger}/logfilemgr.h          |  0
 ft/{ => logger}/logformat.cc          |  2 +-
 ft/{ => logger}/logger.cc             |  2 +-
 ft/{ => logger}/logger.h              |  0
 ft/recover.cc                         | 17 ++++++++---------
 ft/roll.cc                            | 12 ++++++------
 ft/rollback-apply.cc                  |  2 +-
 ft/rollback.cc                        |  2 +-
 ft/serialize/ft_node-serialize.cc     |  2 +-
 ft/tdb_logprint.cc                    |  2 +-
 ft/tests/log-test-maybe-trim.cc       |  2 +-
 ft/tests/logcursor-bad-checksum.cc    |  2 +-
 ft/tests/logcursor-empty-logdir.cc    |  2 +-
 ft/tests/logcursor-empty-logfile-2.cc |  2 +-
 ft/tests/logcursor-empty-logfile-3.cc |  2 +-
 ft/tests/logcursor-empty-logfile.cc   |  2 +-
 ft/tests/logcursor-print.cc           |  2 +-
 ft/tests/logcursor-timestamp.cc       |  2 +-
 ft/tests/logfilemgr-create-destroy.cc |  5 +++--
 ft/tests/logfilemgr-print.cc          |  4 ++--
 ft/tests/test.h                       |  4 ++--
 ft/tests/test_logcursor.cc            |  2 +-
 ft/txn.cc                             | 16 ++++++++--------
 ft/txn_child_manager.cc               |  4 ++--
 ft/txn_manager.cc                     | 17 ++++++++---------
 ft/ule.cc                             |  2 +-
 src/indexer.cc                        |  2 +-
 src/tests/test1572.cc                 |  2 +-
 src/ydb-internal.h                    |  2 +-
 src/ydb.cc                            |  2 +-
 44 files changed, 101 insertions(+), 102 deletions(-)
 rename ft/{ => logger}/log-internal.h (97%)
 rename ft/{ => logger}/log.h (99%)
 rename ft/{ => logger}/log_upgrade.cc (99%)
 rename ft/{ => logger}/logcursor.cc (99%)
 rename ft/{ => logger}/logcursor.h (100%)
 rename ft/{ => logger}/logfilemgr.cc (98%)
 rename ft/{ => logger}/logfilemgr.h (100%)
 rename ft/{ => logger}/logformat.cc (99%)
 rename ft/{ => logger}/logger.cc (99%)
 rename ft/{ => logger}/logger.h (100%)

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 47d49ba1ddb..da08393f702 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -7,7 +7,7 @@ set_source_files_properties(
   "${CMAKE_CURRENT_BINARY_DIR}/log_header.h"
   PROPERTIES GENERATED TRUE)
 
-add_executable(logformat logformat.cc)
+add_executable(logformat logger/logformat.cc)
 target_link_libraries(logformat ${LIBTOKUPORTABILITY}_static)
 
 add_custom_command(
@@ -15,7 +15,7 @@ add_custom_command(
   OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_print.cc"
   OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_header.h"
   COMMAND $<TARGET_FILE:logformat> .
-  DEPENDS logformat
+  DEPENDS logger/logformat
   )
 add_custom_target(
   generate_log_code
@@ -41,10 +41,10 @@ set(FT_SOURCES
   loader/pqueue
   leafentry
   le-cursor
-  logcursor
-  logfilemgr
-  logger
-  log_upgrade
+  logger/logcursor
+  logger/logfilemgr
+  logger/logger
+  logger/log_upgrade
   msg
   msg_buffer
   node
diff --git a/ft/cachetable.cc b/ft/cachetable.cc
index 7eaf1c95679..605302db265 100644
--- a/ft/cachetable.cc
+++ b/ft/cachetable.cc
@@ -89,24 +89,24 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <toku_portability.h>
-#include <stdlib.h>
 #include <string.h>
 #include <time.h>
-#include <stdarg.h>
-#include "cachetable.h"
-#include <ft/log_header.h>
-#include "checkpoint.h"
-#include "log-internal.h"
-#include "cachetable-internal.h"
-#include <memory.h>
-#include <toku_race_tools.h>
+
+#include <portability/memory.h>
+#include <portability/toku_race_tools.h>
 #include <portability/toku_atomic.h>
 #include <portability/toku_pthread.h>
+#include <portability/toku_portability.h>
+#include <portability/toku_stdlib.h>
 #include <portability/toku_time.h>
-#include <util/rwlock.h>
-#include <util/status.h>
-#include <util/context.h>
+
+#include "ft/cachetable.h"
+#include "ft/cachetable-internal.h"
+#include "ft/checkpoint.h"
+#include "ft/logger/log-internal.h"
+#include "util/rwlock.h"
+#include "util/status.h"
+#include "util/context.h"
 
 ///////////////////////////////////////////////////////////////////////////////////
 // Engine status
diff --git a/ft/cachetable.h b/ft/cachetable.h
index e3b5df82bc1..722facd818c 100644
--- a/ft/cachetable.h
+++ b/ft/cachetable.h
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <fcntl.h>
 
 #include "ft/serialize/block_table.h"
-#include "ft/logger.h"
+#include "ft/logger/logger.h"
 #include "ft/txn.h"
 #include "util/minicron.h"
 
diff --git a/ft/checkpoint.cc b/ft/checkpoint.cc
index 88e984367fb..08ccfabfcdb 100644
--- a/ft/checkpoint.cc
+++ b/ft/checkpoint.cc
@@ -133,8 +133,8 @@ PATENT RIGHTS GRANT:
 
 #include "ft/cachetable.h"
 #include "ft/ft.h"
-#include "ft/log-internal.h"
-#include "ft/logger.h"
+#include "ft/logger/log-internal.h"
+#include "ft/logger/logger.h"
 #include "ft/checkpoint.h"
 #include "util/frwlock.h"
 #include "util/status.h"
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 95a2cfc51f0..12c99fc74fb 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -208,7 +208,7 @@ basement nodes, bulk fetch,  and partial fetch:
 #include "ft/ft-internal.h"
 #include "ft/msg.h"
 #include "ft/leafentry.h"
-#include "ft/log-internal.h"
+#include "ft/logger/log-internal.h"
 #include "ft/node.h"
 #include "ft/serialize/block_table.h"
 #include "ft/serialize/sub_block.h"
diff --git a/ft/ft.cc b/ft/ft.cc
index bec8ddd4450..5c43a5efd50 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 #include "ft/ft.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-internal.h"
-#include "ft/log-internal.h"
+#include "ft/logger/log-internal.h"
 #include "ft/log_header.h"
 #include "ft/node.h"
 #include "ft/serialize/ft-serialize.h"
diff --git a/ft/ft.h b/ft/ft.h
index 21dd7da4407..c90eb1b4747 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft/cachetable.h"
 #include "ft/ft-ops.h"
-#include "ft/log.h"
+#include "ft/logger/log.h"
 #include "ft/ybt.h"
 
 typedef struct ft *FT;
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index 474adcd1bbf..db3d3ae225c 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -106,7 +106,7 @@ PATENT RIGHTS GRANT:
 #include "ft/loader/loader-internal.h"
 #include "ft/loader/pqueue.h"
 #include "ft/loader/dbufio.h"
-#include "ft/log-internal.h"
+#include "ft/logger/log-internal.h"
 #include "ft/node.h"
 #include "ft/serialize/block_table.h"
 #include "ft/serialize/ft-serialize.h"
diff --git a/ft/log-internal.h b/ft/logger/log-internal.h
similarity index 97%
rename from ft/log-internal.h
rename to ft/logger/log-internal.h
index 52a5e4bf2a6..f671eed6bc0 100644
--- a/ft/log-internal.h
+++ b/ft/logger/log-internal.h
@@ -96,18 +96,18 @@ PATENT RIGHTS GRANT:
 #include <sys/types.h>
 #include <string.h>
 #include <dirent.h>
-#include "ft-internal.h"
-#include "log.h"
-#include "toku_list.h"
-#include "logfilemgr.h"
-#include "txn.h"
-#include "txn_manager.h"
-#include "rollback_log_node_cache.h"
 
-#include <portability/toku_pthread.h>
+#include "portability/toku_list.h"
+#include "portability/toku_pthread.h"
+#include "ft/ft-internal.h"
+#include "ft/logger/log.h"
+#include "ft/logger/logfilemgr.h"
+#include "ft/txn.h"
+#include "ft/txn_manager.h"
+#include "ft/rollback_log_node_cache.h"
 
-#include <util/memarena.h>
-#include <util/omt.h>
+#include "util/memarena.h"
+#include "util/omt.h"
 
 using namespace toku;
 // Locking for the logger
diff --git a/ft/log.h b/ft/logger/log.h
similarity index 99%
rename from ft/log.h
rename to ft/logger/log.h
index 5b958b2fda7..eac89501141 100644
--- a/ft/log.h
+++ b/ft/logger/log.h
@@ -98,7 +98,7 @@ PATENT RIGHTS GRANT:
 #include "portability/memory.h"
 #include "portability/toku_portability.h"
 
-#include "ft/logger.h"
+#include "ft/logger/logger.h"
 #include "ft/rollback.h"
 #include "ft/recover.h"
 #include "ft/txn.h"
diff --git a/ft/log_upgrade.cc b/ft/logger/log_upgrade.cc
similarity index 99%
rename from ft/log_upgrade.cc
rename to ft/logger/log_upgrade.cc
index 8dba57e9d8d..ca6f70e901c 100644
--- a/ft/log_upgrade.cc
+++ b/ft/logger/log_upgrade.cc
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #include <ft/log_header.h>
 
 #include "log-internal.h"
-#include "logcursor.h"
+#include "logger/logcursor.h"
 #include "checkpoint.h"
 
 static uint64_t footprint = 0;  // for debug and accountability
diff --git a/ft/logcursor.cc b/ft/logger/logcursor.cc
similarity index 99%
rename from ft/logcursor.cc
rename to ft/logger/logcursor.cc
index 384582e000a..071ebf9b3b6 100644
--- a/ft/logcursor.cc
+++ b/ft/logger/logcursor.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "log-internal.h"
-#include "logcursor.h"
+#include "logger/logcursor.h"
 #include <limits.h>
 #include <unistd.h>
 
diff --git a/ft/logcursor.h b/ft/logger/logcursor.h
similarity index 100%
rename from ft/logcursor.h
rename to ft/logger/logcursor.h
diff --git a/ft/logfilemgr.cc b/ft/logger/logfilemgr.cc
similarity index 98%
rename from ft/logfilemgr.cc
rename to ft/logger/logfilemgr.cc
index 917760abc6c..17c4bc922ad 100644
--- a/ft/logfilemgr.cc
+++ b/ft/logger/logfilemgr.cc
@@ -89,9 +89,9 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "log-internal.h"
-#include "logcursor.h"
-#include "logfilemgr.h"
+#include "logger/log-internal.h"
+#include "logger/logcursor.h"
+#include "logger/logfilemgr.h"
 
 // for now, implement with singlely-linked-list
 //   first = oldest  (delete from beginning)
diff --git a/ft/logfilemgr.h b/ft/logger/logfilemgr.h
similarity index 100%
rename from ft/logfilemgr.h
rename to ft/logger/logfilemgr.h
diff --git a/ft/logformat.cc b/ft/logger/logformat.cc
similarity index 99%
rename from ft/logformat.cc
rename to ft/logger/logformat.cc
index fba77d95df2..39ad7d0b798 100644
--- a/ft/logformat.cc
+++ b/ft/logger/logformat.cc
@@ -854,7 +854,7 @@ int main (int argc, const char *const argv[]) {
     fprintf2(cf, hf, "#ident \"Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved.\"\n");
     fprintf2(cf, pf, "#include <stdint.h>\n");
     fprintf2(cf, pf, "#include <sys/time.h>\n");
-    fprintf2(cf, pf, "#include <ft/log-internal.h>\n");
+    fprintf2(cf, pf, "#include <ft/logger/log-internal.h>\n");
     fprintf(hf, "#include <ft/ft-internal.h>\n");
     fprintf(hf, "#include <util/bytestring.h>\n");
     fprintf(hf, "#include <util/memarena.h>\n");
diff --git a/ft/logger.cc b/ft/logger/logger.cc
similarity index 99%
rename from ft/logger.cc
rename to ft/logger/logger.cc
index 28ca3ae82bb..c534065cbc2 100644
--- a/ft/logger.cc
+++ b/ft/logger/logger.cc
@@ -96,7 +96,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft/serialize/block_table.h"
 #include "ft/ft.h"
-#include "ft/log-internal.h"
+#include "ft/logger/log-internal.h"
 #include "ft/txn_manager.h"
 #include "ft/rollback_log_node_cache.h"
 
diff --git a/ft/logger.h b/ft/logger/logger.h
similarity index 100%
rename from ft/logger.h
rename to ft/logger/logger.h
diff --git a/ft/recover.cc b/ft/recover.cc
index 2aac09855cd..9cf231323c9 100644
--- a/ft/recover.cc
+++ b/ft/recover.cc
@@ -89,16 +89,15 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <ft/log_header.h>
 
-#include "ft.h"
-#include "log-internal.h"
-#include "logcursor.h"
-#include "cachetable.h"
-#include "checkpoint.h"
-#include "txn_manager.h"
-
-#include <util/omt.h>
+#include "ft/cachetable.h"
+#include "ft/checkpoint.h"
+#include "ft/ft.h"
+#include "ft/log_header.h"
+#include "ft/logger/log-internal.h"
+#include "ft/logger/logcursor.h"
+#include "ft/txn_manager.h"
+#include "util/omt.h"
 
 int tokudb_recovery_trace = 0;                    // turn on recovery tracing, default off.
 
diff --git a/ft/roll.cc b/ft/roll.cc
index 9ad3dfcf5bf..fd49dfd478a 100644
--- a/ft/roll.cc
+++ b/ft/roll.cc
@@ -91,13 +91,13 @@ PATENT RIGHTS GRANT:
 
 /* rollback and rollforward routines. */
 
-#include <ft/log_header.h>
 
-#include "ft.h"
-#include "ft-ops.h"
-#include "log-internal.h"
-#include "xids.h"
-#include "rollback-apply.h"
+#include "ft/ft.h"
+#include "ft/ft-ops.h"
+#include "ft/log_header.h"
+#include "ft/logger/log-internal.h"
+#include "ft/xids.h"
+#include "ft/rollback-apply.h"
 
 // functionality provided by roll.c is exposed by an autogenerated
 // header file, logheader.h
diff --git a/ft/rollback-apply.cc b/ft/rollback-apply.cc
index 94e5abf56d8..90157d85f5a 100644
--- a/ft/rollback-apply.cc
+++ b/ft/rollback-apply.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/log-internal.h"
+#include "ft/logger/log-internal.h"
 #include "ft/rollback-apply.h"
 
 static void
diff --git a/ft/rollback.cc b/ft/rollback.cc
index c9dc1cbe564..9bdfbe6c9bf 100644
--- a/ft/rollback.cc
+++ b/ft/rollback.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft/serialize/block_table.h"
 #include "ft/ft.h"
-#include "ft/log-internal.h"
+#include "ft/logger/log-internal.h"
 #include "ft/rollback-ct-callbacks.h"
 
 static void rollback_unpin_remove_callback(CACHEKEY* cachekey, bool for_checkpoint, void* extra) {
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index bd51a0e2013..b0eaa0af6fc 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
 #include "ft/node.h"
-#include "ft/log-internal.h"
+#include "ft/logger/log-internal.h"
 #include "ft/rollback.h"
 #include "ft/serialize/block_table.h"
 #include "ft/serialize/compress.h"
diff --git a/ft/tdb_logprint.cc b/ft/tdb_logprint.cc
index 87952b45dae..8d0bea0f016 100644
--- a/ft/tdb_logprint.cc
+++ b/ft/tdb_logprint.cc
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 /* Dump the log from stdin to stdout. */
 
 #include "ft/log_header.h"
-#include "ft/logger.h"
+#include "ft/logger/logger.h"
 
 static void newmain (int count) {
     int i;
diff --git a/ft/tests/log-test-maybe-trim.cc b/ft/tests/log-test-maybe-trim.cc
index 6f2398eead4..d724b075408 100644
--- a/ft/tests/log-test-maybe-trim.cc
+++ b/ft/tests/log-test-maybe-trim.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 // verify that the log file trimmer does not delete the log file containing the
 // begin checkpoint when the checkpoint log entries span multiple log files.
 
-#include "logcursor.h"
+#include "logger/logcursor.h"
 #include "test.h"
 
 int
diff --git a/ft/tests/logcursor-bad-checksum.cc b/ft/tests/logcursor-bad-checksum.cc
index 74a9ec27bf5..22ec4a91a0a 100644
--- a/ft/tests/logcursor-bad-checksum.cc
+++ b/ft/tests/logcursor-bad-checksum.cc
@@ -88,7 +88,7 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007, 2008 Tokutek Inc.  All rights reserved."
 
-#include "logcursor.h"
+#include "logger/logcursor.h"
 #include "test.h"
 
 // log a couple of timestamp entries and verify the log by walking 
diff --git a/ft/tests/logcursor-empty-logdir.cc b/ft/tests/logcursor-empty-logdir.cc
index a4822f14811..6982b310acb 100644
--- a/ft/tests/logcursor-empty-logdir.cc
+++ b/ft/tests/logcursor-empty-logdir.cc
@@ -88,7 +88,7 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007, 2008 Tokutek Inc.  All rights reserved."
 
-#include "logcursor.h"
+#include "logger/logcursor.h"
 #include "test.h"
 
 // a logcursor in an empty directory should not find any log entries
diff --git a/ft/tests/logcursor-empty-logfile-2.cc b/ft/tests/logcursor-empty-logfile-2.cc
index 5bf7269cfc9..1fa630a6422 100644
--- a/ft/tests/logcursor-empty-logfile-2.cc
+++ b/ft/tests/logcursor-empty-logfile-2.cc
@@ -88,7 +88,7 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007, 2008 Tokutek Inc.  All rights reserved."
 
-#include "logcursor.h"
+#include "logger/logcursor.h"
 #include "test.h"
 
 const int N = 2;
diff --git a/ft/tests/logcursor-empty-logfile-3.cc b/ft/tests/logcursor-empty-logfile-3.cc
index 85cce4e7bcd..6e687f9bd30 100644
--- a/ft/tests/logcursor-empty-logfile-3.cc
+++ b/ft/tests/logcursor-empty-logfile-3.cc
@@ -88,7 +88,7 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007, 2008 Tokutek Inc.  All rights reserved."
 
-#include "logcursor.h"
+#include "logger/logcursor.h"
 #include "test.h"
 
 const int N = 2;
diff --git a/ft/tests/logcursor-empty-logfile.cc b/ft/tests/logcursor-empty-logfile.cc
index 7b6de69b061..7fc0be3d734 100644
--- a/ft/tests/logcursor-empty-logfile.cc
+++ b/ft/tests/logcursor-empty-logfile.cc
@@ -88,7 +88,7 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007, 2008 Tokutek Inc.  All rights reserved."
 
-#include "logcursor.h"
+#include "logger/logcursor.h"
 #include "test.h"
 
 const int N = 2;
diff --git a/ft/tests/logcursor-print.cc b/ft/tests/logcursor-print.cc
index 957a7d18494..cf508fa0a2b 100644
--- a/ft/tests/logcursor-print.cc
+++ b/ft/tests/logcursor-print.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 #include "test.h"
-#include "logcursor.h"
+#include "logger/logcursor.h"
 
 int test_main(int argc, const char *argv[]) {
     int r;
diff --git a/ft/tests/logcursor-timestamp.cc b/ft/tests/logcursor-timestamp.cc
index b79bd199e8f..94768f897a1 100644
--- a/ft/tests/logcursor-timestamp.cc
+++ b/ft/tests/logcursor-timestamp.cc
@@ -88,7 +88,7 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007, 2008 Tokutek Inc.  All rights reserved."
 
-#include "logcursor.h"
+#include "logger/logcursor.h"
 #include "test.h"
 
 static uint64_t now(void) {
diff --git a/ft/tests/logfilemgr-create-destroy.cc b/ft/tests/logfilemgr-create-destroy.cc
index 2ec8071cfed..4f447cd7360 100644
--- a/ft/tests/logfilemgr-create-destroy.cc
+++ b/ft/tests/logfilemgr-create-destroy.cc
@@ -88,8 +88,9 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-#include "test.h"
-#include "logfilemgr.h"
+
+#include "ft/tests/test.h"
+#include "ft/logger/logfilemgr.h"
 
 int test_main(int argc __attribute__((unused)), const char *argv[] __attribute__((unused))) {
     int r;
diff --git a/ft/tests/logfilemgr-print.cc b/ft/tests/logfilemgr-print.cc
index 883d7bf0131..a361a270768 100644
--- a/ft/tests/logfilemgr-print.cc
+++ b/ft/tests/logfilemgr-print.cc
@@ -88,8 +88,8 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-#include "test.h"
-#include "logfilemgr.h"
+#include "ft/tests/test.h"
+#include "ft/logger/logfilemgr.h"
 
 int test_main(int argc __attribute__((unused)), const char *argv[] __attribute__((unused))) {
     int r;
diff --git a/ft/tests/test.h b/ft/tests/test.h
index f97edba0d8f..cba8d96e24d 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -110,8 +110,8 @@ PATENT RIGHTS GRANT:
 #include "ft/ft-ops.h"
 #include "ft/serialize/ft-serialize.h"
 #include "ft/serialize/ft_node-serialize.h"
-#include "ft/log-internal.h"
-#include "ft/logger.h"
+#include "ft/logger/log-internal.h"
+#include "ft/logger/logger.h"
 #include "ft/node.h"
 #include "util/bytestring.h"
 
diff --git a/ft/tests/test_logcursor.cc b/ft/tests/test_logcursor.cc
index 4e08cd66d4e..c7b68f2400c 100644
--- a/ft/tests/test_logcursor.cc
+++ b/ft/tests/test_logcursor.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 #include <toku_portability.h>
 #include <string.h>
 
-#include "logcursor.h"
+#include "logger/logcursor.h"
 #include "test.h"
 
 #if defined(HAVE_LIMITS_H)
diff --git a/ft/txn.cc b/ft/txn.cc
index 7a7aa8ad236..90b8a1a3e61 100644
--- a/ft/txn.cc
+++ b/ft/txn.cc
@@ -90,14 +90,14 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 
-#include "ft.h"
-#include "txn.h"
-#include "log-internal.h"
-#include "checkpoint.h"
-#include "ule.h"
-#include "rollback-apply.h"
-#include "txn_manager.h"
-#include <util/status.h>
+#include "ft/checkpoint.h"
+#include "ft/ft.h"
+#include "ft/logger/log-internal.h"
+#include "ft/ule.h"
+#include "ft/rollback-apply.h"
+#include "ft/txn.h"
+#include "ft/txn_manager.h"
+#include "util/status.h"
 
 ///////////////////////////////////////////////////////////////////////////////////
 // Engine status
diff --git a/ft/txn_child_manager.cc b/ft/txn_child_manager.cc
index bb74a1cb8ae..fb71c346e01 100644
--- a/ft/txn_child_manager.cc
+++ b/ft/txn_child_manager.cc
@@ -89,8 +89,8 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "log-internal.h"
-#include "txn_child_manager.h"
+#include "ft/logger/log-internal.h"
+#include "ft/txn_child_manager.h"
 
 //
 // initialized a txn_child_manager,
diff --git a/ft/txn_manager.cc b/ft/txn_manager.cc
index 877d6ebacfd..8da99aa20f9 100644
--- a/ft/txn_manager.cc
+++ b/ft/txn_manager.cc
@@ -89,16 +89,15 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <toku_race_tools.h>
+#include "portability/toku_race_tools.h"
 
-#include <util/omt.h>
-
-#include "log-internal.h"
-#include "txn.h"
-#include "checkpoint.h"
-#include "ule.h"
-#include "txn_manager.h"
-#include "rollback.h"
+#include "ft/checkpoint.h"
+#include "ft/logger/log-internal.h"
+#include "ft/ule.h"
+#include "ft/txn.h"
+#include "ft/txn_manager.h"
+#include "ft/rollback.h"
+#include "util/omt.h"
 
 bool garbage_collection_debug = false;
 
diff --git a/ft/ule.cc b/ft/ule.cc
index bb1e4915c3f..4c5ca3785e8 100644
--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -106,7 +106,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft/ft-internal.h"
 #include "ft/leafentry.h"
-#include "ft/logger.h"
+#include "ft/logger/logger.h"
 #include "ft/msg.h"
 #include "ft/txn.h"
 #include "ft/txn_manager.h"
diff --git a/src/indexer.cc b/src/indexer.cc
index 093f3751733..6d81b70daf2 100644
--- a/src/indexer.cc
+++ b/src/indexer.cc
@@ -103,7 +103,7 @@ PATENT RIGHTS GRANT:
 #include <ft/leafentry.h>
 #include <ft/ule.h>
 #include <ft/xids.h>
-#include <ft/log-internal.h>
+#include <ft/logger/log-internal.h>
 #include <ft/checkpoint.h>
 #include <portability/toku_atomic.h>
 #include "loader.h"
diff --git a/src/tests/test1572.cc b/src/tests/test1572.cc
index 73d93d58761..b6dd7f0dcb6 100644
--- a/src/tests/test1572.cc
+++ b/src/tests/test1572.cc
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 /* Is it feasible to run 4 billion transactions in one test in the regression tests? */
 #include <db.h>
 #include <sys/stat.h>
-#include <ft/log.h>
+#include <ft/logger/log.h>
 #include <src/ydb_txn.h>
 
 static void
diff --git a/src/ydb-internal.h b/src/ydb-internal.h
index a43347eb7a6..08fb4a26fee 100644
--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -97,7 +97,7 @@ PATENT RIGHTS GRANT:
 #include <ft/cachetable.h>
 #include <ft/cursor.h>
 #include <ft/comparator.h>
-#include <ft/logger.h>
+#include <ft/logger/logger.h>
 #include <ft/txn.h>
 
 #include <util/growable_array.h>
diff --git a/src/ydb.cc b/src/ydb.cc
index c75cb306b61..9b527538aad 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -104,7 +104,7 @@ const char *toku_copyright_string = "Copyright (c) 2007-2013 Tokutek Inc.  All r
 
 #include "ft/ft-flusher.h"
 #include "ft/cachetable.h"
-#include "ft/log.h"
+#include "ft/logger/log.h"
 #include "ft/checkpoint.h"
 #include "ft/loader/loader.h"
 #include "ft/log_header.h"

From 157e18040c580263f280f04e93a4c5dcbd2ecdd7 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:36:13 -0400
Subject: [PATCH 072/190] FT-294 Move cachetable files to ft/cachetable, txn
 files to ft/txn FT-295 Move ybt.h to utils/dbt.h

---
 ft/CMakeLists.txt                             |  27 +-
 ft/{ => cachetable}/background_job_manager.cc |   2 +-
 ft/{ => cachetable}/background_job_manager.h  |   0
 ft/{ => cachetable}/cachetable-internal.h     |   2 +-
 ft/{ => cachetable}/cachetable.cc             |  13 +-
 ft/{ => cachetable}/cachetable.h              |   4 +-
 ft/{ => cachetable}/checkpoint.cc             |   6 +-
 ft/{ => cachetable}/checkpoint.h              |   6 +-
 ft/comparator.h                               |   6 +-
 ft/cursor.cc                                  |   4 +-
 ft/ft-cachetable-wrappers.h                   |   2 +-
 ft/ft-internal.h                              |   4 +-
 ft/ft-ops.cc                                  |   6 +-
 ft/ft-ops.h                                   |   4 +-
 ft/ft.h                                       |   4 +-
 ft/leafentry.h                                |   2 +-
 ft/loader/callbacks.cc                        |   2 +-
 ft/loader/loader.h                            |   4 +-
 ft/logger/log-internal.h                      |   6 +-
 ft/logger/log.h                               |   7 +-
 ft/logger/log_upgrade.cc                      |   2 +-
 ft/logger/logger.cc                           |   4 +-
 ft/logger/logger.h                            |   2 +-
 ft/{ => logger}/recover.cc                    |   6 +-
 ft/{ => logger}/recover.h                     |  25 +-
 ft/msg.cc                                     |   4 +-
 ft/msg.h                                      |   2 +-
 ft/msg_buffer.cc                              |   2 +-
 ft/msg_buffer.h                               |   4 +-
 ft/serialize/ft_layout_version.h              |   2 +-
 ft/serialize/ft_node-serialize.cc             |   4 +-
 ft/tdb-recover.cc                             |  15 +-
 ft/tests/cachetable-checkpoint-pending.cc     |   2 +-
 ft/tests/cachetable-checkpoint-test.cc        |   2 +-
 ft/tests/cachetable-checkpointer-class.cc     |   2 +-
 ft/tests/cachetable-evictor-class.cc          |   2 +-
 .../cachetable-prefetch-checkpoint-test.cc    |   2 +-
 .../cachetable-prefetch-flowcontrol-test.cc   |   2 +-
 ft/tests/cachetable-test.h                    |   2 +-
 ft/tests/ftloader-test-vm.cc                  |   2 +-
 ft/tests/is_empty.cc                          |   2 +-
 ft/tests/le-cursor-provdel.cc                 |   2 +-
 ft/tests/le-cursor-right.cc                   |   2 +-
 ft/tests/le-cursor-walk.cc                    |   2 +-
 ft/tests/recovery-test5123.cc                 |   2 +-
 ft/tests/test-bjm.cc                          |   2 +-
 ft/tests/test-checkpoint-during-flush.cc      |   2 +-
 ft/tests/test-checkpoint-during-merge.cc      |   2 +-
 ft/tests/test-checkpoint-during-rebalance.cc  |   2 +-
 ft/tests/test-checkpoint-during-split.cc      |   2 +-
 ft/tests/test-dirty-flushes-on-cleaner.cc     |   2 +-
 ft/tests/test-flushes-on-cleaner.cc           |   2 +-
 ft/tests/test-hot-with-bounds.cc              |   2 +-
 ft/tests/test-merges-on-cleaner.cc            |   2 +-
 ft/tests/test-pick-child-to-flush.cc          |   2 +-
 ft/tests/test-txn-child-manager.cc            |   2 +-
 ft/tests/test.h                               |   4 +-
 ft/tests/test3681.cc                          |   2 +-
 ...test_rightmost_leaf_seqinsert_heuristic.cc |   2 +-
 ft/tests/test_rightmost_leaf_split_merge.cc   |   2 +-
 ft/tests/upgrade_test_simple.cc               |   2 +-
 ft/tests/xid_lsn_independent.cc               |   2 +-
 ft/tokuftdump.cc                              |   2 +-
 ft/{ => txn}/roll.cc                          |   4 +-
 ft/{ => txn}/rollback-apply.cc                |   2 +-
 ft/{ => txn}/rollback-apply.h                 |   0
 ft/{ => txn}/rollback-ct-callbacks.cc         |   4 +-
 ft/{ => txn}/rollback-ct-callbacks.h          |   2 +-
 ft/{ => txn}/rollback.cc                      |   2 +-
 ft/{ => txn}/rollback.h                       |   4 +-
 ft/{ => txn}/rollback_log_node_cache.cc       |   2 +-
 ft/{ => txn}/rollback_log_node_cache.h        |   2 +-
 ft/{ => txn}/txn.cc                           |   8 +-
 ft/{ => txn}/txn.h                            |   4 +-
 ft/{ => txn}/txn_child_manager.cc             |   2 +-
 ft/{ => txn}/txn_child_manager.h              |   4 +-
 ft/{ => txn}/txn_manager.cc                   |   8 +-
 ft/{ => txn}/txn_manager.h                    |   2 +-
 ft/{ => txn}/txn_state.h                      |   0
 ft/{ => txn}/xids.cc                          |   2 +-
 ft/{ => txn}/xids.h                           |   2 +-
 ft/ule.cc                                     |   6 +-
 ft/ule.h                                      |   2 +-
 locktree/keyrange.cc                          | 298 +++++++++---------
 locktree/lock_request.cc                      |  11 +-
 locktree/range_buffer.cc                      |   7 +-
 locktree/range_buffer.h                       |   3 +-
 locktree/tests/test.h                         |   2 +-
 locktree/treenode.h                           |  11 +-
 locktree/txnid_set.h                          |   2 +-
 src/indexer-internal.h                        |   2 +-
 src/indexer-undo-do.cc                        |   6 +-
 src/indexer.cc                                |   4 +-
 src/loader.cc                                 |   2 +-
 src/tests/blackhole.cc                        |   2 +-
 src/tests/hotindexer-undo-do-test.cc          |   3 +-
 src/tests/test_txn_nested1.cc                 |   2 +-
 src/tests/test_txn_nested2.cc                 |  11 +-
 src/tests/test_txn_nested3.cc                 |   2 +-
 src/tests/test_txn_nested4.cc                 |   2 +-
 src/tests/test_txn_nested5.cc                 |   2 +-
 src/tests/threaded_stress_test_helpers.h      |   2 +-
 src/ydb-internal.h                            |   4 +-
 src/ydb.cc                                    |   6 +-
 src/ydb_db.cc                                 |   2 +-
 src/ydb_env_func.cc                           |   4 +-
 src/ydb_txn.cc                                |  10 +-
 src/ydb_write.cc                              |   2 +-
 util/CMakeLists.txt                           |   1 +
 ft/ybt.cc => util/dbt.cc                      |  43 +--
 ft/ybt.h => util/dbt.h                        |   2 +-
 111 files changed, 374 insertions(+), 383 deletions(-)
 rename ft/{ => cachetable}/background_job_manager.cc (99%)
 rename ft/{ => cachetable}/background_job_manager.h (100%)
 rename ft/{ => cachetable}/cachetable-internal.h (99%)
 rename ft/{ => cachetable}/cachetable.cc (99%)
 rename ft/{ => cachetable}/cachetable.h (99%)
 rename ft/{ => cachetable}/checkpoint.cc (99%)
 rename ft/{ => cachetable}/checkpoint.h (99%)
 rename ft/{ => logger}/recover.cc (99%)
 rename ft/{ => logger}/recover.h (88%)
 rename ft/{ => txn}/roll.cc (99%)
 rename ft/{ => txn}/rollback-apply.cc (99%)
 rename ft/{ => txn}/rollback-apply.h (100%)
 rename ft/{ => txn}/rollback-ct-callbacks.cc (99%)
 rename ft/{ => txn}/rollback-ct-callbacks.h (99%)
 rename ft/{ => txn}/rollback.cc (99%)
 rename ft/{ => txn}/rollback.h (99%)
 rename ft/{ => txn}/rollback_log_node_cache.cc (99%)
 rename ft/{ => txn}/rollback_log_node_cache.h (99%)
 rename ft/{ => txn}/txn.cc (99%)
 rename ft/{ => txn}/txn.h (99%)
 rename ft/{ => txn}/txn_child_manager.cc (99%)
 rename ft/{ => txn}/txn_child_manager.h (97%)
 rename ft/{ => txn}/txn_manager.cc (99%)
 rename ft/{ => txn}/txn_manager.h (99%)
 rename ft/{ => txn}/txn_state.h (100%)
 rename ft/{ => txn}/xids.cc (99%)
 rename ft/{ => txn}/xids.h (99%)
 rename ft/ybt.cc => util/dbt.cc (93%)
 rename ft/ybt.h => util/dbt.h (99%)

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index da08393f702..1ac723367e4 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -23,10 +23,10 @@ add_custom_target(
   )
 
 set(FT_SOURCES
-  background_job_manager
   bndata
-  cachetable
-  checkpoint
+  cachetable/background_job_manager
+  cachetable/cachetable
+  cachetable/checkpoint
   cursor
   ft
   ft-cachetable-wrappers
@@ -45,16 +45,11 @@ set(FT_SOURCES
   logger/logfilemgr
   logger/logger
   logger/log_upgrade
+  logger/recover
   msg
   msg_buffer
   node
   pivotkeys
-  recover
-  rollback
-  rollback-apply
-  rollback-ct-callbacks
-  rollback_log_node_cache
-  roll
   serialize/block_allocator
   serialize/block_table
   serialize/compress
@@ -63,12 +58,16 @@ set(FT_SOURCES
   serialize/ft-serialize
   serialize/quicklz
   serialize/sub_block
-  txn
-  txn_child_manager
-  txn_manager
+  txn/rollback
+  txn/rollback-apply
+  txn/rollback-ct-callbacks
+  txn/rollback_log_node_cache
+  txn/roll
+  txn/txn
+  txn/txn_child_manager
+  txn/txn_manager
+  txn/xids
   ule
-  xids
-  ybt
   "${CMAKE_CURRENT_BINARY_DIR}/log_code"
   "${CMAKE_CURRENT_BINARY_DIR}/log_print"
   )
diff --git a/ft/background_job_manager.cc b/ft/cachetable/background_job_manager.cc
similarity index 99%
rename from ft/background_job_manager.cc
rename to ft/cachetable/background_job_manager.cc
index 6849909a2ed..12588cd067d 100644
--- a/ft/background_job_manager.cc
+++ b/ft/cachetable/background_job_manager.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <toku_pthread.h>
 
-#include "background_job_manager.h"
+#include "cachetable/background_job_manager.h"
 
 struct background_job_manager_struct {
     bool accepting_jobs;
diff --git a/ft/background_job_manager.h b/ft/cachetable/background_job_manager.h
similarity index 100%
rename from ft/background_job_manager.h
rename to ft/cachetable/background_job_manager.h
diff --git a/ft/cachetable-internal.h b/ft/cachetable/cachetable-internal.h
similarity index 99%
rename from ft/cachetable-internal.h
rename to ft/cachetable/cachetable-internal.h
index f1dcb71a5ba..78c30bf7861 100644
--- a/ft/cachetable-internal.h
+++ b/ft/cachetable/cachetable-internal.h
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "background_job_manager.h"
+#include "cachetable/background_job_manager.h"
 #include <portability/toku_random.h>
 #include <util/frwlock.h>
 #include <util/kibbutz.h>
diff --git a/ft/cachetable.cc b/ft/cachetable/cachetable.cc
similarity index 99%
rename from ft/cachetable.cc
rename to ft/cachetable/cachetable.cc
index 605302db265..b24f867bbaf 100644
--- a/ft/cachetable.cc
+++ b/ft/cachetable/cachetable.cc
@@ -91,6 +91,7 @@ PATENT RIGHTS GRANT:
 
 #include <string.h>
 #include <time.h>
+#include <stdarg.h>
 
 #include <portability/memory.h>
 #include <portability/toku_race_tools.h>
@@ -100,9 +101,9 @@ PATENT RIGHTS GRANT:
 #include <portability/toku_stdlib.h>
 #include <portability/toku_time.h>
 
-#include "ft/cachetable.h"
-#include "ft/cachetable-internal.h"
-#include "ft/checkpoint.h"
+#include "ft/cachetable/cachetable.h"
+#include "ft/cachetable/cachetable-internal.h"
+#include "ft/cachetable/checkpoint.h"
 #include "ft/logger/log-internal.h"
 #include "util/rwlock.h"
 #include "util/status.h"
@@ -1588,7 +1589,7 @@ int toku_cachetable_get_and_pin_with_dep_pairs (
     PAIR* dependent_pairs,
     enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs
     )
-// See cachetable.h
+// See cachetable/cachetable.h
 {
     CACHETABLE ct = cachefile->cachetable;
     bool wait = false;
@@ -2024,7 +2025,7 @@ int toku_cachetable_get_and_pin_nonblocking(
     void *read_extraargs,
     UNLOCKERS unlockers
     )
-// See cachetable.h.
+// See cachetable/cachetable.h.
 {
     CACHETABLE ct = cf->cachetable;
     assert(lock_type == PL_READ ||
@@ -2207,7 +2208,7 @@ int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, uint32_t fullhash,
                             CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
                             void *read_extraargs,
                             bool *doing_prefetch)
-// Effect: See the documentation for this function in cachetable.h
+// Effect: See the documentation for this function in cachetable/cachetable.h
 {
     int r = 0;
     PAIR p = NULL;
diff --git a/ft/cachetable.h b/ft/cachetable/cachetable.h
similarity index 99%
rename from ft/cachetable.h
rename to ft/cachetable/cachetable.h
index 722facd818c..b4aacd8547b 100644
--- a/ft/cachetable.h
+++ b/ft/cachetable/cachetable.h
@@ -94,9 +94,9 @@ PATENT RIGHTS GRANT:
 
 #include <fcntl.h>
 
-#include "ft/serialize/block_table.h"
 #include "ft/logger/logger.h"
-#include "ft/txn.h"
+#include "ft/serialize/block_table.h"
+#include "ft/txn/txn.h"
 #include "util/minicron.h"
 
 // Maintain a cache mapping from cachekeys to values (void*)
diff --git a/ft/checkpoint.cc b/ft/cachetable/checkpoint.cc
similarity index 99%
rename from ft/checkpoint.cc
rename to ft/cachetable/checkpoint.cc
index 08ccfabfcdb..3da668a815d 100644
--- a/ft/checkpoint.cc
+++ b/ft/cachetable/checkpoint.cc
@@ -131,11 +131,11 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_portability.h"
 #include "portability/toku_atomic.h"
 
-#include "ft/cachetable.h"
+#include "ft/cachetable/cachetable.h"
+#include "ft/cachetable/checkpoint.h"
 #include "ft/ft.h"
 #include "ft/logger/log-internal.h"
-#include "ft/logger/logger.h"
-#include "ft/checkpoint.h"
+#include "ft/logger/recover.h"
 #include "util/frwlock.h"
 #include "util/status.h"
 
diff --git a/ft/checkpoint.h b/ft/cachetable/checkpoint.h
similarity index 99%
rename from ft/checkpoint.h
rename to ft/cachetable/checkpoint.h
index fd08cd90e6f..2e4873084e6 100644
--- a/ft/checkpoint.h
+++ b/ft/cachetable/checkpoint.h
@@ -92,13 +92,13 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 #ident "$Id$"
 
-#include "cachetable.h"
-
 #include <stdint.h>
 
-void toku_set_checkpoint_period(CACHETABLE ct, uint32_t new_period);
+#include "ft/cachetable/cachetable.h"
+
 //Effect: Change [end checkpoint (n) - begin checkpoint (n+1)] delay to
 //        new_period seconds.  0 means disable.
+void toku_set_checkpoint_period(CACHETABLE ct, uint32_t new_period);
 
 uint32_t toku_get_checkpoint_period_unlocked(CACHETABLE ct);
 
diff --git a/ft/comparator.h b/ft/comparator.h
index 9533d0ca0d3..85a95819d2c 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -93,9 +93,9 @@ PATENT RIGHTS GRANT:
 #include <db.h>
 #include <string.h>
 
-#include <ft/ybt.h>
-//#include <ft/fttypes.h>
-#include <portability/memory.h>
+#include "portability/memory.h"
+
+#include "util/dbt.h"
 
 typedef int (*ft_compare_func)(DB *db, const DBT *a, const DBT *b);
 
diff --git a/ft/cursor.cc b/ft/cursor.cc
index d1679feb365..098721b2885 100644
--- a/ft/cursor.cc
+++ b/ft/cursor.cc
@@ -90,8 +90,8 @@ PATENT RIGHTS GRANT:
 
 #include "ft/cursor.h"
 #include "ft/leafentry.h"
-#include "ft/txn.h"
-#include "ft/ybt.h"
+#include "ft/txn/txn.h"
+#include "util/dbt.h"
 
 int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn,
                           bool is_snapshot_read,
diff --git a/ft/ft-cachetable-wrappers.h b/ft/ft-cachetable-wrappers.h
index 044195c7fde..5af425e18ff 100644
--- a/ft/ft-cachetable-wrappers.h
+++ b/ft/ft-cachetable-wrappers.h
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/cachetable.h"
+#include "ft/cachetable/cachetable.h"
 #include "ft/ft-internal.h"
 #include "ft/node.h"
 
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 177484f31c7..87abf06752e 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -96,12 +96,12 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_list.h"
 #include "portability/toku_race_tools.h"
 
-#include "ft/cachetable.h"
+#include "ft/cachetable/cachetable.h"
 #include "ft/comparator.h"
 #include "ft/ft.h"
 #include "ft/ft-ops.h"
 #include "ft/node.h"
-#include "ft/rollback.h"
+#include "ft/txn/rollback.h"
 
 // Symbol TOKUDB_REVISION is not defined by fractal-tree makefiles, so
 // BUILD_ID of 1000 indicates development build of main, not a release build.  
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 12c99fc74fb..1f045bd8dda 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -200,7 +200,7 @@ basement nodes, bulk fetch,  and partial fetch:
 
 */
 
-#include "ft/checkpoint.h"
+#include "ft/cachetable/checkpoint.h"
 #include "ft/cursor.h"
 #include "ft/ft.h"
 #include "ft/ft-cachetable-wrappers.h"
@@ -215,9 +215,9 @@ basement nodes, bulk fetch,  and partial fetch:
 #include "ft/serialize/ft-serialize.h"
 #include "ft/serialize/ft_layout_version.h"
 #include "ft/serialize/ft_node-serialize.h"
-#include "ft/txn_manager.h"
+#include "ft/txn/txn_manager.h"
 #include "ft/ule.h"
-#include "ft/xids.h"
+#include "ft/txn/xids.h"
 
 #include <toku_race_tools.h>
 
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index 786cc32380d..3565eedcb1b 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -96,10 +96,10 @@ PATENT RIGHTS GRANT:
 
 #include <db.h>
 
-#include "ft/cachetable.h"
+#include "ft/cachetable/cachetable.h"
 #include "ft/comparator.h"
 #include "ft/msg.h"
-#include "ft/ybt.h"
+#include "util/dbt.h"
 
 typedef struct ft_handle *FT_HANDLE;
 
diff --git a/ft/ft.h b/ft/ft.h
index c90eb1b4747..4df7ed9cc9e 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -93,10 +93,10 @@ PATENT RIGHTS GRANT:
 
 #include <db.h>
 
-#include "ft/cachetable.h"
+#include "ft/cachetable/cachetable.h"
 #include "ft/ft-ops.h"
 #include "ft/logger/log.h"
-#include "ft/ybt.h"
+#include "util/dbt.h"
 
 typedef struct ft *FT;
 typedef struct ft_options *FT_OPTIONS;
diff --git a/ft/leafentry.h b/ft/leafentry.h
index 148c4092c37..07ae06d64d5 100644
--- a/ft/leafentry.h
+++ b/ft/leafentry.h
@@ -97,7 +97,7 @@ PATENT RIGHTS GRANT:
 #include <util/mempool.h>
 #include <util/omt.h>
 
-#include "ft/txn_manager.h"
+#include "ft/txn/txn_manager.h"
 #include "ft/serialize/rbuf.h"
 #include "ft/msg.h"
 
diff --git a/ft/loader/callbacks.cc b/ft/loader/callbacks.cc
index a6b7686e023..323196bc218 100644
--- a/ft/loader/callbacks.cc
+++ b/ft/loader/callbacks.cc
@@ -96,7 +96,7 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 
 #include "loader/loader-internal.h"
-#include "ybt.h"
+#include "util/dbt.h"
 
 static void error_callback_lock(ft_loader_error_callback loader_error) {
     toku_mutex_lock(&loader_error->mutex);
diff --git a/ft/loader/loader.h b/ft/loader/loader.h
index b4b8a2a1f79..9ab0d482cd3 100644
--- a/ft/loader/loader.h
+++ b/ft/loader/loader.h
@@ -92,8 +92,8 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/txn.h"
-#include "ft/cachetable.h"
+#include "ft/txn/txn.h"
+#include "ft/cachetable/cachetable.h"
 #include "ft/comparator.h"
 #include "ft/ft-ops.h"
 
diff --git a/ft/logger/log-internal.h b/ft/logger/log-internal.h
index f671eed6bc0..d1d19bdad9e 100644
--- a/ft/logger/log-internal.h
+++ b/ft/logger/log-internal.h
@@ -102,9 +102,9 @@ PATENT RIGHTS GRANT:
 #include "ft/ft-internal.h"
 #include "ft/logger/log.h"
 #include "ft/logger/logfilemgr.h"
-#include "ft/txn.h"
-#include "ft/txn_manager.h"
-#include "ft/rollback_log_node_cache.h"
+#include "ft/txn/txn.h"
+#include "ft/txn/txn_manager.h"
+#include "ft/txn/rollback_log_node_cache.h"
 
 #include "util/memarena.h"
 #include "util/omt.h"
diff --git a/ft/logger/log.h b/ft/logger/log.h
index eac89501141..5dfb35315ee 100644
--- a/ft/logger/log.h
+++ b/ft/logger/log.h
@@ -98,10 +98,9 @@ PATENT RIGHTS GRANT:
 #include "portability/memory.h"
 #include "portability/toku_portability.h"
 
-#include "ft/logger/logger.h"
-#include "ft/rollback.h"
-#include "ft/recover.h"
-#include "ft/txn.h"
+#include "ft/logger/recover.h"
+#include "ft/txn/rollback.h"
+#include "ft/txn/txn.h"
 #include "util/bytestring.h"
 
 struct roll_entry;
diff --git a/ft/logger/log_upgrade.cc b/ft/logger/log_upgrade.cc
index ca6f70e901c..8fd65ae2691 100644
--- a/ft/logger/log_upgrade.cc
+++ b/ft/logger/log_upgrade.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 
 #include "log-internal.h"
 #include "logger/logcursor.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 static uint64_t footprint = 0;  // for debug and accountability
 
diff --git a/ft/logger/logger.cc b/ft/logger/logger.cc
index c534065cbc2..48844fd334c 100644
--- a/ft/logger/logger.cc
+++ b/ft/logger/logger.cc
@@ -97,8 +97,8 @@ PATENT RIGHTS GRANT:
 #include "ft/serialize/block_table.h"
 #include "ft/ft.h"
 #include "ft/logger/log-internal.h"
-#include "ft/txn_manager.h"
-#include "ft/rollback_log_node_cache.h"
+#include "ft/txn/txn_manager.h"
+#include "ft/txn/rollback_log_node_cache.h"
 
 #include "util/status.h"
 
diff --git a/ft/logger/logger.h b/ft/logger/logger.h
index 2c04f6b8ac5..4068e802705 100644
--- a/ft/logger/logger.h
+++ b/ft/logger/logger.h
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft/serialize/block_table.h"
 #include "ft/serialize/ft_layout_version.h"
-#include "ft/txn.h"
+#include "ft/txn/txn.h"
 
 typedef struct tokulogger *TOKULOGGER;
 
diff --git a/ft/recover.cc b/ft/logger/recover.cc
similarity index 99%
rename from ft/recover.cc
rename to ft/logger/recover.cc
index 9cf231323c9..ad05ecb18e3 100644
--- a/ft/recover.cc
+++ b/ft/logger/recover.cc
@@ -90,13 +90,13 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 
-#include "ft/cachetable.h"
-#include "ft/checkpoint.h"
+#include "ft/cachetable/cachetable.h"
+#include "ft/cachetable/checkpoint.h"
 #include "ft/ft.h"
 #include "ft/log_header.h"
 #include "ft/logger/log-internal.h"
 #include "ft/logger/logcursor.h"
-#include "ft/txn_manager.h"
+#include "ft/txn/txn_manager.h"
 #include "util/omt.h"
 
 int tokudb_recovery_trace = 0;                    // turn on recovery tracing, default off.
diff --git a/ft/recover.h b/ft/logger/recover.h
similarity index 88%
rename from ft/recover.h
rename to ft/logger/recover.h
index 38d6903f448..134bbcdc480 100644
--- a/ft/recover.h
+++ b/ft/logger/recover.h
@@ -102,21 +102,22 @@ PATENT RIGHTS GRANT:
 #include "ft/ft-ops.h"
 #include "util/x1764.h"
 
-typedef void (*prepared_txn_callback_t)(DB_ENV*, TOKUTXN);
-typedef void (*keep_cachetable_callback_t)(DB_ENV*, CACHETABLE);
+typedef void (*prepared_txn_callback_t)(DB_ENV *env, struct tokutxn *txn);
+typedef void (*keep_cachetable_callback_t)(DB_ENV *env, struct cachetable *ct);
 
 // Run tokudb recovery from the log
 // Returns 0 if success
-int tokudb_recover (DB_ENV *env,
-		    prepared_txn_callback_t    prepared_txn_callback,
-		    keep_cachetable_callback_t keep_cachetable_callback,
-		    struct tokulogger *logger,
-		    const char *env_dir, const char *log_dir,
-                    ft_compare_func bt_compare,
-                    ft_update_func update_function,
-                    generate_row_for_put_func       generate_row_for_put,
-                    generate_row_for_del_func       generate_row_for_del,
-                    size_t cachetable_size);
+int tokudb_recover(DB_ENV *env,
+		   prepared_txn_callback_t prepared_txn_callback,
+		   keep_cachetable_callback_t keep_cachetable_callback,
+		   struct tokulogger *logger,
+		   const char *env_dir,
+                   const char *log_dir,
+                   ft_compare_func bt_compare,
+                   ft_update_func update_function,
+                   generate_row_for_put_func generate_row_for_put,
+                   generate_row_for_del_func generate_row_for_del,
+                   size_t cachetable_size);
 
 // Effect: Check the tokudb logs to determine whether or not we need to run recovery.
 // If the log is empty or if there is a clean shutdown at the end of the log, then we
diff --git a/ft/msg.cc b/ft/msg.cc
index c1ce9fdd477..09a9d17e306 100644
--- a/ft/msg.cc
+++ b/ft/msg.cc
@@ -91,8 +91,8 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_portability.h"
 
 #include "ft/msg.h"
-#include "ft/xids.h"
-#include "ft/ybt.h"
+#include "ft/txn/xids.h"
+#include "util/dbt.h"
 
 ft_msg::ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x) :
     _key(key ? *key : toku_empty_dbt()),
diff --git a/ft/msg.h b/ft/msg.h
index 7d738a3cdb0..1c974a0c84a 100644
--- a/ft/msg.h
+++ b/ft/msg.h
@@ -99,7 +99,7 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_assert.h"
 #include "portability/toku_stdint.h"
 
-#include "ft/xids.h"
+#include "ft/txn/xids.h"
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
diff --git a/ft/msg_buffer.cc b/ft/msg_buffer.cc
index d6ff954360b..9da03033ebc 100644
--- a/ft/msg_buffer.cc
+++ b/ft/msg_buffer.cc
@@ -87,7 +87,7 @@ PATENT RIGHTS GRANT:
 */
 
 #include "ft/msg_buffer.h"
-#include "ft/ybt.h"
+#include "util/dbt.h"
 
 void message_buffer::create() {
     _num_entries = 0;
diff --git a/ft/msg_buffer.h b/ft/msg_buffer.h
index efd39b52670..703f3c5fced 100644
--- a/ft/msg_buffer.h
+++ b/ft/msg_buffer.h
@@ -89,8 +89,8 @@ PATENT RIGHTS GRANT:
 #pragma once
 
 #include "ft/msg.h"
-#include "ft/xids.h"
-#include "ft/ybt.h"
+#include "ft/txn/xids.h"
+#include "util/dbt.h"
 
 class message_buffer {
 public:
diff --git a/ft/serialize/ft_layout_version.h b/ft/serialize/ft_layout_version.h
index 2479aff9cb0..9d9796e925d 100644
--- a/ft/serialize/ft_layout_version.h
+++ b/ft/serialize/ft_layout_version.h
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-//Must be defined before other recursive headers could include logger.h
+//Must be defined before other recursive headers could include logger/recover.h
 enum ft_layout_version_e {
     FT_LAYOUT_VERSION_5 = 5,
     FT_LAYOUT_VERSION_6 = 6,   // Diff from 5 to 6:  Add leafentry_estimate
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index b0eaa0af6fc..65eb8c74154 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -91,12 +91,12 @@ PATENT RIGHTS GRANT:
 
 #include "portability/toku_atomic.h"
 
-#include "ft/cachetable.h"
+#include "ft/cachetable/cachetable.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
 #include "ft/node.h"
 #include "ft/logger/log-internal.h"
-#include "ft/rollback.h"
+#include "ft/txn/rollback.h"
 #include "ft/serialize/block_table.h"
 #include "ft/serialize/compress.h"
 #include "ft/serialize/ft_node-serialize.h"
diff --git a/ft/tdb-recover.cc b/ft/tdb-recover.cc
index 736fd6685c3..9c0239e63b5 100644
--- a/ft/tdb-recover.cc
+++ b/ft/tdb-recover.cc
@@ -95,18 +95,15 @@ PATENT RIGHTS GRANT:
 //    cd ../src/tests/tmpdir
 //    ../../../ft/recover ../dir.test_log2.c.tdb
 
-#include "ft-ops.h"
-#include "recover.h"
+#include "ft/ft-ops.h"
+#include "ft/logger/recover.h"
 
 static int recovery_main(int argc, const char *const argv[]);
 
-int
-main(int argc, const char *const argv[]) {
-    {
-	int rr = toku_ft_layer_init();
-	assert(rr==0);
-    }
-    int r = recovery_main(argc, argv);
+int main(int argc, const char *const argv[]) {
+    int r = toku_ft_layer_init();
+    assert(r == 0);
+    r = recovery_main(argc, argv);
     toku_ft_layer_destroy();
     return r;
 }
diff --git a/ft/tests/cachetable-checkpoint-pending.cc b/ft/tests/cachetable-checkpoint-pending.cc
index ff6ea6a48a7..e0bde782cd3 100644
--- a/ft/tests/cachetable-checkpoint-pending.cc
+++ b/ft/tests/cachetable-checkpoint-pending.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 #include <stdio.h>
 #include <unistd.h>
 #include "cachetable-test.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 #include <portability/toku_atomic.h>
 
 static int N; // how many items in the table
diff --git a/ft/tests/cachetable-checkpoint-test.cc b/ft/tests/cachetable-checkpoint-test.cc
index 2be864d5e3b..fa629794217 100644
--- a/ft/tests/cachetable-checkpoint-test.cc
+++ b/ft/tests/cachetable-checkpoint-test.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 #include <unistd.h>
 
 
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 static const int item_size = 1;
 
diff --git a/ft/tests/cachetable-checkpointer-class.cc b/ft/tests/cachetable-checkpointer-class.cc
index fa950d3972a..57e51820a73 100644
--- a/ft/tests/cachetable-checkpointer-class.cc
+++ b/ft/tests/cachetable-checkpointer-class.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include "test.h"
-#include "cachetable-internal.h"
+#include "cachetable/cachetable-internal.h"
 #include "cachetable-test.h"
 
 //
diff --git a/ft/tests/cachetable-evictor-class.cc b/ft/tests/cachetable-evictor-class.cc
index d0dff7d9570..bd4eff4f8cc 100644
--- a/ft/tests/cachetable-evictor-class.cc
+++ b/ft/tests/cachetable-evictor-class.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
 #include "test.h"
-#include "cachetable-internal.h"
+#include "cachetable/cachetable-internal.h"
 
 class evictor_unit_test {
 public:
diff --git a/ft/tests/cachetable-prefetch-checkpoint-test.cc b/ft/tests/cachetable-prefetch-checkpoint-test.cc
index 6eed6428cbf..c409a62ba2e 100644
--- a/ft/tests/cachetable-prefetch-checkpoint-test.cc
+++ b/ft/tests/cachetable-prefetch-checkpoint-test.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <unistd.h>
 #include "cachetable-test.h"
 
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 const int item_size = 1;
 
diff --git a/ft/tests/cachetable-prefetch-flowcontrol-test.cc b/ft/tests/cachetable-prefetch-flowcontrol-test.cc
index 6125799447b..e557930bfa5 100644
--- a/ft/tests/cachetable-prefetch-flowcontrol-test.cc
+++ b/ft/tests/cachetable-prefetch-flowcontrol-test.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 
 
 #include "test.h"
-#include "cachetable-internal.h"
+#include "cachetable/cachetable-internal.h"
 
 static int flush_calls = 0;
 static int flush_evict_calls = 0;
diff --git a/ft/tests/cachetable-test.h b/ft/tests/cachetable-test.h
index 75316469f6e..facdba3985f 100644
--- a/ft/tests/cachetable-test.h
+++ b/ft/tests/cachetable-test.h
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 
-#include "cachetable-internal.h"
+#include "cachetable/cachetable-internal.h"
 
 //
 // Dummy callbacks for checkpointing
diff --git a/ft/tests/ftloader-test-vm.cc b/ft/tests/ftloader-test-vm.cc
index d9a0566144d..d207fa574a2 100644
--- a/ft/tests/ftloader-test-vm.cc
+++ b/ft/tests/ftloader-test-vm.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "test.h"
-#include "cachetable.h"
+#include "cachetable/cachetable.h"
 #include <inttypes.h>
 
 /* Test for #2755.  The ft_loader is using too much VM. */
diff --git a/ft/tests/is_empty.cc b/ft/tests/is_empty.cc
index 40dfd6bb5e5..f38d0967beb 100644
--- a/ft/tests/is_empty.cc
+++ b/ft/tests/is_empty.cc
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 #include "toku_os.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 
 #define FILENAME "test0.ft"
diff --git a/ft/tests/le-cursor-provdel.cc b/ft/tests/le-cursor-provdel.cc
index 868b237242d..73894c6addd 100644
--- a/ft/tests/le-cursor-provdel.cc
+++ b/ft/tests/le-cursor-provdel.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 // test the LE_CURSOR next function with provisionally deleted rows
 
 
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 #include "le-cursor.h"
 #include "test.h"
 
diff --git a/ft/tests/le-cursor-right.cc b/ft/tests/le-cursor-right.cc
index 54d33397d13..3f9593c1ce0 100644
--- a/ft/tests/le-cursor-right.cc
+++ b/ft/tests/le-cursor-right.cc
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 // - LE_CURSOR somewhere else
 
 
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 #include "le-cursor.h"
 #include "test.h"
 
diff --git a/ft/tests/le-cursor-walk.cc b/ft/tests/le-cursor-walk.cc
index 158f870a3b6..7dc925100b0 100644
--- a/ft/tests/le-cursor-walk.cc
+++ b/ft/tests/le-cursor-walk.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 // test the LE_CURSOR next function
 
 
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 #include "le-cursor.h"
 #include "test.h"
 #include <unistd.h>
diff --git a/ft/tests/recovery-test5123.cc b/ft/tests/recovery-test5123.cc
index 7020ea39b24..7e912b6f9f9 100644
--- a/ft/tests/recovery-test5123.cc
+++ b/ft/tests/recovery-test5123.cc
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 #include "toku_os.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 #include "test-ft-txns.h"
 
diff --git a/ft/tests/test-bjm.cc b/ft/tests/test-bjm.cc
index dc0f833992d..7218b597afd 100644
--- a/ft/tests/test-bjm.cc
+++ b/ft/tests/test-bjm.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2011-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "background_job_manager.h"
+#include "cachetable/background_job_manager.h"
 
 #include "test.h"
 
diff --git a/ft/tests/test-checkpoint-during-flush.cc b/ft/tests/test-checkpoint-during-flush.cc
index 9a42bed40a2..59f9748c4c9 100644
--- a/ft/tests/test-checkpoint-during-flush.cc
+++ b/ft/tests/test-checkpoint-during-flush.cc
@@ -96,7 +96,7 @@ PATENT RIGHTS GRANT:
 #include <ft-cachetable-wrappers.h>
 #include "ft-flusher.h"
 #include "ft-flusher-internal.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
 
diff --git a/ft/tests/test-checkpoint-during-merge.cc b/ft/tests/test-checkpoint-during-merge.cc
index 35a3ab79459..2c1f8838614 100644
--- a/ft/tests/test-checkpoint-during-merge.cc
+++ b/ft/tests/test-checkpoint-during-merge.cc
@@ -96,7 +96,7 @@ PATENT RIGHTS GRANT:
 #include <ft-cachetable-wrappers.h>
 #include "ft-flusher.h"
 #include "ft-flusher-internal.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
 
diff --git a/ft/tests/test-checkpoint-during-rebalance.cc b/ft/tests/test-checkpoint-during-rebalance.cc
index 03add082f4d..6446c26777b 100644
--- a/ft/tests/test-checkpoint-during-rebalance.cc
+++ b/ft/tests/test-checkpoint-during-rebalance.cc
@@ -96,7 +96,7 @@ PATENT RIGHTS GRANT:
 #include <ft-cachetable-wrappers.h>
 #include "ft-flusher.h"
 #include "ft-flusher-internal.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
 
diff --git a/ft/tests/test-checkpoint-during-split.cc b/ft/tests/test-checkpoint-during-split.cc
index 2ab749858e5..94f17403336 100644
--- a/ft/tests/test-checkpoint-during-split.cc
+++ b/ft/tests/test-checkpoint-during-split.cc
@@ -96,7 +96,7 @@ PATENT RIGHTS GRANT:
 #include <ft-cachetable-wrappers.h>
 #include "ft-flusher.h"
 #include "ft-flusher-internal.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
 
diff --git a/ft/tests/test-dirty-flushes-on-cleaner.cc b/ft/tests/test-dirty-flushes-on-cleaner.cc
index 332b86917fe..2c5b97a48da 100644
--- a/ft/tests/test-dirty-flushes-on-cleaner.cc
+++ b/ft/tests/test-dirty-flushes-on-cleaner.cc
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 
 #include <ft-cachetable-wrappers.h>
 #include "ft-flusher.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 
 static TOKUTXN const null_txn = 0;
diff --git a/ft/tests/test-flushes-on-cleaner.cc b/ft/tests/test-flushes-on-cleaner.cc
index b7f0edf7bb6..aba314efb0f 100644
--- a/ft/tests/test-flushes-on-cleaner.cc
+++ b/ft/tests/test-flushes-on-cleaner.cc
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 
 #include <ft-cachetable-wrappers.h>
 #include "ft-flusher.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 
 static TOKUTXN const null_txn = 0;
diff --git a/ft/tests/test-hot-with-bounds.cc b/ft/tests/test-hot-with-bounds.cc
index af191232b79..5966fe207b1 100644
--- a/ft/tests/test-hot-with-bounds.cc
+++ b/ft/tests/test-hot-with-bounds.cc
@@ -96,7 +96,7 @@ PATENT RIGHTS GRANT:
 #include <ft-cachetable-wrappers.h>
 #include "ft-flusher.h"
 #include "ft-flusher-internal.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
 
diff --git a/ft/tests/test-merges-on-cleaner.cc b/ft/tests/test-merges-on-cleaner.cc
index d4780db5f44..437f33fcd7b 100644
--- a/ft/tests/test-merges-on-cleaner.cc
+++ b/ft/tests/test-merges-on-cleaner.cc
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 
 #include <ft-cachetable-wrappers.h>
 #include "ft-flusher.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
 
diff --git a/ft/tests/test-pick-child-to-flush.cc b/ft/tests/test-pick-child-to-flush.cc
index 6203c9085c8..feadabd2b81 100644
--- a/ft/tests/test-pick-child-to-flush.cc
+++ b/ft/tests/test-pick-child-to-flush.cc
@@ -97,7 +97,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft-flusher.h"
 #include "ft-flusher-internal.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 static TOKUTXN const null_txn = 0;
 
diff --git a/ft/tests/test-txn-child-manager.cc b/ft/tests/test-txn-child-manager.cc
index 6ce44f0b3d3..25886031821 100644
--- a/ft/tests/test-txn-child-manager.cc
+++ b/ft/tests/test-txn-child-manager.cc
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 #include "toku_os.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 #include "test-ft-txns.h"
 
diff --git a/ft/tests/test.h b/ft/tests/test.h
index cba8d96e24d..19f44d784e1 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -103,8 +103,8 @@ PATENT RIGHTS GRANT:
 
 #include "ft/serialize/block_allocator.h"
 #include "ft/serialize/block_table.h"
-#include "ft/cachetable.h"
-#include "ft/cachetable-internal.h"
+#include "ft/cachetable/cachetable.h"
+#include "ft/cachetable/cachetable-internal.h"
 #include "ft/cursor.h"
 #include "ft/ft.h"
 #include "ft/ft-ops.h"
diff --git a/ft/tests/test3681.cc b/ft/tests/test3681.cc
index 2367c3d07ce..5e8f5b5e1a9 100644
--- a/ft/tests/test3681.cc
+++ b/ft/tests/test3681.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 //  * Thread 1 calls apply_msg_to_in_memory_leaves, calls get_and_pin_if_in_memory, tries to get a read lock on the root node and blocks on the rwlock because there is a write request on the lock.
 
 
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 #include "test.h"
 
 CACHETABLE ct;
diff --git a/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc b/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc
index cb2e629f855..efa08d0c8c0 100644
--- a/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc
+++ b/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 
 #include "test.h"
 
-#include <ft/ybt.h>
+#include <util/dbt.h>
 #include <ft/ft-cachetable-wrappers.h>
 
 // Each FT maintains a sequential insert heuristic to determine if its
diff --git a/ft/tests/test_rightmost_leaf_split_merge.cc b/ft/tests/test_rightmost_leaf_split_merge.cc
index 4394217bfc6..caa01d83f0c 100644
--- a/ft/tests/test_rightmost_leaf_split_merge.cc
+++ b/ft/tests/test_rightmost_leaf_split_merge.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 
 #include "test.h"
 
-#include <ft/ybt.h>
+#include <util/dbt.h>
 #include <ft/ft-cachetable-wrappers.h>
 #include <ft/ft-flusher.h>
 
diff --git a/ft/tests/upgrade_test_simple.cc b/ft/tests/upgrade_test_simple.cc
index d1ec3f7d3fb..fe3fd60499f 100644
--- a/ft/tests/upgrade_test_simple.cc
+++ b/ft/tests/upgrade_test_simple.cc
@@ -97,7 +97,7 @@ PATENT RIGHTS GRANT:
 
 #include "ft-flusher.h"
 
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 static TOKUTXN const null_txn = NULL;
 
diff --git a/ft/tests/xid_lsn_independent.cc b/ft/tests/xid_lsn_independent.cc
index 93762525a09..43f8102067c 100644
--- a/ft/tests/xid_lsn_independent.cc
+++ b/ft/tests/xid_lsn_independent.cc
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #include "test.h"
 
 #include "toku_os.h"
-#include "checkpoint.h"
+#include "cachetable/checkpoint.h"
 
 #define ENVDIR TOKU_TEST_FILENAME
 #include "test-ft-txns.h"
diff --git a/ft/tokuftdump.cc b/ft/tokuftdump.cc
index e591bcec84f..f6385b4f725 100644
--- a/ft/tokuftdump.cc
+++ b/ft/tokuftdump.cc
@@ -99,7 +99,7 @@ PATENT RIGHTS GRANT:
 #include <limits.h>
 
 #include "ft/serialize/block_table.h"
-#include "ft/cachetable.h"
+#include "ft/cachetable/cachetable.h"
 #include "ft/ft.h"
 #include "ft/ft-internal.h"
 #include "ft/serialize/ft-serialize.h"
diff --git a/ft/roll.cc b/ft/txn/roll.cc
similarity index 99%
rename from ft/roll.cc
rename to ft/txn/roll.cc
index fd49dfd478a..0d8855bb8ce 100644
--- a/ft/roll.cc
+++ b/ft/txn/roll.cc
@@ -96,8 +96,8 @@ PATENT RIGHTS GRANT:
 #include "ft/ft-ops.h"
 #include "ft/log_header.h"
 #include "ft/logger/log-internal.h"
-#include "ft/xids.h"
-#include "ft/rollback-apply.h"
+#include "ft/txn/xids.h"
+#include "ft/txn/rollback-apply.h"
 
 // functionality provided by roll.c is exposed by an autogenerated
 // header file, logheader.h
diff --git a/ft/rollback-apply.cc b/ft/txn/rollback-apply.cc
similarity index 99%
rename from ft/rollback-apply.cc
rename to ft/txn/rollback-apply.cc
index 90157d85f5a..5bd53193642 100644
--- a/ft/rollback-apply.cc
+++ b/ft/txn/rollback-apply.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "ft/logger/log-internal.h"
-#include "ft/rollback-apply.h"
+#include "ft/txn/rollback-apply.h"
 
 static void
 poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint) {
diff --git a/ft/rollback-apply.h b/ft/txn/rollback-apply.h
similarity index 100%
rename from ft/rollback-apply.h
rename to ft/txn/rollback-apply.h
diff --git a/ft/rollback-ct-callbacks.cc b/ft/txn/rollback-ct-callbacks.cc
similarity index 99%
rename from ft/rollback-ct-callbacks.cc
rename to ft/txn/rollback-ct-callbacks.cc
index e2810285bf1..cabf7cf5469 100644
--- a/ft/rollback-ct-callbacks.cc
+++ b/ft/txn/rollback-ct-callbacks.cc
@@ -95,8 +95,8 @@ PATENT RIGHTS GRANT:
 #include "ft/serialize/block_table.h"
 #include "ft/ft-internal.h"
 #include "ft/serialize/ft_node-serialize.h"
-#include "ft/rollback.h"
-#include "ft/rollback-ct-callbacks.h"
+#include "ft/txn/rollback.h"
+#include "ft/txn/rollback-ct-callbacks.h"
 
 #include "util/memarena.h"
 
diff --git a/ft/rollback-ct-callbacks.h b/ft/txn/rollback-ct-callbacks.h
similarity index 99%
rename from ft/rollback-ct-callbacks.h
rename to ft/txn/rollback-ct-callbacks.h
index 3c23473ed6a..4bcb004de3d 100644
--- a/ft/rollback-ct-callbacks.h
+++ b/ft/txn/rollback-ct-callbacks.h
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/cachetable.h"
+#include "ft/cachetable/cachetable.h"
 
 void toku_rollback_flush_callback(CACHEFILE cachefile, int fd, BLOCKNUM logname, void *rollback_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool UU(is_clone));
 int toku_rollback_fetch_callback(CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM logname, uint32_t fullhash, void **rollback_pv,  void** UU(disk_data), PAIR_ATTR *sizep, int * UU(dirtyp), void *extraargs);
diff --git a/ft/rollback.cc b/ft/txn/rollback.cc
similarity index 99%
rename from ft/rollback.cc
rename to ft/txn/rollback.cc
index 9bdfbe6c9bf..15cd9054ddd 100644
--- a/ft/rollback.cc
+++ b/ft/txn/rollback.cc
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 #include "ft/serialize/block_table.h"
 #include "ft/ft.h"
 #include "ft/logger/log-internal.h"
-#include "ft/rollback-ct-callbacks.h"
+#include "ft/txn/rollback-ct-callbacks.h"
 
 static void rollback_unpin_remove_callback(CACHEKEY* cachekey, bool for_checkpoint, void* extra) {
     FT CAST_FROM_VOIDP(ft, extra);
diff --git a/ft/rollback.h b/ft/txn/rollback.h
similarity index 99%
rename from ft/rollback.h
rename to ft/txn/rollback.h
index d9b400b9903..cddbe16c91a 100644
--- a/ft/rollback.h
+++ b/ft/txn/rollback.h
@@ -92,9 +92,9 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/cachetable.h"
+#include "ft/cachetable/cachetable.h"
 #include "ft/serialize/sub_block.h"
-#include "ft/txn.h"
+#include "ft/txn/txn.h"
 
 #include "util/memarena.h"
 
diff --git a/ft/rollback_log_node_cache.cc b/ft/txn/rollback_log_node_cache.cc
similarity index 99%
rename from ft/rollback_log_node_cache.cc
rename to ft/txn/rollback_log_node_cache.cc
index d3ea3471489..fca18702da3 100644
--- a/ft/rollback_log_node_cache.cc
+++ b/ft/txn/rollback_log_node_cache.cc
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <portability/toku_portability.h>
 
-#include "rollback_log_node_cache.h"
+#include "txn/rollback_log_node_cache.h"
 
 void rollback_log_node_cache::init (uint32_t max_num_avail_nodes) {
     XMALLOC_N(max_num_avail_nodes, m_avail_blocknums);
diff --git a/ft/rollback_log_node_cache.h b/ft/txn/rollback_log_node_cache.h
similarity index 99%
rename from ft/rollback_log_node_cache.h
rename to ft/txn/rollback_log_node_cache.h
index 8b234250569..baafcf979d5 100644
--- a/ft/rollback_log_node_cache.h
+++ b/ft/txn/rollback_log_node_cache.h
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "rollback.h"
+#include "ft/txn/rollback.h"
 
 class rollback_log_node_cache {
 public:
diff --git a/ft/txn.cc b/ft/txn/txn.cc
similarity index 99%
rename from ft/txn.cc
rename to ft/txn/txn.cc
index 90b8a1a3e61..29890ad1816 100644
--- a/ft/txn.cc
+++ b/ft/txn/txn.cc
@@ -90,13 +90,13 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 
-#include "ft/checkpoint.h"
+#include "ft/cachetable/checkpoint.h"
 #include "ft/ft.h"
 #include "ft/logger/log-internal.h"
 #include "ft/ule.h"
-#include "ft/rollback-apply.h"
-#include "ft/txn.h"
-#include "ft/txn_manager.h"
+#include "ft/txn/rollback-apply.h"
+#include "ft/txn/txn.h"
+#include "ft/txn/txn_manager.h"
 #include "util/status.h"
 
 ///////////////////////////////////////////////////////////////////////////////////
diff --git a/ft/txn.h b/ft/txn/txn.h
similarity index 99%
rename from ft/txn.h
rename to ft/txn/txn.h
index a9380b0cb06..b96e9b8f1fe 100644
--- a/ft/txn.h
+++ b/ft/txn/txn.h
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 
 #include "portability/toku_stdint.h"
 
-#include "ft/txn_state.h"
+#include "ft/txn/txn_state.h"
 #include "ft/serialize/block_table.h"
 #include "util/omt.h"
 
@@ -114,7 +114,7 @@ typedef struct txnid_pair_s {
 static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE };
 
 // We include the child manager here beacuse it uses the TXNID / TOKUTXN types
-#include "ft/txn_child_manager.h"
+#include "ft/txn/txn_child_manager.h"
 
 /* Log Sequence Number (LSN)
  * Make the LSN be a struct instead of an integer so that we get better type checking. */
diff --git a/ft/txn_child_manager.cc b/ft/txn/txn_child_manager.cc
similarity index 99%
rename from ft/txn_child_manager.cc
rename to ft/txn/txn_child_manager.cc
index fb71c346e01..1282aea2e25 100644
--- a/ft/txn_child_manager.cc
+++ b/ft/txn/txn_child_manager.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "ft/logger/log-internal.h"
-#include "ft/txn_child_manager.h"
+#include "ft/txn/txn_child_manager.h"
 
 //
 // initialized a txn_child_manager,
diff --git a/ft/txn_child_manager.h b/ft/txn/txn_child_manager.h
similarity index 97%
rename from ft/txn_child_manager.h
rename to ft/txn/txn_child_manager.h
index efc6b35312a..81136b02c8c 100644
--- a/ft/txn_child_manager.h
+++ b/ft/txn/txn_child_manager.h
@@ -1,7 +1,7 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
 
-#ident "$Id: rollback.h 49033 2012-10-17 18:48:30Z zardosht $"
+#ident "$Id: txn/rollback.h 49033 2012-10-17 18:48:30Z zardosht $"
 /*
 COPYING CONDITIONS NOTICE:
 
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
-// We should be including ft/txn.h here but that header includes this one,
+// We should be including ft/txn/txn.h here but that header includes this one,
 // so we don't.
 #include "portability/toku_pthread.h"
 
diff --git a/ft/txn_manager.cc b/ft/txn/txn_manager.cc
similarity index 99%
rename from ft/txn_manager.cc
rename to ft/txn/txn_manager.cc
index 8da99aa20f9..b9bbddf5ed1 100644
--- a/ft/txn_manager.cc
+++ b/ft/txn/txn_manager.cc
@@ -91,12 +91,12 @@ PATENT RIGHTS GRANT:
 
 #include "portability/toku_race_tools.h"
 
-#include "ft/checkpoint.h"
+#include "ft/cachetable/checkpoint.h"
 #include "ft/logger/log-internal.h"
 #include "ft/ule.h"
-#include "ft/txn.h"
-#include "ft/txn_manager.h"
-#include "ft/rollback.h"
+#include "ft/txn/txn.h"
+#include "ft/txn/txn_manager.h"
+#include "ft/txn/rollback.h"
 #include "util/omt.h"
 
 bool garbage_collection_debug = false;
diff --git a/ft/txn_manager.h b/ft/txn/txn_manager.h
similarity index 99%
rename from ft/txn_manager.h
rename to ft/txn/txn_manager.h
index a94a003513b..c9fa158aaee 100644
--- a/ft/txn_manager.h
+++ b/ft/txn/txn_manager.h
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_portability.h"
 #include "portability/toku_pthread.h"
 
-#include "ft/txn.h"
+#include "ft/txn/txn.h"
 
 typedef struct txn_manager *TXN_MANAGER;
 
diff --git a/ft/txn_state.h b/ft/txn/txn_state.h
similarity index 100%
rename from ft/txn_state.h
rename to ft/txn/txn_state.h
diff --git a/ft/xids.cc b/ft/txn/xids.cc
similarity index 99%
rename from ft/xids.cc
rename to ft/txn/xids.cc
index 9d629377014..2d80192df99 100644
--- a/ft/xids.cc
+++ b/ft/txn/xids.cc
@@ -109,7 +109,7 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_htod.h"
 #include "portability/toku_portability.h"
 
-#include "ft/xids.h"
+#include "ft/txn/xids.h"
 
 /////////////////////////////////////////////////////////////////////////////////
 //  This layer of abstraction (xids_xxx) understands xids<> and nothing else.
diff --git a/ft/xids.h b/ft/txn/xids.h
similarity index 99%
rename from ft/xids.h
rename to ft/txn/xids.h
index 4e37cb9a73c..894c369a1b1 100644
--- a/ft/xids.h
+++ b/ft/txn/xids.h
@@ -103,7 +103,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/txn.h"
+#include "ft/txn/txn.h"
 #include "ft/serialize/rbuf.h"
 #include "ft/serialize/wbuf.h"
 
diff --git a/ft/ule.cc b/ft/ule.cc
index 4c5ca3785e8..569c2d1ff50 100644
--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -108,11 +108,11 @@ PATENT RIGHTS GRANT:
 #include "ft/leafentry.h"
 #include "ft/logger/logger.h"
 #include "ft/msg.h"
-#include "ft/txn.h"
-#include "ft/txn_manager.h"
+#include "ft/txn/txn.h"
+#include "ft/txn/txn_manager.h"
 #include "ft/ule.h"
 #include "ft/ule-internal.h"
-#include "ft/xids.h"
+#include "ft/txn/xids.h"
 #include "util/bytestring.h"
 #include "util/omt.h"
 #include "util/partitioned_counter.h"
diff --git a/ft/ule.h b/ft/ule.h
index 1441e39c7dd..2468d2c328e 100644
--- a/ft/ule.h
+++ b/ft/ule.h
@@ -98,7 +98,7 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include "leafentry.h"
-#include "txn_manager.h"
+#include "txn/txn_manager.h"
 #include <util/mempool.h>
 
 void toku_ule_status_init(void);
diff --git a/locktree/keyrange.cc b/locktree/keyrange.cc
index 197e250aeb6..c44260c0358 100644
--- a/locktree/keyrange.cc
+++ b/locktree/keyrange.cc
@@ -91,165 +91,165 @@ PATENT RIGHTS GRANT:
 
 #include "keyrange.h"
 
-#include <ft/ybt.h>
+#include <util/dbt.h>
 
 namespace toku {
 
-// create a keyrange by borrowing the left and right dbt
-// pointers. no memory is copied. no checks for infinity needed.
-void keyrange::create(const DBT *left, const DBT *right) {
-    init_empty();
-    m_left_key = left;
-    m_right_key = right;
-}
-
-// destroy the key copies. if they were never set, then destroy does nothing.
-void keyrange::destroy(void) {
-    toku_destroy_dbt(&m_left_key_copy);
-    toku_destroy_dbt(&m_right_key_copy);
-}
-
-// create a keyrange by copying the keys from the given range.
-void keyrange::create_copy(const keyrange &range) {
-    // start with an initialized, empty range
-    init_empty();
-
-    // optimize the case where the left and right keys are the same.
-    // we'd like to only have one copy of the data.
-    if (toku_dbt_equals(range.get_left_key(), range.get_right_key())) {
-        set_both_keys(range.get_left_key());
-    } else {
-        // replace our empty left and right keys with
-        // copies of the range's left and right keys
-        replace_left_key(range.get_left_key());
-        replace_right_key(range.get_right_key());
+    // create a keyrange by borrowing the left and right dbt
+    // pointers. no memory is copied. no checks for infinity needed.
+    void keyrange::create(const DBT *left, const DBT *right) {
+        init_empty();
+        m_left_key = left;
+        m_right_key = right;
     }
-}
 
-// extend this keyrange by choosing the leftmost and rightmost
-// endpoints between this range and the given. replaced keys
-// in this range are freed and inherited keys are copied.
-void keyrange::extend(const comparator &cmp, const keyrange &range) {
-    const DBT *range_left = range.get_left_key();
-    const DBT *range_right = range.get_right_key();
-    if (cmp(range_left, get_left_key()) < 0) {
-        replace_left_key(range_left);
-    }
-    if (cmp(range_right, get_right_key()) > 0) {
-        replace_right_key(range_right);
-    }
-}
-
-// how much memory does this keyrange take?
-// - the size of the left and right keys
-// --- ignore the fact that we may have optimized the point case.
-//     it complicates things for little gain.
-// - the size of the keyrange class itself
-uint64_t keyrange::get_memory_size(void) const {
-    const DBT *left_key = get_left_key();
-    const DBT *right_key = get_right_key();
-    return left_key->size + right_key->size + sizeof(keyrange);
-}
-
-// compare ranges.
-keyrange::comparison keyrange::compare(const comparator &cmp, const keyrange &range) const {
-    if (cmp(get_right_key(), range.get_left_key()) < 0) {
-        return comparison::LESS_THAN;
-    } else if (cmp(get_left_key(), range.get_right_key()) > 0) {
-        return comparison::GREATER_THAN;
-    } else if (cmp(get_left_key(), range.get_left_key()) == 0 &&
-            cmp(get_right_key(), range.get_right_key()) == 0) {
-        return comparison::EQUALS;
-    } else {
-        return comparison::OVERLAPS;
-    }
-}
-
-bool keyrange::overlaps(const comparator &cmp, const keyrange &range) const {
-    // equality is a stronger form of overlapping.
-    // so two ranges "overlap" if they're either equal or just overlapping.
-    comparison c = compare(cmp, range);
-    return c == comparison::EQUALS || c == comparison::OVERLAPS;
-}
-
-keyrange keyrange::get_infinite_range(void) {
-    keyrange range;
-    range.create(toku_dbt_negative_infinity(), toku_dbt_positive_infinity());
-    return range;
-}
-
-void keyrange::init_empty(void) {
-    m_left_key = nullptr;
-    m_right_key = nullptr;
-    toku_init_dbt(&m_left_key_copy);
-    toku_init_dbt(&m_right_key_copy);
-    m_point_range = false;
-}
-
-const DBT *keyrange::get_left_key(void) const {
-    if (m_left_key) {
-        return m_left_key;
-    } else {
-        return &m_left_key_copy;
-    }
-}
-
-const DBT *keyrange::get_right_key(void) const {
-    if (m_right_key) {
-        return m_right_key;
-    } else {
-        return &m_right_key_copy;
-    }
-}
-
-// copy the given once and set both the left and right pointers.
-// optimization for point ranges, so the left and right ranges
-// are not copied twice.
-void keyrange::set_both_keys(const DBT *key) {
-    if (toku_dbt_is_infinite(key)) {
-        m_left_key = key;
-        m_right_key = key;
-    } else {
-        toku_clone_dbt(&m_left_key_copy, *key);
-        toku_copyref_dbt(&m_right_key_copy, m_left_key_copy);
-    }
-    m_point_range = true;
-}
-
-// destroy the current left key. set and possibly copy the new one
-void keyrange::replace_left_key(const DBT *key) {
-    // a little magic:
-    //
-    // if this is a point range, then the left and right keys share
-    // one copy of the data, and it lives in the left key copy. so
-    // if we're replacing the left key, move the real data to the
-    // right key copy instead of destroying it. now, the memory is
-    // owned by the right key and the left key may be replaced.
-    if (m_point_range) {
-        m_right_key_copy = m_left_key_copy;
-    } else {
+    // destroy the key copies. if they were never set, then destroy does nothing.
+    void keyrange::destroy(void) {
         toku_destroy_dbt(&m_left_key_copy);
+        toku_destroy_dbt(&m_right_key_copy);
     }
 
-    if (toku_dbt_is_infinite(key)) {
-        m_left_key = key;
-    } else {
-        toku_clone_dbt(&m_left_key_copy, *key);
+    // create a keyrange by copying the keys from the given range.
+    void keyrange::create_copy(const keyrange &range) {
+        // start with an initialized, empty range
+        init_empty();
+
+        // optimize the case where the left and right keys are the same.
+        // we'd like to only have one copy of the data.
+        if (toku_dbt_equals(range.get_left_key(), range.get_right_key())) {
+            set_both_keys(range.get_left_key());
+        } else {
+            // replace our empty left and right keys with
+            // copies of the range's left and right keys
+            replace_left_key(range.get_left_key());
+            replace_right_key(range.get_right_key());
+        }
+    }
+
+    // extend this keyrange by choosing the leftmost and rightmost
+    // endpoints between this range and the given. replaced keys
+    // in this range are freed and inherited keys are copied.
+    void keyrange::extend(const comparator &cmp, const keyrange &range) {
+        const DBT *range_left = range.get_left_key();
+        const DBT *range_right = range.get_right_key();
+        if (cmp(range_left, get_left_key()) < 0) {
+            replace_left_key(range_left);
+        }
+        if (cmp(range_right, get_right_key()) > 0) {
+            replace_right_key(range_right);
+        }
+    }
+
+    // how much memory does this keyrange take?
+    // - the size of the left and right keys
+    // --- ignore the fact that we may have optimized the point case.
+    //     it complicates things for little gain.
+    // - the size of the keyrange class itself
+    uint64_t keyrange::get_memory_size(void) const {
+        const DBT *left_key = get_left_key();
+        const DBT *right_key = get_right_key();
+        return left_key->size + right_key->size + sizeof(keyrange);
+    }
+
+    // compare ranges.
+    keyrange::comparison keyrange::compare(const comparator &cmp, const keyrange &range) const {
+        if (cmp(get_right_key(), range.get_left_key()) < 0) {
+            return comparison::LESS_THAN;
+        } else if (cmp(get_left_key(), range.get_right_key()) > 0) {
+            return comparison::GREATER_THAN;
+        } else if (cmp(get_left_key(), range.get_left_key()) == 0 &&
+                cmp(get_right_key(), range.get_right_key()) == 0) {
+            return comparison::EQUALS;
+        } else {
+            return comparison::OVERLAPS;
+        }
+    }
+
+    bool keyrange::overlaps(const comparator &cmp, const keyrange &range) const {
+        // equality is a stronger form of overlapping.
+        // so two ranges "overlap" if they're either equal or just overlapping.
+        comparison c = compare(cmp, range);
+        return c == comparison::EQUALS || c == comparison::OVERLAPS;
+    }
+
+    keyrange keyrange::get_infinite_range(void) {
+        keyrange range;
+        range.create(toku_dbt_negative_infinity(), toku_dbt_positive_infinity());
+        return range;
+    }
+
+    void keyrange::init_empty(void) {
         m_left_key = nullptr;
-    }
-    m_point_range = false;
-}
-
-// destroy the current right key. set and possibly copy the new one
-void keyrange::replace_right_key(const DBT *key) {
-    toku_destroy_dbt(&m_right_key_copy);
-    if (toku_dbt_is_infinite(key)) {
-        m_right_key = key;
-    } else {
-        toku_clone_dbt(&m_right_key_copy, *key);
         m_right_key = nullptr;
+        toku_init_dbt(&m_left_key_copy);
+        toku_init_dbt(&m_right_key_copy);
+        m_point_range = false;
+    }
+
+    const DBT *keyrange::get_left_key(void) const {
+        if (m_left_key) {
+            return m_left_key;
+        } else {
+            return &m_left_key_copy;
+        }
+    }
+
+    const DBT *keyrange::get_right_key(void) const {
+        if (m_right_key) {
+            return m_right_key;
+        } else {
+            return &m_right_key_copy;
+        }
+    }
+
+    // copy the given once and set both the left and right pointers.
+    // optimization for point ranges, so the left and right ranges
+    // are not copied twice.
+    void keyrange::set_both_keys(const DBT *key) {
+        if (toku_dbt_is_infinite(key)) {
+            m_left_key = key;
+            m_right_key = key;
+        } else {
+            toku_clone_dbt(&m_left_key_copy, *key);
+            toku_copyref_dbt(&m_right_key_copy, m_left_key_copy);
+        }
+        m_point_range = true;
+    }
+
+    // destroy the current left key. set and possibly copy the new one
+    void keyrange::replace_left_key(const DBT *key) {
+        // a little magic:
+        //
+        // if this is a point range, then the left and right keys share
+        // one copy of the data, and it lives in the left key copy. so
+        // if we're replacing the left key, move the real data to the
+        // right key copy instead of destroying it. now, the memory is
+        // owned by the right key and the left key may be replaced.
+        if (m_point_range) {
+            m_right_key_copy = m_left_key_copy;
+        } else {
+            toku_destroy_dbt(&m_left_key_copy);
+        }
+
+        if (toku_dbt_is_infinite(key)) {
+            m_left_key = key;
+        } else {
+            toku_clone_dbt(&m_left_key_copy, *key);
+            m_left_key = nullptr;
+        }
+        m_point_range = false;
+    }
+
+    // destroy the current right key. set and possibly copy the new one
+    void keyrange::replace_right_key(const DBT *key) {
+        toku_destroy_dbt(&m_right_key_copy);
+        if (toku_dbt_is_infinite(key)) {
+            m_right_key = key;
+        } else {
+            toku_clone_dbt(&m_right_key_copy, *key);
+            m_right_key = nullptr;
+        }
+        m_point_range = false;
     }
-    m_point_range = false;
-}
 
 } /* namespace toku */
diff --git a/locktree/lock_request.cc b/locktree/lock_request.cc
index 66ae43a42d3..3272f012be4 100644
--- a/locktree/lock_request.cc
+++ b/locktree/lock_request.cc
@@ -89,13 +89,12 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <toku_race_tools.h>
+#include "portability/toku_race_tools.h"
 
-#include <ft/txn.h>
-#include <ft/ybt.h>
-
-#include "locktree.h"
-#include "lock_request.h"
+#include "ft/txn/txn.h"
+#include "locktree/locktree.h"
+#include "locktree/lock_request.h"
+#include "util/dbt.h"
 
 namespace toku {
 
diff --git a/locktree/range_buffer.cc b/locktree/range_buffer.cc
index e33e6e842a2..acf3422ced4 100644
--- a/locktree/range_buffer.cc
+++ b/locktree/range_buffer.cc
@@ -89,11 +89,12 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <memory.h>
 #include <string.h>
-#include <ft/ybt.h>
 
-#include "range_buffer.h"
+#include "portability/memory.h"
+
+#include "locktree/range_buffer.h"
+#include "util/dbt.h"
 
 namespace toku {
 
diff --git a/locktree/range_buffer.h b/locktree/range_buffer.h
index 845d6c98ced..edd31211058 100644
--- a/locktree/range_buffer.h
+++ b/locktree/range_buffer.h
@@ -91,8 +91,9 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/ybt.h"
 #include "portability/toku_stdint.h"
+
+#include "util/dbt.h"
 #include "util/memarena.h"
 
 namespace toku {
diff --git a/locktree/tests/test.h b/locktree/tests/test.h
index c5bf06a29b2..dc1eb2f5afe 100644
--- a/locktree/tests/test.h
+++ b/locktree/tests/test.h
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 #include <limits.h>
 
 #include "ft/comparator.h"
-#include "ft/ybt.h"
+#include "util/dbt.h"
 
 namespace toku {
 
diff --git a/locktree/treenode.h b/locktree/treenode.h
index 44c95e90825..a83699d5d56 100644
--- a/locktree/treenode.h
+++ b/locktree/treenode.h
@@ -91,15 +91,14 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include <memory.h>
 #include <string.h>
 
-#include <ft/txn.h>
-#include <ft/comparator.h>
+#include "portability/memory.h"
+#include "portability/toku_pthread.h"
 
-#include <portability/toku_pthread.h>
-
-#include "keyrange.h"
+#include "ft/comparator.h"
+#include "ft/txn/txn.h"
+#include "locktree/keyrange.h"
 
 namespace toku {
 
diff --git a/locktree/txnid_set.h b/locktree/txnid_set.h
index 31b0a1990a5..0dfed575178 100644
--- a/locktree/txnid_set.h
+++ b/locktree/txnid_set.h
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "ft/txn.h"
+#include "ft/txn/txn.h"
 
 #include "util/omt.h"
 
diff --git a/src/indexer-internal.h b/src/indexer-internal.h
index fb06c8f8391..c3a00cef295 100644
--- a/src/indexer-internal.h
+++ b/src/indexer-internal.h
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
-#include <ft/txn_state.h>
+#include <ft/txn/txn_state.h>
 #include <toku_pthread.h>
 
 // the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array.
diff --git a/src/indexer-undo-do.cc b/src/indexer-undo-do.cc
index b18efbed235..a97064119bc 100644
--- a/src/indexer-undo-do.cc
+++ b/src/indexer-undo-do.cc
@@ -99,9 +99,9 @@ PATENT RIGHTS GRANT:
 #include <ft/ft-ops.h>
 #include <ft/leafentry.h>
 #include <ft/ule.h>
-#include <ft/xids.h>
-#include <ft/txn_manager.h>
-#include <ft/checkpoint.h>
+#include <ft/txn/txn_manager.h>
+#include <ft/txn/xids.h>
+#include <ft/cachetable/checkpoint.h>
 
 #include "ydb-internal.h"
 #include "ydb_row_lock.h"
diff --git a/src/indexer.cc b/src/indexer.cc
index 6d81b70daf2..926bf5c579d 100644
--- a/src/indexer.cc
+++ b/src/indexer.cc
@@ -102,9 +102,9 @@ PATENT RIGHTS GRANT:
 #include <ft/ft-ops.h>
 #include <ft/leafentry.h>
 #include <ft/ule.h>
-#include <ft/xids.h>
+#include <ft/txn/xids.h>
 #include <ft/logger/log-internal.h>
-#include <ft/checkpoint.h>
+#include <ft/cachetable/checkpoint.h>
 #include <portability/toku_atomic.h>
 #include "loader.h"
 #include <util/status.h>
diff --git a/src/loader.cc b/src/loader.cc
index 0805b2d7d16..1550af3460c 100644
--- a/src/loader.cc
+++ b/src/loader.cc
@@ -100,7 +100,7 @@ PATENT RIGHTS GRANT:
 
 #include <ft/ft.h>
 #include <ft/loader/loader.h>
-#include <ft/checkpoint.h>
+#include <ft/cachetable/checkpoint.h>
 
 #include "ydb-internal.h"
 #include "ydb_db.h"
diff --git a/src/tests/blackhole.cc b/src/tests/blackhole.cc
index 267eb8c1ba3..bf15283d0f9 100644
--- a/src/tests/blackhole.cc
+++ b/src/tests/blackhole.cc
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 // Test that a db ignores insert messages in blackhole mode
 
 #include "test.h"
-#include <ft/ybt.h>
+#include <util/dbt.h>
 
 static DB *db;
 static DB *blackhole_db;
diff --git a/src/tests/hotindexer-undo-do-test.cc b/src/tests/hotindexer-undo-do-test.cc
index a478666d87c..9aa3ea1fc8e 100644
--- a/src/tests/hotindexer-undo-do-test.cc
+++ b/src/tests/hotindexer-undo-do-test.cc
@@ -95,11 +95,10 @@ PATENT RIGHTS GRANT:
 
 #include "test.h"
 
-#include <ft/xids.h>
 #include <ft/ule.h>
 #include <ft/ule-internal.h>
 #include <ft/le-cursor.h>
-#include <ft/xids.h>
+#include <ft/txn/xids.h>
 
 #include "indexer-internal.h"
 
diff --git a/src/tests/test_txn_nested1.cc b/src/tests/test_txn_nested1.cc
index 4b95a6466e4..748c6e44750 100644
--- a/src/tests/test_txn_nested1.cc
+++ b/src/tests/test_txn_nested1.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <sys/stat.h>
 #include <db.h>
-#include <ft/xids.h>
+#include <ft/txn/xids.h>
 #define MAX_NEST MAX_NESTED_TRANSACTIONS
 
 
diff --git a/src/tests/test_txn_nested2.cc b/src/tests/test_txn_nested2.cc
index 8caa98e734d..3cd2164d938 100644
--- a/src/tests/test_txn_nested2.cc
+++ b/src/tests/test_txn_nested2.cc
@@ -87,15 +87,18 @@ PATENT RIGHTS GRANT:
 */
 
 #ident "Copyright (c) 2009-2013 Tokutek Inc.  All rights reserved."
-#include "test.h"
-#include <stdio.h>
+#include <db.h>
 
+#include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <memory.h>
 #include <sys/stat.h>
-#include <db.h>
-#include <ft/xids.h>
+
+#include "src/tests/test.h"
+
+#include <ft/txn/xids.h>
+
 #define MAX_NEST MAX_TRANSACTION_RECORDS
 #define MAX_SIZE MAX_TRANSACTION_RECORDS
 
diff --git a/src/tests/test_txn_nested3.cc b/src/tests/test_txn_nested3.cc
index 082a1dc8770..0e74b0856ce 100644
--- a/src/tests/test_txn_nested3.cc
+++ b/src/tests/test_txn_nested3.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <sys/stat.h>
 #include <db.h>
-#include <ft/xids.h>
+#include <ft/txn/xids.h>
 #define MAX_NEST MAX_TRANSACTION_RECORDS
 #define MAX_SIZE MAX_TRANSACTION_RECORDS
 
diff --git a/src/tests/test_txn_nested4.cc b/src/tests/test_txn_nested4.cc
index 6f8c7a984cb..9b064564556 100644
--- a/src/tests/test_txn_nested4.cc
+++ b/src/tests/test_txn_nested4.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <sys/stat.h>
 #include <db.h>
-#include <ft/xids.h>
+#include <ft/txn/xids.h>
 #define MAX_NEST MAX_TRANSACTION_RECORDS
 #define MAX_SIZE MAX_TRANSACTION_RECORDS
 
diff --git a/src/tests/test_txn_nested5.cc b/src/tests/test_txn_nested5.cc
index 6009e3b4fee..0f90c96bc2e 100644
--- a/src/tests/test_txn_nested5.cc
+++ b/src/tests/test_txn_nested5.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <sys/stat.h>
 #include <db.h>
-#include <ft/xids.h>
+#include <ft/txn/xids.h>
 #define MAX_NEST MAX_TRANSACTION_RECORDS
 #define MAX_SIZE (MAX_TRANSACTION_RECORDS + 1)
 
diff --git a/src/tests/threaded_stress_test_helpers.h b/src/tests/threaded_stress_test_helpers.h
index d587fbdf608..0fda39a50aa 100644
--- a/src/tests/threaded_stress_test_helpers.h
+++ b/src/tests/threaded_stress_test_helpers.h
@@ -122,7 +122,7 @@ PATENT RIGHTS GRANT:
 
 #include <src/ydb-internal.h>
 
-#include <ft/ybt.h>
+#include <util/dbt.h>
 
 #include <util/rwlock.h>
 #include <util/kibbutz.h>
diff --git a/src/ydb-internal.h b/src/ydb-internal.h
index 08fb4a26fee..60be5338eac 100644
--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -94,11 +94,11 @@ PATENT RIGHTS GRANT:
 #include <db.h>
 #include <limits.h>
 
-#include <ft/cachetable.h>
+#include <ft/cachetable/cachetable.h>
 #include <ft/cursor.h>
 #include <ft/comparator.h>
 #include <ft/logger/logger.h>
-#include <ft/txn.h>
+#include <ft/txn/txn.h>
 
 #include <util/growable_array.h>
 #include <util/minicron.h>
diff --git a/src/ydb.cc b/src/ydb.cc
index 9b527538aad..4eeadfefc6f 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -103,13 +103,13 @@ const char *toku_copyright_string = "Copyright (c) 2007-2013 Tokutek Inc.  All r
 #include "portability/toku_stdlib.h"
 
 #include "ft/ft-flusher.h"
-#include "ft/cachetable.h"
+#include "ft/cachetable/cachetable.h"
+#include "ft/cachetable/checkpoint.h"
 #include "ft/logger/log.h"
-#include "ft/checkpoint.h"
 #include "ft/loader/loader.h"
 #include "ft/log_header.h"
 #include "ft/ft.h"
-#include "ft/txn_manager.h"
+#include "ft/txn/txn_manager.h"
 #include "src/ydb.h"
 #include "src/ydb-internal.h"
 #include "src/ydb_cursor.h"
diff --git a/src/ydb_db.cc b/src/ydb_db.cc
index f3cc74be4cf..f33d965c9b9 100644
--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -95,7 +95,7 @@ PATENT RIGHTS GRANT:
 #include <locktree/locktree.h>
 #include <ft/ft.h>
 #include <ft/ft-flusher.h>
-#include <ft/checkpoint.h>
+#include <ft/cachetable/checkpoint.h>
 
 #include "ydb_cursor.h"
 #include "ydb_row_lock.h"
diff --git a/src/ydb_env_func.cc b/src/ydb_env_func.cc
index 550f853c2d9..3714b55c746 100644
--- a/src/ydb_env_func.cc
+++ b/src/ydb_env_func.cc
@@ -94,11 +94,11 @@ PATENT RIGHTS GRANT:
 #include <memory.h>
 #include <db.h>
 
+#include <ft/cachetable/checkpoint.h>
 #include <ft/ft.h>
 #include <ft/ft-ops.h>
 #include <ft/ft-flusher.h>
-#include <ft/checkpoint.h>
-#include <ft/recover.h>
+#include <ft/logger/recover.h>
 #include <ft/loader/loader.h>
 
 #include "ydb_env_func.h"
diff --git a/src/ydb_txn.cc b/src/ydb_txn.cc
index 6f1105412d7..e513f0da0e5 100644
--- a/src/ydb_txn.cc
+++ b/src/ydb_txn.cc
@@ -89,15 +89,17 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 #ident "$Id$"
 
-#include <toku_race_tools.h>
 
 #include <db.h>
-#include <ft/txn_manager.h>
-#include <ft/log_header.h>
-#include <ft/checkpoint.h>
 
+#include <portability/toku_race_tools.h>
 #include <portability/toku_atomic.h>
 
+#include <ft/cachetable/checkpoint.h>
+#include <ft/log_header.h>
+#include <ft/txn/txn_manager.h>
+
+
 #include "ydb-internal.h"
 #include "ydb_txn.h"
 #include "ydb_row_lock.h"
diff --git a/src/ydb_write.cc b/src/ydb_write.cc
index 62badd4916b..a0d355eb948 100644
--- a/src/ydb_write.cc
+++ b/src/ydb_write.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 #include "ydb-internal.h"
 #include "indexer.h"
 #include <ft/log_header.h>
-#include <ft/checkpoint.h>
+#include <ft/cachetable/checkpoint.h>
 #include "ydb_row_lock.h"
 #include "ydb_write.h"
 #include "ydb_db.h"
diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt
index 3af867238cf..6f6b899e5b7 100644
--- a/util/CMakeLists.txt
+++ b/util/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(util_srcs
   context
+  dbt
   frwlock
   kibbutz
   memarena
diff --git a/ft/ybt.cc b/util/dbt.cc
similarity index 93%
rename from ft/ybt.cc
rename to util/dbt.cc
index 4e0fd2b941c..99d5371a6fd 100644
--- a/ft/ybt.cc
+++ b/util/dbt.cc
@@ -94,29 +94,25 @@ PATENT RIGHTS GRANT:
 
 #include "portability/memory.h"
 
-#include "ft/ybt.h"
+#include "util/dbt.h"
 
-DBT *
-toku_init_dbt(DBT *dbt) {
+DBT *toku_init_dbt(DBT *dbt) {
     memset(dbt, 0, sizeof(*dbt));
     return dbt;
 }
 
-DBT
-toku_empty_dbt(void) {
+DBT toku_empty_dbt(void) {
     static const DBT empty_dbt = { .data = 0, .size = 0, .ulen = 0, .flags = 0 };
     return empty_dbt;
 }
 
-DBT *
-toku_init_dbt_flags(DBT *dbt, uint32_t flags) {
+DBT *toku_init_dbt_flags(DBT *dbt, uint32_t flags) {
     toku_init_dbt(dbt);
     dbt->flags = flags;
     return dbt;
 }
 
-DBT_ARRAY *
-toku_dbt_array_init(DBT_ARRAY *dbts, uint32_t size) {
+DBT_ARRAY *toku_dbt_array_init(DBT_ARRAY *dbts, uint32_t size) {
     uint32_t capacity = 1;
     while (capacity < size) { capacity *= 2; }
 
@@ -129,8 +125,7 @@ toku_dbt_array_init(DBT_ARRAY *dbts, uint32_t size) {
     return dbts;
 }
 
-void
-toku_dbt_array_resize(DBT_ARRAY *dbts, uint32_t size) {
+void toku_dbt_array_resize(DBT_ARRAY *dbts, uint32_t size) {
     if (size != dbts->size) {
         if (size > dbts->capacity) {
             const uint32_t old_capacity = dbts->capacity;
@@ -158,14 +153,12 @@ toku_dbt_array_resize(DBT_ARRAY *dbts, uint32_t size) {
     }
 }
 
-void
-toku_dbt_array_destroy_shallow(DBT_ARRAY *dbts) {
+void toku_dbt_array_destroy_shallow(DBT_ARRAY *dbts) {
     toku_free(dbts->dbts);
     ZERO_STRUCT(*dbts);
 }
 
-void
-toku_dbt_array_destroy(DBT_ARRAY *dbts) {
+void toku_dbt_array_destroy(DBT_ARRAY *dbts) {
     for (uint32_t i = 0; i < dbts->capacity; i++) {
         toku_destroy_dbt(&dbts->dbts[i]);
     }
@@ -174,8 +167,7 @@ toku_dbt_array_destroy(DBT_ARRAY *dbts) {
 
 
 
-void
-toku_destroy_dbt(DBT *dbt) {
+void toku_destroy_dbt(DBT *dbt) {
     switch (dbt->flags) {
     case DB_DBT_MALLOC:
     case DB_DBT_REALLOC:
@@ -185,8 +177,7 @@ toku_destroy_dbt(DBT *dbt) {
     }
 }
 
-DBT *
-toku_fill_dbt(DBT *dbt, const void *k, uint32_t len) {
+DBT *toku_fill_dbt(DBT *dbt, const void *k, uint32_t len) {
     toku_init_dbt(dbt);
     dbt->size=len;
     dbt->data=(char*)k;
@@ -218,8 +209,7 @@ toku_sdbt_cleanup(struct simple_dbt *sdbt) {
     memset(sdbt, 0, sizeof(*sdbt));
 }
 
-static inline int
-sdbt_realloc(struct simple_dbt *sdbt) {
+static inline int sdbt_realloc(struct simple_dbt *sdbt) {
     void *new_data = toku_realloc(sdbt->data, sdbt->len);
     int r;
     if (new_data == NULL) {
@@ -231,8 +221,7 @@ sdbt_realloc(struct simple_dbt *sdbt) {
     return r;
 }
 
-static inline int
-dbt_realloc(DBT *dbt) {
+static inline int dbt_realloc(DBT *dbt) {
     void *new_data = toku_realloc(dbt->data, dbt->ulen);
     int r;
     if (new_data == NULL) {
@@ -244,13 +233,13 @@ dbt_realloc(DBT *dbt) {
     return r;
 }
 
-int
-toku_dbt_set (uint32_t len, const void *val, DBT *d, struct simple_dbt *sdbt) {
 // sdbt is the static value used when flags==0
 // Otherwise malloc or use the user-supplied memory, as according to the flags in d->flags.
+int toku_dbt_set(uint32_t len, const void *val, DBT *d, struct simple_dbt *sdbt) {
     int r;
-    if (!d) r = 0;
-    else {
+    if (d == nullptr) {
+        r = 0;
+    } else {
         switch (d->flags) {
         case (DB_DBT_USERMEM):
             d->size = len;
diff --git a/ft/ybt.h b/util/dbt.h
similarity index 99%
rename from ft/ybt.h
rename to util/dbt.h
index 480790dcdf5..9d8329e78bf 100644
--- a/ft/ybt.h
+++ b/util/dbt.h
@@ -117,7 +117,7 @@ DBT *toku_clone_dbt(DBT *dst, const DBT &src);
 
 int toku_dbt_set(uint32_t len, const void *val, DBT *d, struct simple_dbt *sdbt);
 
-int toku_dbt_set_value(DBT *, const void **val, uint32_t vallen, void **staticptrp, bool ybt1_disposable);
+int toku_dbt_set_value(DBT *, const void **val, uint32_t vallen, void **staticptrp, bool dbt1_disposable);
 
 void toku_sdbt_cleanup(struct simple_dbt *sdbt);
 

From e91c66cce6b05746f22eb1ca1891e0b79d21a124 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:36:25 -0400
Subject: [PATCH 073/190] FT-297 Move tools to tools/

---
 ft/CMakeLists.txt             | 17 -----------------
 tools/CMakeLists.txt          | 23 +++++++++++++++++------
 {ft => tools}/ftverify.cc     |  0
 {ft => tools}/tdb-recover.cc  |  0
 {ft => tools}/tdb_logprint.cc |  0
 {ft => tools}/tokuftdump.cc   |  0
 6 files changed, 17 insertions(+), 23 deletions(-)
 rename {ft => tools}/ftverify.cc (100%)
 rename {ft => tools}/tdb-recover.cc (100%)
 rename {ft => tools}/tdb_logprint.cc (100%)
 rename {ft => tools}/tokuftdump.cc (100%)

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 1ac723367e4..5bea203a36b 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -87,21 +87,4 @@ target_link_libraries(ft LINK_PRIVATE util_static lzma ${LIBTOKUPORTABILITY})
 target_link_libraries(ft LINK_PUBLIC z)
 target_link_libraries(ft_static LINK_PRIVATE lzma)
 
-## build the bins in this directory
-foreach(tool tokuftdump tdb_logprint tdb-recover ftverify)
-  add_executable(${tool} ${tool})
-  add_dependencies(${tool} install_tdb_h)
-  target_link_libraries(${tool} ft_static util_static z lzma ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
-  add_space_separated_property(TARGET ${tool} COMPILE_FLAGS -fvisibility=hidden)
-endforeach(tool)
-
-# link in math.h library just for this tool.
-target_link_libraries(ftverify m)
-
-install(
-  TARGETS tokuftdump
-  DESTINATION bin
-  COMPONENT tokukv_tools
-  )
-
 add_subdirectory(tests)
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index bd5f288b0f1..4e513eae1fc 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,9 +1,20 @@
 set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO)
 
-set(utils tokudb_dump)
-foreach(util ${utils})
-  add_executable(${util} ${util})
-  target_link_libraries(${util} ${LIBTOKUDB}_static ft_static z lzma ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
+set(tools tokudb_dump tokuftdump tdb_logprint tdb-recover ftverify)
+foreach(tool ${tools})
+  add_executable(${tool} ${tool})
+  add_dependencies(${tool} install_tdb_h)
+  target_link_libraries(${tool} ${LIBTOKUDB}_static ft_static z lzma ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
+
+  add_space_separated_property(TARGET ${tool} COMPILE_FLAGS -fvisibility=hidden)
+endforeach(tool)
+
+# link in math.h library just for this tool.
+target_link_libraries(ftverify m)
+
+install(
+  TARGETS tokuftdump
+  DESTINATION bin
+  COMPONENT tokukv_tools
+  )
 
-  add_space_separated_property(TARGET ${util} COMPILE_FLAGS -fvisibility=hidden)
-endforeach(util)
diff --git a/ft/ftverify.cc b/tools/ftverify.cc
similarity index 100%
rename from ft/ftverify.cc
rename to tools/ftverify.cc
diff --git a/ft/tdb-recover.cc b/tools/tdb-recover.cc
similarity index 100%
rename from ft/tdb-recover.cc
rename to tools/tdb-recover.cc
diff --git a/ft/tdb_logprint.cc b/tools/tdb_logprint.cc
similarity index 100%
rename from ft/tdb_logprint.cc
rename to tools/tdb_logprint.cc
diff --git a/ft/tokuftdump.cc b/tools/tokuftdump.cc
similarity index 100%
rename from ft/tokuftdump.cc
rename to tools/tokuftdump.cc

From 85d55da2f32d3ae1832daf662228b17214325357 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:36:25 -0400
Subject: [PATCH 074/190] FT-293 Remove examples/, which hasn't been built or
 maintained in years

---
 CMakeLists.txt                |   3 -
 examples/CMakeLists.txt       |  16 -
 examples/Makefile             |  29 --
 examples/README.examples      |  85 -----
 examples/db-insert-multiple.c | 510 ----------------------------
 examples/db-insert.c          | 610 ----------------------------------
 examples/db-scan.c            | 461 -------------------------
 examples/db-update.c          | 379 ---------------------
 8 files changed, 2093 deletions(-)
 delete mode 100644 examples/CMakeLists.txt
 delete mode 100644 examples/Makefile
 delete mode 100644 examples/README.examples
 delete mode 100644 examples/db-insert-multiple.c
 delete mode 100644 examples/db-insert.c
 delete mode 100644 examples/db-scan.c
 delete mode 100644 examples/db-update.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5a5a9713b4e..ec333002cf7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,9 +77,6 @@ add_subdirectory(locktree)
 add_subdirectory(src)
 add_subdirectory(tools)
 
-## subdirectories that just install things
-add_subdirectory(examples)
-
 install(
   FILES README.md README-TOKUDB
   DESTINATION .
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
deleted file mode 100644
index 01ad01aa8d2..00000000000
--- a/examples/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-# detect when we are being built as a subproject
-if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
-  install(
-    FILES
-      db-insert.c
-      db-insert-multiple.c
-      db-scan.c
-      db-update.c
-      Makefile
-      README.examples
-    DESTINATION
-      examples
-    COMPONENT
-      tokukv_examples
-    )
-endif ()
\ No newline at end of file
diff --git a/examples/Makefile b/examples/Makefile
deleted file mode 100644
index 7f11d23dfd8..00000000000
--- a/examples/Makefile
+++ /dev/null
@@ -1,29 +0,0 @@
-SRCS = $(wildcard *.c)
-TARGETS = $(patsubst %.c,%,$(SRCS)) $(patsubst %.c,%-bdb,$(SRCS))
-CPPFLAGS = -I../include -D_GNU_SOURCE
-CFLAGS = -g -std=c99 -Wall -Wextra -Werror -Wno-missing-field-initializers
-ifeq ($(USE_STATIC_LIBS),1)
-LIBTOKUDB = tokufractaltree_static
-LIBTOKUPORTABILITY = tokuportability_static
-else
-LIBTOKUDB = tokufractaltree
-LIBTOKUPORTABILITY = tokuportability
-endif
-LDFLAGS = -L../lib -l$(LIBTOKUDB) -l$(LIBTOKUPORTABILITY) -Wl,-rpath,../lib -lpthread -lz -ldl
-
-default local: $(TARGETS)
-
-%: %.c
-	$(CC) $(CPPFLAGS) $(CFLAGS) $^ -o $@ $(LDFLAGS)
-
-%-bdb: %.c
-	$(CC) -D_GNU_SOURCE -DBDB $(CFLAGS) $^ -o $@ -ldb
-
-check: $(TARGETS)
-	./db-insert -x && ./db-scan --lwc --prelock --prelockflag
-
-checknox: $(TARGETS)
-	./db-insert && ./db-scan --nox --lwc --prelock --prelockflag
-
-clean:
-	rm -rf $(TARGETS) bench.* update.env.* insertm.env.*
diff --git a/examples/README.examples b/examples/README.examples
deleted file mode 100644
index 2fc6071d686..00000000000
--- a/examples/README.examples
+++ /dev/null
@@ -1,85 +0,0 @@
-The examples includes a pair of programs that can be compiled to use either the Berkeley DB library or the Tokutek Fractal Tree index library.
-
-Note: The file formats are different from TokuDB and Berkley DB.  Thus
-you cannot access a database created by Berkeley DB using the Tokutek
-DB, or vice-versa.
-
-db-insert is a program that inserts random key-value pairs into a database.
-
-db-scan is a program that scans through the key-value pairs, reading every row, from a database.
-
-db-update is a program that upserts key-value pairs into a database.  If the key already exists it increment a count in the value.
-
-db-insert-multiple is a program and inserts key-value pairs into multiple databases.  This is is now TokuDB maintains consistent
-secondary databases.
-
-To build it and run it (it's been tested on Fedora 10):
-$ make                                           (Makes the binaries)
-Run the insertion workload under TokuDB:
-$ ./db-insert
-Run the insertion workload under BDB:
-$ ./db-insert-bdb
-
-Here is what the output looks like (this on a Thinkpad X61s laptop
-running Fedora 10).  BDB is a little faster for sequential insertions
-(the first three columns), but much much slower for random insertions
-(the next 3 columns), so that TokuDB is faster on combined workload.
-
-$ ./db-insert
-serial and random insertions of 1048576 per batch
-serial  2.609965s   401759/s    random 10.983798s    95466/s    cumulative 13.593869s   154272/s
-serial  3.053433s   343409/s    random 12.008670s    87318/s    cumulative 28.656115s   146367/s
-serial  5.198312s   201715/s    random 15.087426s    69500/s    cumulative 48.954605s   128516/s
-serial  6.096396s   171999/s    random 13.550688s    77382/s    cumulative 68.638321s   122215/s
-Shutdown  4.025110s
-Total time 72.677498s for 8388608 insertions =   115422/s
-$ ./db-insert-bdb 
-serial and random insertions of 1048576 per batch
-serial  2.623888s   399627/s    random  8.770850s   119552/s    cumulative 11.394805s   184045/s
-serial  3.081946s   340232/s    random 21.046589s    49822/s    cumulative 35.523434s   118071/s
-serial 14.160498s    74049/s    random 497.117523s     2109/s    cumulative 546.804504s    11506/s
-serial  1.534212s   683462/s    random 1128.525146s      929/s    cumulative 1676.863892s     5003/s
-Shutdown 195.879242s
-Total time 1872.746582s for 8388608 insertions =     4479/s
-
-The files are smaller for TokuDB than BDB.
-
-$ ls -lh bench.tokudb/
-total 39M
--rwxrwxr-x 1 bradley bradley 39M 2009-07-28 15:36 bench.db
-$ ls -lh bench.bdb/
-total 322M
--rw-r--r-- 1 bradley bradley 322M 2009-07-28 16:14 bench.db
-
-When scanning the table, one can run out of locks with BDB.  There are ways around it (increase the lock table size).
-
-$ ./db-scan-bdb --nox
-Lock table is out of available object entries
-db-scan-bdb: db-scan.c:177: scanscan_hwc: Assertion `r==(-30988)' failed.
-Aborted
-
-TokuDB is fine on a big table scan.
-
-$ ./db-scan --nox
-Scan    33162304 bytes (2072644 rows) in  7.924463s at  4.184801MB/s
-Scan    33162304 bytes (2072644 rows) in  3.062239s at 10.829431MB/s
-0:3 1:53 2:56 
-miss=3 hit=53 wait_reading=0 wait=0
-VmPeak:	  244668 kB
-VmHWM:	   68096 kB
-VmRSS:	    1232 kB
-
-The update-bdb program upserts 1B rows into a BDB database. When the database gets larger than memory, the throughput
-should tank since every update needs to read a block from the storage system.  The storage system becomes the performance
-bottleneck.  The program uses 1 1GB cache in front of the kernel's file system buffer cache.  The program should hit the wall
-at about 300M rows on a machine with 16GB of memory since keys are 8 bytes and values are 8 bytes in size.
-
-$ ./db-update-bdb
-
-The update program upserts 1B rows into a TokuDB database.  Throughput should be not degrade significantly since the cost
-of the storage system reads is amortized over 1000's of update operations.  One should expect TokuDB to be at least 50 times
-faster than BDB.
-
-$ ./db-update
-
-There isn't much documentation for the Tokutek Fractal Tree index library, but most of the API is like Berkeley DB's.
diff --git a/examples/db-insert-multiple.c b/examples/db-insert-multiple.c
deleted file mode 100644
index e77dd94547f..00000000000
--- a/examples/db-insert-multiple.c
+++ /dev/null
@@ -1,510 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-// measure the performance of insertions into multiple dictionaries using ENV->put_multiple
-// the table schema is t(a bigint, b bigint, c bigint, d bigint, primary key(a), key(b), key(c,d), clustering key(d))
-// the primary key(a) is represented with key=a and value=b,c,d
-// the key(b) index is represented with key=b,a and no value
-// the key(c,d) index is represented with key=c,d,a and no value
-// the clustering key(d) is represented with key=d,a and value=b,c
-// a is auto increment
-// b, c and d are random
-
-#include "../include/toku_config.h"
-#include <stdio.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#if defined(HAVE_BYTESWAP_H)
-# include <byteswap.h>
-#elif defined(HAVE_LIBKERN_OSBYTEORDER_H)
-# include <libkern/OSByteOrder.h>
-# define bswap_64 OSSwapInt64
-#endif
-#include <arpa/inet.h>
-#include "db.h"
-
-static int force_multiple = 1;
-
-struct table {
-    int ndbs;
-    DB **dbs;
-#if defined(TOKUDB)
-    DBT *mult_keys;
-    DBT *mult_vals;
-    uint32_t *mult_flags;
-#endif
-};
-
-#if defined(TOKUDB)
-static void table_init_dbt(DBT *dbt, size_t length) {
-    dbt->flags = DB_DBT_USERMEM;
-    dbt->data = malloc(length);
-    dbt->ulen = length;
-    dbt->size = 0;
-}
-
-static void table_destroy_dbt(DBT *dbt) {
-    free(dbt->data);
-}
-#endif
-
-static void table_init(struct table *t, int ndbs, DB **dbs, size_t key_length __attribute__((unused)), size_t val_length __attribute__((unused))) {
-    t->ndbs = ndbs;
-    t->dbs = dbs;
-#if defined(TOKUDB)
-    t->mult_keys = calloc(ndbs, sizeof (DBT));
-    int i;
-    for (i = 0; i < ndbs; i++) 
-        table_init_dbt(&t->mult_keys[i], key_length);
-    t->mult_vals = calloc(ndbs, sizeof (DBT));
-    for (i = 0; i < ndbs; i++) 
-        table_init_dbt(&t->mult_vals[i], val_length);
-    t->mult_flags = calloc(ndbs, sizeof (uint32_t));
-    for (i = 0; i < ndbs; i++) 
-        t->mult_flags[i] = 0;
-#endif
-}
-
-static void table_destroy(struct table *t) {
-#if defined(TOKUDB)
-    int i;
-    for (i = 0; i < t->ndbs; i++)
-        table_destroy_dbt(&t->mult_keys[i]);
-    free(t->mult_keys);
-    for (i = 0; i < t->ndbs; i++)
-        table_destroy_dbt(&t->mult_vals[i]);
-    free(t->mult_vals);
-    free(t->mult_flags);
-#else
-    assert(t);
-#endif
-}
-
-static int verbose = 0;
-
-static long random64(void) {
-    return ((long)random() << 32LL) + (long)random();
-}
-
-static long htonl64(long x) {
-#if BYTE_ORDER == LITTLE_ENDIAN
-    return bswap_64(x);
-#else
-#error
-#endif
-}
-
-#if defined(TOKUDB)
-static int my_generate_row_for_put(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val) {
-    assert(src_db);
-    assert(dest_key->flags == DB_DBT_USERMEM && dest_key->ulen >= 4 * 8);
-    assert(dest_val->flags == DB_DBT_USERMEM && dest_val->ulen >= 4 * 8);
-    int index_num; 
-    assert(dest_db->descriptor->dbt.size == sizeof index_num);
-    memcpy(&index_num, dest_db->descriptor->dbt.data, sizeof index_num);
-    switch (htonl(index_num) % 4) {
-    case 0:
-        // dest_key = src_key
-        dest_key->size = src_key->size;
-        memcpy(dest_key->data, src_key->data, src_key->size);
-        // dest_val = src_val
-        dest_val->size = src_val->size;
-        memcpy(dest_val->data, src_val->data, src_val->size);
-        break;
-    case 1:
-        // dest_key = b,a
-        dest_key->size = 2 * 8;
-        memcpy((char *)dest_key->data + 0, (char *)src_val->data + 0, 8);
-        memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8);
-        // dest_val = null
-        dest_val->size = 0;
-        break;
-    case 2:
-        // dest_key = c,d,a
-        dest_key->size = 3 * 8;
-        memcpy((char *)dest_key->data + 0, (char *)src_val->data + 8, 8);
-        memcpy((char *)dest_key->data + 8, (char *)src_val->data + 16, 8);
-        memcpy((char *)dest_key->data + 16, (char *)src_key->data + 0, 8);
-        // dest_val = null
-        dest_val->size = 0;
-        break;
-    case 3:
-        // dest_key = d,a
-        dest_key->size = 2 * 8;
-        memcpy((char *)dest_key->data + 0, (char *)src_val->data + 16, 8);
-        memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8);
-        // dest_val = b,c
-        dest_val->size = 2 * 8;
-        memcpy((char *)dest_val->data + 0, (char *)src_val->data + 0, 8);
-        memcpy((char *)dest_val->data + 8, (char *)src_val->data + 8, 8);
-        break;
-    default:
-        assert(0);
-    }
-    return 0;
-}
-
-#else
-
-static int my_secondary_key(DB *db, const DBT *src_key, const DBT *src_val, DBT *dest_key) {
-    assert(dest_key->flags == 0 && dest_key->data == NULL);
-    dest_key->flags = DB_DBT_APPMALLOC;
-    dest_key->data = malloc(4 * 8); assert(dest_key->data);
-    switch ((intptr_t)db->app_private % 4) {
-    case 0:
-        // dest_key = src_key
-        dest_key->size = src_key->size;
-        memcpy(dest_key->data, src_key->data, src_key->size);
-        break;
-    case 1:
-        // dest_key = b,a
-        dest_key->size = 2 * 8;
-        memcpy((char *)dest_key->data + 0, (char *)src_val->data + 0, 8);
-        memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8);
-        break;
-    case 2:
-        // dest_key = c,d,a
-        dest_key->size = 3 * 8;
-        memcpy((char *)dest_key->data + 0, (char *)src_val->data + 8, 8);
-        memcpy((char *)dest_key->data + 8, (char *)src_val->data + 16, 8);
-        memcpy((char *)dest_key->data + 16, (char *)src_key->data + 0, 8);
-        break;
-    case 3:
-        // dest_key = d,a,b,c
-        dest_key->size = 4 * 8;
-        memcpy((char *)dest_key->data + 0, (char *)src_val->data + 16, 8);
-        memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8);
-        memcpy((char *)dest_key->data + 16, (char *)src_val->data + 0, 8);
-        memcpy((char *)dest_key->data + 24, (char *)src_val->data + 8, 8);
-        break;
-    default:
-        assert(0);
-    }
-    return 0;
-}
-#endif
-
-static void insert_row(DB_ENV *db_env, struct table *t, DB_TXN *txn, long a, long b, long c, long d) {
-    int r;
-
-    // generate the primary key
-    char key_buffer[8];
-    a = htonl64(a);
-    memcpy(key_buffer, &a, sizeof a);
-
-    // generate the primary value
-    char val_buffer[3*8];
-    b = htonl64(b);
-    memcpy(val_buffer+0, &b, sizeof b);
-    c = htonl64(c);
-    memcpy(val_buffer+8, &c, sizeof c);
-    d = htonl64(d);
-    memcpy(val_buffer+16, &d, sizeof d);
-
-    DBT key = { .data = key_buffer, .size = sizeof key_buffer };
-    DBT value = { .data = val_buffer, .size = sizeof val_buffer };
-#if defined(TOKUDB)
-    if (!force_multiple && t->ndbs == 1) {
-        r = t->dbs[0]->put(t->dbs[0], txn, &key, &value, t->mult_flags[0]); assert(r == 0);
-    } else {
-        r = db_env->put_multiple(db_env, t->dbs[0], txn, &key, &value, t->ndbs, &t->dbs[0], t->mult_keys, t->mult_vals, t->mult_flags); assert(r == 0);
-    }
-#else
-    assert(db_env);
-    r = t->dbs[0]->put(t->dbs[0], txn, &key, &value, 0); assert(r == 0);
-#endif
-}
-
-static inline float tdiff (struct timeval *a, struct timeval *b) {
-    return (a->tv_sec - b->tv_sec) +1e-6*(a->tv_usec - b->tv_usec);
-}
-
-static void insert_all(DB_ENV *db_env, struct table *t, long nrows, long max_rows_per_txn, long key_range, long rows_per_report, bool do_txn) {
-    int r;
-
-    struct timeval tstart;
-    r = gettimeofday(&tstart, NULL); assert(r == 0);
-    struct timeval tlast = tstart;
-    DB_TXN *txn = NULL;
-    if (do_txn) {
-        r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0);
-    }
-    long n_rows_per_txn = 0;
-    long rowi;
-    for (rowi = 0; rowi < nrows; rowi++) {
-        long a = rowi;
-        long b = random64() % key_range;
-        long c = random64() % key_range;
-        long d = random64() % key_range;
-        insert_row(db_env, t, txn, a, b, c, d);
-        n_rows_per_txn++;
-        
-        // maybe commit
-        if (do_txn && n_rows_per_txn == max_rows_per_txn) {
-            r = txn->commit(txn, 0); assert(r == 0);
-            r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0);
-            n_rows_per_txn = 0;
-        }
-
-        // maybe report performance
-        if (((rowi + 1) % rows_per_report) == 0) {
-            struct timeval tnow;
-            r = gettimeofday(&tnow, NULL); assert(r == 0);
-            float last_time = tdiff(&tnow, &tlast);
-            float total_time = tdiff(&tnow, &tstart);
-            printf("%ld %.3f %.0f/s %.0f/s\n", rowi + 1, last_time, rows_per_report/last_time, rowi/total_time); fflush(stdout);
-            tlast = tnow;
-        }
-    }
-
-    if (do_txn) {
-        r = txn->commit(txn, 0); assert(r == 0);
-    }
-    struct timeval tnow;
-    r = gettimeofday(&tnow, NULL); assert(r == 0);
-    printf("total %ld %.3f %.0f/s\n", nrows, tdiff(&tnow, &tstart), nrows/tdiff(&tnow, &tstart)); fflush(stdout);
-}
-
-int main(int argc, char *argv[]) {
-#if defined(TOKDUB)
-    char *db_env_dir = "insertm.env.tokudb";
-#else
-    char *db_env_dir = "insertm.env.bdb";
-#endif
-    int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
-    long rows = 100000000;
-    long rows_per_txn = 1000;
-    long rows_per_report = 100000;
-    long key_range = 100000;
-    bool do_txn = true;
-    u_int32_t pagesize = 0;
-    u_int64_t cachesize = 1000000000;
-    int ndbs = 4;
-#if defined(TOKUDB)
-    u_int32_t checkpoint_period = 60;
-#endif
-
-    int i;
-    for (i = 1; i < argc; i++) {
-        char *arg = argv[i];
-        if (strcmp(arg, "--verbose") == 0) {
-            verbose++;
-            continue;
-        }
-        if (strcmp(arg, "--ndbs") == 0 && i+1 < argc) {
-            ndbs = atoi(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--rows") == 0 && i+1 < argc) {
-            rows = atol(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--rows_per_txn") == 0 && i+1 < argc) {
-            rows_per_txn = atol(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--rows_per_report") == 0 && i+1 < argc) {
-            rows_per_report = atol(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--key_range") == 0 && i+1 < argc) {
-            key_range = atol(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--txn") == 0 && i+1 < argc) {
-            do_txn = atoi(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--pagesize") == 0 && i+1 < argc) {
-            pagesize = atoi(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--cachesize") == 0 && i+1 < argc) {
-            cachesize = atol(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--force_multiple") == 0 && i+1 < argc) {
-            force_multiple = atoi(argv[++i]);
-            continue;
-        }
-#if defined(TOKUDB)
-        if (strcmp(arg, "--checkpoint_period") == 0 && i+1 < argc) {
-            checkpoint_period = atoi(argv[++i]);
-            continue;
-        }
-#endif
-
-        assert(0);
-    }
-
-    int r;
-    char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1];
-    snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir);
-    r = system(rm_cmd); assert(r == 0);
-
-    r = mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0);
-
-    // create and open the env
-    DB_ENV *db_env = NULL;
-    r = db_env_create(&db_env, 0); assert(r == 0);
-    if (!do_txn)
-        db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG);
-    if (cachesize) {
-        const u_int64_t gig = 1 << 30;
-        r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0);
-    }
-#if defined(TOKUDB)
-    r = db_env->set_generate_row_callback_for_put(db_env, my_generate_row_for_put); assert(r == 0);
-#endif
-    r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0);
-#if defined(TOKUDB)
-    if (checkpoint_period) {
-        r = db_env->checkpointing_set_period(db_env, checkpoint_period); assert(r == 0);
-        u_int32_t period;
-        r = db_env->checkpointing_get_period(db_env, &period); assert(r == 0 && period == checkpoint_period);
-    }
-#endif
-
-
-    // create the db
-    DB *dbs[ndbs];
-    for (i = 0; i < ndbs; i++) {
-        DB *db = NULL;
-        r = db_create(&db, db_env, 0); assert(r == 0);
-        DB_TXN *create_txn = NULL;
-        if (do_txn) {
-            r = db_env->txn_begin(db_env, NULL, &create_txn, 0); assert(r == 0);
-        }
-        if (pagesize) {
-            r = db->set_pagesize(db, pagesize); assert(r == 0);
-        }
-        char db_filename[32]; sprintf(db_filename, "test%d", i);
-        r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0);
-
-#if defined(TOKUDB)
-        DESCRIPTOR_S new_descriptor;
-        int index_num = htonl(i);
-        new_descriptor.dbt.data = &index_num;
-        new_descriptor.dbt.size = sizeof i;
-        r = db->change_descriptor(db, create_txn, &new_descriptor.dbt, 0); assert(r == 0);
-#else
-        db->app_private = (void *) (intptr_t) i;
-        if (i > 0) {
-            r = dbs[0]->associate(dbs[0], create_txn, db, my_secondary_key, 0); assert(r == 0);
-        }
-#endif
-        if (do_txn) {
-            r = create_txn->commit(create_txn, 0); assert(r == 0);
-        }
-        dbs[i] = db;
-    }
-
-    // insert all rows
-    struct table table;
-    table_init(&table, ndbs, dbs, 4 * 8, 4 * 8);
-
-    insert_all(db_env, &table, rows, rows_per_txn, key_range, rows_per_report, do_txn);
-
-    table_destroy(&table);
-
-    // shutdown
-    for (i = 0; i < ndbs; i++) {
-        DB *db = dbs[i];
-        r = db->close(db, 0); assert(r == 0); db = NULL;
-    }
-    r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL;
-
-    return 0;
-}
diff --git a/examples/db-insert.c b/examples/db-insert.c
deleted file mode 100644
index 87cd9d35e21..00000000000
--- a/examples/db-insert.c
+++ /dev/null
@@ -1,610 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-// Define BDB if you want to compile this to use Berkeley DB
-#include <stdint.h>
-#include <inttypes.h>
-#ifdef BDB
-#include <sys/types.h>
-#include <db.h>
-#define DIRSUF bdb
-#else
-#include <tokudb.h>
-#define DIRSUF tokudb
-#endif
-
-#include <assert.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-
-static inline float toku_tdiff (struct timeval *a, struct timeval *b) {
-    return (a->tv_sec - b->tv_sec) +1e-6*(a->tv_usec - b->tv_usec);
-}
-
-#if !defined(DB_PRELOCKED_WRITE)
-#define NO_DB_PRELOCKED
-#define DB_PRELOCKED_WRITE 0
-#endif
-
-int verbose=1;
-
-enum { SERIAL_SPACING = 1<<6 };
-enum { DEFAULT_ITEMS_TO_INSERT_PER_ITERATION = 1<<20 };
-enum { DEFAULT_ITEMS_PER_TRANSACTION = 1<<14 };
-
-static void insert (long long v);
-#define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, db_strerror(r)); assert(__r==0); })
-#define CKERR2(r,rexpect) if (r!=rexpect) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, db_strerror(r)); assert(r==rexpect);
-
-/* default test parameters */
-int keysize = sizeof (long long);
-int valsize = sizeof (long long);
-int pagesize = 0;
-long long cachesize = 1000000000; // 1GB
-int dupflags = 0;
-int noserial = 0; // Don't do the serial stuff
-int norandom = 0; // Don't do the random stuff
-int prelock  = 0;
-int prelockflag = 0;
-int items_per_transaction = DEFAULT_ITEMS_PER_TRANSACTION;
-int items_per_iteration   = DEFAULT_ITEMS_TO_INSERT_PER_ITERATION;
-int finish_child_first = 0;  // Commit or abort child first (before doing so to the parent).  No effect if child does not exist.
-int singlex_child = 0;  // Do a single transaction, but do all work with a child
-int singlex = 0;  // Do a single transaction
-int singlex_create = 0;  // Create the db using the single transaction (only valid if singlex)
-int insert1first = 0;  // insert 1 before doing the rest
-int do_transactions = 0;
-int if_transactions_do_logging = DB_INIT_LOG; // set this to zero if we want no logging when transactions are used
-int do_abort = 0;
-int n_insertions_since_txn_began=0;
-int env_open_flags = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL;
-u_int32_t put_flags = 0;
-double compressibility = -1; // -1 means make it very compressible.  1 means use random bits everywhere.  2 means half the bits are random.
-int do_append = 0;
-u_int32_t checkpoint_period = 60;
-
-static void do_prelock(DB* db, DB_TXN* txn) {
-    if (prelock) {
-#if !defined(NO_DB_PRELOCKED)
-        int r = db->pre_acquire_table_lock(db, txn);
-        assert(r==0);
-#else
-        (void) db; (void) txn;
-#endif
-    }
-}
-
-#define STRINGIFY2(s) #s
-#define STRINGIFY(s) STRINGIFY2(s)
-const char *dbdir = "./bench."  STRINGIFY(DIRSUF);
-char *dbfilename = "bench.db";
-char *dbname;
-
-DB_ENV *dbenv;
-DB *db;
-DB_TXN *parenttid=0;
-DB_TXN *tid=0;
-
-
-static void benchmark_setup (void) {
-    int r;
-   
-    if (!do_append) {
-        char unlink_cmd[strlen(dbdir) + strlen("rm -rf ") + 1];
-        snprintf(unlink_cmd, sizeof(unlink_cmd), "rm -rf %s", dbdir);
-        //printf("unlink_cmd=%s\n", unlink_cmd);
-        system(unlink_cmd);
-        
-        if (strcmp(dbdir, ".") != 0) {
-            r = mkdir(dbdir,S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH);
-            assert(r == 0);
-        }
-    }
-
-    r = db_env_create(&dbenv, 0);
-    assert(r == 0);
-
-#if !defined(TOKUDB)
-#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 4
-    if (dbenv->set_lk_max) {
-        r = dbenv->set_lk_max(dbenv, items_per_transaction*2);
-        assert(r==0);
-    }
-#elif (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 7) || DB_VERSION_MAJOR >= 5
-    if (dbenv->set_lk_max_locks) {
-        r = dbenv->set_lk_max_locks(dbenv, items_per_transaction*2);
-        assert(r==0);
-    }
-    if (dbenv->set_lk_max_lockers) {
-        r = dbenv->set_lk_max_lockers(dbenv, items_per_transaction*2);
-        assert(r==0);
-    }
-    if (dbenv->set_lk_max_objects) {
-        r = dbenv->set_lk_max_objects(dbenv, items_per_transaction*2);
-        assert(r==0);
-    }
-#else
-#error
-#endif
-#endif
-
-    if (dbenv->set_cachesize) {
-        r = dbenv->set_cachesize(dbenv, cachesize / (1024*1024*1024), cachesize % (1024*1024*1024), 1);
-        if (r != 0) 
-            printf("WARNING: set_cachesize %d\n", r);
-    }
-    {
-        r = dbenv->open(dbenv, dbdir, env_open_flags, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
-        assert(r == 0);
-    }
-
-#if defined(TOKUDB)
-    if (checkpoint_period) {
-        printf("set checkpoint_period %u\n", checkpoint_period);
-        r = dbenv->checkpointing_set_period(dbenv, checkpoint_period); assert(r == 0);
-        u_int32_t period;
-        r = dbenv->checkpointing_get_period(dbenv, &period); assert(r == 0 && period == checkpoint_period);
-    }
-#endif
-
-    r = db_create(&db, dbenv, 0);
-    assert(r == 0);
-
-    if (do_transactions) {
-        r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r);
-    }
-    if (pagesize && db->set_pagesize) {
-        r = db->set_pagesize(db, pagesize); 
-        assert(r == 0);
-    }
-    if (dupflags) {
-        r = db->set_flags(db, dupflags);
-        assert(r == 0);
-    }
-    r = db->open(db, tid, dbfilename, NULL, DB_BTREE, DB_CREATE, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
-    if (r!=0) fprintf(stderr, "errno=%d, %s\n", errno, strerror(errno));
-    assert(r == 0);
-    if (insert1first) {
-        if (do_transactions) {
-            r=tid->commit(tid, 0);
-            assert(r==0);
-            tid = NULL;
-            r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r);
-        }
-        insert(-1);
-        if (singlex) {
-            r=tid->commit(tid, 0);
-            assert(r==0);
-            tid = NULL;
-            r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r);
-        }
-    }
-    else if (singlex && !singlex_create) {
-        r=tid->commit(tid, 0);
-        assert(r==0);
-        tid = NULL;
-        r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r);
-    }
-    if (do_transactions) {
-        if (singlex)
-            do_prelock(db, tid);
-        else {
-            r=tid->commit(tid, 0);
-            assert(r==0);
-            tid = NULL;
-        }
-    }
-    if (singlex_child) {
-        parenttid = tid;
-        tid = NULL;
-        r=dbenv->txn_begin(dbenv, parenttid, &tid, 0); CKERR(r);
-    }
-
-}
-
-static void benchmark_shutdown (void) {
-    int r;
-    
-    if (do_transactions && singlex && !insert1first && (singlex_create || prelock)) {
-#if defined(TOKUDB)
-        //There should be a single 'truncate' in the rollback instead of many 'insert' entries.
-        struct txn_stat *s;
-        r = tid->txn_stat(tid, &s);
-        assert(r==0);
-        //TODO: #1125 Always do the test after performance testing is done.
-        if (singlex_child) fprintf(stderr, "SKIPPED 'small rollback' test for child txn\n");
-        else
-            assert(s->rollback_raw_count < 100);  // gross test, not worth investigating details
-        free(s);
-        //system("ls -l bench.tokudb");
-#endif
-    }
-    if (do_transactions && singlex) {
-        if (!singlex_child || finish_child_first) {
-            assert(tid);
-            r = (do_abort ? tid->abort(tid) : tid->commit(tid, 0));    assert(r==0);
-            tid = NULL; 
-        }
-        if (singlex_child) {
-            assert(parenttid);
-            r = (do_abort ? parenttid->abort(parenttid) : parenttid->commit(parenttid, 0));    assert(r==0);
-            parenttid = NULL;
-        }
-        else
-            assert(!parenttid);
-    }
-    assert(!tid);
-    assert(!parenttid);
-
-    r = db->close(db, 0);
-    assert(r == 0);
-    r = dbenv->close(dbenv, 0);
-    assert(r == 0);
-}
-
-static void long_long_to_array (unsigned char *a, int array_size, unsigned long long l) {
-    int i;
-    for (i=0; i<8 && i<array_size; i++)
-    a[i] = (l>>(56-8*i))&0xff;
-}
-
-static DBT *fill_dbt(DBT *dbt, const void *data, int size) {
-    memset(dbt, 0, sizeof *dbt);
-    dbt->size = size;
-    dbt->data = (void *) data;
-    return dbt;
-}
-
-// Fill array with 0's if compressibilty==-1, otherwise fill array with data that is likely to compress by a factor of compressibility.
-static void fill_array (unsigned char *data, int size) {
-    memset(data, 0, size);
-    if (compressibility>0) {
-        int i;
-        for (i=0; i<size/compressibility; i++) {
-            data[i] = (unsigned char) random();
-        }
-    }
-}
-
-static void insert (long long v) {
-    unsigned char kc[keysize], vc[valsize];
-    DBT  kt, vt;
-    fill_array(kc, sizeof kc);
-    long_long_to_array(kc, keysize, v); // Fill in the array first, then write the long long in.
-    fill_array(vc, sizeof vc);
-    long_long_to_array(vc, valsize, v);
-    int r = db->put(db, tid, fill_dbt(&kt, kc, keysize), fill_dbt(&vt, vc, valsize), put_flags);
-    CKERR(r);
-    if (do_transactions) {
-        if (n_insertions_since_txn_began>=items_per_transaction && !singlex) {
-            n_insertions_since_txn_began=0;
-            r = tid->commit(tid, 0); assert(r==0);
-            tid = NULL;
-            r=dbenv->txn_begin(dbenv, 0, &tid, 0); assert(r==0);
-            do_prelock(db, tid);
-            n_insertions_since_txn_began=0;
-        }
-        n_insertions_since_txn_began++;
-    }
-}
-
-static void serial_insert_from (long long from) {
-    long long i;
-    if (do_transactions && !singlex) {
-        int r = dbenv->txn_begin(dbenv, 0, &tid, 0); assert(r==0);
-        do_prelock(db, tid);
-        {
-            DBT k,v;
-            r=db->put(db, tid, fill_dbt(&k, "a", 1), fill_dbt(&v, "b", 1), put_flags);
-            CKERR(r);
-        }
-    }
-    for (i=0; i<items_per_iteration; i++) {
-        insert((from+i)*SERIAL_SPACING);
-    }
-    if (do_transactions && !singlex) {
-        int  r= tid->commit(tid, 0);             assert(r==0);
-        tid=NULL;
-    }
-}
-
-static long long llrandom (void) {
-    return (((long long)(random()))<<32) + random();
-}
-
-static void random_insert_below (long long below) {
-    long long i;
-    if (do_transactions && !singlex) {
-        int r = dbenv->txn_begin(dbenv, 0, &tid, 0); assert(r==0);
-        do_prelock(db, tid);
-    }
-    for (i=0; i<items_per_iteration; i++) {
-        insert(llrandom()%below);
-    }
-    if (do_transactions && !singlex) {
-        int  r= tid->commit(tid, 0);             assert(r==0);
-        tid=NULL;
-    }
-}
-
-static void biginsert (long long n_elements, struct timeval *starttime) {
-    long long i;
-    struct timeval t1,t2;
-    int iteration;
-    for (i=0, iteration=0; i<n_elements; i+=items_per_iteration, iteration++) {
-        if (verbose) {
-            printf("%d ", iteration);
-            fflush(stdout);
-        }
-        if (!noserial) {
-            gettimeofday(&t1,0);
-            serial_insert_from(i);
-            gettimeofday(&t2,0);
-            if (verbose) {
-                printf("serial %9.6fs %8.0f/s    ", toku_tdiff(&t2, &t1), items_per_iteration/toku_tdiff(&t2, &t1));
-                fflush(stdout);
-            }
-        }
-        if (!norandom) {
-            gettimeofday(&t1,0);
-            random_insert_below((i+items_per_iteration)*SERIAL_SPACING);
-            gettimeofday(&t2,0);
-            if (verbose) {
-                printf("random %9.6fs %8.0f/s    ", toku_tdiff(&t2, &t1), items_per_iteration/toku_tdiff(&t2, &t1));
-                fflush(stdout);
-            }
-        }
-        if (verbose) {
-            printf("cumulative %9.6fs %8.0f/s\n", toku_tdiff(&t2, starttime), (((float)items_per_iteration*(!noserial+!norandom))/toku_tdiff(&t2, starttime))*(iteration+1));
-            fflush(stdout);
-        }
-    }
-}
-
-const long long default_n_items = 1LL<<22;
-
-static int print_usage (const char *argv0) {
-    fprintf(stderr, "Usage:\n");
-    fprintf(stderr, " %s [-x] [--keysize KEYSIZE] [--valsize VALSIZE] [--noserial] [--norandom] [ n_iterations ]\n", argv0);
-    fprintf(stderr, "   where\n");
-    fprintf(stderr, "    -x                    do transactions (XCOUNT transactions per iteration) (default: no transactions at all)\n");
-    fprintf(stderr, "    --keysize KEYSIZE     sets the key size (default 8)\n");
-    fprintf(stderr, "    --valsize VALSIZE     sets the value size (default 8)\n");
-    fprintf(stderr, "    --noserial            causes the serial insertions to be skipped\n");
-    fprintf(stderr, "    --norandom            causes the random insertions to be skipped\n");
-    fprintf(stderr, "    --cachesize CACHESIZE set the database cache size\n");
-    fprintf(stderr, "    --pagesize PAGESIZE   sets the database page size\n");
-    fprintf(stderr, "    --compressibility C   creates data that should compress by about a factor C.   Default C is large.   C is an float.\n");
-    fprintf(stderr, "    --xcount N            how many insertions per transaction (default=%d)\n", DEFAULT_ITEMS_PER_TRANSACTION);
-    fprintf(stderr, "    --singlex             (implies -x) Run the whole job as a single transaction.  (Default don't run as a single transaction.)\n");
-    fprintf(stderr, "    --singlex-child       (implies -x) Run the whole job as a single transaction, do all work a child of that transaction.\n");
-    fprintf(stderr, "    --finish-child-first  Commit/abort child before doing so to parent (no effect if no child).\n");
-    fprintf(stderr, "    --singlex-create      (implies --singlex)  Create the file using the single transaction (Default is to use a different transaction to create.)\n");
-    fprintf(stderr, "    --prelock             Prelock the database.\n");
-    fprintf(stderr, "    --prelockflag         Prelock the database and send the DB_PRELOCKED_WRITE flag.\n");
-    fprintf(stderr, "    --abort               Abort the singlex after the transaction is over. (Requires --singlex.)\n");
-    fprintf(stderr, "    --nolog               If transactions are used, then don't write the recovery log\n");
-    fprintf(stderr, "    --periter N           how many insertions per iteration (default=%d)\n", DEFAULT_ITEMS_TO_INSERT_PER_ITERATION);
-    fprintf(stderr, "    --env DIR\n");
-    fprintf(stderr, "    --append              append to an existing file\n");
-    fprintf(stderr, "    --checkpoint-period %" PRIu32 "       checkpoint period\n", checkpoint_period); 
-    fprintf(stderr, "   n_iterations     how many iterations (default %lld)\n", default_n_items/DEFAULT_ITEMS_TO_INSERT_PER_ITERATION);
-
-    return 1;
-}
-
-#define UU(x) x __attribute__((__unused__))
-
-int main (int argc, const char *argv[]) {
-    struct timeval t1,t2,t3;
-    long long total_n_items = default_n_items;
-    char *endptr;
-    int i;
-    for (i=1; i<argc; i++) {
-        const char *arg = argv[i];
-        if (arg[0] != '-')
-            break;
-        if (strcmp(arg, "-q") == 0) {
-            verbose--; if (verbose<0) verbose=0;
-        } else if (strcmp(arg, "-x") == 0) {
-            do_transactions = 1;
-        } else if (strcmp(arg, "--noserial") == 0) {
-            noserial=1;
-        } else if (strcmp(arg, "--norandom") == 0) {
-            norandom=1;
-        } else if (strcmp(arg, "--compressibility") == 0) {
-            compressibility = atof(argv[++i]);
-        } else if (strcmp(arg, "--nolog") == 0) {
-            if_transactions_do_logging = 0;
-        } else if (strcmp(arg, "--singlex-create") == 0) {
-            do_transactions = 1;
-            singlex = 1;
-            singlex_create = 1;
-        } else if (strcmp(arg, "--finish-child-first") == 0) {
-            finish_child_first = 1;
-        } else if (strcmp(arg, "--singlex-child") == 0) {
-            do_transactions = 1;
-            singlex = 1;
-            singlex_child = 1;
-        } else if (strcmp(arg, "--singlex") == 0) {
-            do_transactions = 1;
-            singlex = 1;
-        } else if (strcmp(arg, "--insert1first") == 0) {
-            insert1first = 1;
-        } else if (strcmp(arg, "--xcount") == 0) {
-            if (i+1 >= argc) return print_usage(argv[0]);
-            items_per_transaction = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0);
-        } else if (strcmp(arg, "--abort") == 0) {
-            do_abort = 1;
-        } else if (strcmp(arg, "--periter") == 0) {
-            if (i+1 >= argc) return print_usage(argv[0]);
-            items_per_iteration = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0);
-        } else if (strcmp(arg, "--cachesize") == 0) {
-            if (i+1 >= argc) return print_usage(argv[0]);
-            cachesize = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0);
-        } else if (strcmp(arg, "--keysize") == 0) {
-            if (i+1 >= argc) return print_usage(argv[0]);
-            keysize = atoi(argv[++i]);
-        } else if (strcmp(arg, "--valsize") == 0) {
-            if (i+1 >= argc) return print_usage(argv[0]);
-            valsize = atoi(argv[++i]);
-        } else if (strcmp(arg, "--pagesize") == 0) {
-            if (i+1 >= argc) return print_usage(argv[0]);
-            pagesize = atoi(argv[++i]);
-        } else if (strcmp(arg, "--env") == 0) {
-            if (i+1 >= argc) return print_usage(argv[0]);
-            dbdir = argv[++i];
-        } else if (strcmp(arg, "--prelock") == 0) {
-            prelock=1;
-        } else if (strcmp(arg, "--prelockflag") == 0) {
-            prelock=1;
-            prelockflag=1;
-        } else if (strcmp(arg, "--srandom") == 0) {
-            if (i+1 >= argc) return print_usage(argv[0]);
-            srandom(atoi(argv[++i]));
-        } else if (strcmp(arg, "--append") == 0) {
-            do_append = 1;
-        } else if (strcmp(arg, "--checkpoint-period") == 0) {
-            if (i+1 >= argc) return print_usage(argv[9]);
-            checkpoint_period = (u_int32_t) atoi(argv[++i]);
-        } else if (strcmp(arg, "--unique_checks") == 0) {
-            if (i+1 >= argc) return print_usage(argv[0]);
-            int unique_checks = atoi(argv[++i]);
-            if (unique_checks)
-                put_flags = DB_NOOVERWRITE;
-            else
-                put_flags = 0;
-        } else {
-            return print_usage(argv[0]);
-        }
-    }
-    if (do_transactions) {
-        env_open_flags |= DB_INIT_TXN | if_transactions_do_logging | DB_INIT_LOCK;
-    }
-    if (do_transactions && prelockflag) {
-        put_flags |= DB_PRELOCKED_WRITE;
-    }
-    if (i<argc) {
-        /* if it looks like a number */
-        char *end;
-        errno=0;
-        long n_iterations = strtol(argv[i], &end, 10);
-        if (errno!=0 || *end!=0 || end==argv[i]) {
-            print_usage(argv[0]);
-            return 1;
-        }
-        total_n_items = items_per_iteration * (long long)n_iterations;
-    }
-    if (verbose) {
-        if (!noserial) printf("serial ");
-        if (!noserial && !norandom) printf("and ");
-        if (!norandom) printf("random ");
-        printf("insertions of %d per batch%s\n", items_per_iteration, do_transactions ? " (with transactions)" : "");
-    }
-    benchmark_setup();
-    gettimeofday(&t1,0);
-    biginsert(total_n_items, &t1);
-    gettimeofday(&t2,0);
-    benchmark_shutdown();
-    gettimeofday(&t3,0);
-    if (verbose) {
-        printf("Shutdown %9.6fs\n", toku_tdiff(&t3, &t2));
-        printf("Total time %9.6fs for %lld insertions = %8.0f/s\n", toku_tdiff(&t3, &t1), 
-               (!noserial+!norandom)*total_n_items, (!noserial+!norandom)*total_n_items/toku_tdiff(&t3, &t1));
-    }
-
-    return 0;
-}
diff --git a/examples/db-scan.c b/examples/db-scan.c
deleted file mode 100644
index f01e0dc55d0..00000000000
--- a/examples/db-scan.c
+++ /dev/null
@@ -1,461 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-/* Scan the bench.tokudb/bench.db over and over. */
-#define DONT_DEPRECATE_MALLOC
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <inttypes.h>
-#ifdef BDB
-#include <db.h>
-#define DIRSUF bdb
-#else
-#include <tokudb.h>
-#define DIRSUF tokudb
-#endif
-#include <assert.h>
-#include <errno.h>
-#include <string.h>
-#include <stdio.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/time.h>
-
-static const char *pname;
-static enum run_mode { RUN_HWC, RUN_LWC, RUN_VERIFY, RUN_RANGE} run_mode = RUN_HWC;
-static int do_txns=1, prelock=0, prelockflag=0;
-static u_int32_t lock_flag = 0;
-static long limitcount=-1;
-static u_int32_t cachesize = 127*1024*1024;
-static u_int64_t start_range = 0, end_range = 0;
-static int n_experiments = 2;
-static int bulk_fetch = 1;
-
-static int print_usage (const char *argv0) {
-    fprintf(stderr, "Usage:\n%s [--verify-lwc | --lwc | --nohwc] [--prelock] [--prelockflag] [--prelockwriteflag] [--env DIR]\n", argv0);
-    fprintf(stderr, "  --verify-lwc        means to run the light weight cursor and the heavyweight cursor to verify that they get the same answer.\n");
-    fprintf(stderr, "  --lwc               run light weight cursors instead of heavy weight cursors\n");
-    fprintf(stderr, "  --prelock           acquire a read lock on the entire table before running\n");
-    fprintf(stderr, "  --prelockflag       pass DB_PRELOCKED to the the cursor get operation whenever the locks have been acquired\n");
-    fprintf(stderr, "  --prelockwriteflag  pass DB_PRELOCKED_WRITE to the cursor get operation\n");
-    fprintf(stderr, "  --nox               no transactions (no locking)\n");
-    fprintf(stderr, "  --count COUNT       read the first COUNT rows and then  stop.\n");
-    fprintf(stderr, "  --cachesize N       set the env cachesize to N bytes\n");
-    fprintf(stderr, "  --srandom N         srandom(N)\n");
-    fprintf(stderr, "  --env DIR           put db files in DIR instead of default\n");
-    fprintf(stderr, "  --bulk_fetch 0|1    do bulk fetch on lwc operations (default: 1)\n");
-    return 1;
-}
-
-static DB_ENV *env;
-static DB *db;
-static DB_TXN *tid=0;
-
-#define STRINGIFY2(s) #s
-#define STRINGIFY(s) STRINGIFY2(s)
-static const char *dbdir = "./bench."  STRINGIFY(DIRSUF); /* DIRSUF is passed in as a -D argument to the compiler. */
-static int env_open_flags_yesx = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL|DB_INIT_TXN|DB_INIT_LOG|DB_INIT_LOCK;
-static int env_open_flags_nox = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL;
-static char *dbfilename = "bench.db";
-
-
-static void parse_args (int argc, const char *argv[]) {
-    pname=argv[0];
-    argc--; argv++;
-    int specified_run_mode=0;
-    while (argc>0) {
-        if (strcmp(*argv,"--verify-lwc")==0) {
-            if (specified_run_mode && run_mode!=RUN_VERIFY) { two_modes: fprintf(stderr, "You specified two run modes\n"); exit(1); }
-            run_mode = RUN_VERIFY;
-        } else if (strcmp(*argv, "--lwc")==0)  {
-            if (specified_run_mode && run_mode!=RUN_LWC) goto two_modes;
-            run_mode = RUN_LWC;
-        } else if (strcmp(*argv, "--hwc")==0)  {
-            if (specified_run_mode && run_mode!=RUN_VERIFY) goto two_modes;
-            run_mode = RUN_HWC;
-        } else if (strcmp(*argv, "--prelock")==0) prelock=1;
-#ifdef TOKUDB
-        else if (strcmp(*argv, "--prelockflag")==0)      { prelockflag=1; lock_flag = DB_PRELOCKED; }
-        else if (strcmp(*argv, "--prelockwriteflag")==0) { prelockflag=1; lock_flag = DB_PRELOCKED_WRITE; }
-#endif
-        else if (strcmp(*argv, "--nox")==0)              { do_txns=0; }
-        else if (strcmp(*argv, "--count")==0)            {
-            char *end;
-            argc--; argv++; 
-            errno=0; limitcount=strtol(*argv, &end, 10); assert(errno==0);
-            printf("Limiting count to %ld\n", limitcount);
-        } else if (strcmp(*argv, "--cachesize")==0 && argc>0) {
-            char *end;
-            argc--; argv++; 
-            cachesize=(u_int32_t)strtol(*argv, &end, 10);
-        } else if (strcmp(*argv, "--env") == 0) {
-            argc--; argv++;
-            if (argc==0) exit(print_usage(pname));
-            dbdir = *argv;
-        } else if (strcmp(*argv, "--range") == 0 && argc > 2) {
-            run_mode = RUN_RANGE;
-            argc--; argv++;
-            start_range = strtoll(*argv, NULL, 10);
-            argc--; argv++;
-            end_range = strtoll(*argv, NULL, 10);
-        } else if (strcmp(*argv, "--experiments") == 0 && argc > 1) {
-            argc--; argv++;
-            n_experiments = strtol(*argv, NULL, 10);
-        } else if (strcmp(*argv, "--srandom") == 0 && argc > 1) {
-            argc--; argv++;
-            srandom(atoi(*argv));
-        } else if (strcmp(*argv, "--bulk_fetch") == 0 && argc > 1) {
-            argc--; argv++;
-            bulk_fetch = atoi(*argv);
-        } else {
-            exit(print_usage(pname));
-    }
-    argc--; argv++;
-    }
-    //Prelocking is meaningless without transactions
-    if (do_txns==0) {
-        prelockflag=0;
-        lock_flag=0;
-        prelock=0;
-    }
-}
-
-static void scanscan_setup (void) {
-    int r;
-    r = db_env_create(&env, 0);                                                           assert(r==0);
-    r = env->set_cachesize(env, 0, cachesize, 1);                                         assert(r==0);
-    r = env->open(env, dbdir, do_txns? env_open_flags_yesx : env_open_flags_nox, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);   assert(r==0);
-    r = db_create(&db, env, 0);                                                           assert(r==0);
-    if (do_txns) {
-        r = env->txn_begin(env, 0, &tid, 0);                                              assert(r==0);
-    }
-    r = db->open(db, tid, dbfilename, NULL, DB_BTREE, 0, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);                           assert(r==0);
-#ifdef TOKUDB
-    if (prelock) {
-        r = db->pre_acquire_table_lock(db, tid);
-        assert(r==0);
-    }
-#endif
-}
-
-static void scanscan_shutdown (void) {
-    int r;
-    r = db->close(db, 0);                                       assert(r==0);
-    if (do_txns) {
-        r = tid->commit(tid, 0);                                assert(r==0);
-    }
-    r = env->close(env, 0);                                     assert(r==0);
-}
-
-static double gettime (void) {
-    struct timeval tv;
-    int r = gettimeofday(&tv, 0);
-    assert(r==0);
-    return tv.tv_sec + 1e-6*tv.tv_usec;
-}
-
-static void scanscan_hwc (void) {
-    int r;
-    int counter=0;
-    for (counter=0; counter<n_experiments; counter++) {
-        long long totalbytes=0;
-        int rowcounter=0;
-        double prevtime = gettime();
-        DBT k,v;
-        DBC *dbc;
-        r = db->cursor(db, tid, &dbc, 0);                           assert(r==0);
-        memset(&k, 0, sizeof(k));
-        memset(&v, 0, sizeof(v));
-        u_int32_t c_get_flags = DB_NEXT;
-        if (prelockflag && (counter || prelock)) {
-            c_get_flags |= lock_flag;
-        }
-        while (0 == (r = dbc->c_get(dbc, &k, &v, c_get_flags))) {
-        
-            //printf("r=%d\n", r);
-
-            totalbytes += k.size + v.size;
-            rowcounter++;
-            if (limitcount>0 && rowcounter>=limitcount) break;
-        }
-        assert(r==DB_NOTFOUND);
-        r = dbc->c_close(dbc);                                      assert(r==0);
-        double thistime = gettime();
-        double tdiff = thistime-prevtime;
-        printf("Scan    %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", totalbytes, rowcounter, tdiff, 1e-6*totalbytes/tdiff);
-    }
-}
-
-#ifdef TOKUDB
-
-struct extra_count {
-    long long totalbytes;
-    int rowcounter;
-};
-
-static int counttotalbytes (DBT const *key, DBT const *data, void *extrav) {
-    struct extra_count *e=extrav;
-    e->totalbytes += key->size + data->size;
-    e->rowcounter++;
-    return bulk_fetch ? TOKUDB_CURSOR_CONTINUE : 0;
-}
-
-static void scanscan_lwc (void) {
-    int r;
-    int counter=0;
-    for (counter=0; counter<n_experiments; counter++) {
-        struct extra_count e = {0,0};
-        double prevtime = gettime();
-        DBC *dbc;
-        r = db->cursor(db, tid, &dbc, 0);                           assert(r==0);
-        u_int32_t f_flags = 0;
-        if (prelockflag && (counter || prelock)) {
-            f_flags |= lock_flag;
-        }
-        long rowcounter=0;
-        while (0 == (r = dbc->c_getf_next(dbc, f_flags, counttotalbytes, &e))) {
-            rowcounter++;
-            if (limitcount>0 && rowcounter>=limitcount) break;
-        }
-        r = dbc->c_close(dbc);                                      assert(r==0);
-        double thistime = gettime();
-        double tdiff = thistime-prevtime;
-        printf("LWC Scan %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", e.totalbytes, e.rowcounter, tdiff, 1e-6*e.totalbytes/tdiff);
-    }
-}
-#endif
-
-static void scanscan_range (void) {
-    int r;
-
-    double texperiments[n_experiments];
-    u_int64_t k = 0;
-    char kv[8];
-    DBT key, val;
-  
-    int counter;
-    for (counter = 0; counter < n_experiments; counter++) {
-
-        if (1) { //if ((counter&1) == 0) {   
-        makekey:
-            // generate a random key in the key range
-            k = (start_range + (random() % (end_range - start_range))) * (1<<6);
-            int i;
-            for (i = 0; i < 8; i++)
-                kv[i] = k >> (56-8*i);
-        }
-        memset(&key, 0, sizeof key); key.data = &kv, key.size = sizeof kv;
-        memset(&val, 0, sizeof val);
-
-        double tstart = gettime();
-
-        DBC *dbc;
-        r = db->cursor(db, tid, &dbc, 0); assert(r==0);
-
-        // set the cursor to the random key
-        r = dbc->c_get(dbc, &key, &val, DB_SET_RANGE+lock_flag);
-        if (r != 0) {
-            assert(r == DB_NOTFOUND);
-            printf("%s:%d %" PRIu64 "\n", __FUNCTION__, __LINE__, k);
-            goto makekey;
-        }
-
-#ifdef TOKUDB
-        // do the range scan
-        long rowcounter = 0;
-        struct extra_count e = {0,0};
-        while (limitcount > 0 && rowcounter < limitcount) {
-            r = dbc->c_getf_next(dbc, prelockflag ? lock_flag : 0, counttotalbytes, &e);
-            if (r != 0)
-                break;
-            rowcounter++;
-        }
-#endif
-
-        r = dbc->c_close(dbc);                                      
-        assert(r==0);
-
-        texperiments[counter] = gettime() - tstart;
-        printf("%" PRIu64 " %f\n", k, texperiments[counter]); fflush(stdout);
-    }
-
-    // print the times
-    double tsum = 0.0, tmin = 0.0, tmax = 0.0;
-    for (counter = 0; counter < n_experiments; counter++) {
-        if (counter==0 || texperiments[counter] < tmin)
-            tmin = texperiments[counter];
-        if (counter==0 || texperiments[counter] > tmax)
-            tmax = texperiments[counter];
-        tsum += texperiments[counter];
-    }
-    printf("%f %f %f/%d = %f\n", tmin, tmax, tsum, n_experiments, tsum / n_experiments);
-}
-
-#ifdef TOKUDB
-
-struct extra_verify {
-    long long totalbytes;
-    int rowcounter;
-    DBT k,v; // the k and v are gotten using the old cursor
-};
-
-static int
-checkbytes (DBT const *key, DBT const *data, void *extrav) {
-    struct extra_verify *e=extrav;
-    e->totalbytes += key->size + data->size;
-    e->rowcounter++;
-    assert(e->k.size == key->size);
-    assert(e->v.size == data->size);
-    assert(memcmp(e->k.data, key->data,  key->size)==0);
-    assert(memcmp(e->v.data, data->data, data->size)==0);
-    assert(e->k.data != key->data);
-    assert(e->v.data != data->data);
-    return 0;
-}
-    
-
-static void scanscan_verify (void) {
-    int r;
-    int counter=0;
-    for (counter=0; counter<n_experiments; counter++) {
-        struct extra_verify v;
-        v.totalbytes=0;
-        v.rowcounter=0;
-        double prevtime = gettime();
-        DBC *dbc1, *dbc2;
-        r = db->cursor(db, tid, &dbc1, 0);                           assert(r==0);
-        r = db->cursor(db, tid, &dbc2, 0);                           assert(r==0);
-        memset(&v.k, 0, sizeof(v.k));
-        memset(&v.v, 0, sizeof(v.v));
-        u_int32_t f_flags = 0;
-        u_int32_t c_get_flags = DB_NEXT;
-        if (prelockflag && (counter || prelock)) {
-            f_flags     |= lock_flag;
-            c_get_flags |= lock_flag;
-        }
-        while (1) {
-            int r1,r2;
-            r2 = dbc1->c_get(dbc1, &v.k, &v.v, c_get_flags);
-            r1 = dbc2->c_getf_next(dbc2, f_flags, checkbytes, &v);
-            assert(r1==r2);
-            if (r1) break;
-        }
-        r = dbc1->c_close(dbc1);                                      assert(r==0);
-        r = dbc2->c_close(dbc2);                                      assert(r==0);
-        double thistime = gettime();
-        double tdiff = thistime-prevtime;
-        printf("verify   %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", v.totalbytes, v.rowcounter, tdiff, 1e-6*v.totalbytes/tdiff);
-    }
-}
-
-#endif
-
-int main (int argc, const char *argv[]) {
-
-    parse_args(argc,argv);
-
-    scanscan_setup();
-    switch (run_mode) {
-    case RUN_HWC:    scanscan_hwc();    break;
-#ifdef TOKUDB
-    case RUN_LWC:    scanscan_lwc();    break;
-    case RUN_VERIFY: scanscan_verify(); break;
-#endif
-    case RUN_RANGE:  scanscan_range();  break;
-    default:         assert(0);         break;
-    }
-    scanscan_shutdown();
-
-    return 0;
-}
diff --git a/examples/db-update.c b/examples/db-update.c
deleted file mode 100644
index e2ab1ecdce4..00000000000
--- a/examples/db-update.c
+++ /dev/null
@@ -1,379 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-// measure the performance of a simulated "insert on duplicate key update" operation
-// the table schema is t(a int, b int, c int, d int, primary key(a, b))
-// a and b are random
-// c is the sum of the observations
-// d is the first observation
-
-#include <stdio.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <arpa/inet.h>
-#include "db.h"
-
-static size_t key_size = 8;
-static size_t val_size = 8;
-static int verbose = 0;
-
-static void db_error(const DB_ENV *env, const char *prefix, const char *msg) {
-    printf("%s: %p %s %s\n", __FUNCTION__, env, prefix, msg);
-}
-
-static int get_int(void *p) {
-    int v; 
-    memcpy(&v, p, sizeof v);
-    return htonl(v);
-}
-
-#if defined(TOKUDB)
-static int my_update_callback(DB *db, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra) {
-    assert(db);
-    assert(key);
-    if (old_val == NULL) {
-        // insert new_val = extra
-        set_val(extra, set_extra);
-    } else {
-        if (verbose) printf("u");
-        // update new_val = old_val + extra
-        assert(old_val->size == val_size && extra->size == val_size);
-        char new_val_buffer[val_size];
-        memcpy(new_val_buffer, old_val->data, sizeof new_val_buffer);
-        int newc = htonl(get_int(old_val->data) + get_int(extra->data)); // newc = oldc + newc
-        memcpy(new_val_buffer, &newc, sizeof newc);
-        DBT new_val = { .data = new_val_buffer, .size = sizeof new_val_buffer };
-        set_val(&new_val, set_extra);
-    }
-    return 0;
-}
-#endif
-
-static void insert_and_update(DB *db, DB_TXN *txn, int a, int b, int c, int d, bool do_update_callback) {
-#if !defined(TOKUDB)
-    assert(!do_update_callback);
-#endif
-    int r;
-
-    // generate the key
-    assert(key_size >= 8);
-    char key_buffer[key_size];
-    int newa = htonl(a);
-    memcpy(key_buffer, &newa, sizeof newa);
-    int newb = htonl(b);
-    memcpy(key_buffer+4, &newb, sizeof newb);
-
-    // generate the value
-    assert(val_size >= 8);
-    char val_buffer[val_size];
-    int newc = htonl(c);
-    memcpy(val_buffer, &newc, sizeof newc);
-    int newd = htonl(d);
-    memcpy(val_buffer+4, &newd, sizeof newd);
-
-#if defined(TOKUDB)
-    if (do_update_callback) {
-        // extra = value_buffer, implicit combine column c update function
-        DBT key = { .data = key_buffer, .size = sizeof key_buffer };
-        DBT extra = { .data = val_buffer, .size = sizeof val_buffer };
-        r = db->update(db, txn, &key, &extra, 0); assert(r == 0);
-    } else
-#endif
-    {
-        DBT key = { .data = key_buffer, .size = sizeof key_buffer };
-        DBT value = { .data = val_buffer, .size = sizeof val_buffer };
-        DBT oldvalue = { };
-        r = db->get(db, txn, &key, &oldvalue, 0);
-        assert(r == 0 || r == DB_NOTFOUND);
-        if (r == 0) {
-            // update it
-            if (verbose) printf("U");
-            int oldc = get_int(oldvalue.data);
-            newc = htonl(oldc + c); // newc = oldc + newc
-            memcpy(val_buffer, &newc, sizeof newc);
-            r = db->put(db, txn, &key, &value, 0);
-            assert(r == 0);
-        } else if (r == DB_NOTFOUND) {
-            r = db->put(db, txn, &key, &value, 0);
-            assert(r == 0);
-        }
-    }
-}
-
-static inline float tdiff (struct timeval *a, struct timeval *b) {
-    return (a->tv_sec - b->tv_sec) +1e-6*(a->tv_usec - b->tv_usec);
-}
-
-static void insert_and_update_all(DB_ENV *db_env, DB *db, long nrows, long max_rows_per_txn, int key_range, long rows_per_report, bool do_update_callback, bool do_txn) {
-    int r;
-    struct timeval tstart;
-    r = gettimeofday(&tstart, NULL); assert(r == 0);
-    struct timeval tlast = tstart;
-    DB_TXN *txn = NULL;
-    if (do_txn) {
-        r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0);
-    }
-    long n_rows_per_txn = 0;
-    long rowi;
-    for (rowi = 0; rowi < nrows; rowi++) {
-        int a = random() % key_range;
-        int b = random() % key_range;
-        int c = 1;
-        int d = 0; // timestamp
-        insert_and_update(db, txn, a, b, c, d, do_update_callback);
-        n_rows_per_txn++;
-        
-        // maybe commit
-        if (do_txn && n_rows_per_txn == max_rows_per_txn) {
-            r = txn->commit(txn, 0); assert(r == 0);
-            r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0);
-            n_rows_per_txn = 0;
-        }
-
-        // maybe report performance
-        if (((rowi + 1) % rows_per_report) == 0) {
-            struct timeval tnow;
-            r = gettimeofday(&tnow, NULL); assert(r == 0);
-            float last_time = tdiff(&tnow, &tlast);
-            float total_time = tdiff(&tnow, &tstart);
-            printf("%ld %.3f %.0f/s %.0f/s\n", rowi + 1, last_time, rows_per_report/last_time, rowi/total_time); fflush(stdout);
-            tlast = tnow;
-        }
-    }
-
-    if (do_txn) {
-        r = txn->commit(txn, 0); assert(r == 0);
-    }
-    struct timeval tnow;
-    r = gettimeofday(&tnow, NULL); assert(r == 0);
-    printf("total %ld %.3f %.0f/s\n", nrows, tdiff(&tnow, &tstart), nrows/tdiff(&tnow, &tstart)); fflush(stdout);
-}
-
-int main(int argc, char *argv[]) {
-#if defined(TOKUDB)
-    char *db_env_dir = "update.env.tokudb";
-#else
-    char *db_env_dir = "update.env.bdb";
-#endif
-    int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG;
-    char *db_filename = "update.db";
-    long rows = 1000000000;
-    long rows_per_txn = 100;
-    long rows_per_report = 100000;
-    int key_range = 1000000;
-#if defined(TOKUDB)
-    bool do_update_callback = true;
-#else
-    bool do_update_callback = false;
-#endif
-    bool do_txn = false;
-    u_int64_t cachesize = 1000000000;
-    u_int32_t pagesize = 0;
-#if defined(TOKUDB)
-    u_int32_t checkpoint_period = 60;
-#endif
-
-    int i;
-    for (i = 1; i < argc; i++) {
-        char *arg = argv[i];
-        if (strcmp(arg, "--verbose") == 0) {
-            verbose++;
-            continue;
-        }
-        if (strcmp(arg, "--rows") == 0 && i+1 < argc) {
-            rows = atol(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--rows_per_txn") == 0 && i+1 < argc) {
-            rows_per_txn = atol(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--rows_per_report") == 0 && i+1 < argc) {
-            rows_per_report = atol(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--key_range") == 0 && i+1 < argc) {
-            key_range = atol(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--txn") == 0 && i+1 < argc) {
-            do_txn = atoi(argv[++i]) != 0;
-            continue;
-        }
-        if (strcmp(arg, "--pagesize") == 0 && i+1 < argc) {
-            pagesize = atoi(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--cachesize") == 0 && i+1 < argc) {
-            cachesize = atol(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--update_callback") == 0 && i+1 < argc) {
-            do_update_callback = atoi(argv[++i]) != 0;
-            continue;
-        }
-        if (strcmp(arg, "--key_size") == 0 && i+1 < argc) {
-            key_size = atoi(argv[++i]);
-            continue;
-        }
-        if (strcmp(arg, "--val_size") == 0 && i+1 < argc) {
-            val_size = atoi(argv[++i]);
-            continue;
-        }
-#if defined(TOKUDB)
-        if (strcmp(arg, "--checkpoint_period") == 0 && i+1 < argc) {
-            checkpoint_period = atoi(argv[++i]);
-            continue;
-        }
-#endif
-
-        assert(0);
-    }
-
-    int r;
-    char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1];
-    snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir);
-    r = system(rm_cmd); assert(r == 0);
-
-    r = mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0);
-
-    // create and open the env
-    DB_ENV *db_env = NULL;
-    r = db_env_create(&db_env, 0); assert(r == 0);
-#if defined(TOKUDB)
-    db_env->set_update(db_env, my_update_callback);
-#endif
-    if (cachesize) {
-        if (verbose) printf("cachesize %llu\n", (unsigned long long)cachesize);
-        const u_int64_t gig = 1 << 30;
-        r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0);
-    }
-    if (!do_txn)
-        db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG);
-    db_env->set_errcall(db_env, db_error);
-    if (verbose) printf("env %s\n", db_env_dir);
-    r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0);
-#if defined(TOKUDB)
-    if (checkpoint_period) {
-        r = db_env->checkpointing_set_period(db_env, checkpoint_period); assert(r == 0);
-        u_int32_t period;
-        r = db_env->checkpointing_get_period(db_env, &period); assert(r == 0 && period == checkpoint_period);
-    }
-#endif
-
-    // create the db
-    DB *db = NULL;
-    r = db_create(&db, db_env, 0); assert(r == 0);
-    DB_TXN *create_txn = NULL;
-    if (do_txn) {
-        r = db_env->txn_begin(db_env, NULL, &create_txn, 0); assert(r == 0);
-    }
-    if (pagesize) {
-        r = db->set_pagesize(db, pagesize); assert(r == 0);
-    }
-    r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0);
-    if (do_txn) {
-        r = create_txn->commit(create_txn, 0); assert(r == 0);
-    }
-
-    // insert on duplicate key update
-    insert_and_update_all(db_env, db, rows, rows_per_txn, key_range, rows_per_report, do_update_callback, do_txn);
-
-    // shutdown
-    r = db->close(db, 0); assert(r == 0); db = NULL;
-    r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL;
-
-    return 0;
-}

From c81c06bda4ce91f56885959f81229d11ab6624ab Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:36:25 -0400
Subject: [PATCH 075/190] FT-292 Fix leak in comparator-test

---
 ft/tests/comparator-test.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ft/tests/comparator-test.cc b/ft/tests/comparator-test.cc
index 8c5bac55f5d..55804db0af8 100644
--- a/ft/tests/comparator-test.cc
+++ b/ft/tests/comparator-test.cc
@@ -122,6 +122,7 @@ static void test_desc(void) {
     expected_desc = &d2;
     c = cmp(&dbt_a, &dbt_b);
     invariant(c == MAGIC);
+    cmp2.destroy();
 
     // go back to using d1, but using the create_from API
     toku::comparator cmp3, cmp4;
@@ -132,6 +133,8 @@ static void test_desc(void) {
     invariant(c == MAGIC);
     c = cmp4(&dbt_a, &dbt_b);
     invariant(c == MAGIC);
+    cmp3.destroy();
+    cmp4.destroy();
 
     cmp.destroy();
 }

From 84c5d22e29486537e8c97f2b3db122ddfa196324 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:36:25 -0400
Subject: [PATCH 076/190] FT-291 Remove the remaining cilk artifacts

---
 scripts/tokucilkscreen | 19 -------------------
 src/tests/test.h       | 11 ++---------
 util/sort.h            | 16 ++--------------
 3 files changed, 4 insertions(+), 42 deletions(-)
 delete mode 100755 scripts/tokucilkscreen

diff --git a/scripts/tokucilkscreen b/scripts/tokucilkscreen
deleted file mode 100755
index 91a63ec66ef..00000000000
--- a/scripts/tokucilkscreen
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-
-# exit 1 if cilkscreen finds errors
-
-function cleanup() {
-    if [ "$logfile" != "" ] ; then rm $logfile; logfile=; fi
-}
-
-trap cleanup SIGINT
-logfile=$(mktemp /tmp/toku_cilkscreen.XXXXXXXX)
-cilkscreen $* 2>$logfile
-exitcode=$?
-if [ $exitcode = 0 ] ; then
-    cat $logfile >>/dev/fd/2
-    grep "No errors found by Cilkscreen" $logfile >/dev/null 2>&1
-    exitcode=$?
-fi
-rm $logfile
-exit $exitcode
\ No newline at end of file
diff --git a/src/tests/test.h b/src/tests/test.h
index db1ef1e4012..80f9ee37215 100644
--- a/src/tests/test.h
+++ b/src/tests/test.h
@@ -494,15 +494,8 @@ static int env_del_multiple_test_no_array(
             { int chk_r = (txn)->abort(txn); CKERR(chk_r); }            \
         })
 
-
-int test_main (int argc, char * const argv[]);
-int
-#if defined(__cilkplusplus)
-cilk_main(int argc, char *argv[]) 
-#else
-main(int argc, char * const argv[]) 
-#endif
-{
+int test_main(int argc, char *const argv[]);
+int main(int argc, char *const argv[]) {
     int r;
     toku_os_initialize_settings(1);
     r = test_main(argc, argv);
diff --git a/util/sort.h b/util/sort.h
index d3dd2459ee5..d597b4d7a8d 100644
--- a/util/sort.h
+++ b/util/sort.h
@@ -94,16 +94,6 @@ PATENT RIGHTS GRANT:
 #include <string.h>
 #include <memory.h>
 
-#if defined(HAVE_CILK)
-#include <cilk/cilk.h>
-#define cilk_worker_count (__cilkrts_get_nworkers())
-#else
-#define cilk_spawn
-#define cilk_sync
-#define cilk_for for
-#define cilk_worker_count 1
-#endif
-
 namespace toku {
 
     template<typename sortdata_t, typename sortextra_t, int (*cmp)(sortextra_t &, const sortdata_t &, const sortdata_t &)>
@@ -147,9 +137,8 @@ namespace toku {
             }
             const int mid = n / 2;
             sortdata_t *right_as[2] = { &(as[0])[mid], &(as[1])[mid] };
-            const int r1 = cilk_spawn mergesort_internal(as, which, mid, extra);
+            const int r1 = mergesort_internal(as, which, mid, extra);
             const int r2 = mergesort_internal(right_as, which, n - mid, extra);
-            cilk_sync;
             if (r1 != r2) {
                 // move everything to the same place (r2)
                 memcpy(as[r2], as[r1], mid * (sizeof as[r2][0]));
@@ -221,9 +210,8 @@ namespace toku {
                 const int a2 = an / 2;
                 const sortdata_t *akey = &a[a2];
                 const int b2 = binsearch(*akey, b, bn, 0, extra);
-                cilk_spawn merge(dest, a, a2, b, b2, extra);
+                merge(dest, a, a2, b, b2, extra);
                 merge(&dest[a2 + b2], akey, an - a2, &b[b2], bn - b2, extra);
-                cilk_sync;
             }
         }
 

From 46ab99301c6cb8a19b30c83e44263853e8f56852 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 12:57:00 -0400
Subject: [PATCH 077/190] FT-276 Remove alignment from toku_mempool_malloc API

---
 ft/bndata.cc    | 14 +++++++-------
 util/dmt.cc     | 14 +++++++-------
 util/mempool.cc | 14 +++++---------
 util/mempool.h  |  4 ++--
 4 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/ft/bndata.cc b/ft/bndata.cc
index b95e02cda10..9841d7ef2ed 100644
--- a/ft/bndata.cc
+++ b/ft/bndata.cc
@@ -140,7 +140,7 @@ void bn_data::initialize_from_separate_keys_and_vals(uint32_t num_entries, struc
     rbuf_literal_bytes(rb, &vals_src, val_data_size);
 
     if (num_entries > 0) {
-        void *vals_dest = toku_mempool_malloc(&this->m_buffer_mempool, val_data_size, 1);
+        void *vals_dest = toku_mempool_malloc(&this->m_buffer_mempool, val_data_size);
         paranoid_invariant_notnull(vals_dest);
         memcpy(vals_dest, vals_src, val_data_size);
     }
@@ -384,7 +384,7 @@ struct dmt_compressor_state {
 static int move_it (const uint32_t, klpair_struct *klpair, const uint32_t idx UU(), struct dmt_compressor_state * const oc) {
     LEAFENTRY old_le = oc->bd->get_le_from_klpair(klpair);
     uint32_t size = leafentry_memsize(old_le);
-    void* newdata = toku_mempool_malloc(oc->new_kvspace, size, 1);
+    void* newdata = toku_mempool_malloc(oc->new_kvspace, size);
     paranoid_invariant_notnull(newdata); // we do this on a fresh mempool, so nothing bad should happen
     memcpy(newdata, old_le, size);
     klpair->le_offset = toku_mempool_get_offset_from_pointer_and_base(oc->new_kvspace, newdata);
@@ -411,7 +411,7 @@ void bn_data::dmt_compress_kvspace(size_t added_size, void **maybe_free, bool fo
     } else {
         toku_mempool_construct(&new_kvspace, total_size_needed);
         size_t old_offset_limit = toku_mempool_get_offset_limit(&m_buffer_mempool);
-        void *new_mempool_base = toku_mempool_malloc(&new_kvspace, old_offset_limit, 1);
+        void *new_mempool_base = toku_mempool_malloc(&new_kvspace, old_offset_limit);
         memcpy(new_mempool_base, old_mempool_base, old_offset_limit);
     }
 
@@ -428,10 +428,10 @@ void bn_data::dmt_compress_kvspace(size_t added_size, void **maybe_free, bool fo
 //  If MAYBE_FREE is nullptr then free the old mempool's space.
 //  Otherwise, store the old mempool's space in maybe_free.
 LEAFENTRY bn_data::mempool_malloc_and_update_dmt(size_t size, void **maybe_free) {
-    void *v = toku_mempool_malloc(&m_buffer_mempool, size, 1);
+    void *v = toku_mempool_malloc(&m_buffer_mempool, size);
     if (v == nullptr) {
         dmt_compress_kvspace(size, maybe_free, false);
-        v = toku_mempool_malloc(&m_buffer_mempool, size, 1);
+        v = toku_mempool_malloc(&m_buffer_mempool, size);
         paranoid_invariant_notnull(v);
     }
     return (LEAFENTRY)v;
@@ -506,7 +506,7 @@ class split_klpairs_extra {
         LEAFENTRY old_le = m_left_bn->get_le_from_klpair(&klpair);
         size_t le_size = leafentry_memsize(old_le);
 
-        void *new_le = toku_mempool_malloc(dest_mp, le_size, 1);
+        void *new_le = toku_mempool_malloc(dest_mp, le_size);
         paranoid_invariant_notnull(new_le);
         memcpy(new_le, old_le, le_size);
         size_t le_offset = toku_mempool_get_offset_from_pointer_and_base(dest_mp, new_le);
@@ -659,7 +659,7 @@ void bn_data::set_contents_as_clone_of_sorted_array(
     dmt_builder.create(num_les, total_key_size);
 
     for (uint32_t idx = 0; idx < num_les; idx++) {
-        void* new_le = toku_mempool_malloc(&m_buffer_mempool, le_sizes[idx], 1);
+        void* new_le = toku_mempool_malloc(&m_buffer_mempool, le_sizes[idx]);
         paranoid_invariant_notnull(new_le);
         memcpy(new_le, old_les[idx], le_sizes[idx]);
         size_t le_offset = toku_mempool_get_offset_from_pointer_and_base(&m_buffer_mempool, new_le);
diff --git a/util/dmt.cc b/util/dmt.cc
index 8d10c5b921c..87b06bf5696 100644
--- a/util/dmt.cc
+++ b/util/dmt.cc
@@ -130,7 +130,7 @@ void dmt<dmtdata_t, dmtdataout_t, dmtwriter_t>::create_from_sorted_memory_of_fix
     toku_mempool_construct(&this->mp, aligned_memsize);
     if (aligned_memsize > 0) {
         paranoid_invariant(numvalues > 0);
-        void *ptr = toku_mempool_malloc(&this->mp, aligned_memsize, 1);
+        void *ptr = toku_mempool_malloc(&this->mp, aligned_memsize);
         paranoid_invariant_notnull(ptr);
         uint8_t * const CAST_FROM_VOIDP(dest, ptr);
         const uint8_t * const CAST_FROM_VOIDP(src, mem);
@@ -261,7 +261,7 @@ dmtdata_t * dmt<dmtdata_t, dmtdataout_t, dmtwriter_t>::alloc_array_value_end(voi
     paranoid_invariant(this->values_same_size);
     this->d.a.num_values++;
 
-    void *ptr = toku_mempool_malloc(&this->mp, align(this->value_length), 1);
+    void *ptr = toku_mempool_malloc(&this->mp, align(this->value_length));
     paranoid_invariant_notnull(ptr);
     paranoid_invariant(reinterpret_cast<size_t>(ptr) % ALIGNMENT == 0);
     dmtdata_t *CAST_FROM_VOIDP(n, ptr);
@@ -302,7 +302,7 @@ void dmt<dmtdata_t, dmtdataout_t, dmtwriter_t>::maybe_resize_array_for_insert(vo
         paranoid_invariant(copy_bytes <= toku_mempool_get_used_size(&this->mp));
         // Copy over to new mempool
         if (this->d.a.num_values > 0) {
-            void* dest = toku_mempool_malloc(&new_kvspace, copy_bytes, 1);
+            void* dest = toku_mempool_malloc(&new_kvspace, copy_bytes);
             invariant(dest!=nullptr);
             memcpy(dest, get_array_value(0), copy_bytes);
         }
@@ -344,7 +344,7 @@ void dmt<dmtdata_t, dmtdataout_t, dmtwriter_t>::convert_from_tree_to_array(void)
     const uint32_t fixed_aligned_len = align(this->value_length);
     size_t mem_needed = num_values * fixed_aligned_len;
     toku_mempool_construct(&new_mp, mem_needed);
-    uint8_t* CAST_FROM_VOIDP(dest, toku_mempool_malloc(&new_mp, mem_needed, 1));
+    uint8_t* CAST_FROM_VOIDP(dest, toku_mempool_malloc(&new_mp, mem_needed));
     paranoid_invariant_notnull(dest);
     for (uint32_t i = 0; i < num_values; i++) {
         const dmt_node &n = get_node(tmp_array[i]);
@@ -588,7 +588,7 @@ node_offset dmt<dmtdata_t, dmtdataout_t, dmtwriter_t>::node_malloc_and_set_value
     size_t val_size = value.get_size();
     size_t size_to_alloc = __builtin_offsetof(dmt_node, value) + val_size;
     size_to_alloc = align(size_to_alloc);
-    void* np = toku_mempool_malloc(&this->mp, size_to_alloc, 1);
+    void* np = toku_mempool_malloc(&this->mp, size_to_alloc);
     paranoid_invariant_notnull(np);
     dmt_node *CAST_FROM_VOIDP(n, np);
     node_set_value(n, value);
@@ -645,7 +645,7 @@ void dmt<dmtdata_t, dmtdataout_t, dmtwriter_t>::maybe_resize_tree(const dmtwrite
                 dmt_node &node = get_node(tmp_array[i]);
                 const size_t bytes_to_copy = __builtin_offsetof(dmt_node, value) + node.value_length;
                 const size_t bytes_to_alloc = align(bytes_to_copy);
-                void* newdata = toku_mempool_malloc(&new_kvspace, bytes_to_alloc, 1);
+                void* newdata = toku_mempool_malloc(&new_kvspace, bytes_to_alloc);
                 memcpy(newdata, &node, bytes_to_copy);
                 tmp_array[i] = toku_mempool_get_offset_from_pointer_and_base(&new_kvspace, newdata);
             }
@@ -1251,7 +1251,7 @@ void dmt<dmtdata_t, dmtdataout_t, dmtwriter_t>::builder::build(dmt<dmtdata_t, dm
         invariant_zero(toku_mempool_get_frag_size(&this->temp.mp));
         struct mempool new_mp;
         toku_mempool_construct(&new_mp, used);
-        void * newbase = toku_mempool_malloc(&new_mp, used, 1);
+        void * newbase = toku_mempool_malloc(&new_mp, used);
         invariant_notnull(newbase);
         memcpy(newbase, toku_mempool_get_base(&this->temp.mp), used);
         toku_mempool_destroy(&this->temp.mp);
diff --git a/util/mempool.cc b/util/mempool.cc
index 9eea03338ad..ffd900580b1 100644
--- a/util/mempool.cc
+++ b/util/mempool.cc
@@ -207,24 +207,20 @@ size_t toku_mempool_get_allocated_size(const struct mempool *mp) {
     return mp->free_offset;
 }
 
-void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment) {
+void *toku_mempool_malloc(struct mempool *mp, size_t size) {
     paranoid_invariant(size < (1U<<31));
     paranoid_invariant(mp->size < (1U<<31));
     paranoid_invariant(mp->free_offset < (1U<<31));
     paranoid_invariant(mp->free_offset <= mp->size);
     void *vp;
-    size_t offset = (mp->free_offset + (alignment-1)) & ~(alignment-1);
-    //printf("mempool_malloc size=%ld base=%p free_offset=%ld mp->size=%ld offset=%ld\n", size, mp->base, mp->free_offset, mp->size, offset);
-    if (offset + size > mp->size) {
-        vp = 0;
+    if (mp->free_offset + size > mp->size) {
+        vp = nullptr;
     } else {
-        vp = (char *)mp->base + offset;
-        mp->free_offset = offset + size;
+        vp = reinterpret_cast<char *>(mp->base) + mp->free_offset;
+        mp->free_offset += size;
     }
     paranoid_invariant(mp->free_offset <= mp->size);
-    paranoid_invariant(((long)vp & (alignment-1)) == 0);
     paranoid_invariant(vp == 0 || toku_mempool_inrange(mp, vp, size));
-    //printf("mempool returning %p\n", vp);
     return vp;
 }
 
diff --git a/util/mempool.h b/util/mempool.h
index ee6d6bc5a08..8b52f095b22 100644
--- a/util/mempool.h
+++ b/util/mempool.h
@@ -163,8 +163,8 @@ size_t toku_mempool_get_free_size(const struct mempool *mp);
 /* get the amount of space that has been allocated for use (wasted or not) */
 size_t toku_mempool_get_allocated_size(const struct mempool *mp);
 
-/* allocate a chunk of memory from the memory pool suitably aligned */
-void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment);
+/* allocate a chunk of memory from the memory pool */
+void *toku_mempool_malloc(struct mempool *mp, size_t size);
 
 /* free a previously allocated chunk of memory.  the free only updates
    a count of the amount of free space in the memory pool.  the memory

From 0acaea986bd41ac9ce15b001311f29e793904016 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 15:19:58 -0400
Subject: [PATCH 078/190] FT-302 Add block allocation strategy to the block
 allocator. Default to the one and only strategy so far - first fit.

---
 ft/serialize/block_allocator.cc | 100 ++++++++++++++++++++------------
 ft/serialize/block_allocator.h  |  12 ++++
 2 files changed, 74 insertions(+), 38 deletions(-)

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index aa11ac8cb91..44f75e1a144 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -119,6 +119,7 @@ void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment)
     _blocks_array_size = 1;
     XMALLOC_N(_blocks_array_size, _blocks_array);
     _n_bytes_in_use = reserve_at_beginning;
+    _strategy = BA_STRATEGY_FIRST_FIT;
 
     VALIDATE();
 }
@@ -127,6 +128,10 @@ void block_allocator::destroy() {
     toku_free(_blocks_array);
 }
 
+void block_allocator::set_strategy(enum allocation_strategy strategy) {
+    _strategy = strategy;
+}
+
 void block_allocator::grow_blocks_array_by(uint64_t n_to_add) {
     if (_n_blocks + n_to_add > _blocks_array_size) {
         uint64_t new_size = _n_blocks + n_to_add;
@@ -221,6 +226,34 @@ static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
     return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
 }
 
+static struct block_allocator::blockpair *
+choose_block_first_fit_strategy(struct block_allocator::blockpair *blocks_array,
+                                uint64_t n_blocks, uint64_t size,
+                                uint64_t alignment) {
+    // Implement first fit.
+    for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
+        // Consider the space after blocknum
+        struct block_allocator::blockpair *bp = &blocks_array[blocknum];
+        uint64_t possible_offset = align(bp->offset + bp->size, alignment);
+        if (possible_offset + size <= bp[1].offset) {
+            return bp;
+        }
+    }
+    return nullptr;
+}
+
+// TODO: other strategies
+// TODO: Put strategies in their own file, ft/serialize/block_allocator_strategy.{cc,h}?
+
+struct block_allocator::blockpair *block_allocator::choose_block_to_alloc_after(size_t size) {
+    switch (_strategy) {
+    case BA_STRATEGY_FIRST_FIT:
+        return choose_block_first_fit_strategy(_blocks_array, _n_blocks, size, _alignment);
+    default:
+        abort();
+    }
+}
+
 // Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
 void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
     // Allocator does not support size 0 blocks. See block_allocator_free_block.
@@ -228,6 +261,8 @@ void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
 
     grow_blocks_array();
     _n_bytes_in_use += size;
+
+    // First and only block
     if (_n_blocks == 0) {
         assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use
         _blocks_array[0].offset = align(_reserve_at_beginning, _alignment);
@@ -237,50 +272,39 @@ void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
         return;
     }
 
-    // Implement first fit.
-    {
-        uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
-        if (end_of_reserve + size <= _blocks_array[0].offset ) {
-            // Check to see if the space immediately after the reserve is big enough to hold the new block.
-            struct blockpair *bp = &_blocks_array[0];
-            memmove(bp + 1, bp, _n_blocks * sizeof(*bp));
-            bp[0].offset = end_of_reserve;
-            bp[0].size = size;
-            _n_blocks++;
-            *offset = end_of_reserve;
-            VALIDATE();
-            return;
-        }
-    }
-
-    for (uint64_t blocknum = 0; blocknum + 1 < _n_blocks; blocknum++) {
-        // Consider the space after blocknum
-        struct blockpair *bp = &_blocks_array[blocknum];
-        uint64_t this_offset = bp[0].offset;
-        uint64_t this_size   = bp[0].size;
-        uint64_t answer_offset = align(this_offset + this_size, _alignment);
-        if (answer_offset + size > bp[1].offset) {
-            continue; // The block we want doesn't fit after this block.
-        }
-
-        // It fits, so allocate it here.
-        memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp));
-        bp[1].offset = answer_offset;
-        bp[1].size = size;
+    // Check to see if the space immediately after the reserve is big enough to hold the new block.
+    uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
+    if (end_of_reserve + size <= _blocks_array[0].offset ) {
+        struct blockpair *bp = &_blocks_array[0];
+        memmove(bp + 1, bp, _n_blocks * sizeof(*bp));
+        bp[0].offset = end_of_reserve;
+        bp[0].size = size;
         _n_blocks++;
-        *offset = answer_offset;
+        *offset = end_of_reserve;
         VALIDATE();
         return;
     }
 
-    // It didn't fit anywhere, so fit it on the end.
-    assert(_n_blocks < _blocks_array_size);
-    struct blockpair *bp = &_blocks_array[_n_blocks];
-    uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment);
-    bp->offset = answer_offset;
-    bp->size = size;
+    struct blockpair *bp = choose_block_first_fit_strategy(_blocks_array, _n_blocks, size, _alignment);
+    if (bp != nullptr) {
+        // our allocation strategy chose the space after `bp' to fit the new block
+        uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
+        uint64_t blocknum = bp - _blocks_array;
+        assert(&_blocks_array[blocknum] == bp);
+        memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp));
+        bp[1].offset = answer_offset;
+        bp[1].size = size;
+        *offset = answer_offset;
+    } else {
+        // It didn't fit anywhere, so fit it on the end.
+        assert(_n_blocks < _blocks_array_size);
+        bp = &_blocks_array[_n_blocks];
+        uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment);
+        bp->offset = answer_offset;
+        bp->size = size;
+        *offset = answer_offset;
+    }
     _n_blocks++;
-    *offset = answer_offset;
     VALIDATE();
 }
 
diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h
index 5c1bc75a504..4a4222f466c 100644
--- a/ft/serialize/block_allocator.h
+++ b/ft/serialize/block_allocator.h
@@ -124,7 +124,12 @@ public:
 
     static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
 
+    enum allocation_strategy {
+        BA_STRATEGY_FIRST_FIT = 1
+    };
+
     // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
+    //         The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
     //  All blocks be start on a multiple of ALIGNMENT.
     //  Aborts if we run out of memory.
     // Parameters
@@ -135,6 +140,10 @@ public:
     // Effect: Destroy this block allocator
     void destroy();
 
+    // Effect: Set the allocation strategy that the allocator should use
+    // Requires: No other threads are operating on this block allocator
+    void set_strategy(enum allocation_strategy strategy);
+
     // Effect: Allocate a block of the specified size at a particular offset.
     //  Aborts if anything goes wrong.
     //  The performance of this function may be as bad as Theta(N), where N is the number of blocks currently in use.
@@ -219,6 +228,7 @@ private:
     void grow_blocks_array_by(uint64_t n_to_add);
     void grow_blocks_array();
     int64_t find_block(uint64_t offset);
+    struct blockpair *choose_block_to_alloc_after(size_t size);
 
     static int compare_blockpairs(const void *av, const void *bv);
 
@@ -234,4 +244,6 @@ private:
     struct blockpair *_blocks_array;
     // Including the reserve_at_beginning
     uint64_t _n_bytes_in_use;
+    // The allocation strategy are we using
+    enum allocation_strategy _strategy;
 };

From 5883b9a280da26442c179baec1a16b8e96488d99 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 15:27:26 -0400
Subject: [PATCH 079/190] FT-302 Correctly use the new interface (oops)

---
 ft/serialize/block_allocator.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index 44f75e1a144..06400de0415 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -285,7 +285,7 @@ void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
         return;
     }
 
-    struct blockpair *bp = choose_block_first_fit_strategy(_blocks_array, _n_blocks, size, _alignment);
+    struct blockpair *bp = choose_block_to_alloc_after(size);
     if (bp != nullptr) {
         // our allocation strategy chose the space after `bp' to fit the new block
         uint64_t answer_offset = align(bp->offset + bp->size, _alignment);

From dc07bf756f40f5b39cbcb8e065676f359bb34297 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 3 Jul 2014 15:51:23 -0400
Subject: [PATCH 080/190] FT-302 Organize allocation strategy code into its own
 header / source file.

---
 ft/CMakeLists.txt                        |   1 +
 ft/serialize/block_allocator.cc          |  29 +-----
 ft/serialize/block_allocator_strategy.cc | 104 ++++++++++++++++++++++
 ft/serialize/block_allocator_strategy.h  | 108 +++++++++++++++++++++++
 4 files changed, 217 insertions(+), 25 deletions(-)
 create mode 100644 ft/serialize/block_allocator_strategy.cc
 create mode 100644 ft/serialize/block_allocator_strategy.h

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 5bea203a36b..9b2e4905612 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -51,6 +51,7 @@ set(FT_SOURCES
   node
   pivotkeys
   serialize/block_allocator
+  serialize/block_allocator_strategy
   serialize/block_table
   serialize/compress
   serialize/ft_node-serialize
diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index 06400de0415..06fbe58e91e 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -98,10 +98,7 @@ PATENT RIGHTS GRANT:
 #include "portability/toku_stdlib.h"
 
 #include "ft/serialize/block_allocator.h"
-
-// Here's a very simple implementation.
-// It's not very fast at allocating or freeing.
-// Previous implementation used next_fit, but now use first_fit since we are moving blocks around to reduce file size.
+#include "ft/serialize/block_allocator_strategy.h"
 
 #if 0
 #define VALIDATE() validate()
@@ -226,29 +223,11 @@ static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
     return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
 }
 
-static struct block_allocator::blockpair *
-choose_block_first_fit_strategy(struct block_allocator::blockpair *blocks_array,
-                                uint64_t n_blocks, uint64_t size,
-                                uint64_t alignment) {
-    // Implement first fit.
-    for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
-        // Consider the space after blocknum
-        struct block_allocator::blockpair *bp = &blocks_array[blocknum];
-        uint64_t possible_offset = align(bp->offset + bp->size, alignment);
-        if (possible_offset + size <= bp[1].offset) {
-            return bp;
-        }
-    }
-    return nullptr;
-}
-
-// TODO: other strategies
-// TODO: Put strategies in their own file, ft/serialize/block_allocator_strategy.{cc,h}?
-
-struct block_allocator::blockpair *block_allocator::choose_block_to_alloc_after(size_t size) {
+struct block_allocator::blockpair *
+block_allocator::choose_block_to_alloc_after(size_t size) {
     switch (_strategy) {
     case BA_STRATEGY_FIRST_FIT:
-        return choose_block_first_fit_strategy(_blocks_array, _n_blocks, size, _alignment);
+        return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment);
     default:
         abort();
     }
diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
new file mode 100644
index 00000000000..620ab24a5bd
--- /dev/null
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -0,0 +1,104 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#include "ft/serialize/block_allocator_strategy.h"
+
+// First fit block allocation
+struct block_allocator::blockpair *
+block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
+                                    uint64_t n_blocks, uint64_t size, uint64_t alignment) {
+    for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
+        // Consider the space after blocknum
+        struct block_allocator::blockpair *bp = &blocks_array[blocknum];
+        uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
+        if (possible_offset + size <= bp[1].offset) {
+            return bp;
+        }
+    }
+    return nullptr;
+}
diff --git a/ft/serialize/block_allocator_strategy.h b/ft/serialize/block_allocator_strategy.h
new file mode 100644
index 00000000000..5023a8d740c
--- /dev/null
+++ b/ft/serialize/block_allocator_strategy.h
@@ -0,0 +1,108 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#pragma once
+
+#include <db.h>
+
+#include "ft/serialize/block_allocator.h"
+
+// Block allocation strategy implementations
+
+class block_allocator_strategy {
+public:
+    static struct block_allocator::blockpair *
+    first_fit(struct block_allocator::blockpair *blocks_array,
+              uint64_t n_blocks, uint64_t size, uint64_t alignment);
+
+private:
+    // Effect: align a value by rounding up.
+    static inline uint64_t _align(uint64_t value, uint64_t ba_alignment) {
+        return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
+    }
+};

From c5361547e08fa572cac4e77eeda4dbc5f259690d Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Sat, 5 Jul 2014 00:49:10 -0400
Subject: [PATCH 081/190] updated branding to tokuft

---
 README.md | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 72b8988165a..12d6e6e4eed 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,16 @@
-TokuKV
+TokuFT
 ======
 
-TokuKV is a high-performance, transactional key-value store, used in the
+TokuFT is a high-performance, transactional key-value store, used in the
 TokuDB storage engine for MySQL and MariaDB and in TokuMX, the
 high-performance MongoDB distribution.
 
-TokuKV is provided as a shared library with an interface similar to
+TokuFT is provided as a shared library with an interface similar to
 Berkeley DB.
 
 To build the full MySQL product, see the instructions for
 [Tokutek/ft-engine][ft-engine].  To build TokuMX, see the instructions
-for [Tokutek/mongo][mongo].  This document covers TokuKV only.
+for [Tokutek/mongo][mongo].  This document covers TokuFT only.
 
 [ft-engine]: https://github.com/Tokutek/ft-engine
 [mongo]: https://github.com/Tokutek/mongo
@@ -19,7 +19,7 @@ for [Tokutek/mongo][mongo].  This document covers TokuKV only.
 Building
 --------
 
-TokuKV is built using CMake >= 2.8.9.  Out-of-source builds are
+TokuFT is built using CMake >= 2.8.9.  Out-of-source builds are
 recommended.  You need a C++11 compiler, though only GCC >= 4.7 and
 Apple's Clang are tested.  You also need zlib development packages
 (`yum install zlib-devel` or `apt-get install zlib1g-dev`).
@@ -50,14 +50,14 @@ to that if you are planning to run benchmarks or in production.
 
 ### Platforms
 
-TokuKV is supported on 64-bit Centos, should work on other 64-bit linux
-distributions, and may work on OSX 10.8 and FreeBSD.  TokuKV is not
+TokuFT is supported on 64-bit Centos, should work on other 64-bit linux
+distributions, and may work on OSX 10.8 and FreeBSD.  TokuFT is not
 supported on 32-bit systems.
 
 [Transparent hugepages][transparent-hugepages] is a feature in newer linux
 kernel versions that causes problems for the memory usage tracking
-calculations in TokuKV and can lead to memory overcommit.  If you have
-this feature enabled, TokuKV will not start, and you should turn it off.
+calculations in TokuFT and can lead to memory overcommit.  If you have
+this feature enabled, TokuFT will not start, and you should turn it off.
 If you want to run with transparent hugepages on, you can set an
 environment variable `TOKU_HUGE_PAGES_OK=1`, but only do this for testing,
 and only with a small cache size.
@@ -68,23 +68,23 @@ and only with a small cache size.
 Examples
 --------
 
-There are some sample programs that can use either TokuKV or Berkeley DB
+There are some sample programs that can use either TokuFT or Berkeley DB
 in the `examples/` directory.  Follow the above instructions to build and
-install TokuKV, and then look in the installed `examples/` directory for
+install TokuFT, and then look in the installed `examples/` directory for
 instructions on building and running them.
 
 
 Testing
 -------
 
-TokuKV uses CTest for testing.  The CDash testing dashboard is not
+TokuFT uses CTest for testing.  The CDash testing dashboard is not
 currently public, but you can run the tests without submitting them.
 
 There are some large data files not stored in the git repository, that
 will be made available soon.  For now, the tests that use these files will
 not run.
 
-Many of the tests are linked with both TokuKV and Berkeley DB, as a sanity
+Many of the tests are linked with both TokuFT and Berkeley DB, as a sanity
 check on the tests themselves.  To build these tests, you will need
 Berkeley DB and its header files installed.  If you do not have Berkeley
 DB installed, just don't pass `USE_BDB=ON`.
@@ -103,7 +103,7 @@ ctest -D ExperimentalStart \
 Contributing
 ------------
 
-Please report bugs in TokuKV here on github.
+Please report bugs in TokuFT here on github.
 
 We have two publicly accessible mailing lists for TokuDB:
 
@@ -125,7 +125,7 @@ We are also available on IRC on freenode.net, in the #tokutek channel.
 License
 -------
 
-TokuKV is available under the GPL version 2, with slight modifications.
+TokuFT is available under the GPL version 2, with slight modifications.
 See [README-TOKUDB][license].
 
 [license]: http://github.com/Tokutek/ft-index/blob/master/README-TOKUDB

From 77fb45af2353deed1a6ebaaa992010a66fe38e33 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Sat, 5 Jul 2014 14:06:29 -0400
Subject: [PATCH 082/190] FT-300 Add simple, compile-time enabled stderr
 tracing for each block allocator.

---
 ft/serialize/block_allocator.cc          | 53 ++++++++++++++++++------
 ft/serialize/block_allocator_strategy.cc |  2 +-
 ft/serialize/block_allocator_strategy.h  |  2 +-
 3 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index 06fbe58e91e..f4611c5ca61 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -106,6 +106,14 @@ PATENT RIGHTS GRANT:
 #define VALIDATE()
 #endif
 
+static inline bool ba_trace_enabled() {
+#if 0
+    return true;
+#else
+    return false;
+#endif
+}
+
 void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
     // the alignment must be at least 512 and aligned with 512 to work with direct I/O
     assert(alignment >= 512 && (alignment % 512) == 0);
@@ -119,10 +127,18 @@ void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment)
     _strategy = BA_STRATEGY_FIRST_FIT;
 
     VALIDATE();
+
+    if (ba_trace_enabled()) {
+        fprintf(stderr, "ba_trace_create %p", this);
+    }
 }
 
 void block_allocator::destroy() {
     toku_free(_blocks_array);
+
+    if (ba_trace_enabled()) {
+        fprintf(stderr, "ba_trace_destroy %p", this);
+    }
 }
 
 void block_allocator::set_strategy(enum allocation_strategy strategy) {
@@ -235,36 +251,34 @@ block_allocator::choose_block_to_alloc_after(size_t size) {
 
 // Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
 void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
+    struct blockpair *bp;
+
     // Allocator does not support size 0 blocks. See block_allocator_free_block.
     invariant(size > 0);
 
     grow_blocks_array();
     _n_bytes_in_use += size;
 
-    // First and only block
+    uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
+
     if (_n_blocks == 0) {
+        // First and only block
         assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use
         _blocks_array[0].offset = align(_reserve_at_beginning, _alignment);
         _blocks_array[0].size = size;
         *offset = _blocks_array[0].offset;
-        _n_blocks++;
-        return;
-    }
-
-    // Check to see if the space immediately after the reserve is big enough to hold the new block.
-    uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
-    if (end_of_reserve + size <= _blocks_array[0].offset ) {
-        struct blockpair *bp = &_blocks_array[0];
+        goto done;
+    } else if (end_of_reserve + size <= _blocks_array[0].offset ) {
+        // Check to see if the space immediately after the reserve is big enough to hold the new block.
+        bp = &_blocks_array[0];
         memmove(bp + 1, bp, _n_blocks * sizeof(*bp));
         bp[0].offset = end_of_reserve;
         bp[0].size = size;
-        _n_blocks++;
         *offset = end_of_reserve;
-        VALIDATE();
-        return;
+        goto done;
     }
 
-    struct blockpair *bp = choose_block_to_alloc_after(size);
+    bp = choose_block_to_alloc_after(size);
     if (bp != nullptr) {
         // our allocation strategy chose the space after `bp' to fit the new block
         uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
@@ -283,8 +297,15 @@ void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
         bp->size = size;
         *offset = answer_offset;
     }
+
+done:
     _n_blocks++;
     VALIDATE();
+
+    if (ba_trace_enabled()) {
+        fprintf(stderr, "ba_trace_alloc %p %lu %lu\n",
+                this, static_cast<unsigned long>(size), static_cast<unsigned long>(*offset));
+    }
 }
 
 // Find the index in the blocks array that has a particular offset.  Requires that the block exist.
@@ -326,6 +347,12 @@ void block_allocator::free_block(uint64_t offset) {
             (_n_blocks - bn - 1) * sizeof(struct blockpair));
     _n_blocks--;
     VALIDATE();
+
+    if (ba_trace_enabled()) {
+        fprintf(stderr, "ba_trace_free %p %lu\n",
+                this, static_cast<unsigned long>(offset));
+                
+    }
 }
 
 uint64_t block_allocator::block_size(uint64_t offset) {
diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
index 620ab24a5bd..5108f151248 100644
--- a/ft/serialize/block_allocator_strategy.cc
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 COPYRIGHT NOTICE:
 
   TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
+  Copyright (C) 2007-2014 Tokutek, Inc.
 
 DISCLAIMER:
 
diff --git a/ft/serialize/block_allocator_strategy.h b/ft/serialize/block_allocator_strategy.h
index 5023a8d740c..3dc5fca9813 100644
--- a/ft/serialize/block_allocator_strategy.h
+++ b/ft/serialize/block_allocator_strategy.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 COPYRIGHT NOTICE:
 
   TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
+  Copyright (C) 2007-2014 Tokutek, Inc.
 
 DISCLAIMER:
 

From a6a936b16937f28f13015adb3fefc14db6162f61 Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Mon, 7 Jul 2014 13:34:13 -0400
Subject: [PATCH 083/190] remove gcc 4.7 artifacts from nightly scripts

---
 scripts/run-nightly-coverage-tests.bash | 2 +-
 scripts/run-nightly-drd-tests.bash      | 2 +-
 scripts/run-nightly-release-tests.bash  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/run-nightly-coverage-tests.bash b/scripts/run-nightly-coverage-tests.bash
index 42af1748766..c91bdb7b9a0 100755
--- a/scripts/run-nightly-coverage-tests.bash
+++ b/scripts/run-nightly-coverage-tests.bash
@@ -12,7 +12,7 @@ cd $tokudbdir
 if [ ! -d build ] ; then
     mkdir build
     pushd build
-    CC=gcc47 CXX=g++47 cmake \
+    cmake \
         -D CMAKE_BUILD_TYPE=Debug \
         -D USE_VALGRIND=ON \
         -D TOKU_DEBUG_PARANOID=ON \
diff --git a/scripts/run-nightly-drd-tests.bash b/scripts/run-nightly-drd-tests.bash
index 467c47b9cd2..4a99b40262e 100755
--- a/scripts/run-nightly-drd-tests.bash
+++ b/scripts/run-nightly-drd-tests.bash
@@ -12,7 +12,7 @@ cd $tokudbdir
 if [ ! -d build ] ; then
     mkdir build
     pushd build
-    CC=gcc47 CXX=g++47 cmake \
+    cmake \
         -D CMAKE_BUILD_TYPE=drd \
         -D USE_VALGRIND=ON \
         -D TOKU_DEBUG_PARANOID=ON \
diff --git a/scripts/run-nightly-release-tests.bash b/scripts/run-nightly-release-tests.bash
index 5ac3e62b216..e5767d10893 100755
--- a/scripts/run-nightly-release-tests.bash
+++ b/scripts/run-nightly-release-tests.bash
@@ -12,7 +12,7 @@ cd $tokudbdir
 if [ ! -d build ] ; then
     mkdir build
     pushd build
-    CC=gcc47 CXX=g++47 cmake \
+    cmake \
         -D CMAKE_BUILD_TYPE=Release \
         -D USE_VALGRIND=ON \
         -D TOKU_DEBUG_PARANOID=OFF \

From 982fc1c267826c4ba1b0820a13b6490e89143a34 Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Mon, 7 Jul 2014 16:27:58 -0400
Subject: [PATCH 084/190] moved to jira for issues

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 12d6e6e4eed..eaded7c4287 100644
--- a/README.md
+++ b/README.md
@@ -103,7 +103,7 @@ ctest -D ExperimentalStart \
 Contributing
 ------------
 
-Please report bugs in TokuFT here on github.
+Please report bugs in TokuFT to the [issue tracker][jira].
 
 We have two publicly accessible mailing lists for TokuDB:
 
@@ -121,6 +121,8 @@ and two for TokuMX:
 
 We are also available on IRC on freenode.net, in the #tokutek channel.
 
+[jira]: https://tokutek.atlassian.net/browse/FT/
+
 
 License
 -------

From 1cd4af2b821a23c486dcc14bddf8647a5ec84442 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 8 Jul 2014 09:08:45 -0400
Subject: [PATCH 085/190] #259 add TOKUDB_CACHETABLE_SIZE_CLONED variable

---
 .../suite/tokudb/r/information-schema-global-status.result       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mysql-test/suite/tokudb/r/information-schema-global-status.result b/mysql-test/suite/tokudb/r/information-schema-global-status.result
index 438d10ac282..369c14fe4fe 100644
--- a/mysql-test/suite/tokudb/r/information-schema-global-status.result
+++ b/mysql-test/suite/tokudb/r/information-schema-global-status.result
@@ -45,6 +45,7 @@ TOKUDB_CACHETABLE_MISS
 TOKUDB_CACHETABLE_MISS_TIME
 TOKUDB_CACHETABLE_PREFETCHES
 TOKUDB_CACHETABLE_SIZE_CACHEPRESSURE
+TOKUDB_CACHETABLE_SIZE_CLONED
 TOKUDB_CACHETABLE_SIZE_CURRENT
 TOKUDB_CACHETABLE_SIZE_LEAF
 TOKUDB_CACHETABLE_SIZE_LIMIT

From e9de0225e445a96c68d9114210ae70fd0f11b49d Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 8 Jul 2014 13:32:51 -0400
Subject: [PATCH 086/190] FT-306 Fix logger long wait status text

---
 ft/logger/logger.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ft/logger/logger.cc b/ft/logger/logger.cc
index 48844fd334c..a2e5a257456 100644
--- a/ft/logger/logger.cc
+++ b/ft/logger/logger.cc
@@ -1392,11 +1392,11 @@ status_init(void) {
     // Note, this function initializes the keyname, type, and legend fields.
     // Value fields are initialized to zero by compiler.
     STATUS_INIT(LOGGER_NEXT_LSN,     nullptr, UINT64,  "next LSN", TOKU_ENGINE_STATUS);
-    STATUS_INIT(LOGGER_NUM_WRITES,                  LOGGER_WRITES, UINT64,  "writes", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
-    STATUS_INIT(LOGGER_BYTES_WRITTEN,               LOGGER_WRITES_BYTES, UINT64,  "writes (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
-    STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN,  LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64,  "writes (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
-    STATUS_INIT(LOGGER_TOKUTIME_WRITES,             LOGGER_WRITES_SECONDS, TOKUTIME,  "writes (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
-    STATUS_INIT(LOGGER_WAIT_BUF_LONG,               LOGGER_WAIT_LONG, UINT64,  "count", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
+    STATUS_INIT(LOGGER_NUM_WRITES,                  LOGGER_WRITES, UINT64, "writes", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
+    STATUS_INIT(LOGGER_BYTES_WRITTEN,               LOGGER_WRITES_BYTES, UINT64, "writes (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
+    STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN,  LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
+    STATUS_INIT(LOGGER_TOKUTIME_WRITES,             LOGGER_WRITES_SECONDS, TOKUTIME, "writes (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
+    STATUS_INIT(LOGGER_WAIT_BUF_LONG,               LOGGER_WAIT_LONG, UINT64, "number of long logger write operations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
     logger_status.initialized = true;
 }
 #undef STATUS_INIT

From 17c5b4d906abde0ae54f78ba3d8628572a4ac261 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 8 Jul 2014 15:15:10 -0400
Subject: [PATCH 087/190] FT-300 Block allocator trace replay tool

---
 ft/serialize/block_allocator.cc |  12 +-
 ft/serialize/block_allocator.h  |   8 +
 tools/CMakeLists.txt            |   2 +-
 tools/ba_replay.cc              | 365 ++++++++++++++++++++++++++++++++
 4 files changed, 384 insertions(+), 3 deletions(-)
 create mode 100644 tools/ba_replay.cc

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index f4611c5ca61..4229427bb3f 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -129,7 +129,7 @@ void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment)
     VALIDATE();
 
     if (ba_trace_enabled()) {
-        fprintf(stderr, "ba_trace_create %p", this);
+        fprintf(stderr, "ba_trace_create %p\n", this);
     }
 }
 
@@ -137,7 +137,7 @@ void block_allocator::destroy() {
     toku_free(_blocks_array);
 
     if (ba_trace_enabled()) {
-        fprintf(stderr, "ba_trace_destroy %p", this);
+        fprintf(stderr, "ba_trace_destroy %p\n", this);
     }
 }
 
@@ -460,6 +460,14 @@ void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) {
     }
 }
 
+void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) {
+    report->data_bytes = _n_bytes_in_use; 
+    report->data_blocks = _n_blocks; 
+    report->file_size_bytes = 0;
+    report->checkpoint_bytes_additional = 0;
+    get_unused_statistics(report);
+}
+
 void block_allocator::validate() const {
     uint64_t n_bytes_in_use = _reserve_at_beginning;
     for (uint64_t i = 0; i < _n_blocks; i++) {
diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h
index 4a4222f466c..056be17b4dc 100644
--- a/ft/serialize/block_allocator.h
+++ b/ft/serialize/block_allocator.h
@@ -213,6 +213,14 @@ public:
     //  report->checkpoint_bytes_additional is filled in
     void get_unused_statistics(TOKU_DB_FRAGMENTATION report);
 
+    // Effect: Fill in report->data_bytes with the number of bytes in use
+    //         Fill in report->data_blocks with the number of blockpairs in use
+    //         Fill in unused statistics using this->get_unused_statistics()
+    // Requires:
+    //  report->file_size is ignored on return
+    //  report->checkpoint_bytes_additional is ignored on return
+    void get_statistics(TOKU_DB_FRAGMENTATION report);
+
     // Effect: Merge dst[d] and src[s] into dst[d+s], merging in place.
     //   Initially dst and src hold sorted arrays (sorted by increasing offset).
     //   Finally dst contains all d+s elements sorted in order.
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 4e513eae1fc..50bc5b9e05a 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,6 +1,6 @@
 set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO)
 
-set(tools tokudb_dump tokuftdump tdb_logprint tdb-recover ftverify)
+set(tools tokudb_dump tokuftdump tdb_logprint tdb-recover ftverify ba_replay)
 foreach(tool ${tools})
   add_executable(${tool} ${tool})
   add_dependencies(${tool} install_tdb_h)
diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
new file mode 100644
index 00000000000..c2a912880c0
--- /dev/null
+++ b/tools/ba_replay.cc
@@ -0,0 +1,365 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+// Replay a block allocator trace against different strategies and compare
+// the results
+
+#include <db.h>
+
+#include <stdio.h>
+#include <string.h>
+
+#include <map>
+#include <set>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include <portability/memory.h>
+#include <portability/toku_assert.h>
+#include <portability/toku_stdlib.h>
+
+#include "ft/serialize/block_allocator.h"
+
+using std::map;
+using std::set;
+using std::string;
+using std::vector;
+
+static void ba_replay_assert(bool pred, const char *msg, const char *line) {
+    if (!pred) {
+        fprintf(stderr, "%s, line: %s\n", msg, line);
+        abort();
+    }
+}
+
+// return line with whitespace skipped, and any newline replaced with a null byte
+static char *tidy_line(char *line) {
+    // skip leading whitespace
+    while (isspace(*line)) {
+        line++;
+    }
+    char *ptr = strchr(line, '\n');
+    if (ptr != nullptr) {
+        *ptr = '\0';
+    }
+    return line;
+}
+
+static int64_t parse_number(char **ptr, int base) {
+    *ptr = tidy_line(*ptr);
+
+    char *new_ptr;
+    int64_t n = strtoll(*ptr, &new_ptr, base);
+    ba_replay_assert(n >= 0, "malformed trace", *ptr);
+    *ptr = new_ptr;
+    return n;
+}
+
+static uint64_t parse_uint64(char **ptr) {
+    int64_t n = parse_number(ptr, 10);
+    ba_replay_assert(n >= 0, "malformed trace", *ptr);
+    // we happen to know that the uint64's we deal with will
+    // take less than 63 bits (they come from pointers)
+    return static_cast<uint64_t>(n);
+}
+
+static string parse_token(char **ptr) {
+    char *line = *ptr;
+
+    // parse the first token, which represents the traced function
+    char token[64];
+    int r = sscanf(line, "%64s", token);
+    ba_replay_assert(r == 1, "malformed trace", line);
+    *ptr += strlen(token);
+    return string(token);
+}
+
+static vector<string> canonicalize_trace_from(FILE *file) {
+    // new trace, canonicalized from a raw trace
+    vector<string> canonicalized_trace;
+
+    // raw trace offset `result' -> canonical allocation id, generated in sequence
+    //
+    // keeps track of which allocation results map to a specific allocation event
+    // later, when we write free()s to the trace, we'll need to translate
+    // the offset to the allocation seq num
+    map<uint64_t, uint64_t> offset_to_seq_num;
+    uint64_t allocation_seq_num = 0;
+
+    // maps raw allocator id to canonical allocator id, generated in sequence
+    //
+    // keeps track of allocators that were created as part of the trace,
+    // and therefore will be part of the canonicalized trace.
+    map<uint64_t, uint64_t> allocator_ids;
+    uint64_t allocator_id_seq_num = 0;
+
+    const int max_line = 512;
+    char line[max_line];
+    while (fgets(line, max_line, file) != nullptr) {
+        // removes leading whitespace and trailing newline
+        char *ptr = tidy_line(line);
+
+        string fn = parse_token(&ptr);
+        int64_t allocator_id = parse_number(&ptr, 16);
+
+        std::stringstream ss;
+        if (fn == "ba_trace_create") {
+            // only allocators created in the raw traec will be part of the
+            // canonical trace, so save the next canonical allocator id here.
+            ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line);
+            allocator_ids[allocator_id] = allocator_id_seq_num;
+            ss << fn << ' ' << allocator_id_seq_num << ' ' << std::endl;
+            allocator_id_seq_num++;
+        } else if (allocator_ids.count(allocator_id) > 0) {
+            // this allocator is part of the canonical trace
+            uint64_t canonical_allocator_id = allocator_ids[allocator_id];
+            if (fn == "ba_trace_alloc") {
+                const uint64_t size = parse_uint64(&ptr);
+                const uint64_t offset = parse_uint64(&ptr);
+                ba_replay_assert(offset_to_seq_num.count(offset) == 0, "corrupted trace: double alloc", line);
+
+                // remember that an allocation at `offset' has the current alloc seq num
+                offset_to_seq_num[offset] = allocation_seq_num;
+
+                // translate `offset = alloc(size)' to `asn = alloc(size)'
+                ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << allocation_seq_num << std::endl;
+                allocation_seq_num++;
+            } else if (fn == "ba_trace_free") {
+                const uint64_t offset = parse_uint64(&ptr);
+                ba_replay_assert(offset_to_seq_num.count(offset) != 0, "corrupted trace: invalid free", line);
+
+                // get the alloc seq num for an allcation that occurred at `offset'
+                const uint64_t asn = offset_to_seq_num[offset];
+
+                // translate `free(offset)' to `free(asn)'
+                ss << fn << ' ' << canonical_allocator_id << ' ' << asn << std::endl;
+            } else if (fn == "ba_trace_destroy") {
+                allocator_ids.erase(allocator_id);
+
+                // translate `destroy(ptr_id) to destroy(canonical_id)'
+                ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl;
+            }
+        } else {
+            // traced an alloc/free for an allocator not created as part of this trace, skip
+            continue;
+        }
+        canonicalized_trace.push_back(ss.str());
+    }
+
+    return canonicalized_trace;
+}
+
+static void replay_canonicalized_trace(const vector<string> &canonicalized_trace,
+                                       block_allocator::allocation_strategy strategy,
+                                       map<uint64_t, block_allocator *> *allocator_map) {
+    // maps allocation seq num to allocated offset
+    map<uint64_t, uint64_t> seq_num_to_offset;
+
+    for (vector<string>::const_iterator it = canonicalized_trace.begin();
+         it != canonicalized_trace.end(); it++) {
+        char *line = toku_strdup(it->c_str());
+
+        printf("playing canonical trace line: %s", line);
+        char *ptr = tidy_line(line);
+
+        // canonical allocator id is in base 10, not 16
+        string fn = parse_token(&ptr);
+        int64_t allocator_id = parse_number(&ptr, 10);
+
+        if (fn == "ba_trace_create") {
+            ba_replay_assert(allocator_map->count(allocator_id) == 0,
+                             "corrupted canonical trace: double create", ptr);
+
+            block_allocator *ba = new block_allocator();
+            ba->create(8096, 4096); // header reserve, alignment - taken from block_table.cc
+            ba->set_strategy(strategy);
+
+            // caller owns the allocator_map and its contents
+            (*allocator_map)[allocator_id] = ba;
+        } else {
+            ba_replay_assert(allocator_map->count(allocator_id) > 0,
+                             "corrupted canonical trace: no such allocator", line);
+
+            block_allocator *ba = (*allocator_map)[allocator_id];
+            if (fn == "ba_trace_alloc") {
+                const uint64_t size = parse_uint64(&ptr);
+                const uint64_t asn = parse_uint64(&ptr);
+                ba_replay_assert(seq_num_to_offset.count(asn) == 0,
+                                 "corrupted canonical trace: double alloc", line);
+
+                uint64_t offset;
+                ba->alloc_block(size, &offset);
+                seq_num_to_offset[asn] = offset;
+            } else if (fn == "ba_trace_free") {
+                const uint64_t asn = parse_uint64(&ptr);
+                ba_replay_assert(seq_num_to_offset.count(asn) == 1,
+                                 "corrupted canonical trace: double free", line);
+
+                uint64_t offset = seq_num_to_offset[asn];
+                ba->free_block(offset);
+                seq_num_to_offset.erase(asn);
+            } else if (fn == "ba_trace_destroy") {
+                allocator_map->erase(allocator_id);
+            } else {
+                ba_replay_assert(false, "corrupted canonical trace: bad fn", line);
+            }
+        }
+
+        toku_free(line);
+    }
+}
+
+// TODO: Put this in the allocation strategy class
+static const char *strategy_str(block_allocator::allocation_strategy strategy) {
+    switch (strategy) {
+    case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT:
+        return "first-fit";
+    default:
+        abort();
+    }
+}
+
+static void print_result(uint64_t allocator_id,
+                         block_allocator::allocation_strategy strategy,
+                         TOKU_DB_FRAGMENTATION report) {
+    uint64_t total_bytes = report->data_bytes + report->unused_bytes;
+    uint64_t total_blocks = report->data_blocks + report->unused_blocks;
+    printf("\n");
+    printf("allocator_id:   %20ld\n", allocator_id);
+    printf("strategy:       %20s\n", strategy_str(strategy));
+
+    // byte statistics
+    printf("total bytes:    %20ld\n", total_bytes);
+    printf("used bytes:     %20ld (%.3lf)\n", report->data_bytes,
+           static_cast<double>(report->data_bytes) / total_bytes);
+    printf("unused bytes:   %20ld (%.3lf)\n", report->unused_bytes,
+           static_cast<double>(report->unused_bytes) / total_bytes);
+
+    // block statistics
+    printf("total blocks:   %20ld\n", total_blocks);
+    printf("used blocks:    %20ld (%.3lf)\n", report->data_blocks,
+           static_cast<double>(report->data_blocks) / total_blocks);
+    printf("unused blocks:  %20ld (%.3lf)\n", report->unused_blocks,
+           static_cast<double>(report->unused_blocks) / total_blocks);
+
+    // misc
+    printf("largest unused: %20ld\n", report->largest_unused_block);
+}
+
+int main(void) {
+    // Read the raw trace from stdin
+    vector<string> canonicalized_trace = canonicalize_trace_from(stdin);
+
+    vector<enum block_allocator::allocation_strategy> candidate_strategies;
+    candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
+
+    for (vector<enum block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
+         it != candidate_strategies.end(); it++) {
+        const block_allocator::allocation_strategy strategy(*it);
+
+        // replay the canonicalized trace against the current strategy.
+        //
+        // we provided the allocator map so we can gather statistics later
+        map<uint64_t, block_allocator *> allocator_map;
+        replay_canonicalized_trace(canonicalized_trace, strategy, &allocator_map);
+
+        for (map<uint64_t, block_allocator *>::iterator al = allocator_map.begin();
+             al != allocator_map.end(); al++) {
+            block_allocator *ba = al->second;
+
+            TOKU_DB_FRAGMENTATION_S report;
+            ba->get_statistics(&report);
+            ba->destroy();
+
+            print_result(al->first, strategy,&report);
+        }
+    }
+
+    return 0;
+}

From 2548a42617959ffc0e87a0d308a37a9abad93123 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 8 Jul 2014 15:37:35 -0400
Subject: [PATCH 088/190] FT-307 Add a 'memcmp magic' API, where users can
 specify that a particular value for the first byte of a key means it may be
 compared with memcmp and a length check.

FT-50 Implement toku_builtin_cmp_fun with memcmp and a length check
---
 buildheader/make_tdb.cc |  1 +
 ft/comparator.h         | 32 ++++++++++++++++++---------
 ft/ft-internal.h        |  1 +
 ft/ft-ops.cc            | 49 ++++++++++++++++++++++-------------------
 ft/ft-ops.h             |  1 +
 ft/ft.cc                |  5 +++--
 src/ydb.cc              | 25 ++-------------------
 src/ydb_db.cc           | 37 +++++++++++++++++++++++++++++--
 src/ydb_db.h            |  1 +
 9 files changed, 92 insertions(+), 60 deletions(-)

diff --git a/buildheader/make_tdb.cc b/buildheader/make_tdb.cc
index bd0e01e2960..91e26f93c2a 100644
--- a/buildheader/make_tdb.cc
+++ b/buildheader/make_tdb.cc
@@ -545,6 +545,7 @@ static void print_db_struct (void) {
 			 "int (*change_fanout)(DB *db, uint32_t fanout)",
 			 "int (*get_fanout)(DB *db, uint32_t *fanout)",
 			 "int (*set_fanout)(DB *db, uint32_t fanout)",
+			 "int (*set_memcmp_magic)(DB *db, uint8_t magic)",
 			 "int (*set_indexer)(DB*, DB_INDEXER*)",
 			 "void (*get_indexer)(DB*, DB_INDEXER**)",
 			 "int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going)",
diff --git a/ft/comparator.h b/ft/comparator.h
index 85a95819d2c..e468e1fe82b 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -110,12 +110,16 @@ namespace toku {
     // that points may be positive or negative infinity.
 
     class comparator {
-    public:
-        void create(ft_compare_func cmp, DESCRIPTOR desc) {
+        void init(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic) {
             _cmp = cmp;
-            XCALLOC(_fake_db);
             _fake_db->cmp_descriptor = desc;
-            _builtin = _cmp == &toku_builtin_compare_fun;
+            _memcmp_magic = memcmp_magic;
+        }
+
+    public:
+        void create(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic = 0) {
+            XCALLOC(_fake_db);
+            init(cmp, desc, memcmp_magic);
         }
 
         // inherit the attributes of another comparator, but keep our own
@@ -124,9 +128,7 @@ namespace toku {
             invariant_notnull(_fake_db);
             invariant_notnull(cmp._cmp);
             invariant_notnull(cmp._fake_db);
-            _cmp = cmp._cmp;
-            _fake_db->cmp_descriptor = cmp._fake_db->cmp_descriptor;
-            _builtin = cmp._builtin;
+            init(cmp._cmp, cmp._fake_db->cmp_descriptor, cmp._memcmp_magic);
         }
 
         // like inherit, but doesn't require that the this comparator
@@ -148,14 +150,24 @@ namespace toku {
             return _cmp;
         }
 
+        uint8_t get_memcmp_magic() const {
+            return _memcmp_magic;
+        }
+
         bool valid() const {
             return _cmp != nullptr;
         }
 
+        inline bool dbt_has_memcmp_magic(const DBT *dbt) const {
+            return *reinterpret_cast<const char *>(dbt->data) == _memcmp_magic;
+        }
+
         int operator()(const DBT *a, const DBT *b) const {
-            if (__builtin_expect(!!(toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b)), 0)) {
+            if (__builtin_expect(toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b), 0)) {
                 return toku_dbt_infinite_compare(a, b);
-            } else if (_builtin) {
+            } else if (_memcmp_magic && dbt_has_memcmp_magic(a)
+                       // At this point we expect b to also have the memcmp magic
+                       && __builtin_expect(dbt_has_memcmp_magic(b), 1)) {
                 return toku_builtin_compare_fun(nullptr, a, b);
             } else {
                 // yikes, const sadness here
@@ -166,7 +178,7 @@ namespace toku {
     private:
         DB *_fake_db;
         ft_compare_func _cmp;
-        bool _builtin;
+        uint8_t _memcmp_magic;
     };
 
 } /* namespace toku */
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 87abf06752e..89d98f1c676 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -278,6 +278,7 @@ struct ft_options {
     enum toku_compression_method compression_method;
     unsigned int fanout;
     unsigned int flags;
+    uint8_t memcmp_magic;
     ft_compare_func compare_fun;
     ft_update_func update_fun;
 };
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 1f045bd8dda..bb8ee27b9a8 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -2887,6 +2887,17 @@ toku_ft_handle_get_fanout(FT_HANDLE ft_handle, unsigned int *fanout)
         *fanout = ft_handle->options.fanout;
     }
 }
+
+void toku_ft_handle_set_memcmp_magic(FT_HANDLE ft_handle, uint8_t magic) {
+    invariant(magic != 0);
+    if (ft_handle->ft) {
+        // handle is already open, application bug if memcmp magic changes
+        invariant(ft_handle->ft->cmp.get_memcmp_magic() == magic);
+    } else {
+        ft_handle->options.memcmp_magic = magic;
+    }
+}
+
 static int
 verify_builtin_comparisons_consistent(FT_HANDLE t, uint32_t flags) {
     if ((flags & TOKU_DB_KEYCMP_BUILTIN) && (t->options.compare_fun != toku_builtin_compare_fun))
@@ -2955,6 +2966,7 @@ toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) {
         .compression_method = ft->h->compression_method,
         .fanout = ft->h->fanout,
         .flags = ft->h->flags,
+        .memcmp_magic = ft->cmp.get_memcmp_magic(),
         .compare_fun = ft->cmp.get_compare_func(),
         .update_fun = ft->update_fun
     };
@@ -4721,32 +4733,23 @@ int toku_ft_strerror_r(int error, char *buf, size_t buflen)
     }
 }
 
-// when a and b are chars, return a-b is safe here because return type is int.  No over/underflow possible.
-int toku_keycompare (const void *key1, uint32_t key1len, const void *key2, uint32_t key2len) {
-    int comparelen = key1len<key2len ? key1len : key2len;
-    const unsigned char *k1;
-    const unsigned char *k2;
-    for (CAST_FROM_VOIDP(k1, key1), CAST_FROM_VOIDP(k2, key2);
-	 comparelen>4;
-	 k1+=4, k2+=4, comparelen-=4) {
-	{ int v1=k1[0], v2=k2[0]; if (v1!=v2) return v1-v2; }
-	{ int v1=k1[1], v2=k2[1]; if (v1!=v2) return v1-v2; }
-	{ int v1=k1[2], v2=k2[2]; if (v1!=v2) return v1-v2; }
-	{ int v1=k1[3], v2=k2[3]; if (v1!=v2) return v1-v2; }
+int toku_keycompare(const void *key1, uint32_t key1len, const void *key2, uint32_t key2len) {
+    int comparelen = key1len < key2len ? key1len : key2len;
+    int c = memcmp(key1, key2, comparelen);
+    if (__builtin_expect(c != 0, 1)) {
+        return c;
+    } else {
+        if (key1len < key2len) {
+            return -1;
+        } else if (key1len > key2len) { 
+            return 1;
+        } else {
+            return 0;
+        }
     }
-    for (;
-	 comparelen>0;
-	 k1++, k2++, comparelen--) {
-	if (*k1 != *k2) {
-	    return (int)*k1-(int)*k2;
-	}
-    }
-    if (key1len<key2len) return -1;
-    if (key1len>key2len) return 1;
-    return 0;
 }
 
-int toku_builtin_compare_fun (DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) {
+int toku_builtin_compare_fun(DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) {
     return toku_keycompare(a->data, a->size, b->data, b->size);
 }
 
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index 3565eedcb1b..bdd9d33988c 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -126,6 +126,7 @@ void toku_ft_handle_set_compression_method(FT_HANDLE, enum toku_compression_meth
 void toku_ft_handle_get_compression_method(FT_HANDLE, enum toku_compression_method *);
 void toku_ft_handle_set_fanout(FT_HANDLE, unsigned int fanout);
 void toku_ft_handle_get_fanout(FT_HANDLE, unsigned int *fanout);
+void toku_ft_handle_set_memcmp_magic(FT_HANDLE, uint8_t magic);
 
 void toku_ft_set_bt_compare(FT_HANDLE ft_handle, ft_compare_func cmp_func);
 const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle);
diff --git a/ft/ft.cc b/ft/ft.cc
index 5c43a5efd50..9eafc756ce1 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -384,7 +384,7 @@ static void ft_init(FT ft, FT_OPTIONS options, CACHEFILE cf) {
     toku_list_init(&ft->live_ft_handles);
 
     // intuitively, the comparator points to the FT's cmp descriptor
-    ft->cmp.create(options->compare_fun, &ft->cmp_descriptor);
+    ft->cmp.create(options->compare_fun, &ft->cmp_descriptor, options->memcmp_magic);
     ft->update_fun = options->update_fun;
 
     if (ft->cf != NULL) {
@@ -486,7 +486,7 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN
 
     invariant_notnull(ft);
     // intuitively, the comparator points to the FT's cmp descriptor
-    ft->cmp.create(ft_handle->options.compare_fun, &ft->cmp_descriptor);
+    ft->cmp.create(ft_handle->options.compare_fun, &ft->cmp_descriptor, ft_handle->options.memcmp_magic);
     ft->update_fun = ft_handle->options.update_fun;
     ft->cf = cf;
     toku_cachefile_set_userdata(cf,
@@ -611,6 +611,7 @@ toku_ft_init(FT ft,
         .compression_method = compression_method,
         .fanout = fanout,
         .flags = 0,
+        .memcmp_magic = 0,
         .compare_fun = NULL,
         .update_fun = NULL
     };
diff --git a/src/ydb.cc b/src/ydb.cc
index 4eeadfefc6f..9cc82949d80 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -465,27 +465,6 @@ needs_recovery (DB_ENV *env) {
 
 static int toku_env_txn_checkpoint(DB_ENV * env, uint32_t kbyte, uint32_t min, uint32_t flags);
 
-// Instruct db to use the default (built-in) key comparison function
-// by setting the flag bits in the db and ft structs
-static int
-db_use_builtin_key_cmp(DB *db) {
-    HANDLE_PANICKED_DB(db);
-    int r = 0;
-    if (db_opened(db))
-        r = toku_ydb_do_error(db->dbenv, EINVAL, "Comparison functions cannot be set after DB open.\n");
-    else if (db->i->key_compare_was_set)
-        r = toku_ydb_do_error(db->dbenv, EINVAL, "Key comparison function already set.\n");
-    else {
-        uint32_t tflags;
-        toku_ft_get_flags(db->i->ft_handle, &tflags);
-
-        tflags |= TOKU_DB_KEYCMP_BUILTIN;
-        toku_ft_set_flags(db->i->ft_handle, tflags);
-        db->i->key_compare_was_set = true;
-    }
-    return r;
-}
-
 // Keys used in persistent environment dictionary:
 // Following keys added in version 12
 static const char * orig_env_ver_key = "original_version";
@@ -1025,7 +1004,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
     {
         r = toku_db_create(&env->i->persistent_environment, env, 0);
         assert_zero(r);
-        r = db_use_builtin_key_cmp(env->i->persistent_environment);
+        r = toku_db_use_builtin_key_cmp(env->i->persistent_environment);
         assert_zero(r);
         r = toku_db_open_iname(env->i->persistent_environment, txn, toku_product_name_strings.environmentdictionary, DB_CREATE, mode);
         if (r != 0) {
@@ -1063,7 +1042,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
     {
         r = toku_db_create(&env->i->directory, env, 0);
         assert_zero(r);
-        r = db_use_builtin_key_cmp(env->i->directory);
+        r = toku_db_use_builtin_key_cmp(env->i->directory);
         assert_zero(r);
         r = toku_db_open_iname(env->i->directory, txn, toku_product_name_strings.fileopsdirectory, DB_CREATE, mode);
         if (r != 0) {
diff --git a/src/ydb_db.cc b/src/ydb_db.cc
index f33d965c9b9..ad49376a39d 100644
--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -431,8 +431,27 @@ void toku_db_lt_on_destroy_callback(toku::locktree *lt) {
     toku_ft_handle_close(ft_handle);
 }
 
-int 
-toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t flags, int mode) {
+// Instruct db to use the default (built-in) key comparison function
+// by setting the flag bits in the db and ft structs
+int toku_db_use_builtin_key_cmp(DB *db) {
+    HANDLE_PANICKED_DB(db);
+    int r = 0;
+    if (db_opened(db)) {
+        r = toku_ydb_do_error(db->dbenv, EINVAL, "Comparison functions cannot be set after DB open.\n");
+    } else if (db->i->key_compare_was_set) {
+        r = toku_ydb_do_error(db->dbenv, EINVAL, "Key comparison function already set.\n");
+    } else {
+        uint32_t tflags;
+        toku_ft_get_flags(db->i->ft_handle, &tflags);
+
+        tflags |= TOKU_DB_KEYCMP_BUILTIN;
+        toku_ft_set_flags(db->i->ft_handle, tflags);
+        db->i->key_compare_was_set = true;
+    }
+    return r;
+}
+
+int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t flags, int mode) {
     //Set comparison functions if not yet set.
     HANDLE_READ_ONLY_TXN(txn);
     if (!db->i->key_compare_was_set && db->dbenv->i->bt_compare) {
@@ -704,6 +723,19 @@ toku_db_get_fanout(DB *db, unsigned int *fanout) {
     return 0;
 }
 
+static int
+toku_db_set_memcmp_magic(DB *db, uint8_t magic) {
+    HANDLE_PANICKED_DB(db);
+    if (db_opened(db)) {
+        return EINVAL;
+    }
+    if (magic == 0) {
+        return EINVAL;
+    }
+    toku_ft_handle_set_memcmp_magic(db->i->ft_handle, magic);
+    return 0;
+}
+
 static int
 toku_db_get_fractal_tree_info64(DB *db, uint64_t *num_blocks_allocated, uint64_t *num_blocks_in_use, uint64_t *size_allocated, uint64_t *size_in_use) {
     HANDLE_PANICKED_DB(db);
@@ -1101,6 +1133,7 @@ toku_db_create(DB ** db, DB_ENV * env, uint32_t flags) {
     USDB(change_compression_method);
     USDB(set_fanout);
     USDB(get_fanout);
+    USDB(set_memcmp_magic);
     USDB(change_fanout);
     USDB(set_flags);
     USDB(get_flags);
diff --git a/src/ydb_db.h b/src/ydb_db.h
index d8bc0223e0e..9fac85f9ffb 100644
--- a/src/ydb_db.h
+++ b/src/ydb_db.h
@@ -131,6 +131,7 @@ static inline const toku::comparator &toku_db_get_comparator(DB *db) {
     return toku_ft_get_comparator(db->i->ft_handle);
 }
 
+int toku_db_use_builtin_key_cmp(DB *db);
 int toku_db_pre_acquire_fileops_lock(DB *db, DB_TXN *txn);
 int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname, uint32_t flags, int mode);
 int toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn);

From 8aff914d395698fbe3121ddef270958e8547056a Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 8 Jul 2014 16:08:40 -0400
Subject: [PATCH 089/190] FT-300 Fix OSX build

---
 tools/ba_replay.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index c2a912880c0..f9052f07fdc 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -311,25 +311,25 @@ static void print_result(uint64_t allocator_id,
     uint64_t total_bytes = report->data_bytes + report->unused_bytes;
     uint64_t total_blocks = report->data_blocks + report->unused_blocks;
     printf("\n");
-    printf("allocator_id:   %20ld\n", allocator_id);
+    printf("allocator_id:   %20" PRId64 "\n", allocator_id);
     printf("strategy:       %20s\n", strategy_str(strategy));
 
     // byte statistics
-    printf("total bytes:    %20ld\n", total_bytes);
-    printf("used bytes:     %20ld (%.3lf)\n", report->data_bytes,
+    printf("total bytes:    %20" PRId64 "\n", total_bytes);
+    printf("used bytes:     %20" PRId64 " (%.3lf)\n", report->data_bytes,
            static_cast<double>(report->data_bytes) / total_bytes);
-    printf("unused bytes:   %20ld (%.3lf)\n", report->unused_bytes,
+    printf("unused bytes:   %20" PRId64 " (%.3lf)\n", report->unused_bytes,
            static_cast<double>(report->unused_bytes) / total_bytes);
 
     // block statistics
-    printf("total blocks:   %20ld\n", total_blocks);
-    printf("used blocks:    %20ld (%.3lf)\n", report->data_blocks,
+    printf("total blocks:   %20" PRId64 "\n", total_blocks);
+    printf("used blocks:    %20" PRId64 " (%.3lf)\n", report->data_blocks,
            static_cast<double>(report->data_blocks) / total_blocks);
-    printf("unused blocks:  %20ld (%.3lf)\n", report->unused_blocks,
+    printf("unused blocks:  %20" PRId64 " (%.3lf)\n", report->unused_blocks,
            static_cast<double>(report->unused_blocks) / total_blocks);
 
     // misc
-    printf("largest unused: %20ld\n", report->largest_unused_block);
+    printf("largest unused: %20" PRId64 "\n", report->largest_unused_block);
 }
 
 int main(void) {

From 2bb482bb4e6a95f95625052ad5937f227ebd5c7c Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 9 Jul 2014 15:39:57 -0400
Subject: [PATCH 090/190] FT-308 support deferred XA recovery with txn discard
 and dirty environment shutdown

---
 buildheader/make_tdb.cc            |   2 +
 ft/ft.cc                           |  16 +--
 ft/logger/logger.cc                |  26 ++--
 ft/logger/logger.h                 |   1 +
 ft/txn/rollback-apply.cc           |  18 +--
 ft/txn/rollback-apply.h            |   1 +
 ft/txn/txn.cc                      |   7 +-
 ft/txn/txn.h                       |   2 +
 src/tests/xa-dirty-commit.cc       | 193 ++++++++++++++++++++++++++++
 src/tests/xa-dirty-rollback.cc     | 193 ++++++++++++++++++++++++++++
 src/tests/xa-txn-discard-abort.cc  | 195 ++++++++++++++++++++++++++++
 src/tests/xa-txn-discard-commit.cc | 196 +++++++++++++++++++++++++++++
 src/ydb.cc                         |  39 +++---
 src/ydb_txn.cc                     |  46 +++++--
 14 files changed, 889 insertions(+), 46 deletions(-)
 create mode 100644 src/tests/xa-dirty-commit.cc
 create mode 100644 src/tests/xa-dirty-rollback.cc
 create mode 100644 src/tests/xa-txn-discard-abort.cc
 create mode 100644 src/tests/xa-txn-discard-commit.cc

diff --git a/buildheader/make_tdb.cc b/buildheader/make_tdb.cc
index 91e26f93c2a..7ddcd161989 100644
--- a/buildheader/make_tdb.cc
+++ b/buildheader/make_tdb.cc
@@ -291,6 +291,7 @@ static void print_defines (void) {
     printf("#define DB_IS_HOT_INDEX 0x00100000\n"); // private tokudb
     printf("#define DBC_DISABLE_PREFETCHING 0x20000000\n"); // private tokudb
     printf("#define DB_UPDATE_CMP_DESCRIPTOR 0x40000000\n"); // private tokudb
+    printf("#define TOKUFT_DIRTY_SHUTDOWN %x\n", 1<<31);
 
     {
         //dbt flags
@@ -572,6 +573,7 @@ static void print_db_txn_struct (void) {
     STRUCT_SETUP(DB_TXN, api_internal,"void *%s");
     STRUCT_SETUP(DB_TXN, commit,      "int (*%s) (DB_TXN*, uint32_t)");
     STRUCT_SETUP(DB_TXN, prepare,     "int (*%s) (DB_TXN*, uint8_t gid[DB_GID_SIZE])");
+    STRUCT_SETUP(DB_TXN, discard,     "int (*%s) (DB_TXN*, uint32_t)");
     STRUCT_SETUP(DB_TXN, id,          "uint32_t (*%s) (DB_TXN *)");
     STRUCT_SETUP(DB_TXN, mgrp,        "DB_ENV *%s /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/");
     STRUCT_SETUP(DB_TXN, parent,      "DB_TXN *%s");
diff --git a/ft/ft.cc b/ft/ft.cc
index 9eafc756ce1..4c358f95a55 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -313,14 +313,16 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val
         }
     }
     if (ft->h->dirty) {               // this is the only place this bit is tested (in currentheader)
-        if (logger) { //Rollback cachefile MUST NOT BE CLOSED DIRTY
-                      //It can be checkpointed only via 'checkpoint'
-            assert(logger->rollback_cachefile != cachefile);
+        bool do_checkpoint = true;
+        if (logger && logger->rollback_cachefile == cachefile) {
+            do_checkpoint = false;
+        }
+        if (do_checkpoint) {
+            ft_begin_checkpoint(lsn, header_v);
+            ft_checkpoint(cachefile, fd, ft);
+            ft_end_checkpoint(cachefile, fd, header_v);
+            assert(!ft->h->dirty); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary)
         }
-        ft_begin_checkpoint(lsn, header_v);
-        ft_checkpoint(cachefile, fd, ft);
-        ft_end_checkpoint(cachefile, fd, header_v);
-        assert(!ft->h->dirty); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary)
     }
 }
 
diff --git a/ft/logger/logger.cc b/ft/logger/logger.cc
index a2e5a257456..cf078d7a680 100644
--- a/ft/logger/logger.cc
+++ b/ft/logger/logger.cc
@@ -304,26 +304,30 @@ toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create)
 //            so it will always be clean (!h->dirty) when about to be closed.
 //            Rollback log can only be closed when there are no open transactions,
 //            so it will always be empty (no data blocks) when about to be closed.
-void toku_logger_close_rollback(TOKULOGGER logger) {
+void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdown) {
     CACHEFILE cf = logger->rollback_cachefile;  // stored in logger at rollback cachefile open
     if (cf) {
         FT_HANDLE ft_to_close;
         {   //Find "ft_to_close"
             logger->rollback_cache.destroy();
             FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf));
-            //Verify it is safe to close it.
-            assert(!ft->h->dirty);  //Must not be dirty.
-            toku_free_unused_blocknums(ft->blocktable, ft->h->root_blocknum);
-            //Must have no data blocks (rollback logs or otherwise).
-            toku_block_verify_no_data_blocks_except_root(ft->blocktable, ft->h->root_blocknum);
-            assert(!ft->h->dirty);
+            if (clean_shutdown) {
+                //Verify it is safe to close it.
+                assert(!ft->h->dirty);  //Must not be dirty.
+                toku_free_unused_blocknums(ft->blocktable, ft->h->root_blocknum);
+                //Must have no data blocks (rollback logs or otherwise).
+                toku_block_verify_no_data_blocks_except_root(ft->blocktable, ft->h->root_blocknum);
+                assert(!ft->h->dirty);
+            } else {
+                ft->h->dirty = 0;
+            }
             ft_to_close = toku_ft_get_only_existing_ft_handle(ft);
-            {
+            if (clean_shutdown) {
                 bool is_empty;
                 is_empty = toku_ft_is_empty_fast(ft_to_close);
                 assert(is_empty);
+                assert(!ft->h->dirty); // it should not have been dirtied by the toku_ft_is_empty test.
             }
-            assert(!ft->h->dirty); // it should not have been dirtied by the toku_ft_is_empty test.
         }
 
         toku_ft_handle_close(ft_to_close);
@@ -332,6 +336,10 @@ void toku_logger_close_rollback(TOKULOGGER logger) {
     }
 }
 
+void toku_logger_close_rollback(TOKULOGGER logger) {
+    toku_logger_close_rollback_check_empty(logger, true);
+}
+
 // No locks held on entry
 // No locks held on exit.
 // No locks are needed, since you cannot legally close the log concurrently with doing anything else.
diff --git a/ft/logger/logger.h b/ft/logger/logger.h
index 4068e802705..9ef62f9b596 100644
--- a/ft/logger/logger.h
+++ b/ft/logger/logger.h
@@ -115,6 +115,7 @@ int toku_logger_close(TOKULOGGER *loggerp);
 void toku_logger_initialize_rollback_cache(TOKULOGGER logger, struct ft *ft);
 int toku_logger_open_rollback(TOKULOGGER logger, struct cachetable *ct, bool create);
 void toku_logger_close_rollback(TOKULOGGER logger);
+void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdown);
 bool toku_logger_rollback_is_open (TOKULOGGER); // return true iff the rollback is open.
 
 void toku_logger_fsync (TOKULOGGER logger);
diff --git a/ft/txn/rollback-apply.cc b/ft/txn/rollback-apply.cc
index 5bd53193642..2f0239bb175 100644
--- a/ft/txn/rollback-apply.cc
+++ b/ft/txn/rollback-apply.cc
@@ -92,8 +92,7 @@ PATENT RIGHTS GRANT:
 #include "ft/logger/log-internal.h"
 #include "ft/txn/rollback-apply.h"
 
-static void
-poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint) {
+static void poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint) {
     if (txn->progress_poll_fun) {
         TOKU_TXN_PROGRESS_S progress = {
             .entries_total     = txn->roll_info.num_rollentries,
@@ -124,17 +123,14 @@ int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn) {
     return r;
 }
 
-int
-note_ft_used_in_txns_parent(const FT &ft, uint32_t UU(index), TOKUTXN const child);
-int
-note_ft_used_in_txns_parent(const FT &ft, uint32_t UU(index), TOKUTXN const child) {
+int note_ft_used_in_txns_parent(const FT &ft, uint32_t UU(index), TOKUTXN const child);
+int note_ft_used_in_txns_parent(const FT &ft, uint32_t UU(index), TOKUTXN const child) {
     TOKUTXN parent = child->parent;
     toku_txn_maybe_note_ft(parent, ft);
     return 0;
 }
 
-static int
-apply_txn(TOKUTXN txn, LSN lsn, apply_rollback_item func) {
+static int apply_txn(TOKUTXN txn, LSN lsn, apply_rollback_item func) {
     int r = 0;
     // do the commit/abort calls and free everything
     // we do the commit/abort calls in reverse order too.
@@ -302,3 +298,9 @@ int toku_rollback_abort(TOKUTXN txn, LSN lsn) {
     assert(r==0);
     return r;
 }
+
+int toku_rollback_discard(TOKUTXN txn) {
+    txn->roll_info.current_rollback = ROLLBACK_NONE;
+    return 0;
+}
+
diff --git a/ft/txn/rollback-apply.h b/ft/txn/rollback-apply.h
index af93d62cebe..2ddd24563fe 100644
--- a/ft/txn/rollback-apply.h
+++ b/ft/txn/rollback-apply.h
@@ -98,3 +98,4 @@ int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn);
 
 int toku_rollback_commit(TOKUTXN txn, LSN lsn);
 int toku_rollback_abort(TOKUTXN txn, LSN lsn);
+int toku_rollback_discard(TOKUTXN txn);
diff --git a/ft/txn/txn.cc b/ft/txn/txn.cc
index 29890ad1816..2654aafab26 100644
--- a/ft/txn/txn.cc
+++ b/ft/txn/txn.cc
@@ -627,7 +627,7 @@ void toku_txn_complete_txn(TOKUTXN txn) {
     assert(txn->roll_info.spilled_rollback_tail.b == ROLLBACK_NONE.b);
     assert(txn->roll_info.current_rollback.b == ROLLBACK_NONE.b);
     assert(txn->num_pin == 0);
-    assert(txn->state == TOKUTXN_COMMITTING || txn->state == TOKUTXN_ABORTING);
+    assert(txn->state == TOKUTXN_COMMITTING || txn->state == TOKUTXN_ABORTING || txn->state == TOKUTXN_PREPARING);
     if (txn->parent) {
         toku_txn_manager_handle_snapshot_destroy_for_child_txn(
             txn,
@@ -800,6 +800,11 @@ int toku_txn_reads_txnid(TXNID txnid, TOKUTXN txn) {
     return r;
 }
 
+int toku_txn_discard_txn(TOKUTXN txn) {
+    int r = toku_rollback_discard(txn);
+    return r;
+}
+
 #include <toku_race_tools.h>
 void __attribute__((__constructor__)) toku_txn_status_helgrind_ignore(void);
 void toku_txn_status_helgrind_ignore(void) {
diff --git a/ft/txn/txn.h b/ft/txn/txn.h
index b96e9b8f1fe..f8d78eb956c 100644
--- a/ft/txn/txn.h
+++ b/ft/txn/txn.h
@@ -300,6 +300,8 @@ int toku_txn_abort_txn(struct tokutxn *txn,
 int toku_txn_abort_with_lsn(struct tokutxn *txn, LSN oplsn,
                             TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra);
 
+int toku_txn_discard_txn(struct tokutxn *txn);
+
 void toku_txn_prepare_txn (struct tokutxn *txn, TOKU_XA_XID *xid);
 // Effect: Do the internal work of preparing a transaction (does not log the prepare record).
 
diff --git a/src/tests/xa-dirty-commit.cc b/src/tests/xa-dirty-commit.cc
new file mode 100644
index 00000000000..03850b2b026
--- /dev/null
+++ b/src/tests/xa-dirty-commit.cc
@@ -0,0 +1,193 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+#include "test.h"
+
+// Verify that a commit of a prepared txn in recovery retains a db that was created by it.
+// The rollback file is dirty when the environment is closed.
+
+static void create_foo(DB_ENV *env, DB_TXN *txn) {
+    int r;
+    DB *db;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE,  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+    r = db->close(db, 0);
+    CKERR(r);
+}
+
+static void check_foo(DB_ENV *env) {
+    int r;
+    DB *db;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, 0);
+    CKERR(r);
+    r = db->close(db, 0);
+    CKERR(r);
+}
+
+static void create_prepared_txn(void) {
+    int r;
+
+    DB_ENV *env = nullptr;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, 
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, 
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    DB_TXN *txn = nullptr;
+    r = env->txn_begin(env, nullptr, &txn, 0);
+    CKERR(r);
+
+    create_foo(env, txn);
+
+    TOKU_XA_XID xid = { 0x1234, 8, 9 };
+    for (int i = 0; i < 8+9; i++) {
+        xid.data[i] = i;
+    }
+    r = txn->xa_prepare(txn, &xid);
+    CKERR(r);
+
+    // discard the txn so that we can close the env and run xa recovery later
+    r = txn->discard(txn, 0);
+    CKERR(r);
+
+    r = env->close(env, TOKUFT_DIRTY_SHUTDOWN);
+    CKERR(r);
+}
+
+static void run_xa_recovery(void) {
+    int r;
+
+    DB_ENV *env;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, 
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER,
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    // get prepared xid
+    long count;
+    TOKU_XA_XID xid;
+    r = env->txn_xa_recover(env, &xid, 1, &count, DB_FIRST);
+    CKERR(r);
+
+    // commit it
+    DB_TXN *txn = nullptr;
+    r = env->get_txn_from_xid(env, &xid, &txn);
+    CKERR(r);
+    r = txn->commit(txn, 0);
+    CKERR(r);
+
+    check_foo(env);
+
+    r = env->close(env, 0);
+    CKERR(r);
+}
+
+int test_main (int argc, char *const argv[]) {
+    default_parse_args(argc, argv);
+
+    // init the env directory
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO);   
+    CKERR(r);
+
+    // run the test
+    create_prepared_txn();
+    run_xa_recovery();
+
+    return 0;
+}
diff --git a/src/tests/xa-dirty-rollback.cc b/src/tests/xa-dirty-rollback.cc
new file mode 100644
index 00000000000..8d28e8a762f
--- /dev/null
+++ b/src/tests/xa-dirty-rollback.cc
@@ -0,0 +1,193 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+#include "test.h"
+
+// Verify that an abort of a prepared txn in recovery deletes a db created by it.
+// The rollback file is dirty when the environment is closed.
+
+static void create_foo(DB_ENV *env, DB_TXN *txn) {
+    int r;
+    DB *db;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE,  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+    r = db->close(db, 0);
+    CKERR(r);
+}
+
+static void check_foo(DB_ENV *env) {
+    int r;
+    DB *db;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, 0);
+    CKERR2(r, ENOENT);
+    r = db->close(db, 0);
+    CKERR(r);
+}
+
+static void create_prepared_txn(void) {
+    int r;
+
+    DB_ENV *env = nullptr;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, 
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, 
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    DB_TXN *txn = nullptr;
+    r = env->txn_begin(env, nullptr, &txn, 0);
+    CKERR(r);
+
+    create_foo(env, txn);
+
+    TOKU_XA_XID xid = { 0x1234, 8, 9 };
+    for (int i = 0; i < 8+9; i++) {
+        xid.data[i] = i;
+    }
+    r = txn->xa_prepare(txn, &xid);
+    CKERR(r);
+
+    // discard the txn so that we can close the env and run xa recovery later
+    r = txn->discard(txn, 0);
+    CKERR(r);
+
+    r = env->close(env, TOKUFT_DIRTY_SHUTDOWN);
+    CKERR(r);
+}
+
+static void run_xa_recovery(void) {
+    int r;
+
+    DB_ENV *env;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, 
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER,
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    // get prepared xid
+    long count;
+    TOKU_XA_XID xid;
+    r = env->txn_xa_recover(env, &xid, 1, &count, DB_FIRST);
+    CKERR(r);
+
+    // abort it
+    DB_TXN *txn = nullptr;
+    r = env->get_txn_from_xid(env, &xid, &txn);
+    CKERR(r);
+    r = txn->abort(txn);
+    CKERR(r);
+
+    check_foo(env);
+
+    r = env->close(env, 0);
+    CKERR(r);
+}
+
+int test_main (int argc, char *const argv[]) {
+    default_parse_args(argc, argv);
+
+    // init the env directory
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO);   
+    CKERR(r);
+
+    // run the test
+    create_prepared_txn();
+    run_xa_recovery();
+
+    return 0;
+}
diff --git a/src/tests/xa-txn-discard-abort.cc b/src/tests/xa-txn-discard-abort.cc
new file mode 100644
index 00000000000..3b71f807d44
--- /dev/null
+++ b/src/tests/xa-txn-discard-abort.cc
@@ -0,0 +1,195 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+#include "test.h"
+
+// Verify that an abort of a prepared txn in recovery removes a db created by it. 
+// A checkpoint is taken between the db creation and the txn prepare.
+
+static void create_foo(DB_ENV *env, DB_TXN *txn) {
+    int r;
+    DB *db;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE,  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+    r = db->close(db, 0);
+    CKERR(r);
+}
+
+static void check_foo(DB_ENV *env) {
+    int r;
+    DB *db;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, 0);
+    CKERR2(r, ENOENT);
+    r = db->close(db, 0);
+    CKERR(r);
+}
+
+static void create_prepared_txn(void) {
+    int r;
+
+    DB_ENV *env = nullptr;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, 
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, 
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    DB_TXN *txn = nullptr;
+    r = env->txn_begin(env, nullptr, &txn, 0);
+    CKERR(r);
+
+    create_foo(env, txn);
+    r = env->txn_checkpoint(env, 0, 0, 0);
+    CKERR(r);
+
+    TOKU_XA_XID xid = { 0x1234, 8, 9 };
+    for (int i = 0; i < 8+9; i++) {
+        xid.data[i] = i;
+    }
+    r = txn->xa_prepare(txn, &xid);
+    CKERR(r);
+
+    // discard the txn so that we can close the env and run xa recovery later
+    r = txn->discard(txn, 0);
+    CKERR(r);
+
+    r = env->close(env, TOKUFT_DIRTY_SHUTDOWN);
+    CKERR(r);
+}
+
+static void run_xa_recovery(void) {
+    int r;
+
+    DB_ENV *env;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, 
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER,
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    // get prepared xid
+    long count;
+    TOKU_XA_XID xid;
+    r = env->txn_xa_recover(env, &xid, 1, &count, DB_FIRST);
+    CKERR(r);
+
+    // abort it
+    DB_TXN *txn = nullptr;
+    r = env->get_txn_from_xid(env, &xid, &txn);
+    CKERR(r);
+    r = txn->abort(txn);
+    CKERR(r);
+
+    check_foo(env);
+
+    r = env->close(env, 0);
+    CKERR(r);
+}
+
+int test_main (int argc, char *const argv[]) {
+    default_parse_args(argc, argv);
+
+    // init the env directory
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO);   
+    CKERR(r);
+
+    // run the test
+    create_prepared_txn();
+    run_xa_recovery();
+
+    return 0;
+}
diff --git a/src/tests/xa-txn-discard-commit.cc b/src/tests/xa-txn-discard-commit.cc
new file mode 100644
index 00000000000..51b2d0670cd
--- /dev/null
+++ b/src/tests/xa-txn-discard-commit.cc
@@ -0,0 +1,196 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+#include "test.h"
+
+// Verify that a commit of a prepared txn in recovery retains a db created by it. 
+// A checkpoint is taken between the db creation and the txn prepare.
+
+static void create_foo(DB_ENV *env, DB_TXN *txn) {
+    int r;
+    DB *db;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE,  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+    r = db->close(db, 0);
+    CKERR(r);
+}
+
+static void check_foo(DB_ENV *env) {
+    int r;
+    DB *db;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, 0);
+    CKERR(r);
+    r = db->close(db, 0);
+    CKERR(r);
+}
+
+static void create_prepared_txn(void) {
+    int r;
+
+    DB_ENV *env = nullptr;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, 
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, 
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    DB_TXN *txn = nullptr;
+    r = env->txn_begin(env, nullptr, &txn, 0);
+    CKERR(r);
+
+    create_foo(env, txn);
+
+    r = env->txn_checkpoint(env, 0, 0, 0);
+    CKERR(r);
+
+    TOKU_XA_XID xid = { 0x1234, 8, 9 };
+    for (int i = 0; i < 8+9; i++) {
+        xid.data[i] = i;
+    }
+    r = txn->xa_prepare(txn, &xid);
+    CKERR(r);
+
+    // discard the txn so that we can close the env and run xa recovery later
+    r = txn->discard(txn, 0);
+    CKERR(r);
+
+    r = env->close(env, TOKUFT_DIRTY_SHUTDOWN);
+    CKERR(r);
+}
+
+static void run_xa_recovery(void) {
+    int r;
+
+    DB_ENV *env;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, 
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER,
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    // get prepared xid
+    long count;
+    TOKU_XA_XID xid;
+    r = env->txn_xa_recover(env, &xid, 1, &count, DB_FIRST);
+    CKERR(r);
+
+    // commit it
+    DB_TXN *txn = nullptr;
+    r = env->get_txn_from_xid(env, &xid, &txn);
+    CKERR(r);
+    r = txn->commit(txn, 0);
+    CKERR(r);
+
+    check_foo(env);
+
+    r = env->close(env, 0);
+    CKERR(r);
+}
+
+int test_main (int argc, char *const argv[]) {
+    default_parse_args(argc, argv);
+
+    // init the env directory
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO);   
+    CKERR(r);
+
+    // run the test
+    create_prepared_txn();
+    run_xa_recovery();
+
+    return 0;
+}
diff --git a/src/ydb.cc b/src/ydb.cc
index 9cc82949d80..ed5eb8bfae9 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -1092,6 +1092,12 @@ static int
 env_close(DB_ENV * env, uint32_t flags) {
     int r = 0;
     const char * err_msg = NULL;
+    bool clean_shutdown = true;
+
+    if (flags & TOKUFT_DIRTY_SHUTDOWN) {
+        clean_shutdown = false;
+        flags &= ~TOKUFT_DIRTY_SHUTDOWN;
+    }
 
     most_recent_env = NULL; // Set most_recent_env to NULL so that we don't have a dangling pointer (and if there's an error, the toku assert code would try to look at the env.)
 
@@ -1132,22 +1138,27 @@ env_close(DB_ENV * env, uint32_t flags) {
     if (env->i->cachetable) {
         toku_cachetable_minicron_shutdown(env->i->cachetable);
         if (env->i->logger) {
-            CHECKPOINTER cp = toku_cachetable_get_checkpointer(env->i->cachetable);
-            r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, SHUTDOWN_CHECKPOINT);
-            if (r) {
-                err_msg = "Cannot close environment (error during checkpoint)\n";
-                toku_ydb_do_error(env, r, "%s", err_msg);
-                goto panic_and_quit_early;
+            CHECKPOINTER cp = nullptr;
+            if (clean_shutdown) {
+                cp = toku_cachetable_get_checkpointer(env->i->cachetable);
+                r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, SHUTDOWN_CHECKPOINT);
+                if (r) {
+                    err_msg = "Cannot close environment (error during checkpoint)\n";
+                    toku_ydb_do_error(env, r, "%s", err_msg);
+                    goto panic_and_quit_early;
+                }
             }
-            toku_logger_close_rollback(env->i->logger);
-            //Do a second checkpoint now that the rollback cachefile is closed.
-            r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, SHUTDOWN_CHECKPOINT);
-            if (r) {
-                err_msg = "Cannot close environment (error during checkpoint)\n";
-                toku_ydb_do_error(env, r, "%s", err_msg);
-                goto panic_and_quit_early;
+            toku_logger_close_rollback_check_empty(env->i->logger, clean_shutdown);
+            if (clean_shutdown) {
+                //Do a second checkpoint now that the rollback cachefile is closed.
+                r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, SHUTDOWN_CHECKPOINT);
+                if (r) {
+                    err_msg = "Cannot close environment (error during checkpoint)\n";
+                    toku_ydb_do_error(env, r, "%s", err_msg);
+                    goto panic_and_quit_early;
+                }
+                toku_logger_shutdown(env->i->logger); 
             }
-            toku_logger_shutdown(env->i->logger); 
         }
         toku_cachetable_close(&env->i->cachetable);
     }
diff --git a/src/ydb_txn.cc b/src/ydb_txn.cc
index e513f0da0e5..1cccb91f330 100644
--- a/src/ydb_txn.cc
+++ b/src/ydb_txn.cc
@@ -207,12 +207,6 @@ cleanup:
     return r;
 }
 
-static uint32_t toku_txn_id(DB_TXN * txn) {
-    HANDLE_PANICKED_ENV(txn->mgrp);
-    abort();
-    return (uint32_t) -1;
-}
-
 static int toku_txn_abort(DB_TXN * txn,
                           TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) {
     HANDLE_PANICKED_ENV(txn->mgrp);
@@ -389,6 +383,44 @@ static uint64_t locked_txn_get_client_id(DB_TXN *txn) {
     return toku_txn_get_client_id(db_txn_struct_i(txn)->tokutxn);
 }
 
+static int toku_txn_discard(DB_TXN *txn, uint32_t flags) {
+    // check parameters
+    if (flags != 0)
+        return EINVAL;
+    TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
+    if (toku_txn_get_state(ttxn) != TOKUTXN_PREPARING)
+        return EINVAL;
+
+    bool low_priority;
+    if (toku_is_big_tokutxn(ttxn)) {
+        low_priority = true;
+        toku_low_priority_multi_operation_client_lock();
+    } else {
+        low_priority = false;
+        toku_multi_operation_client_lock();
+    }
+
+    // discard
+    toku_txn_discard_txn(ttxn);
+
+    // complete
+    toku_txn_complete_txn(ttxn);
+
+    // release locks
+    toku_txn_release_locks(txn);
+
+    if (low_priority) {
+        toku_low_priority_multi_operation_client_unlock();
+    } else {
+        toku_multi_operation_client_unlock();
+    }
+
+    // destroy
+    toku_txn_destroy(txn);
+
+    return 0;
+}
+
 static inline void txn_func_init(DB_TXN *txn) {
 #define STXN(name) txn->name = locked_txn_ ## name
     STXN(abort);
@@ -402,8 +434,8 @@ static inline void txn_func_init(DB_TXN *txn) {
 #define SUTXN(name) txn->name = toku_txn_ ## name
     SUTXN(prepare);
     SUTXN(xa_prepare);
+    SUTXN(discard);
 #undef SUTXN
-    txn->id = toku_txn_id;
     txn->id64 = toku_txn_id64;
 }
 

From 48a6883b3414089de7ededd092c0560d41d2c519 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 9 Jul 2014 15:50:15 -0400
Subject: [PATCH 091/190] #258 support deferred XA recovery with discard of
 prepared txns and dirty shutdown of the FT environment

---
 storage/tokudb/hatoku_hton.cc | 43 ++++++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index 276fc096d3f..b9970f3377d 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -336,7 +336,7 @@ static int tokudb_init_func(void *p) {
 
 #if TOKUDB_CHECK_JEMALLOC
     if (tokudb_check_jemalloc && dlsym(RTLD_DEFAULT, "mallctl") == NULL) {
-        sql_print_error("%s not initialized because jemalloc is not loaded", tokudb_hton_name);
+        sql_print_error("%s is not initialized because jemalloc is not loaded", tokudb_hton_name);
         goto error;
     }
 #endif
@@ -597,8 +597,35 @@ int tokudb_end(handlerton * hton, ha_panic_function type) {
     if (db_env) {
         if (tokudb_init_flags & DB_INIT_LOG)
             tokudb_cleanup_log_files();
-        error = db_env->close(db_env, 0);       // Error is logged
-        assert(error==0);
+#if TOKU_INCLUDE_XA
+        long total_prepared = 0; // count the total number of prepared txn's that we discard
+        while (1) {
+            // get xid's 
+            const long n_xid = 1;
+            TOKU_XA_XID xids[n_xid];
+            long n_prepared = 0;
+            error = db_env->txn_xa_recover(db_env, xids, n_xid, &n_prepared, total_prepared == 0 ? DB_FIRST : DB_NEXT);
+            assert(error == 0);
+            if (n_prepared == 0) 
+                break;
+            // discard xid's
+            for (long i = 0; i < n_xid; i++) {
+                DB_TXN *txn = NULL;
+                error = db_env->get_txn_from_xid(db_env, &xids[i], &txn);
+                assert(error == 0);
+                error = txn->discard(txn, 0);
+                assert(error == 0);
+            }
+            total_prepared += n_prepared;
+        }
+#endif
+        error = db_env->close(db_env, total_prepared > 0 ? TOKUFT_DIRTY_SHUTDOWN : 0);
+#if TOKU_INCLUDE_XA
+        if (error != 0 && total_prepared > 0) {
+            sql_print_error("%s: %ld prepared txns still live, please shutdown, error %d", tokudb_hton_name, total_prepared, error);
+        } else
+#endif
+        assert(error == 0);
         db_env = NULL;
     }
 
@@ -690,7 +717,7 @@ static void commit_txn_with_progress(DB_TXN* txn, uint32_t flags, THD* thd) {
     info.thd = thd;
     int r = txn->commit_with_progress(txn, flags, txn_progress_func, &info);
     if (r != 0) {
-        sql_print_error("tried committing transaction %p and got error code %d", txn, r);
+        sql_print_error("%s: tried committing transaction %p and got error code %d", tokudb_hton_name, txn, r);
     }
     assert(r == 0);
     thd_proc_info(thd, orig_proc_info);
@@ -702,7 +729,7 @@ static void abort_txn_with_progress(DB_TXN* txn, THD* thd) {
     info.thd = thd;
     int r = txn->abort_with_progress(txn, txn_progress_func, &info);
     if (r != 0) {
-        sql_print_error("tried aborting transaction %p and got error code %d", txn, r);
+        sql_print_error("%s: tried aborting transaction %p and got error code %d", tokudb_hton_name, txn, r);
     }
     assert(r == 0);
     thd_proc_info(thd, orig_proc_info);
@@ -801,7 +828,7 @@ static int tokudb_xa_prepare(handlerton* hton, THD* thd, bool all) {
     TOKUDB_DBUG_RETURN(r);
 }
 
-static int tokudb_xa_recover(handlerton* hton, XID*  xid_list, uint  len) {
+static int tokudb_xa_recover(handlerton* hton, XID* xid_list, uint len) {
     TOKUDB_DBUG_ENTER("");
     int r = 0;
     if (len == 0 || xid_list == NULL) {
@@ -1202,7 +1229,7 @@ static void tokudb_handle_fatal_signal(handlerton *hton __attribute__ ((__unused
 #endif
 
 static void tokudb_print_error(const DB_ENV * db_env, const char *db_errpfx, const char *buffer) {
-    sql_print_error("%s:  %s", db_errpfx, buffer);
+    sql_print_error("%s: %s", db_errpfx, buffer);
 }
 
 static void tokudb_cleanup_log_files(void) {
@@ -1955,7 +1982,7 @@ static void tokudb_lock_timeout_callback(DB *db, uint64_t requesting_txnid, cons
         }
         // dump to stderr
         if (lock_timeout_debug & 2) {
-            TOKUDB_TRACE("%s", log_str.c_ptr());
+            sql_print_error("%s: %s", tokudb_hton_name, log_str.c_ptr());
         }
     }
 }

From 79411796ce9cdcf6604054114c693648f17f7d6f Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 15 Jul 2014 13:13:30 -0400
Subject: [PATCH 092/190] #262 enable bulk fetch for create select sql commands

---
 storage/tokudb/ha_tokudb.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 43de1c05aa5..1be821bfe7e 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -4371,7 +4371,7 @@ static bool index_key_is_null(TABLE *table, uint keynr, const uchar *key, uint k
 //      error otherwise
 //
 int ha_tokudb::prepare_index_key_scan(const uchar * key, uint key_len) {
-    TOKUDB_HANDLER_DBUG_ENTER("");
+    TOKUDB_HANDLER_DBUG_ENTER("%p %u", key, key_len);
     int error = 0;
     DBT start_key, end_key;
     THD* thd = ha_thd();
@@ -4395,7 +4395,7 @@ int ha_tokudb::prepare_index_key_scan(const uchar * key, uint key_len) {
 
     range_lock_grabbed = true;
     range_lock_grabbed_null = index_key_is_null(table, tokudb_active_index, key, key_len);
-    doing_bulk_fetch = (thd_sql_command(thd) == SQLCOM_SELECT);
+    doing_bulk_fetch = thd_sql_command(thd) == SQLCOM_SELECT || thd_sql_command(thd) == SQLCOM_CREATE_TABLE;
     bulk_fetch_iteration = 0;
     rows_fetched_using_bulk_fetch = 0;
     error = 0;
@@ -5698,7 +5698,7 @@ int ha_tokudb::prelock_range( const key_range *start_key, const key_range *end_k
     // at this point, determine if we will be doing bulk fetch
     // as of now, only do it if we are doing a select
     //
-    doing_bulk_fetch = (thd_sql_command(thd) == SQLCOM_SELECT);
+    doing_bulk_fetch = thd_sql_command(thd) == SQLCOM_SELECT || thd_sql_command(thd) == SQLCOM_CREATE_TABLE;
     bulk_fetch_iteration = 0;
     rows_fetched_using_bulk_fetch = 0;
 

From 53231b13411db24b910b6079d7cba9a60deb9865 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 15 Jul 2014 15:45:48 -0400
Subject: [PATCH 093/190] #263 enable bulk fetch for insert select sql commands

---
 storage/tokudb/ha_tokudb.cc | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 1be821bfe7e..989fbf560c7 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -4395,7 +4395,7 @@ int ha_tokudb::prepare_index_key_scan(const uchar * key, uint key_len) {
 
     range_lock_grabbed = true;
     range_lock_grabbed_null = index_key_is_null(table, tokudb_active_index, key, key_len);
-    doing_bulk_fetch = thd_sql_command(thd) == SQLCOM_SELECT || thd_sql_command(thd) == SQLCOM_CREATE_TABLE;
+    doing_bulk_fetch = thd_sql_command(thd) == SQLCOM_SELECT || thd_sql_command(thd) == SQLCOM_CREATE_TABLE || thd_sql_command(thd) == SQLCOM_INSERT_SELECT;
     bulk_fetch_iteration = 0;
     rows_fetched_using_bulk_fetch = 0;
     error = 0;
@@ -5694,11 +5694,8 @@ int ha_tokudb::prelock_range( const key_range *start_key, const key_range *end_k
         goto cleanup; 
     }
 
-    //
     // at this point, determine if we will be doing bulk fetch
-    // as of now, only do it if we are doing a select
-    //
-    doing_bulk_fetch = thd_sql_command(thd) == SQLCOM_SELECT || thd_sql_command(thd) == SQLCOM_CREATE_TABLE;
+    doing_bulk_fetch = thd_sql_command(thd) == SQLCOM_SELECT || thd_sql_command(thd) == SQLCOM_CREATE_TABLE || thd_sql_command(thd) == SQLCOM_INSERT_SELECT;
     bulk_fetch_iteration = 0;
     rows_fetched_using_bulk_fetch = 0;
 

From 36a5eadbace230f4bc8512dff6801b6b951d0855 Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Wed, 16 Jul 2014 07:39:11 -0400
Subject: [PATCH 094/190] temporary fix for gcov build

---
 ft/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ft/CMakeLists.txt b/ft/CMakeLists.txt
index 9b2e4905612..a433c7fc3a7 100644
--- a/ft/CMakeLists.txt
+++ b/ft/CMakeLists.txt
@@ -9,6 +9,8 @@ set_source_files_properties(
 
 add_executable(logformat logger/logformat.cc)
 target_link_libraries(logformat ${LIBTOKUPORTABILITY}_static)
+add_space_separated_property(TARGET logformat LINK_FLAGS --coverage)
+
 
 add_custom_command(
   OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_code.cc"

From 862d17b3614fd7469f2618d47eb8636d539fe9d0 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 16 Jul 2014 10:03:00 -0400
Subject: [PATCH 095/190] #261 debug prelocking for index scans

---
 storage/tokudb/ha_tokudb.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 989fbf560c7..4ebae49946b 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -5628,8 +5628,8 @@ cleanup:
     TOKUDB_HANDLER_DBUG_RETURN(error);
 }
 
-int ha_tokudb::prelock_range( const key_range *start_key, const key_range *end_key) {
-    TOKUDB_HANDLER_DBUG_ENTER("");
+int ha_tokudb::prelock_range(const key_range *start_key, const key_range *end_key) {
+    TOKUDB_HANDLER_DBUG_ENTER("%p %p", start_key, end_key);
     THD* thd = ha_thd(); 
 
     int error = 0;
@@ -5710,7 +5710,7 @@ cleanup:
 // Forward scans use read_range_first()/read_range_next().
 //
 int ha_tokudb::prepare_range_scan( const key_range *start_key, const key_range *end_key) {
-    TOKUDB_HANDLER_DBUG_ENTER("");
+    TOKUDB_HANDLER_DBUG_ENTER("%p %p", start_key, end_key);
     int error = prelock_range(start_key, end_key);
     if (!error) {
         range_lock_grabbed = true;
@@ -5724,7 +5724,7 @@ int ha_tokudb::read_range_first(
     bool eq_range, 
     bool sorted) 
 {
-    TOKUDB_HANDLER_DBUG_ENTER("");
+    TOKUDB_HANDLER_DBUG_ENTER("%p %p %u %u", start_key, end_key, eq_range, sorted);
     int error = prelock_range(start_key, end_key);
     if (error) { goto cleanup; }
     range_lock_grabbed = true;

From c1685957ecee1968226a24234f5950536916cc08 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Thu, 17 Jul 2014 14:50:13 -0400
Subject: [PATCH 096/190] update to 7.1.7

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 1deb3699c5a..e49e26f118d 100644
--- a/README.md
+++ b/README.md
@@ -24,14 +24,14 @@ working MySQL or MariaDB with Tokutek patches, and with the TokuDB storage
 engine, called `make.mysql.bash`.  This script will download copies of the
 needed source code from github and build everything.
 
-To build MySQL 5.5.37 with TokuDB 7.1.6:
+To build MySQL 5.5.38 with TokuDB 7.1.7:
 ```sh
-scripts/make.mysql.bash --mysqlbuild=mysql-5.5.37-tokudb-7.1.6-linux-x86_64
+scripts/make.mysql.bash --mysqlbuild=mysql-5.5.38-tokudb-7.1.7-linux-x86_64
 ```
 
-To build MariaDB 5.5.37 with TokuDB 7.1.6:
+To build MariaDB 5.5.38 with TokuDB 7.1.7:
 ```sh
-scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.37-tokudb-7.1.6-linux-x86_64
+scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.38-tokudb-7.1.7-linux-x86_64
 ```
 
 Before you start, make sure you have a C++11-compatible compiler (GCC >=

From 030487487c56fe2549f9acf30d0d633fe77e3ce0 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Fri, 18 Jul 2014 06:39:07 -0400
Subject: [PATCH 097/190] #261 auto detect index scans to fix perf problem with
 partitions

---
 .../tokudb.bugs/r/part_index_scan.result      |  86 +++++++++
 .../suite/tokudb.bugs/t/part_index_scan.test  | 114 ++++++++++++
 storage/tokudb/ha_tokudb.cc                   | 170 +++++++++---------
 storage/tokudb/ha_tokudb.h                    |   1 +
 4 files changed, 291 insertions(+), 80 deletions(-)
 create mode 100644 mysql-test/suite/tokudb.bugs/r/part_index_scan.result
 create mode 100644 mysql-test/suite/tokudb.bugs/t/part_index_scan.test

diff --git a/mysql-test/suite/tokudb.bugs/r/part_index_scan.result b/mysql-test/suite/tokudb.bugs/r/part_index_scan.result
new file mode 100644
index 00000000000..3ede692cc78
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/r/part_index_scan.result
@@ -0,0 +1,86 @@
+set default_storage_engine='tokudb';
+drop table if exists t,t1,t2,t3;
+CREATE TABLE `t` (
+`num` int(10) unsigned auto_increment NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+CREATE TABLE `t1` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+CREATE TABLE `t2` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) 
+PARTITION BY HASH (num) PARTITIONS 10;
+CREATE TABLE `t3` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+)
+PARTITION BY RANGE (num)
+(PARTITION p0 VALUES LESS THAN (1000000),
+PARTITION p1 VALUES LESS THAN (2000000),
+PARTITION p2 VALUES LESS THAN (3000000),
+PARTITION p3 VALUES LESS THAN (4000000),
+PARTITION p4 VALUES LESS THAN (5000000),
+PARTITION p5 VALUES LESS THAN (6000000),
+PARTITION p6 VALUES LESS THAN (7000000),
+PARTITION p7 VALUES LESS THAN (8000000),
+PARTITION px VALUES LESS THAN MAXVALUE);
+insert into t1 select * from t;
+insert into t2 select * from t;
+insert into t3 select * from t;
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
+1
+select count(*) from t3;
+count(*)
+8388608
+1
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
+1
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
+1
+drop table if exists t,t1,t2,t3;
diff --git a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
new file mode 100644
index 00000000000..23d797af92f
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
@@ -0,0 +1,114 @@
+# verify that index scans on parititions are not slow
+# due totokudb bulk fetch not being used
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t,t1,t2,t3;
+
+CREATE TABLE `t` (
+  `num` int(10) unsigned auto_increment NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+# put 8M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+SELECT count(*) FROM t;
+
+CREATE TABLE `t1` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+CREATE TABLE `t2` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) 
+PARTITION BY HASH (num) PARTITIONS 10;
+
+CREATE TABLE `t3` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+)
+PARTITION BY RANGE (num)
+(PARTITION p0 VALUES LESS THAN (1000000),
+ PARTITION p1 VALUES LESS THAN (2000000),
+ PARTITION p2 VALUES LESS THAN (3000000),
+ PARTITION p3 VALUES LESS THAN (4000000),
+ PARTITION p4 VALUES LESS THAN (5000000),
+ PARTITION p5 VALUES LESS THAN (6000000),
+ PARTITION p6 VALUES LESS THAN (7000000),
+ PARTITION p7 VALUES LESS THAN (8000000),
+ PARTITION px VALUES LESS THAN MAXVALUE);
+
+insert into t1 select * from t;
+insert into t2 select * from t;
+insert into t3 select * from t;
+
+# verify that full index scans on partitioned tables t2 and t3 are comparable to a non-partitioned table t1
+let $s = `select to_seconds(now())`;
+select count(*) from t1;
+let $t1 = `select to_seconds(now()) - $s`;
+# echo $t1;
+
+let $s = `select to_seconds(now())`;
+select count(*) from t2;
+let $t2 = `select to_seconds(now()) - $s`;
+# echo $t2;
+let $d = `select abs($t2 - $t1) <= $t1`;
+echo $d;
+
+let $s = `select to_seconds(now())`;
+select count(*) from t3;
+let $t3 = `select to_seconds(now()) - $s`;
+# echo $t3;
+let $d = `select abs($t3 - $t1) <= $t1`;
+echo $d;
+
+let $s = `select to_seconds(now())`;
+select count(*) from t1 where num>7000000;
+let $t1 = `select to_seconds(now()) - $s`;
+# echo $t1;
+
+let $s = `select to_seconds(now())`;
+select count(*) from t2 where num>7000000;
+let $t2 = `select to_seconds(now()) - $s`;
+# echo $t2;
+let $d = `select abs($t2 - $t1) <= $t1`;
+echo $d;
+
+let $s = `select to_seconds(now())`;
+select count(*) from t3 where num>7000000;
+let $t3 = `select to_seconds(now()) - $s`;
+# echo $t3;
+let $d = `select abs($t3 - $t1) <= $t1`;
+echo $d;
+
+enable_warnings;
+drop table if exists t,t1,t2,t3;
diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 4ebae49946b..bbb671b135b 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -4504,6 +4504,7 @@ int ha_tokudb::index_init(uint keynr, bool sorted) {
     }
     invalidate_bulk_fetch();
     doing_bulk_fetch = false;
+    maybe_index_scan = false;
     error = 0;
 exit:
     TOKUDB_HANDLER_DBUG_RETURN(error);
@@ -5246,86 +5247,91 @@ cleanup:
 }
 
 int ha_tokudb::get_next(uchar* buf, int direction, DBT* key_to_compare, bool do_key_read) {
-    int error = 0; 
-    uint32_t flags = SET_PRELOCK_FLAG(0);
-    THD* thd = ha_thd();
-    tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);;
-    bool need_val;
+    int error = 0;
     HANDLE_INVALID_CURSOR();
 
-    // we need to read the val of what we retrieve if
-    // we do NOT have a covering index AND we are using a clustering secondary
-    // key
-    need_val = (do_key_read == 0) && 
-                (tokudb_active_index == primary_key || 
-                 key_is_clustering(&table->key_info[tokudb_active_index])
-                       );
-
-    if ((bytes_used_in_range_query_buff - curr_range_query_buff_offset) > 0) {
-        error = read_data_from_range_query_buff(buf, need_val, do_key_read);
+    if (maybe_index_scan) {
+        maybe_index_scan = false;
+        if (!range_lock_grabbed) {
+            error = prepare_index_scan();
+        }
     }
-    else if (icp_went_out_of_range) {
-        icp_went_out_of_range = false;
-        error = HA_ERR_END_OF_FILE;
-    }
-    else {
-        invalidate_bulk_fetch();
-        if (doing_bulk_fetch) {
-            struct smart_dbt_bf_info bf_info;
-            bf_info.ha = this;
-            // you need the val if you have a clustering index and key_read is not 0;
-            bf_info.direction = direction;
-            bf_info.thd = ha_thd();
-            bf_info.need_val = need_val;
-            bf_info.buf = buf;
-            bf_info.key_to_compare = key_to_compare;
-            //
-            // call c_getf_next with purpose of filling in range_query_buff
-            //
-            rows_fetched_using_bulk_fetch = 0;
-            // it is expected that we can do ICP in the smart_dbt_bf_callback
-            // as a result, it's possible we don't return any data because
-            // none of the rows matched the index condition. Therefore, we need
-            // this while loop. icp_out_of_range will be set if we hit a row that
-            // the index condition states is out of our range. When that hits,
-            // we know all the data in the buffer is the last data we will retrieve
-            while (bytes_used_in_range_query_buff == 0 && !icp_went_out_of_range && error == 0) {
-                if (direction > 0) {
-                    error = cursor->c_getf_next(cursor, flags, smart_dbt_bf_callback, &bf_info);
-                } else {
-                    error = cursor->c_getf_prev(cursor, flags, smart_dbt_bf_callback, &bf_info);
-                }
-            }
-            // if there is no data set and we went out of range, 
-            // then there is nothing to return
-            if (bytes_used_in_range_query_buff == 0 && icp_went_out_of_range) {
-                icp_went_out_of_range = false;
-                error = HA_ERR_END_OF_FILE;
-            }
-            if (bulk_fetch_iteration < HA_TOKU_BULK_FETCH_ITERATION_MAX) {
-                bulk_fetch_iteration++;
-            }
+    
+    if (!error) {
+        uint32_t flags = SET_PRELOCK_FLAG(0);
 
-            error = handle_cursor_error(error, HA_ERR_END_OF_FILE,tokudb_active_index);
-            if (error) { goto cleanup; }
-            
-            //
-            // now that range_query_buff is filled, read an element
-            //
+        // we need to read the val of what we retrieve if
+        // we do NOT have a covering index AND we are using a clustering secondary
+        // key
+        bool need_val = (do_key_read == 0) && 
+            (tokudb_active_index == primary_key || key_is_clustering(&table->key_info[tokudb_active_index]));
+
+        if ((bytes_used_in_range_query_buff - curr_range_query_buff_offset) > 0) {
             error = read_data_from_range_query_buff(buf, need_val, do_key_read);
         }
+        else if (icp_went_out_of_range) {
+            icp_went_out_of_range = false;
+            error = HA_ERR_END_OF_FILE;
+        }
         else {
-            struct smart_dbt_info info;
-            info.ha = this;
-            info.buf = buf;
-            info.keynr = tokudb_active_index;
+            invalidate_bulk_fetch();
+            if (doing_bulk_fetch) {
+                struct smart_dbt_bf_info bf_info;
+                bf_info.ha = this;
+                // you need the val if you have a clustering index and key_read is not 0;
+                bf_info.direction = direction;
+                bf_info.thd = ha_thd();
+                bf_info.need_val = need_val;
+                bf_info.buf = buf;
+                bf_info.key_to_compare = key_to_compare;
+                //
+                // call c_getf_next with purpose of filling in range_query_buff
+                //
+                rows_fetched_using_bulk_fetch = 0;
+                // it is expected that we can do ICP in the smart_dbt_bf_callback
+                // as a result, it's possible we don't return any data because
+                // none of the rows matched the index condition. Therefore, we need
+                // this while loop. icp_out_of_range will be set if we hit a row that
+                // the index condition states is out of our range. When that hits,
+                // we know all the data in the buffer is the last data we will retrieve
+                while (bytes_used_in_range_query_buff == 0 && !icp_went_out_of_range && error == 0) {
+                    if (direction > 0) {
+                        error = cursor->c_getf_next(cursor, flags, smart_dbt_bf_callback, &bf_info);
+                    } else {
+                        error = cursor->c_getf_prev(cursor, flags, smart_dbt_bf_callback, &bf_info);
+                    }
+                }
+                // if there is no data set and we went out of range, 
+                // then there is nothing to return
+                if (bytes_used_in_range_query_buff == 0 && icp_went_out_of_range) {
+                    icp_went_out_of_range = false;
+                    error = HA_ERR_END_OF_FILE;
+                }
+                if (bulk_fetch_iteration < HA_TOKU_BULK_FETCH_ITERATION_MAX) {
+                    bulk_fetch_iteration++;
+                }
 
-            if (direction > 0) {
-                error = cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
-            } else {
-                error = cursor->c_getf_prev(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
+                error = handle_cursor_error(error, HA_ERR_END_OF_FILE,tokudb_active_index);
+                if (error) { goto cleanup; }
+            
+                //
+                // now that range_query_buff is filled, read an element
+                //
+                error = read_data_from_range_query_buff(buf, need_val, do_key_read);
+            }
+            else {
+                struct smart_dbt_info info;
+                info.ha = this;
+                info.buf = buf;
+                info.keynr = tokudb_active_index;
+                
+                if (direction > 0) {
+                    error = cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
+                } else {
+                    error = cursor->c_getf_prev(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
+                }
+                error = handle_cursor_error(error, HA_ERR_END_OF_FILE, tokudb_active_index);
             }
-            error = handle_cursor_error(error, HA_ERR_END_OF_FILE, tokudb_active_index);
         }
     }
 
@@ -5337,12 +5343,15 @@ int ha_tokudb::get_next(uchar* buf, int direction, DBT* key_to_compare, bool do_
     // read the full row by doing a point query into the 
     // main table.
     //
-    
     if (!error && !do_key_read && (tokudb_active_index != primary_key) && !key_is_clustering(&table->key_info[tokudb_active_index])) {
         error = read_full_row(buf);
     }
-    trx->stmt_progress.queried++;
-    track_progress(thd);
+
+    if (!error) {
+        tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(ha_thd(), tokudb_hton);
+        trx->stmt_progress.queried++;
+        track_progress(ha_thd());
+    }
 cleanup:
     return error;
 }
@@ -5411,8 +5420,7 @@ int ha_tokudb::index_first(uchar * buf) {
     info.buf = buf;
     info.keynr = tokudb_active_index;
 
-    error = cursor->c_getf_first(cursor, flags,
-            SMART_DBT_CALLBACK(key_read), &info);
+    error = cursor->c_getf_first(cursor, flags, SMART_DBT_CALLBACK(key_read), &info);
     error = handle_cursor_error(error,HA_ERR_END_OF_FILE,tokudb_active_index);
 
     //
@@ -5422,9 +5430,11 @@ int ha_tokudb::index_first(uchar * buf) {
     if (!error && !key_read && (tokudb_active_index != primary_key) && !key_is_clustering(&table->key_info[tokudb_active_index])) {
         error = read_full_row(buf);
     }
-    trx->stmt_progress.queried++;
+    if (trx) {
+        trx->stmt_progress.queried++;
+    }
     track_progress(thd);
-    
+    maybe_index_scan = true;    
 cleanup:
     TOKUDB_HANDLER_DBUG_RETURN(error);
 }
@@ -5454,8 +5464,7 @@ int ha_tokudb::index_last(uchar * buf) {
     info.buf = buf;
     info.keynr = tokudb_active_index;
 
-    error = cursor->c_getf_last(cursor, flags,
-            SMART_DBT_CALLBACK(key_read), &info);
+    error = cursor->c_getf_last(cursor, flags, SMART_DBT_CALLBACK(key_read), &info);
     error = handle_cursor_error(error,HA_ERR_END_OF_FILE,tokudb_active_index);
     //
     // still need to get entire contents of the row if operation done on
@@ -5469,6 +5478,7 @@ int ha_tokudb::index_last(uchar * buf) {
         trx->stmt_progress.queried++;
     }
     track_progress(thd);
+    maybe_index_scan = true;
 cleanup:
     TOKUDB_HANDLER_DBUG_RETURN(error);
 }
diff --git a/storage/tokudb/ha_tokudb.h b/storage/tokudb/ha_tokudb.h
index f75d75bee4c..d9a98a825d9 100644
--- a/storage/tokudb/ha_tokudb.h
+++ b/storage/tokudb/ha_tokudb.h
@@ -251,6 +251,7 @@ private:
     uint64_t bulk_fetch_iteration;
     uint64_t rows_fetched_using_bulk_fetch;
     bool doing_bulk_fetch;
+    bool maybe_index_scan;
 
     //
     // buffer used to temporarily store a "packed key" 

From 40573588dbc50d81126dc8c2b2ddb008641d7eb1 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Fri, 18 Jul 2014 14:06:44 -0400
Subject: [PATCH 098/190] FT-304 Move the blocktable into its own class, fix
 tests.

---
 ft/ft-cachetable-wrappers.cc           |   18 +-
 ft/ft-flusher.cc                       |    4 +-
 ft/ft-internal.h                       |    4 +-
 ft/ft-ops.cc                           |   38 +-
 ft/ft.cc                               |   40 +-
 ft/ft.h                                |    3 +
 ft/logger/logger.cc                    |   36 +-
 ft/serialize/block_allocator.cc        |   75 +-
 ft/serialize/block_allocator.h         |   60 +-
 ft/serialize/block_table.cc            | 1045 ++++++++++--------------
 ft/serialize/block_table.h             |  280 +++++--
 ft/serialize/ft-serialize.cc           |   33 +-
 ft/serialize/ft_node-serialize.cc      |   71 +-
 ft/tests/block_allocator_test.cc       |   26 -
 ft/tests/ft-bfe-query.cc               |   12 +-
 ft/tests/ft-clock-test.cc              |   24 +-
 ft/tests/ft-serialize-benchmark.cc     |   24 +-
 ft/tests/ft-serialize-test.cc          |   84 +-
 ft/tests/test_block_allocator_merge.cc |  236 ------
 ft/txn/rollback-ct-callbacks.cc        |    3 +-
 ft/txn/rollback.cc                     |    4 +-
 tools/ftverify.cc                      |   22 +-
 tools/tokuftdump.cc                    |   20 +-
 23 files changed, 904 insertions(+), 1258 deletions(-)
 delete mode 100644 ft/tests/test_block_allocator_merge.cc

diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index 02589ef5c56..685de99fec2 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -105,10 +105,10 @@ ftnode_get_key_and_fullhash(
     void* extra)
 {
     FT ft = (FT) extra;
-    BLOCKNUM name;
-    toku_allocate_blocknum(ft->blocktable, &name, ft);
-    *cachekey = name;
-    *fullhash = toku_cachetable_hash(ft->cf, name);
+    BLOCKNUM blocknum;
+    ft->blocktable.allocate_blocknum(&blocknum, ft);
+    *cachekey = blocknum;
+    *fullhash = toku_cachetable_hash(ft->cf, blocknum);
 }
 
 void
@@ -116,7 +116,7 @@ cachetable_put_empty_node_with_dep_nodes(
     FT ft,
     uint32_t num_dependent_nodes,
     FTNODE* dependent_nodes,
-    BLOCKNUM* name, //output
+    BLOCKNUM* blocknum, //output
     uint32_t* fullhash, //output
     FTNODE* result)
 {
@@ -138,7 +138,7 @@ cachetable_put_empty_node_with_dep_nodes(
         num_dependent_nodes,
         dependent_pairs,
         dependent_dirty_bits,
-        name,
+        blocknum,
         fullhash,
         toku_ftnode_save_ct_pair);
     *result = new_node;
@@ -154,13 +154,13 @@ create_new_ftnode_with_dep_nodes(
     FTNODE* dependent_nodes)
 {
     uint32_t fullhash = 0;
-    BLOCKNUM name;
+    BLOCKNUM blocknum;
 
     cachetable_put_empty_node_with_dep_nodes(
         ft,
         num_dependent_nodes,
         dependent_nodes,
-        &name,
+        &blocknum,
         &fullhash,
         result);
 
@@ -171,7 +171,7 @@ create_new_ftnode_with_dep_nodes(
 
     toku_initialize_empty_ftnode(
         *result,
-        name,
+        blocknum,
         height,
         n_children,
         ft->h->layout_version,
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index 10264133728..e1d76a5d8f4 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -1338,7 +1338,7 @@ maybe_merge_pinned_nodes(
 
 static void merge_remove_key_callback(BLOCKNUM *bp, bool for_checkpoint, void *extra) {
     FT ft = (FT) extra;
-    toku_free_blocknum(ft->blocktable, bp, ft, for_checkpoint);
+    ft->blocktable.free_blocknum(bp, ft, for_checkpoint);
 }
 
 //
@@ -1517,7 +1517,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
 
     // get the child into memory
     BLOCKNUM targetchild = BP_BLOCKNUM(parent, childnum);
-    toku_verify_blocknum_allocated(ft->blocktable, targetchild);
+    ft->blocktable.verify_blocknum_allocated(targetchild);
     uint32_t childfullhash = compute_child_fullhash(ft->cf, parent, childnum);
     FTNODE child;
     struct ftnode_fetch_extra bfe;
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 89d98f1c676..e6214df4d9b 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -101,6 +101,7 @@ PATENT RIGHTS GRANT:
 #include "ft/ft.h"
 #include "ft/ft-ops.h"
 #include "ft/node.h"
+#include "ft/serialize/block_table.h"
 #include "ft/txn/rollback.h"
 
 // Symbol TOKUDB_REVISION is not defined by fractal-tree makefiles, so
@@ -111,7 +112,6 @@ PATENT RIGHTS GRANT:
 #error
 #endif
 
-struct block_table;
 struct ft_search;
 
 enum { FT_DEFAULT_FANOUT = 16 };
@@ -225,7 +225,7 @@ struct ft {
     // These are not read-only:
 
     // protected by blocktable lock
-    struct block_table *blocktable;
+    struct block_table blocktable;
 
     // protected by atomic builtins
     STAT64INFO_S in_memory_stats;
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index bb8ee27b9a8..6844ca3478b 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -1927,7 +1927,7 @@ static void push_something_in_subtree(
 
         {
             const BLOCKNUM child_blocknum = BP_BLOCKNUM(subtree_root, childnum);
-            toku_verify_blocknum_allocated(ft->blocktable, child_blocknum);
+            ft->blocktable.verify_blocknum_allocated(child_blocknum);
             const uint32_t child_fullhash = toku_cachetable_hash(ft->cf, child_blocknum);
 
             FTNODE child;
@@ -3088,10 +3088,11 @@ ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only
         toku_txn_maybe_note_ft(txn, ft);
     }
 
-    //Opening an ft may restore to previous checkpoint.         Truncate if necessary.
+    // Opening an ft may restore to previous checkpoint.
+    // Truncate if necessary.
     {
         int fd = toku_cachefile_get_fd (ft->cf);
-        toku_maybe_truncate_file_on_open(ft->blocktable, fd);
+        ft->blocktable.maybe_truncate_file_on_open(fd);
     }
 
     r = 0;
@@ -4528,17 +4529,15 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
     return result;
 }
 
-int toku_dump_ft (FILE *f, FT_HANDLE ft_handle) {
-    int r;
-    assert(ft_handle->ft);
-    toku_dump_translation_table(f, ft_handle->ft->blocktable);
-    {
-        uint32_t fullhash = 0;
-        CACHEKEY root_key;
-        toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
-        r = toku_dump_ftnode(f, ft_handle, root_key, 0, 0, 0);
-    }
-    return r;
+int toku_dump_ft(FILE *f, FT_HANDLE ft_handle) {
+    FT ft = ft_handle->ft;
+    invariant_notnull(ft);
+    ft->blocktable.dump_translation_table(f);
+
+    uint32_t fullhash = 0;
+    CACHEKEY root_key;
+    toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
+    return toku_dump_ftnode(f, ft_handle, root_key, 0, 0, 0);
 }
 
 int toku_ft_layer_init(void) {
@@ -4630,18 +4629,15 @@ void toku_ft_unlink(FT_HANDLE handle) {
     toku_cachefile_unlink_on_close(cf);
 }
 
-int
-toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) {
-    int r;
-
+int toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) {
     int fd = toku_cachefile_get_fd(ft_handle->ft->cf);
     toku_ft_lock(ft_handle->ft);
 
     int64_t file_size;
-    r = toku_os_get_file_size(fd, &file_size);
-    if (r==0) {
+    int r = toku_os_get_file_size(fd, &file_size);
+    if (r == 0) {
         report->file_size_bytes = file_size;
-        toku_block_table_get_fragmentation_unlocked(ft_handle->ft->blocktable, report);
+        ft_handle->ft->blocktable.get_fragmentation_unlocked(report);
     }
     toku_ft_unlock(ft_handle->ft);
     return r;
diff --git a/ft/ft.cc b/ft/ft.cc
index 4c358f95a55..7430606758a 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -121,7 +121,7 @@ ft_destroy(FT ft) {
     //header and checkpoint_header have same Blocktable pointer
     //cannot destroy since it is still in use by CURRENT
     assert(ft->h->type == FT_CURRENT);
-    toku_blocktable_destroy(&ft->blocktable);
+    ft->blocktable.destroy();
     ft->cmp.destroy();
     toku_destroy_dbt(&ft->descriptor.dbt);
     toku_destroy_dbt(&ft->cmp_descriptor.dbt);
@@ -203,7 +203,7 @@ static void ft_begin_checkpoint (LSN checkpoint_lsn, void *header_v) {
     assert(ft->checkpoint_header == NULL);
     ft_copy_for_checkpoint_unlocked(ft, checkpoint_lsn);
     ft->h->dirty = 0;             // this is only place this bit is cleared        (in currentheader)
-    toku_block_translation_note_start_checkpoint_unlocked(ft->blocktable);
+    ft->blocktable.note_start_checkpoint_unlocked();
     toku_ft_unlock (ft);
 }
 
@@ -239,8 +239,6 @@ ft_hack_highest_unused_msn_for_upgrade_for_checkpoint(FT ft) {
 static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) {
     FT ft = (FT) header_v;
     FT_HEADER ch = ft->checkpoint_header;
-    //printf("%s:%d allocated_limit=%lu writing queue to %lu\n", __FILE__, __LINE__,
-    //             block_allocator_allocated_limit(h->block_allocator), h->unused_blocks.b*h->nodesize);
     assert(ch);
     assert(ch->type == FT_CHECKPOINT_INPROGRESS);
     if (ch->dirty) {            // this is only place this bit is tested (in checkpoint_header)
@@ -255,16 +253,15 @@ static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) {
         ft_hack_highest_unused_msn_for_upgrade_for_checkpoint(ft);
                                                              
         // write translation and header to disk (or at least to OS internal buffer)
-        toku_serialize_ft_to(fd, ch, ft->blocktable, ft->cf);
+        toku_serialize_ft_to(fd, ch, &ft->blocktable, ft->cf);
         ch->dirty = 0;                      // this is only place this bit is cleared (in checkpoint_header)
         
         // fsync the cachefile
         toku_cachefile_fsync(cf);
         ft->h->checkpoint_count++;        // checkpoint succeeded, next checkpoint will save to alternate header location
         ft->h->checkpoint_lsn = ch->checkpoint_lsn;  //Header updated.
-    } 
-    else {
-        toku_block_translation_note_skipped_checkpoint(ft->blocktable);
+    } else {
+        ft->blocktable.note_skipped_checkpoint();
     }
 }
 
@@ -272,14 +269,12 @@ static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) {
 // free unused disk space 
 // (i.e. tell BlockAllocator to liberate blocks used by previous checkpoint).
 // Must have access to fd (protected)
-static void ft_end_checkpoint (CACHEFILE UU(cachefile), int fd, void *header_v) {
+static void ft_end_checkpoint(CACHEFILE UU(cf), int fd, void *header_v) {
     FT ft = (FT) header_v;
     assert(ft->h->type == FT_CURRENT);
-    toku_block_translation_note_end_checkpoint(ft->blocktable, fd);
-    if (ft->checkpoint_header) {
-        toku_free(ft->checkpoint_header);
-        ft->checkpoint_header = NULL;
-    }
+    ft->blocktable.note_end_checkpoint(fd);
+    toku_free(ft->checkpoint_header);
+    ft->checkpoint_header = nullptr;
 }
 
 // maps to cf->close_userdata
@@ -407,7 +402,7 @@ static void ft_init(FT ft, FT_OPTIONS options, CACHEFILE cf) {
                                 ft_note_pin_by_checkpoint,
                                 ft_note_unpin_by_checkpoint);
 
-    toku_block_verify_no_free_blocknums(ft->blocktable);
+    ft->blocktable.verify_no_free_blocknums();
 }
 
 
@@ -456,8 +451,8 @@ void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn) {
     toku_ft_init_reflock(ft);
 
     // Assign blocknum for root block, also dirty the header
-    toku_blocktable_create_new(&ft->blocktable);
-    toku_allocate_blocknum(ft->blocktable, &ft->h->root_blocknum, ft);
+    ft->blocktable.create();
+    ft->blocktable.allocate_blocknum(&ft->h->root_blocknum, ft);
 
     ft_init(ft, options, cf);
 
@@ -875,14 +870,13 @@ toku_ft_stat64 (FT ft, struct ftstat64_s *s) {
     s->verify_time_sec = ft->h->time_of_last_verification;    
 }
 
-void
-toku_ft_get_fractal_tree_info64(FT ft, struct ftinfo64 *s) {
-    toku_blocktable_get_info64(ft->blocktable, s);
+void toku_ft_get_fractal_tree_info64(FT ft, struct ftinfo64 *info) {
+    ft->blocktable.get_info64(info);
 }
 
 int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *iter_extra) {
     uint64_t this_checkpoint_count = ft->h->checkpoint_count;
-    return toku_blocktable_iterate_translation_tables(ft->blocktable, this_checkpoint_count, iter, iter_extra);
+    return ft->blocktable.iterate_translation_tables(this_checkpoint_count, iter, iter_extra);
 }
 
 void 
@@ -908,7 +902,7 @@ toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd) {
     // make space for the new descriptor and write it out to disk
     DISKOFF offset, size;
     size = toku_serialize_descriptor_size(desc) + 4;
-    toku_realloc_descriptor_on_disk(ft->blocktable, size, &offset, ft, fd);
+    ft->blocktable.realloc_descriptor_on_disk(size, &offset, ft, fd);
     toku_serialize_descriptor_contents_to_fd(fd, desc, offset);
 
     // cleanup the old descriptor and set the in-memory descriptor to the new one
@@ -1086,7 +1080,7 @@ void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space) {
         .total_space = 0,
         .used_space = 0
     };
-    toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED, garbage_helper, &info, true, true);
+    ft->blocktable.iterate(block_table::TRANSLATION_CHECKPOINTED, garbage_helper, &info, true, true);
     *total_space = info.total_space;
     *used_space = info.used_space;
 }
diff --git a/ft/ft.h b/ft/ft.h
index 4df7ed9cc9e..73f52dea990 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -111,6 +111,9 @@ void toku_ft_destroy_reflock(FT ft);
 void toku_ft_grab_reflock(FT ft);
 void toku_ft_release_reflock(FT ft);
 
+void toku_ft_lock(struct ft *ft);
+void toku_ft_unlock(struct ft *ft);
+
 void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn);
 void toku_ft_free (FT ft);
 
diff --git a/ft/logger/logger.cc b/ft/logger/logger.cc
index cf078d7a680..f00044b75a2 100644
--- a/ft/logger/logger.cc
+++ b/ft/logger/logger.cc
@@ -269,32 +269,30 @@ bool toku_logger_rollback_is_open (TOKULOGGER logger) {
 
 #define MAX_CACHED_ROLLBACK_NODES 4096
 
-void
-toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft) {
-    toku_free_unused_blocknums(ft->blocktable, ft->h->root_blocknum);
+void toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft) {
+    ft->blocktable.free_unused_blocknums(ft->h->root_blocknum);
     logger->rollback_cache.init(MAX_CACHED_ROLLBACK_NODES);
 }
 
-int
-toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create) {
+int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create) {
     assert(logger->is_open);
     assert(!logger->rollback_cachefile);
 
-    FT_HANDLE t = NULL;   // Note, there is no DB associated with this FT.
-    toku_ft_handle_create(&t);
-    int r = toku_ft_handle_open(t, toku_product_name_strings.rollback_cachefile, create, create, cachetable, nullptr);
+    FT_HANDLE ft_handle = nullptr;   // Note, there is no DB associated with this FT.
+    toku_ft_handle_create(&ft_handle);
+    int r = toku_ft_handle_open(ft_handle, toku_product_name_strings.rollback_cachefile, create, create, cachetable, nullptr);
     if (r == 0) {
-        logger->rollback_cachefile = t->ft->cf;
-        toku_logger_initialize_rollback_cache(logger, t->ft);
+        FT ft = ft_handle->ft;
+        logger->rollback_cachefile = ft->cf;
+        toku_logger_initialize_rollback_cache(logger, ft_handle->ft);
 
-        //Verify it is empty
-        //Must have no data blocks (rollback logs or otherwise).
-        toku_block_verify_no_data_blocks_except_root(t->ft->blocktable, t->ft->h->root_blocknum);
-        bool is_empty;
-        is_empty = toku_ft_is_empty_fast(t);
+        // Verify it is empty
+        // Must have no data blocks (rollback logs or otherwise).
+        ft->blocktable.verify_no_data_blocks_except_root(ft->h->root_blocknum);
+        bool is_empty = toku_ft_is_empty_fast(ft_handle);
         assert(is_empty);
     } else {
-        toku_ft_handle_close(t);
+        toku_ft_handle_close(ft_handle);
     }
     return r;
 }
@@ -314,9 +312,9 @@ void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdo
             if (clean_shutdown) {
                 //Verify it is safe to close it.
                 assert(!ft->h->dirty);  //Must not be dirty.
-                toku_free_unused_blocknums(ft->blocktable, ft->h->root_blocknum);
-                //Must have no data blocks (rollback logs or otherwise).
-                toku_block_verify_no_data_blocks_except_root(ft->blocktable, ft->h->root_blocknum);
+                ft->blocktable.free_unused_blocknums(ft->h->root_blocknum);
+                // Must have no data blocks (rollback logs or otherwise).
+                ft->blocktable.verify_no_data_blocks_except_root(ft->h->root_blocknum);
                 assert(!ft->h->dirty);
             } else {
                 ft->h->dirty = 0;
diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index 4229427bb3f..a8fb88dbbef 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -114,7 +114,7 @@ static inline bool ba_trace_enabled() {
 #endif
 }
 
-void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
+void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) {
     // the alignment must be at least 512 and aligned with 512 to work with direct I/O
     assert(alignment >= 512 && (alignment % 512) == 0);
 
@@ -127,7 +127,10 @@ void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment)
     _strategy = BA_STRATEGY_FIRST_FIT;
 
     VALIDATE();
+}
 
+void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
+    _create_internal(reserve_at_beginning, alignment);
     if (ba_trace_enabled()) {
         fprintf(stderr, "ba_trace_create %p\n", this);
     }
@@ -161,41 +164,6 @@ void block_allocator::grow_blocks_array() {
     grow_blocks_array_by(1);
 }
 
-void block_allocator::merge_blockpairs_into(uint64_t d, struct blockpair dst[],
-                                            uint64_t s, const struct blockpair src[])
-{
-    uint64_t tail = d+s;
-    while (d > 0 && s > 0) {
-        struct blockpair       *dp = &dst[d - 1];
-        struct blockpair const *sp = &src[s - 1];
-        struct blockpair       *tp = &dst[tail - 1];
-        assert(tail > 0);
-        if (dp->offset > sp->offset) {
-            *tp = *dp;
-            d--;
-            tail--;
-        } else {
-            *tp = *sp;
-            s--;
-            tail--;
-        }
-    }
-    while (d > 0) {
-        struct blockpair *dp = &dst[d - 1];
-        struct blockpair *tp = &dst[tail - 1];
-        *tp = *dp;
-        d--;
-        tail--;
-    }
-    while (s > 0) {
-        struct blockpair const *sp = &src[s - 1];
-        struct blockpair       *tp = &dst[tail - 1];
-        *tp = *sp;
-        s--;
-        tail--;
-    }
-}
-
 int block_allocator::compare_blockpairs(const void *av, const void *bv) {
     const struct blockpair *a = (const struct blockpair *) av;
     const struct blockpair *b = (const struct blockpair *) bv;
@@ -208,32 +176,27 @@ int block_allocator::compare_blockpairs(const void *av, const void *bv) {
     }
 }
 
-// See the documentation in block_allocator.h
-void block_allocator::alloc_blocks_at(uint64_t n_blocks, struct blockpair pairs[]) {
-    VALIDATE();
-    qsort(pairs, n_blocks, sizeof(*pairs), compare_blockpairs);
-    for (uint64_t i = 0; i < n_blocks; i++) {
-        assert(pairs[i].offset >= _reserve_at_beginning);
-        assert(pairs[i].offset % _alignment == 0);
-        _n_bytes_in_use += pairs[i].size;
+void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
+                                             struct blockpair *pairs, uint64_t n_blocks) {
+    _create_internal(reserve_at_beginning, alignment);
+
+    for (uint64_t i = 0; i < _n_blocks; i++) {
         // Allocator does not support size 0 blocks. See block_allocator_free_block.
         invariant(pairs[i].size > 0);
+        invariant(pairs[i].offset >= _reserve_at_beginning);
+        invariant(pairs[i].offset % _alignment == 0);
+
+        _n_bytes_in_use += pairs[i].size;
     }
-    grow_blocks_array_by(n_blocks);
-    merge_blockpairs_into(_n_blocks, _blocks_array, n_blocks, pairs);
-    _n_blocks += n_blocks;
+    _n_blocks = n_blocks;
+
+    grow_blocks_array_by(_n_blocks);
+    memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair));
+    qsort(_blocks_array, _n_blocks, sizeof(struct blockpair), compare_blockpairs);
+
     VALIDATE();
 }
 
-void block_allocator::alloc_block_at(uint64_t size, uint64_t offset) {
-    struct blockpair p(offset, size);
-
-    // Just do a linear search for the block.
-    // This data structure is a sorted array (no gaps or anything), so the search isn't really making this any slower than the insertion.
-    // To speed up the insertion when opening a file, we provide the block_allocator_alloc_blocks_at function.
-    alloc_blocks_at(1, &p);
-}
-
 // Effect: align a value by rounding up.
 static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
     return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h
index 056be17b4dc..b79e5c4eb56 100644
--- a/ft/serialize/block_allocator.h
+++ b/ft/serialize/block_allocator.h
@@ -128,6 +128,14 @@ public:
         BA_STRATEGY_FIRST_FIT = 1
     };
 
+    struct blockpair {
+        uint64_t offset;
+        uint64_t size;
+        blockpair(uint64_t o, uint64_t s) :
+            offset(o), size(s) {
+        }
+    };
+
     // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
     //         The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
     //  All blocks be start on a multiple of ALIGNMENT.
@@ -137,6 +145,19 @@ public:
     //  alignment (IN)                   Block alignment.
     void create(uint64_t reserve_at_beginning, uint64_t alignment);
 
+    // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
+    //         The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
+    //         The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs'
+    //  All blocks be start on a multiple of ALIGNMENT.
+    //  Aborts if we run out of memory.
+    // Parameters
+    //  pairs,                           unowned array of pairs to copy
+    //  n_blocks,                        Size of pairs array
+    //  reserve_at_beginning (IN)        Size of reserved block at beginning.  This size does not have to be aligned.
+    //  alignment (IN)                   Block alignment.
+    void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
+                                struct blockpair *pairs, uint64_t n_blocks);
+
     // Effect: Destroy this block allocator
     void destroy();
 
@@ -144,35 +165,10 @@ public:
     // Requires: No other threads are operating on this block allocator
     void set_strategy(enum allocation_strategy strategy);
 
-    // Effect: Allocate a block of the specified size at a particular offset.
-    //  Aborts if anything goes wrong.
-    //  The performance of this function may be as bad as Theta(N), where N is the number of blocks currently in use.
-    // Usage note: To allocate several blocks (e.g., when opening a FT),  use block_allocator_alloc_blocks_at().
-    // Requires: The resulting block may not overlap any other allocated block.
-    //  And the offset must be a multiple of the block alignment.
-    // Parameters:
-    //  size (IN):   The size of the block.
-    //  offset (IN): The location of the block.
-    void alloc_block_at(uint64_t size, uint64_t offset);
-
-    struct blockpair {
-        uint64_t offset;
-        uint64_t size;
-        blockpair(uint64_t o, uint64_t s) :
-            offset(o), size(s) {
-        }
-    };
-
-    // Effect: Take pairs in any order, and add them all, as if we did block_allocator_alloc_block() on each pair.
-    //  This should run in time O(N + M log M) where N is the number of blocks in ba, and M is the number of new blocks.
-    // Modifies: pairs (sorts them).
-    void alloc_blocks_at(uint64_t n_blocks, blockpair *pairs);
-
     // Effect: Allocate a block of the specified size at an address chosen by the allocator.
     //  Aborts if anything goes wrong.
     //  The block address will be a multiple of the alignment.
     // Parameters:
-    //  ba (IN/OUT):  The block allocator.   (Modifies ba.)
     //  size (IN):    The size of the block.  (The size does not have to be aligned.)
     //  offset (OUT): The location of the block.
     void alloc_block(uint64_t size, uint64_t *offset);
@@ -180,14 +176,12 @@ public:
     // Effect: Free the block at offset.
     // Requires: There must be a block currently allocated at that offset.
     // Parameters:
-    //  ba (IN/OUT): The block allocator.  (Modifies ba.)
     //  offset (IN): The offset of the block.
     void free_block(uint64_t offset);
 
     // Effect: Return the size of the block that starts at offset.
     // Requires: There must be a block currently allocated at that offset.
     // Parameters:
-    //  ba (IN/OUT): The block allocator.  (Modifies ba.)
     //  offset (IN): The offset of the block.
     uint64_t block_size(uint64_t offset);
 
@@ -221,18 +215,8 @@ public:
     //  report->checkpoint_bytes_additional is ignored on return
     void get_statistics(TOKU_DB_FRAGMENTATION report);
 
-    // Effect: Merge dst[d] and src[s] into dst[d+s], merging in place.
-    //   Initially dst and src hold sorted arrays (sorted by increasing offset).
-    //   Finally dst contains all d+s elements sorted in order.
-    // Requires: 
-    //   dst and src are sorted.
-    //   dst must be large enough (sizeof(dst) >= d && sizeof(src) >= s)
-    //   No blocks may overlap.
-    // Rationale: This is exposed so it can be tested by a glass box tester.
-    static void merge_blockpairs_into(uint64_t d, struct blockpair dst[],
-                                      uint64_t s, const struct blockpair src[]);
-
 private:
+    void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment);
     void grow_blocks_array_by(uint64_t n_to_add);
     void grow_blocks_array();
     int64_t find_block(uint64_t offset);
diff --git a/ft/serialize/block_table.cc b/ft/serialize/block_table.cc
index 630dc28200c..39d4ac7346b 100644
--- a/ft/serialize/block_table.cc
+++ b/ft/serialize/block_table.cc
@@ -89,13 +89,16 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
-#include "portability/toku_portability.h"
 #include "portability/memory.h"
 #include "portability/toku_assert.h"
+#include "portability/toku_portability.h"
 #include "portability/toku_pthread.h"
 
-#include "ft/ft-internal.h"        // ugly but pragmatic, need access to dirty bits while holding translation lock
-// TODO: reorganize this dependency
+// ugly but pragmatic, need access to dirty bits while holding translation lock
+// TODO: Refactor this (possibly with FT-301)
+#include "ft/ft-internal.h"
+
+// TODO: reorganize this dependency (FT-303)
 #include "ft/ft-ops.h" // for toku_maybe_truncate_file
 #include "ft/serialize/block_table.h"
 #include "ft/serialize/rbuf.h"
@@ -103,124 +106,164 @@ PATENT RIGHTS GRANT:
 #include "ft/serialize/block_allocator.h"
 
 #include "util/nb_mutex.h"
+#include "util/scoped_malloc.h"
 
-//When the translation (btt) is stored on disk:
-//  In Header:
-//      size_on_disk
-//      location_on_disk
-//  In block translation table (in order):
-//      smallest_never_used_blocknum
-//      blocknum_freelist_head
-//      array
-//      a checksum
-struct translation { //This is the BTT (block translation table)
-    enum translation_type type;
-    int64_t length_of_array;                           //Number of elements in array (block_translation).  always >= smallest_never_used_blocknum
-    BLOCKNUM smallest_never_used_blocknum;
-    BLOCKNUM blocknum_freelist_head;                     // next (previously used) unused blocknum (free list)
-    struct block_translation_pair *block_translation;
+// indicates the end of a freelist
+static const BLOCKNUM freelist_null = { -1 };
 
-    // Where and how big is the block translation vector stored on disk.
-    // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
-    // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
-};
+// value of block_translation_pair.size if blocknum is unused
+static const DISKOFF size_is_free = (DISKOFF) -1;
 
-static const BLOCKNUM freelist_null  = {-1}; // in a freelist, this indicates end of list
-static const DISKOFF  size_is_free   = (DISKOFF)-1;  // value of block_translation_pair.size if blocknum is unused
-static const DISKOFF  diskoff_unused = (DISKOFF)-2;  // value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock
+// value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock
+static const DISKOFF diskoff_unused = (DISKOFF) -2;
 
-/********
- *  There are three copies of the translation table (btt) in the block table:
- *
- *    checkpointed   Is initialized by deserializing from disk,
- *                   and is the only version ever read from disk.
- *                   When read from disk it is copied to current.
- *                   It is immutable. It can be replaced by an inprogress btt.
- *
- *    inprogress     Is only filled by copying from current,
- *                   and is the only version ever serialized to disk.
- *                   (It is serialized to disk on checkpoint and clean shutdown.)
- *                   At end of checkpoint it replaces 'checkpointed'.
- *                   During a checkpoint, any 'pending' dirty writes will update
- *                   inprogress.
- *
- *    current        Is initialized by copying from checkpointed,
- *                   is the only version ever modified while the database is in use, 
- *                   and is the only version ever copied to inprogress.
- *                   It is never stored on disk.
- ********/
+void block_table::_mutex_lock() {
+    toku_mutex_lock(&_mutex);
+}
 
+void block_table::_mutex_unlock() {
+    toku_mutex_unlock(&_mutex);
+}
 
-struct block_table {
-    struct translation current;      // The current translation is the one used by client threads.  It is not represented on disk.
-    struct translation inprogress;   // the translation used by the checkpoint currently in progress.  If the checkpoint thread allocates a block, it must also update the current translation.
-    struct translation checkpointed; // the translation for the data that shall remain inviolate on disk until the next checkpoint finishes, after which any blocks used only in this translation can be freed.
+// TODO: Move lock to FT
+void toku_ft_lock(FT ft) {
+    block_table *bt = &ft->blocktable;
+    bt->_mutex_lock();
+}
 
-    // The in-memory data structure for block allocation.  There is no on-disk data structure for block allocation.
-    // Note: This is *allocation* not *translation*.  The bt_block_allocator is unaware of which blocks are used for which translation, but simply allocates and deallocates blocks.
-    block_allocator bt_block_allocator;
-    toku_mutex_t mutex;
-    struct nb_mutex safe_file_size_lock;
-    bool checkpoint_skipped;
-    uint64_t safe_file_size;
-};
+// TODO: Move lock to FT
+void toku_ft_unlock(FT ft) {
+    block_table *bt = &ft->blocktable;
+    toku_mutex_assert_locked(&bt->_mutex);
+    bt->_mutex_unlock();
+}
 
-//forward decls
-static int64_t calculate_size_on_disk (struct translation *t);
-static inline bool translation_prevents_freeing (struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair);
-static inline void lock_for_blocktable (BLOCK_TABLE bt);
-static inline void unlock_for_blocktable (BLOCK_TABLE bt);
+// There are two headers: the reserve must fit them both and be suitably aligned.
+static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
+              block_allocator::BLOCK_ALLOCATOR_ALIGNMENT == 0,
+              "Block allocator's header reserve must be suitibly aligned");
+static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
+              block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+              "Block allocator's total header reserve must exactly fit two headers");
 
+// does NOT initialize the block allocator: the caller is responsible
+void block_table::_create_internal() {
+    memset(&_mutex, 0, sizeof(_mutex));
+    toku_mutex_init(&_mutex, nullptr);
+    nb_mutex_init(&_safe_file_size_lock);
 
-
-static void 
-ft_set_dirty(FT ft, bool for_checkpoint){
-    toku_mutex_assert_locked(&ft->blocktable->mutex);
-    paranoid_invariant(ft->h->type == FT_CURRENT);
-    if (for_checkpoint) {
-        paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
-        ft->checkpoint_header->dirty = 1;
+    _checkpointed.type = TRANSLATION_CHECKPOINTED;
+    _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS);
+    _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b;
+    _checkpointed.blocknum_freelist_head = freelist_null;
+    XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation);
+    for (int64_t i = 0; i < _checkpointed.length_of_array; i++) {
+        _checkpointed.block_translation[i].size = 0;
+        _checkpointed.block_translation[i].u.diskoff = diskoff_unused;
     }
-    else {
+
+    // we just created a default checkpointed, now copy it to current.  
+    _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
+}
+
+// Fill in the checkpointed translation from buffer, and copy checkpointed to current.
+// The one read from disk is the last known checkpointed one, so we are keeping it in 
+// place and then setting current (which is never stored on disk) for current use.
+// The translation_buffer has translation only, we create the rest of the block_table.
+int block_table::create_from_buffer(int fd,
+                                    DISKOFF location_on_disk, //Location of translation_buffer
+                                    DISKOFF size_on_disk,
+                                    unsigned char *translation_buffer) {
+    // Does not initialize the block allocator
+    _create_internal();
+
+    // Deserialize the translation and copy it to current
+    int r = _translation_deserialize_from_buffer(&_checkpointed,
+                                                 location_on_disk, size_on_disk,
+                                                 translation_buffer);
+    if (r != 0) {
+        return r;
+    }
+    _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
+
+    // Determine the file size
+    int64_t file_size;
+    r = toku_os_get_file_size(fd, &file_size);
+    lazy_assert_zero(r);
+    invariant(file_size >= 0);
+    _safe_file_size = file_size;
+
+    // Gather the non-empty translations and use them to create the block allocator
+    toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b *
+                                  sizeof(struct block_allocator::blockpair));
+    struct block_allocator::blockpair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
+    uint64_t n_pairs = 0;
+    for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) {
+        struct block_translation_pair pair = _checkpointed.block_translation[i];
+        if (pair.size > 0) {
+            invariant(pair.u.diskoff != diskoff_unused);
+            pairs[n_pairs++] = block_allocator::blockpair(pair.u.diskoff, pair.size);
+        }
+    }
+
+    _bt_block_allocator.create_from_blockpairs(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+                                               block_allocator::BLOCK_ALLOCATOR_ALIGNMENT,
+                                               pairs, n_pairs);
+
+    return 0;
+}
+
+void block_table::create() {
+    // Does not initialize the block allocator
+    _create_internal();
+
+    // Create an empty block allocator.
+    _bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+                               block_allocator::BLOCK_ALLOCATOR_ALIGNMENT);
+}
+
+// TODO: Refactor with FT-303
+static void ft_set_dirty(FT ft, bool for_checkpoint) {
+    invariant(ft->h->type == FT_CURRENT);
+    if (for_checkpoint) {
+        invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
+        ft->checkpoint_header->dirty = 1;
+    } else {
         ft->h->dirty = 1;
     }
 }
 
-static void
-maybe_truncate_file(BLOCK_TABLE bt, int fd, uint64_t size_needed_before) {
-    toku_mutex_assert_locked(&bt->mutex);
-    uint64_t new_size_needed = bt->bt_block_allocator.allocated_limit();
+void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) {
+    toku_mutex_assert_locked(&_mutex);
+    uint64_t new_size_needed = _bt_block_allocator.allocated_limit();
     //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful.
-    if (new_size_needed < size_needed_before && new_size_needed < bt->safe_file_size) {
-        nb_mutex_lock(&bt->safe_file_size_lock, &bt->mutex);
+    if (new_size_needed < size_needed_before && new_size_needed < _safe_file_size) {
+        nb_mutex_lock(&_safe_file_size_lock, &_mutex);
 
-        // Must hold safe_file_size_lock to change safe_file_size.
-        if (new_size_needed < bt->safe_file_size) {
-            int64_t safe_file_size_before = bt->safe_file_size;
+        // Must hold _safe_file_size_lock to change _safe_file_size.
+        if (new_size_needed < _safe_file_size) {
+            int64_t safe_file_size_before = _safe_file_size;
             // Not safe to use the 'to-be-truncated' portion until truncate is done.
-            bt->safe_file_size = new_size_needed;
-            unlock_for_blocktable(bt);
+            _safe_file_size = new_size_needed;
+            _mutex_unlock();
 
             uint64_t size_after;
             toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after);
-            lock_for_blocktable(bt);
+            _mutex_lock();
 
-            bt->safe_file_size = size_after;
+            _safe_file_size = size_after;
         }
-        nb_mutex_unlock(&bt->safe_file_size_lock);
+        nb_mutex_unlock(&_safe_file_size_lock);
     }
 }
 
-void
-toku_maybe_truncate_file_on_open(BLOCK_TABLE bt, int fd) {
-    lock_for_blocktable(bt);
-    maybe_truncate_file(bt, fd, bt->safe_file_size);
-    unlock_for_blocktable(bt);
+void block_table::maybe_truncate_file_on_open(int fd) {
+    _mutex_lock();
+    _maybe_truncate_file(fd, _safe_file_size);
+    _mutex_unlock();
 }
 
-
-static void
-copy_translation(struct translation * dst, struct translation * src, enum translation_type newtype) {
+void block_table::_copy_translation(struct translation * dst, struct translation * src, enum translation_type newtype) {
     paranoid_invariant(src->length_of_array >= src->smallest_never_used_blocknum.b); //verify invariant
     paranoid_invariant(newtype==TRANSLATION_DEBUG ||
                        (src->type == TRANSLATION_CURRENT      && newtype == TRANSLATION_INPROGRESS) ||
@@ -239,10 +282,9 @@ copy_translation(struct translation * dst, struct translation * src, enum transl
     dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused;
 }
 
-int64_t
-toku_block_get_blocks_in_use_unlocked(BLOCK_TABLE bt) {
+int64_t block_table::get_blocks_in_use_unlocked() {
     BLOCKNUM b;
-    struct translation *t = &bt->current;
+    struct translation *t = &_current;
     int64_t num_blocks = 0;
     {
         //Reserved blocknums do not get upgraded; They are part of the header.
@@ -255,8 +297,7 @@ toku_block_get_blocks_in_use_unlocked(BLOCK_TABLE bt) {
     return num_blocks;
 }
 
-static void
-maybe_optimize_translation(struct translation *t) {
+void block_table::_maybe_optimize_translation(struct translation *t) {
     //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just
     //on a free list.  Doing so requires us to regenerate the free list.
     //This is O(n) work, so do it only if you're already doing that.
@@ -295,25 +336,24 @@ maybe_optimize_translation(struct translation *t) {
 }
 
 // block table must be locked by caller of this function
-void
-toku_block_translation_note_start_checkpoint_unlocked (BLOCK_TABLE bt) {
-    toku_mutex_assert_locked(&bt->mutex);
+void block_table::note_start_checkpoint_unlocked() {
+    toku_mutex_assert_locked(&_mutex);
     // Copy current translation to inprogress translation.
-    paranoid_invariant(bt->inprogress.block_translation == NULL);
+    paranoid_invariant(_inprogress.block_translation == NULL);
     //We're going to do O(n) work to copy the translation, so we
     //can afford to do O(n) work by optimizing the translation
-    maybe_optimize_translation(&bt->current);
-    copy_translation(&bt->inprogress, &bt->current, TRANSLATION_INPROGRESS);
+    _maybe_optimize_translation(&_current);
+    _copy_translation(&_inprogress, &_current, TRANSLATION_INPROGRESS);
 
-    bt->checkpoint_skipped = false;
+    _checkpoint_skipped = false;
 }
 
-void toku_block_translation_note_skipped_checkpoint (BLOCK_TABLE bt) {
+void block_table::note_skipped_checkpoint() {
     //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header
-    lock_for_blocktable(bt);
-    paranoid_invariant_notnull(bt->inprogress.block_translation);
-    bt->checkpoint_skipped = true;
-    unlock_for_blocktable(bt);
+    _mutex_lock();
+    paranoid_invariant_notnull(_inprogress.block_translation);
+    _checkpoint_skipped = true;
+    _mutex_unlock();
 }
 
 // Purpose: free any disk space used by previous checkpoint that isn't in use by either
@@ -326,143 +366,92 @@ void toku_block_translation_note_skipped_checkpoint (BLOCK_TABLE bt) {
 //      free (offset,len) from checkpoint
 // move inprogress to checkpoint (resetting type)
 // inprogress = NULL
-void
-toku_block_translation_note_end_checkpoint (BLOCK_TABLE bt, int fd) {
+void block_table::note_end_checkpoint(int fd) {
     // Free unused blocks
-    lock_for_blocktable(bt);
-    uint64_t allocated_limit_at_start = bt->bt_block_allocator.allocated_limit();
-    paranoid_invariant_notnull(bt->inprogress.block_translation);
-    if (bt->checkpoint_skipped) {
-        toku_free(bt->inprogress.block_translation);
-        memset(&bt->inprogress, 0, sizeof(bt->inprogress));
+    _mutex_lock();
+    uint64_t allocated_limit_at_start = _bt_block_allocator.allocated_limit();
+    paranoid_invariant_notnull(_inprogress.block_translation);
+    if (_checkpoint_skipped) {
+        toku_free(_inprogress.block_translation);
+        memset(&_inprogress, 0, sizeof(_inprogress));
         goto end;
     }
 
     //Make certain inprogress was allocated space on disk
-    assert(bt->inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
-    assert(bt->inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0);
+    assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
+    assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0);
 
     {
-        int64_t i;
-        struct translation *t = &bt->checkpointed;
-
-        for (i = 0; i < t->length_of_array; i++) {
+        struct translation *t = &_checkpointed;
+        for (int64_t i = 0; i < t->length_of_array; i++) {
             struct block_translation_pair *pair = &t->block_translation[i];
-            if (pair->size > 0 && !translation_prevents_freeing(&bt->inprogress, make_blocknum(i), pair)) {
-                assert(!translation_prevents_freeing(&bt->current, make_blocknum(i), pair));
-                bt->bt_block_allocator.free_block(pair->u.diskoff);
+            if (pair->size > 0 && !_translation_prevents_freeing(&_inprogress, make_blocknum(i), pair)) {
+                assert(!_translation_prevents_freeing(&_current, make_blocknum(i), pair));
+                _bt_block_allocator.free_block(pair->u.diskoff);
             }
         }
-        toku_free(bt->checkpointed.block_translation);
-        bt->checkpointed = bt->inprogress;
-        bt->checkpointed.type = TRANSLATION_CHECKPOINTED;
-        memset(&bt->inprogress, 0, sizeof(bt->inprogress));
-        maybe_truncate_file(bt, fd, allocated_limit_at_start);
+        toku_free(_checkpointed.block_translation);
+        _checkpointed = _inprogress;
+        _checkpointed.type = TRANSLATION_CHECKPOINTED;
+        memset(&_inprogress, 0, sizeof(_inprogress));
+        _maybe_truncate_file(fd, allocated_limit_at_start);
     }
 end:
-    unlock_for_blocktable(bt);
+    _mutex_unlock();
 }
 
-__attribute__((nonnull,const))
-static inline bool
-is_valid_blocknum(struct translation *t, BLOCKNUM b) {
-    //Sanity check: Verify invariant
-    paranoid_invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
+bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) {
+    invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
     return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b;
 }
 
-static inline void
-verify_valid_blocknum (struct translation *UU(t), BLOCKNUM UU(b)) {
-    paranoid_invariant(is_valid_blocknum(t, b));
+void block_table::_verify_valid_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
+    invariant(_is_valid_blocknum(t, b));
 }
 
-__attribute__((nonnull,const))
-static inline bool
-is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) {
-    //Sanity check: Verify invariant
-    paranoid_invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
+bool block_table::_is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) {
+    invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
     return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b;
 }
 
-//Can be freed
-static inline void
-verify_valid_freeable_blocknum (struct translation *UU(t), BLOCKNUM UU(b)) {
-    paranoid_invariant(is_valid_freeable_blocknum(t, b));
-}
-
-static void
-blocktable_lock_init (BLOCK_TABLE bt) {
-    memset(&bt->mutex, 0, sizeof(bt->mutex));
-    toku_mutex_init(&bt->mutex, NULL);
-}
-
-static void
-blocktable_lock_destroy (BLOCK_TABLE bt) {
-    toku_mutex_destroy(&bt->mutex);
-}
-
-static inline void
-lock_for_blocktable (BLOCK_TABLE bt) {
-    // Locks the blocktable_mutex. 
-    toku_mutex_lock(&bt->mutex);
-}
-
-static inline void
-unlock_for_blocktable (BLOCK_TABLE bt) {
-    toku_mutex_unlock(&bt->mutex);
-}
-
-void
-toku_ft_lock (FT ft) {
-    BLOCK_TABLE bt = ft->blocktable;
-    lock_for_blocktable(bt);
-}
-
-void
-toku_ft_unlock (FT ft) {
-    BLOCK_TABLE bt = ft->blocktable;
-    toku_mutex_assert_locked(&bt->mutex);
-    unlock_for_blocktable(bt);
+// should be freeable
+void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
+    invariant(_is_valid_freeable_blocknum(t, b));
 }
 
 // Also used only in ft-serialize-test.
-void
-toku_block_free(BLOCK_TABLE bt, uint64_t offset) {
-    lock_for_blocktable(bt);
-    bt->bt_block_allocator.free_block(offset);
-    unlock_for_blocktable(bt);
+void block_table::block_free(uint64_t offset) {
+    _mutex_lock();
+    _bt_block_allocator.free_block(offset);
+    _mutex_unlock();
 }
 
-static int64_t
-calculate_size_on_disk (struct translation *t) {
-    int64_t r = (8 + // smallest_never_used_blocknum
-                 8 + // blocknum_freelist_head
-                 t->smallest_never_used_blocknum.b * 16 + // Array
-                 4); // 4 for checksum
-    return r;
+int64_t block_table::_calculate_size_on_disk(struct translation *t) {
+    return 8 + // smallest_never_used_blocknum
+           8 + // blocknum_freelist_head
+           t->smallest_never_used_blocknum.b * 16 + // Array
+           4; // 4 for checksum
 }
 
 // We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table.
-static inline bool
-translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) {
-    return (t->block_translation &&
-         b.b < t->smallest_never_used_blocknum.b &&
-         old_pair->u.diskoff == t->block_translation[b.b].u.diskoff);
+bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) {
+    return t->block_translation &&
+           b.b < t->smallest_never_used_blocknum.b &&
+           old_pair->u.diskoff == t->block_translation[b.b].u.diskoff;
 }
 
-static void
-blocknum_realloc_on_disk_internal (BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint) {
-    toku_mutex_assert_locked(&bt->mutex);
+void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint) {
+    toku_mutex_assert_locked(&_mutex);
     ft_set_dirty(ft, for_checkpoint);
 
-    struct translation *t = &bt->current;
+    struct translation *t = &_current;
     struct block_translation_pair old_pair = t->block_translation[b.b];
     //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
     bool cannot_free = (bool)
-        ((!for_checkpoint && translation_prevents_freeing(&bt->inprogress,   b, &old_pair)) ||
-         translation_prevents_freeing(&bt->checkpointed, b, &old_pair));
+        ((!for_checkpoint && _translation_prevents_freeing(&_inprogress,   b, &old_pair)) ||
+         _translation_prevents_freeing(&_checkpointed, b, &old_pair));
     if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) {
-        bt->bt_block_allocator.free_block(old_pair.u.diskoff);
+        _bt_block_allocator.free_block(old_pair.u.diskoff);
     }
 
     uint64_t allocator_offset = diskoff_unused;
@@ -470,90 +459,84 @@ blocknum_realloc_on_disk_internal (BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DIS
     if (size > 0) {
         // Allocate a new block if the size is greater than 0,
         // if the size is just 0, offset will be set to diskoff_unused
-        bt->bt_block_allocator.alloc_block(size, &allocator_offset);
+        _bt_block_allocator.alloc_block(size, &allocator_offset);
     }
     t->block_translation[b.b].u.diskoff = allocator_offset;
     *offset = allocator_offset;
 
     //Update inprogress btt if appropriate (if called because Pending bit is set).
     if (for_checkpoint) {
-        paranoid_invariant(b.b < bt->inprogress.length_of_array);
-        bt->inprogress.block_translation[b.b] = t->block_translation[b.b];
+        paranoid_invariant(b.b < _inprogress.length_of_array);
+        _inprogress.block_translation[b.b] = t->block_translation[b.b];
     }
 }
 
-static void
-ensure_safe_write_unlocked(BLOCK_TABLE bt, int fd, DISKOFF block_size, DISKOFF block_offset) {
-    // Requires: holding bt->mutex
+void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset) {
+    // Requires: holding _mutex
     uint64_t size_needed = block_size + block_offset;
-    if (size_needed > bt->safe_file_size) {
-        // Must hold safe_file_size_lock to change safe_file_size.
-        nb_mutex_lock(&bt->safe_file_size_lock, &bt->mutex);
-        if (size_needed > bt->safe_file_size) {
-            unlock_for_blocktable(bt);
+    if (size_needed > _safe_file_size) {
+        // Must hold _safe_file_size_lock to change _safe_file_size.
+        nb_mutex_lock(&_safe_file_size_lock, &_mutex);
+        if (size_needed > _safe_file_size) {
+            _mutex_unlock();
 
             int64_t size_after;
-            toku_maybe_preallocate_in_file(fd, size_needed, bt->safe_file_size, &size_after);
+            toku_maybe_preallocate_in_file(fd, size_needed, _safe_file_size, &size_after);
 
-            lock_for_blocktable(bt);
-            bt->safe_file_size = size_after;
+            _mutex_lock();
+            _safe_file_size = size_after;
         }
-        nb_mutex_unlock(&bt->safe_file_size_lock);
+        nb_mutex_unlock(&_safe_file_size_lock);
     }
 }
 
-void
-toku_blocknum_realloc_on_disk (BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint) {
-    lock_for_blocktable(bt);
-    struct translation *t = &bt->current;
-    verify_valid_freeable_blocknum(t, b);
-    blocknum_realloc_on_disk_internal(bt, b, size, offset, ft, for_checkpoint);
+void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint) {
+    _mutex_lock();
+    struct translation *t = &_current;
+    _verify_valid_freeable_blocknum(t, b);
+    _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint);
 
-    ensure_safe_write_unlocked(bt, fd, size, *offset);
-    unlock_for_blocktable(bt);
+    _ensure_safe_write_unlocked(fd, size, *offset);
+    _mutex_unlock();
 }
 
-__attribute__((nonnull,const))
-static inline bool
-pair_is_unallocated(struct block_translation_pair *pair) {
+bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) {
     return pair->size == 0 && pair->u.diskoff == diskoff_unused;
 }
 
-static void blocknum_alloc_translation_on_disk_unlocked(BLOCK_TABLE bt)
 // Effect: figure out where to put the inprogress btt on disk, allocate space for it there.
 //   The space must be 512-byte aligned (both the starting address and the size).
 //   As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt.
-{
-    toku_mutex_assert_locked(&bt->mutex);
+void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
+    toku_mutex_assert_locked(&_mutex);
 
-    struct translation *t = &bt->inprogress;
+    struct translation *t = &_inprogress;
     paranoid_invariant_notnull(t->block_translation);
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
     //Each inprogress is allocated only once
-    paranoid_invariant(pair_is_unallocated(&t->block_translation[b.b]));
+    paranoid_invariant(_pair_is_unallocated(&t->block_translation[b.b]));
 
     //Allocate a new block
-    int64_t size = calculate_size_on_disk(t);
+    int64_t size = _calculate_size_on_disk(t);
     uint64_t offset;
-    bt->bt_block_allocator.alloc_block(size, &offset);
+    _bt_block_allocator.alloc_block(size, &offset);
     t->block_translation[b.b].u.diskoff = offset;
     t->block_translation[b.b].size      = size;
 }
 
-void toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, int fd, struct wbuf *w,
-                                        int64_t *address, int64_t *size) 
-// Effect: Fills wbuf (which starts uninitialized) with bt
+// Effect: Serializes the blocktable to a wbuf (which starts uninitialized)
 //   A clean shutdown runs checkpoint start so that current and inprogress are copies.
 //   The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd)
 //   The address is guaranteed to be 512-byte aligned, but the size is not guaranteed.
 //   It *is* guaranteed that we can read up to the next 512-byte boundary, however
-{
-    lock_for_blocktable(bt);
-    struct translation *t = &bt->inprogress;
+void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w,
+                                                int64_t *address, int64_t *size) {
+    _mutex_lock();
+    struct translation *t = &_inprogress;
 
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
-    blocknum_alloc_translation_on_disk_unlocked(bt); // The allocated block must be 512-byte aligned to make O_DIRECT happy.
-    uint64_t size_translation = calculate_size_on_disk(t);
+    _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block must be 512-byte aligned to make O_DIRECT happy.
+    uint64_t size_translation = _calculate_size_on_disk(t);
     uint64_t size_aligned     = roundup_to_multiple(512, size_translation);
     assert((int64_t)size_translation==t->block_translation[b.b].size);
     {
@@ -579,33 +562,33 @@ void toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, int fd, struct wbuf *w,
     *size    = size_translation;
     assert((*address)%512 == 0);
 
-    ensure_safe_write_unlocked(bt, fd, size_aligned, *address);
-    unlock_for_blocktable(bt);
-}
-
-
-// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
-static void
-translate_blocknum_to_offset_size_unlocked(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
-    struct translation *t = &bt->current;
-    verify_valid_blocknum(t, b);
-    if (offset) *offset = t->block_translation[b.b].u.diskoff;
-    if (size)   *size = t->block_translation[b.b].size;
+    _ensure_safe_write_unlocked(fd, size_aligned, *address);
+    _mutex_unlock();
 }
 
 // Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
-void
-toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
-    lock_for_blocktable(bt);
-    translate_blocknum_to_offset_size_unlocked(bt, b, offset, size);
-    unlock_for_blocktable(bt);
+void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
+    struct translation *t = &_current;
+    _verify_valid_blocknum(t, b);
+    if (offset) {
+        *offset = t->block_translation[b.b].u.diskoff;
+    }
+    if (size) {
+        *size = t->block_translation[b.b].size;
+    }
 }
 
-//Only called by toku_allocate_blocknum
-static void
-maybe_expand_translation (struct translation *t) {
+// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
+void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
+    _mutex_lock();
+    _translate_blocknum_to_offset_size_unlocked(b, offset, size);
+    _mutex_unlock();
+}
+
+// Only called by toku_allocate_blocknum
 // Effect: expand the array to maintain size invariant
 // given that one more never-used blocknum will soon be used.
+void block_table::_maybe_expand_translation(struct translation *t) {
     if (t->length_of_array <= t->smallest_never_used_blocknum.b) {
         //expansion is necessary
         uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
@@ -619,15 +602,14 @@ maybe_expand_translation (struct translation *t) {
     }
 }
 
-void
-toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, FT ft) {
-    toku_mutex_assert_locked(&bt->mutex);
+void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
+    toku_mutex_assert_locked(&_mutex);
     BLOCKNUM result;
-    struct translation * t = &bt->current;
+    struct translation *t = &_current;
     if (t->blocknum_freelist_head.b == freelist_null.b) {
         // no previously used blocknums are available
         // use a never used blocknum
-        maybe_expand_translation(t); //Ensure a never used blocknums is available
+        _maybe_expand_translation(t); //Ensure a never used blocknums is available
         result = t->smallest_never_used_blocknum;
         t->smallest_never_used_blocknum.b++;
     } else {  // reuse a previously used blocknum
@@ -640,22 +622,19 @@ toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, FT ft) {
     //blocknum is not free anymore
     t->block_translation[result.b].u.diskoff = diskoff_unused;
     t->block_translation[result.b].size    = 0;
-    verify_valid_freeable_blocknum(t, result);
+    _verify_valid_freeable_blocknum(t, result);
     *res = result;
     ft_set_dirty(ft, false);
 }
 
-void
-toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, FT ft) {
-    lock_for_blocktable(bt);
-    toku_allocate_blocknum_unlocked(bt, res, ft);
-    unlock_for_blocktable(bt);
+void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) {
+    _mutex_lock();
+    _allocate_blocknum_unlocked(res, ft);
+    _mutex_unlock();
 }
 
-static void
-free_blocknum_in_translation(struct translation *t, BLOCKNUM b)
-{
-    verify_valid_freeable_blocknum(t, b);
+void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM b) {
+    _verify_valid_freeable_blocknum(t, b);
     paranoid_invariant(t->block_translation[b.b].size != size_is_free);
 
     t->block_translation[b.b].size                 = size_is_free;
@@ -663,30 +642,29 @@ free_blocknum_in_translation(struct translation *t, BLOCKNUM b)
     t->blocknum_freelist_head                      = b;
 }
 
-static void
-free_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *bp, FT ft, bool for_checkpoint) {
 // Effect: Free a blocknum.
 // If the blocknum holds the only reference to a block on disk, free that block
-    toku_mutex_assert_locked(&bt->mutex);
+void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoint) {
+    toku_mutex_assert_locked(&_mutex);
     BLOCKNUM b = *bp;
     bp->b = 0; //Remove caller's reference.
 
-    struct block_translation_pair old_pair = bt->current.block_translation[b.b];
+    struct block_translation_pair old_pair = _current.block_translation[b.b];
 
-    free_blocknum_in_translation(&bt->current, b);
+    _free_blocknum_in_translation(&_current, b);
     if (for_checkpoint) {
         paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
-        free_blocknum_in_translation(&bt->inprogress, b);
+        _free_blocknum_in_translation(&_inprogress, b);
     }
 
     //If the size is 0, no disk block has ever been assigned to this blocknum.
     if (old_pair.size > 0) {
         //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
         bool cannot_free = (bool)
-            (translation_prevents_freeing(&bt->inprogress,   b, &old_pair) ||
-             translation_prevents_freeing(&bt->checkpointed, b, &old_pair));
+            (_translation_prevents_freeing(&_inprogress,   b, &old_pair) ||
+             _translation_prevents_freeing(&_checkpointed, b, &old_pair));
         if (!cannot_free) {
-            bt->bt_block_allocator.free_block(old_pair.u.diskoff);
+            _bt_block_allocator.free_block(old_pair.u.diskoff);
         }
     }
     else {
@@ -696,91 +674,80 @@ free_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *bp, FT ft, bool for_checkpoint)
     ft_set_dirty(ft, for_checkpoint);
 }
 
-void
-toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *bp, FT ft, bool for_checkpoint) {
-    lock_for_blocktable(bt);
-    free_blocknum_unlocked(bt, bp, ft, for_checkpoint);
-    unlock_for_blocktable(bt);
+void block_table::free_blocknum(BLOCKNUM *bp, FT ft, bool for_checkpoint) {
+    _mutex_lock();
+    _free_blocknum_unlocked(bp, ft, for_checkpoint);
+    _mutex_unlock();
 }
 
-//Verify there are no free blocks.
-void
-toku_block_verify_no_free_blocknums(BLOCK_TABLE UU(bt)) {
-    paranoid_invariant(bt->current.blocknum_freelist_head.b == freelist_null.b);
+// Verify there are no free blocks.
+void block_table::verify_no_free_blocknums() {
+    invariant(_current.blocknum_freelist_head.b == freelist_null.b);
 }
 
 // Frees blocknums that have a size of 0 and unused diskoff
 // Currently used for eliminating unused cached rollback log nodes
-void
-toku_free_unused_blocknums(BLOCK_TABLE bt, BLOCKNUM root) {
-    lock_for_blocktable(bt);
-    int64_t smallest = bt->current.smallest_never_used_blocknum.b;
+void block_table::free_unused_blocknums(BLOCKNUM root) {
+    _mutex_lock();
+    int64_t smallest = _current.smallest_never_used_blocknum.b;
     for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) {
         if (i == root.b) {
             continue;
         }
         BLOCKNUM b = make_blocknum(i);
-        if (bt->current.block_translation[b.b].size == 0) {
-            invariant(bt->current.block_translation[b.b].u.diskoff == diskoff_unused);
-            free_blocknum_in_translation(&bt->current, b);
+        if (_current.block_translation[b.b].size == 0) {
+            invariant(_current.block_translation[b.b].u.diskoff == diskoff_unused);
+            _free_blocknum_in_translation(&_current, b);
         }
     }
-    unlock_for_blocktable(bt);
+    _mutex_unlock();
 }
 
-__attribute__((nonnull,const,unused))
-static inline bool
-no_data_blocks_except_root(BLOCK_TABLE bt, BLOCKNUM root) {
+bool block_table::_no_data_blocks_except_root(BLOCKNUM root) {
     bool ok = true;
-    lock_for_blocktable(bt);
-    int64_t smallest = bt->current.smallest_never_used_blocknum.b;
+    _mutex_lock();
+    int64_t smallest = _current.smallest_never_used_blocknum.b;
     if (root.b < RESERVED_BLOCKNUMS) {
         ok = false;
         goto cleanup;
     }
-    int64_t i;
-    for (i=RESERVED_BLOCKNUMS; i < smallest; i++) {
+    for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) {
         if (i == root.b) {
             continue;
         }
         BLOCKNUM b = make_blocknum(i);
-        if (bt->current.block_translation[b.b].size != size_is_free) {
+        if (_current.block_translation[b.b].size != size_is_free) {
             ok = false;
             goto cleanup;
         }
     }
  cleanup:
-    unlock_for_blocktable(bt);
+    _mutex_unlock();
     return ok;
 }
 
-//Verify there are no data blocks except root.
+// Verify there are no data blocks except root.
 // TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now.
-void
-toku_block_verify_no_data_blocks_except_root(BLOCK_TABLE UU(bt), BLOCKNUM UU(root)) {
-    paranoid_invariant(no_data_blocks_except_root(bt, root));
+void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) {
+    paranoid_invariant(_no_data_blocks_except_root(root));
 }
 
-__attribute__((nonnull,const,unused))
-static inline bool
-blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b) {
-    lock_for_blocktable(bt);
-    struct translation *t = &bt->current;
-    verify_valid_blocknum(t, b);
+bool block_table::_blocknum_allocated(BLOCKNUM b) {
+    _mutex_lock();
+    struct translation *t = &_current;
+    _verify_valid_blocknum(t, b);
     bool ok = t->block_translation[b.b].size != size_is_free;
-    unlock_for_blocktable(bt);
+    _mutex_unlock();
     return ok;
 }
 
-//Verify a blocknum is currently allocated.
-void
-toku_verify_blocknum_allocated(BLOCK_TABLE UU(bt), BLOCKNUM UU(b)) {
-    paranoid_invariant(blocknum_allocated(bt, b));
+// Verify a blocknum is currently allocated.
+void block_table::verify_blocknum_allocated(BLOCKNUM UU(b)) {
+    paranoid_invariant(_blocknum_allocated(b));
 }
 
-//Only used by toku_dump_translation table (debug info)
-static void
-dump_translation(FILE *f, struct translation *t) {
+// Only used by toku_dump_translation table (debug info)
+void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
     if (t->block_translation) {
         BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
         fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array);
@@ -793,230 +760,128 @@ dump_translation(FILE *f, struct translation *t) {
             fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
         }
         fprintf(f, "\n");
+    } else {
+        fprintf(f, " does not exist\n");
     }
-    else fprintf(f, " does not exist\n");
 }
 
-//Only used by toku_ft_dump which is only for debugging purposes
+// Only used by toku_ft_dump which is only for debugging purposes
 // "pretty" just means we use tabs so we can parse output easier later
-void
-toku_dump_translation_table_pretty(FILE *f, BLOCK_TABLE bt) {
-    lock_for_blocktable(bt);
-    struct translation *t = &bt->checkpointed;
+void block_table::dump_translation_table_pretty(FILE *f) {
+    _mutex_lock();
+    struct translation *t = &_checkpointed;
     assert(t->block_translation != nullptr);
     for (int64_t i = 0; i < t->length_of_array; ++i) {
         fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
     }
-    unlock_for_blocktable(bt);
+    _mutex_unlock();
 }
 
-//Only used by toku_ft_dump which is only for debugging purposes
-void
-toku_dump_translation_table(FILE *f, BLOCK_TABLE bt) {
-    lock_for_blocktable(bt);
+// Only used by toku_ft_dump which is only for debugging purposes
+void block_table::dump_translation_table(FILE *f) {
+    _mutex_lock();
     fprintf(f, "Current block translation:");
-    dump_translation(f, &bt->current);
+    _dump_translation_internal(f, &_current);
     fprintf(f, "Checkpoint in progress block translation:");
-    dump_translation(f, &bt->inprogress);
+    _dump_translation_internal(f, &_inprogress);
     fprintf(f, "Checkpointed block translation:");
-    dump_translation(f, &bt->checkpointed);
-    unlock_for_blocktable(bt);
+    _dump_translation_internal(f, &_checkpointed);
+    _mutex_unlock();
 }
 
-//Only used by ftdump
-void
-toku_blocknum_dump_translation(BLOCK_TABLE bt, BLOCKNUM b) {
-    lock_for_blocktable(bt);
+// Only used by ftdump
+void block_table::blocknum_dump_translation(BLOCKNUM b) {
+    _mutex_lock();
 
-    struct translation *t = &bt->current;
+    struct translation *t = &_current;
     if (b.b < t->length_of_array) {
         struct block_translation_pair *bx = &t->block_translation[b.b];
         printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size);
     }
-    unlock_for_blocktable(bt);
+    _mutex_unlock();
 }
 
+// Must not call this function when anything else is using the blocktable.
+// No one may use the blocktable afterwards.
+void block_table::destroy(void) {
+    // TODO: translation.destroy();
+    toku_free(_current.block_translation);
+    toku_free(_inprogress.block_translation);
+    toku_free(_checkpointed.block_translation);
 
-//Must not call this function when anything else is using the blocktable.
-//No one may use the blocktable afterwards.
-void
-toku_blocktable_destroy(BLOCK_TABLE *btp) {
-    BLOCK_TABLE bt = *btp;
-    *btp = NULL;
-    if (bt->current.block_translation)      toku_free(bt->current.block_translation);
-    if (bt->inprogress.block_translation)   toku_free(bt->inprogress.block_translation);
-    if (bt->checkpointed.block_translation) toku_free(bt->checkpointed.block_translation);
-
-    bt->bt_block_allocator.destroy();
-    blocktable_lock_destroy(bt);
-    nb_mutex_destroy(&bt->safe_file_size_lock);
-    toku_free(bt);
+    _bt_block_allocator.destroy();
+    toku_mutex_destroy(&_mutex);
+    nb_mutex_destroy(&_safe_file_size_lock);
 }
 
-
-static BLOCK_TABLE
-blocktable_create_internal (void) {
-// Effect: Fill it in, including the translation table, which is uninitialized
-    BLOCK_TABLE XCALLOC(bt);
-    blocktable_lock_init(bt);
-    nb_mutex_init(&bt->safe_file_size_lock);
-
-    //There are two headers, so we reserve space for two.
-    uint64_t reserve_per_header = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
-
-    //Must reserve in multiples of BLOCK_ALLOCATOR_ALIGNMENT
-    //Round up the per-header usage if necessary.
-    //We want each header aligned.
-    uint64_t remainder = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE % block_allocator::BLOCK_ALLOCATOR_ALIGNMENT;
-    if (remainder != 0) {
-        reserve_per_header += block_allocator::BLOCK_ALLOCATOR_ALIGNMENT;
-        reserve_per_header -= remainder;
-    }
-    assert(2 * reserve_per_header == block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    bt->bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, block_allocator::BLOCK_ALLOCATOR_ALIGNMENT);
-    return bt;
-}
-
-
-
-static void
-translation_default(struct translation *t) {  // destination into which to create a default translation
-    t->type = TRANSLATION_CHECKPOINTED;
-    t->smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS);
-    t->length_of_array              = t->smallest_never_used_blocknum.b;
-    t->blocknum_freelist_head       = freelist_null;
-    XMALLOC_N(t->length_of_array, t->block_translation);
-    int64_t i;
-    for (i = 0; i < t->length_of_array; i++) {
-        t->block_translation[i].size      = 0;
-        t->block_translation[i].u.diskoff = diskoff_unused;
-    }
-}
-
-
-static int
-translation_deserialize_from_buffer(struct translation *t,    // destination into which to deserialize
-                                    DISKOFF location_on_disk, //Location of translation_buffer
-                                    uint64_t size_on_disk,
-                                    unsigned char * translation_buffer) {   // buffer with serialized translation
+int block_table::_translation_deserialize_from_buffer(struct translation *t,
+                                                      DISKOFF location_on_disk,
+                                                      uint64_t size_on_disk,
+                                                      // out: buffer with serialized translation
+                                                      unsigned char *translation_buffer) {
     int r = 0;
-    assert(location_on_disk!=0);
+    assert(location_on_disk != 0);
     t->type = TRANSLATION_CHECKPOINTED;
-    {
-        // check the checksum
-        uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4);
-        uint64_t offset = size_on_disk - 4;
-        //printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, block_translation_address_on_disk, offset, block_translation_size_on_disk);
-        uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset));
-        if (x1764 != stored_x1764) {
-            fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764);
-            r = TOKUDB_BAD_CHECKSUM;
-            goto exit;
-        }
-    }
-    struct rbuf rt;
-    rt.buf = translation_buffer;
-    rt.ndone = 0;
-    rt.size = size_on_disk-4;//4==checksum
 
-    t->smallest_never_used_blocknum = rbuf_blocknum(&rt); 
-    t->length_of_array = t->smallest_never_used_blocknum.b;
-    assert(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
-    t->blocknum_freelist_head       = rbuf_blocknum(&rt); 
-    XMALLOC_N(t->length_of_array, t->block_translation);
-    for (int64_t i = 0; i < t->length_of_array; i++) {
-        t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rt);
-        t->block_translation[i].size = rbuf_DISKOFF(&rt);
-    }
-    assert(calculate_size_on_disk(t)                                     == (int64_t)size_on_disk);
-    assert(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size      == (int64_t)size_on_disk);
-    assert(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk);
-exit:
-    return r;
-}
-
-// We just initialized a translation, inform block allocator to reserve space for each blocknum in use.
-static void blocktable_note_translation(block_allocator *ba, struct translation *t) {
-    //This is where the space for them will be reserved (in addition to normal blocks).
-    //See RESERVED_BLOCKNUMS
-
-    // Previously this added blocks one at a time.  Now we make an array and pass it in so it can be sorted and merged.  See #3218.
-    struct block_allocator::blockpair *XMALLOC_N(t->smallest_never_used_blocknum.b, pairs);
-    uint64_t n_pairs = 0;
-    for (int64_t i=0; i<t->smallest_never_used_blocknum.b; i++) {
-        struct block_translation_pair pair = t->block_translation[i];
-        if (pair.size > 0) {
-            paranoid_invariant(pair.u.diskoff != diskoff_unused);
-            int cur_pair = n_pairs++;
-            pairs[cur_pair] = block_allocator::blockpair(pair.u.diskoff, pair.size);
-        }
-    }
-    ba->alloc_blocks_at(n_pairs, pairs);
-    toku_free(pairs);
-}
-
-
-// Fill in the checkpointed translation from buffer, and copy checkpointed to current.
-// The one read from disk is the last known checkpointed one, so we are keeping it in 
-// place and then setting current (which is never stored on disk) for current use.
-// The translation_buffer has translation only, we create the rest of the block_table.
-int
-toku_blocktable_create_from_buffer(int fd,
-                                   BLOCK_TABLE *btp,
-                                   DISKOFF location_on_disk, //Location of translation_buffer
-                                   DISKOFF size_on_disk,
-                                   unsigned char *translation_buffer) {
-    BLOCK_TABLE bt = blocktable_create_internal();
-    int r = translation_deserialize_from_buffer(&bt->checkpointed, location_on_disk, size_on_disk, translation_buffer);
-    if (r != 0) {
+    // check the checksum
+    uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4);
+    uint64_t offset = size_on_disk - 4;
+    uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset));
+    if (x1764 != stored_x1764) {
+        fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764);
+        r = TOKUDB_BAD_CHECKSUM;
         goto exit;
     }
-    blocktable_note_translation(&bt->bt_block_allocator, &bt->checkpointed);
-    // we just filled in checkpointed, now copy it to current.  
-    copy_translation(&bt->current, &bt->checkpointed, TRANSLATION_CURRENT);
 
-    int64_t file_size;
-    r = toku_os_get_file_size(fd, &file_size);
-    lazy_assert_zero(r);
-    invariant(file_size >= 0);
-    bt->safe_file_size = file_size;
+    struct rbuf rb;
+    rb.buf = translation_buffer;
+    rb.ndone = 0;
+    rb.size = size_on_disk-4;//4==checksum
+
+    t->smallest_never_used_blocknum = rbuf_blocknum(&rb); 
+    t->length_of_array = t->smallest_never_used_blocknum.b;
+    invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
+    t->blocknum_freelist_head = rbuf_blocknum(&rb); 
+    XMALLOC_N(t->length_of_array, t->block_translation);
+    for (int64_t i = 0; i < t->length_of_array; i++) {
+        t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rb);
+        t->block_translation[i].size = rbuf_DISKOFF(&rb);
+    }
+    invariant(_calculate_size_on_disk(t) == (int64_t) size_on_disk);
+    invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t) size_on_disk);
+    invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk);
 
-    *btp = bt;
 exit:
     return r;
 }
 
-
-void
-toku_blocktable_create_new(BLOCK_TABLE *btp) {
-    BLOCK_TABLE bt = blocktable_create_internal();
-    translation_default(&bt->checkpointed);  // create default btt (empty except for reserved blocknums)
-    blocktable_note_translation(&bt->bt_block_allocator, &bt->checkpointed);
-    // we just created a default checkpointed, now copy it to current.  
-    copy_translation(&bt->current, &bt->checkpointed, TRANSLATION_CURRENT);
-
-    *btp = bt;
-}    
-
-int
-toku_blocktable_iterate (BLOCK_TABLE bt, enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) {
+int block_table::iterate(enum translation_type type,
+                         BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) {
     struct translation *src;
     
     int r = 0;
     switch (type) {
-        case TRANSLATION_CURRENT:      src = &bt->current; break;
-        case TRANSLATION_INPROGRESS:   src = &bt->inprogress; break;
-        case TRANSLATION_CHECKPOINTED: src = &bt->checkpointed; break;
-        default: r = EINVAL; break;
+    case TRANSLATION_CURRENT:
+        src = &_current;
+        break;
+    case TRANSLATION_INPROGRESS:
+        src = &_inprogress;
+        break;
+    case TRANSLATION_CHECKPOINTED:
+        src = &_checkpointed;
+        break;
+    default:
+        r = EINVAL;
     }
+
     struct translation fakecurrent;
     struct translation *t = &fakecurrent;
-    if (r==0) {
-        lock_for_blocktable(bt);
-        copy_translation(t, src, TRANSLATION_DEBUG);
+    if (r == 0) {
+        _mutex_lock();
+        _copy_translation(t, src, TRANSLATION_DEBUG);
         t->block_translation[RESERVED_BLOCKNUM_TRANSLATION] =
-           src->block_translation[RESERVED_BLOCKNUM_TRANSLATION];
-        unlock_for_blocktable(bt);
+            src->block_translation[RESERVED_BLOCKNUM_TRANSLATION];
+        _mutex_unlock();
         int64_t i;
         for (i=0; i<t->smallest_never_used_blocknum.b; i++) {
             struct block_translation_pair pair = t->block_translation[i];
@@ -1035,8 +900,7 @@ typedef struct {
     int64_t total_space;
 } frag_extra;
 
-static int
-frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) {
+static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) {
     frag_extra *info = (frag_extra *) extra;
 
     if (size + address > info->total_space)
@@ -1045,96 +909,86 @@ frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) {
     return 0;
 }
 
-void
-toku_blocktable_internal_fragmentation (BLOCK_TABLE bt, int64_t *total_sizep, int64_t *used_sizep) {
-    frag_extra info = {0,0};
-    int r = toku_blocktable_iterate(bt, TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true);
+void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep) {
+    frag_extra info = { 0, 0 };
+    int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true);
     assert_zero(r);
 
     if (total_sizep) *total_sizep = info.total_space;
     if (used_sizep)  *used_sizep  = info.used_space;
 }
 
-void
-toku_realloc_descriptor_on_disk_unlocked(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT ft) {
-    toku_mutex_assert_locked(&bt->mutex);
+void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) {
+    toku_mutex_assert_locked(&_mutex);
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
-    blocknum_realloc_on_disk_internal(bt, b, size, offset, ft, false);
+    _realloc_on_disk_internal(b, size, offset, ft, false);
 }
 
-void
-toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT ft, int fd) {
-    lock_for_blocktable(bt);
-    toku_realloc_descriptor_on_disk_unlocked(bt, size, offset, ft);
-
-    ensure_safe_write_unlocked(bt, fd, size, *offset);
-    unlock_for_blocktable(bt);
+void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) {
+    _mutex_lock();
+    _realloc_descriptor_on_disk_unlocked(size, offset, ft);
+    _ensure_safe_write_unlocked(fd, size, *offset);
+    _mutex_unlock();
 }
 
-void
-toku_get_descriptor_offset_size(BLOCK_TABLE bt, DISKOFF *offset, DISKOFF *size) {
-    lock_for_blocktable(bt);
+void block_table::get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size) {
+    _mutex_lock();
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
-    translate_blocknum_to_offset_size_unlocked(bt, b, offset, size);
-    unlock_for_blocktable(bt);
+    _translate_blocknum_to_offset_size_unlocked(b, offset, size);
+    _mutex_unlock();
 }
 
-void
-toku_block_table_get_fragmentation_unlocked(BLOCK_TABLE bt, TOKU_DB_FRAGMENTATION report) {
-    //Requires:  blocktable lock is held.
-    //Requires:  report->file_size_bytes is already filled in.
+void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
+    // Requires:  blocktable lock is held.
+    // Requires:  report->file_size_bytes is already filled in.
     
-    //Count the headers.
-    report->data_bytes                   = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
-    report->data_blocks                  = 1;
-    report->checkpoint_bytes_additional  = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+    // Count the headers.
+    report->data_bytes = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+    report->data_blocks = 1;
+    report->checkpoint_bytes_additional = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     report->checkpoint_blocks_additional = 1;
 
-    struct translation *current = &bt->current;
-    int64_t i;
-    for (i = 0; i < current->length_of_array; i++) {
+    struct translation *current = &_current;
+    for (int64_t i = 0; i < current->length_of_array; i++) {
         struct block_translation_pair *pair = &current->block_translation[i];
         if (pair->size > 0) {
             report->data_bytes += pair->size;
             report->data_blocks++;
         }
     }
-    struct translation *checkpointed = &bt->checkpointed;
-    for (i = 0; i < checkpointed->length_of_array; i++) {
-        struct block_translation_pair *pair = &checkpointed->block_translation[i];
-        if (pair->size > 0 &&
-            !(i < current->length_of_array &&
-                current->block_translation[i].size > 0 &&
-                current->block_translation[i].u.diskoff == pair->u.diskoff)
-           ) {
-                report->checkpoint_bytes_additional += pair->size;
-                report->checkpoint_blocks_additional++;
-        }
-    }
-    struct translation *inprogress = &bt->inprogress;
-    for (i = 0; i < inprogress->length_of_array; i++) {
-        struct block_translation_pair *pair = &inprogress->block_translation[i];
-        if (pair->size > 0 &&
-            !(i < current->length_of_array &&
-                current->block_translation[i].size > 0 &&
-                current->block_translation[i].u.diskoff == pair->u.diskoff) &&
-            !(i < checkpointed->length_of_array &&
-                checkpointed->block_translation[i].size > 0 &&
-                checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)
-           ) {
+
+    struct translation *checkpointed = &_checkpointed;
+    for (int64_t i = 0; i < checkpointed->length_of_array; i++) {
+        struct block_translation_pair *pair = &_checkpointed.block_translation[i];
+        if (pair->size > 0 && !(i < current->length_of_array &&
+                                current->block_translation[i].size > 0 &&
+                                current->block_translation[i].u.diskoff == pair->u.diskoff)) {
                 report->checkpoint_bytes_additional += pair->size;
                 report->checkpoint_blocks_additional++;
         }
     }
 
-    bt->bt_block_allocator.get_unused_statistics(report);
+    struct translation *inprogress = &_inprogress;
+    for (int64_t i = 0; i < inprogress->length_of_array; i++) {
+        struct block_translation_pair *pair = &_inprogress.block_translation[i];
+        if (pair->size > 0 && !(i < current->length_of_array &&
+                                current->block_translation[i].size > 0 &&
+                                current->block_translation[i].u.diskoff == pair->u.diskoff) &&
+                              !(i < checkpointed->length_of_array &&
+                                checkpointed->block_translation[i].size > 0 &&
+                                checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)) {
+            report->checkpoint_bytes_additional += pair->size;
+            report->checkpoint_blocks_additional++;
+        }
+    }
+
+    _bt_block_allocator.get_unused_statistics(report);
 }
 
-void
-toku_blocktable_get_info64(BLOCK_TABLE bt, struct ftinfo64 *s) {
-    lock_for_blocktable(bt);
+void block_table::get_info64(struct ftinfo64 *s) {
+    _mutex_lock();
 
-    struct translation *current = &bt->current;
+    struct translation *current = &_current;
     s->num_blocks_allocated = current->length_of_array;
     s->num_blocks_in_use = 0;
     s->size_allocated = 0;
@@ -1154,31 +1008,30 @@ toku_blocktable_get_info64(BLOCK_TABLE bt, struct ftinfo64 *s) {
         }
     }
 
-    unlock_for_blocktable(bt);
+    _mutex_unlock();
 }
 
-int
-toku_blocktable_iterate_translation_tables(BLOCK_TABLE bt, uint64_t checkpoint_count,
-                                           int (*iter)(uint64_t checkpoint_count,
-                                                       int64_t total_num_rows,
-                                                       int64_t blocknum,
-                                                       int64_t diskoff,
-                                                       int64_t size,
-                                                       void *extra),
-                                           void *iter_extra) {
+int block_table::iterate_translation_tables(uint64_t checkpoint_count,
+                                            int (*iter)(uint64_t checkpoint_count,
+                                                        int64_t total_num_rows,
+                                                        int64_t blocknum,
+                                                        int64_t diskoff,
+                                                        int64_t size,
+                                                        void *extra),
+                                            void *iter_extra) {
     int error = 0;
-    lock_for_blocktable(bt);
+    _mutex_lock();
 
-    int64_t total_num_rows = bt->current.length_of_array + bt->checkpointed.length_of_array;
-    for (int64_t i = 0; error == 0 && i < bt->current.length_of_array; ++i) {
-        struct block_translation_pair *block = &bt->current.block_translation[i];
+    int64_t total_num_rows = _current.length_of_array + _checkpointed.length_of_array;
+    for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) {
+        struct block_translation_pair *block = &_current.block_translation[i];
         error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
     }
-    for (int64_t i = 0; error == 0 && i < bt->checkpointed.length_of_array; ++i) {
-        struct block_translation_pair *block = &bt->checkpointed.block_translation[i];
+    for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) {
+        struct block_translation_pair *block = &_checkpointed.block_translation[i];
         error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
     }
 
-    unlock_for_blocktable(bt);
+    _mutex_unlock();
     return error;
 }
diff --git a/ft/serialize/block_table.h b/ft/serialize/block_table.h
index cb0f50f51f3..8e9e2279526 100644
--- a/ft/serialize/block_table.h
+++ b/ft/serialize/block_table.h
@@ -1,6 +1,6 @@
 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
+
 /*
 COPYING CONDITIONS NOTICE:
 
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 COPYRIGHT NOTICE:
 
   TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
+  Copyright (C) 2007-2014 Tokutek, Inc.
 
 DISCLAIMER:
 
@@ -88,98 +88,25 @@ PATENT RIGHTS GRANT:
 
 #pragma once
 
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "Copyright (c) 2007-2014 Tokutek Inc.  All rights reserved."
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 
 #include <db.h>
 
 #include "portability/toku_stdint.h"
+#include "portability/toku_pthread.h"
+
+#include "ft/serialize/block_allocator.h"
+#include "util/nb_mutex.h"
 
 struct ft;
 
-typedef struct block_table *BLOCK_TABLE;
-
 typedef struct blocknum_s { int64_t b; } BLOCKNUM;
-static inline BLOCKNUM make_blocknum(int64_t b) {
-    BLOCKNUM result = { .b = b };
-    return result;
-}
-static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
-
 
 // Offset in a disk. -1 is the 'null' pointer.
 typedef int64_t DISKOFF;
 
-// Needed by tests, ftdump
-struct block_translation_pair {
-    union { // If in the freelist, use next_free_blocknum, otherwise diskoff.
-        DISKOFF  diskoff; 
-        BLOCKNUM next_free_blocknum;
-    } u;
-    DISKOFF size;    // set to 0xFFFFFFFFFFFFFFFF for free
-};
-
-void toku_blocktable_create_new(BLOCK_TABLE *btp);
-int toku_blocktable_create_from_buffer(int fd, BLOCK_TABLE *btp, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
-void toku_blocktable_destroy(BLOCK_TABLE *btp);
-
-void toku_ft_lock(struct ft *ft);
-void toku_ft_unlock(struct ft *ft);
-
-void toku_block_translation_note_start_checkpoint_unlocked(BLOCK_TABLE bt);
-void toku_block_translation_note_end_checkpoint(BLOCK_TABLE bt, int fd);
-void toku_block_translation_note_skipped_checkpoint(BLOCK_TABLE bt);
-void toku_maybe_truncate_file_on_open(BLOCK_TABLE bt, int fd);
-
-//Blocknums
-void toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, struct ft *ft);
-void toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, struct ft *ft);
-void toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
-void toku_verify_blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b);
-void toku_block_verify_no_data_blocks_except_root(BLOCK_TABLE bt, BLOCKNUM root);
-void toku_free_unused_blocknums(BLOCK_TABLE bt, BLOCKNUM root);
-void toku_block_verify_no_free_blocknums(BLOCK_TABLE bt);
-void toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd);
-void toku_realloc_descriptor_on_disk_unlocked(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, struct ft *ft);
-void toku_get_descriptor_offset_size(BLOCK_TABLE bt, DISKOFF *offset, DISKOFF *size);
-
-//Blocks and Blocknums
-void toku_blocknum_realloc_on_disk(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint);
-void toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
-
-//Serialization
-void toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, int fd, struct wbuf *w, int64_t *address, int64_t *size);
-void toku_block_table_swap_for_redirect(BLOCK_TABLE old_bt, BLOCK_TABLE new_bt);
-
-//DEBUG ONLY (ftdump included), tests included
-void toku_blocknum_dump_translation(BLOCK_TABLE bt, BLOCKNUM b);
-void toku_dump_translation_table_pretty(FILE *f, BLOCK_TABLE bt);
-void toku_dump_translation_table(FILE *f, BLOCK_TABLE bt);
-void toku_block_free(BLOCK_TABLE bt, uint64_t offset);
-typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
-
-enum translation_type {
-    TRANSLATION_NONE = 0,
-    TRANSLATION_CURRENT,
-    TRANSLATION_INPROGRESS,
-    TRANSLATION_CHECKPOINTED,
-    TRANSLATION_DEBUG
-};
-
-int toku_blocktable_iterate(BLOCK_TABLE bt, enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); 
-void toku_blocktable_internal_fragmentation(BLOCK_TABLE bt, int64_t *total_sizep, int64_t *used_sizep);
-
-void toku_block_table_get_fragmentation_unlocked(BLOCK_TABLE bt, TOKU_DB_FRAGMENTATION report);
-//Requires:  blocktable lock is held.
-//Requires:  report->file_size_bytes is already filled in.
-
-int64_t toku_block_get_blocks_in_use_unlocked(BLOCK_TABLE bt);
-
-void toku_blocktable_get_info64(BLOCK_TABLE, struct ftinfo64 *);
-
-int toku_blocktable_iterate_translation_tables(BLOCK_TABLE, uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *);
-
-//Unmovable reserved first, then reallocable.
+// Unmovable reserved first, then reallocable.
 // We reserve one blocknum for the translation table itself.
 enum {
     RESERVED_BLOCKNUM_NULL = 0,
@@ -188,6 +115,197 @@ enum {
     RESERVED_BLOCKNUMS
 };
 
+typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
+
+static inline BLOCKNUM make_blocknum(int64_t b) {
+    BLOCKNUM result = { .b = b };
+    return result;
+}
+static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
+
+/**
+ *  There are three copies of the translation table (btt) in the block table:
+ *
+ *    checkpointed   Is initialized by deserializing from disk,
+ *                   and is the only version ever read from disk.
+ *                   When read from disk it is copied to current.
+ *                   It is immutable. It can be replaced by an inprogress btt.
+ *
+ *    inprogress     Is only filled by copying from current,
+ *                   and is the only version ever serialized to disk.
+ *                   (It is serialized to disk on checkpoint and clean shutdown.)
+ *                   At end of checkpoint it replaces 'checkpointed'.
+ *                   During a checkpoint, any 'pending' dirty writes will update
+ *                   inprogress.
+ *
+ *    current        Is initialized by copying from checkpointed,
+ *                   is the only version ever modified while the database is in use, 
+ *                   and is the only version ever copied to inprogress.
+ *                   It is never stored on disk.
+ */
+class block_table {
+public:
+    enum translation_type {
+        TRANSLATION_NONE = 0,
+        TRANSLATION_CURRENT,
+        TRANSLATION_INPROGRESS,
+        TRANSLATION_CHECKPOINTED,
+        TRANSLATION_DEBUG
+    };
+
+    void create();
+
+    int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
+
+    void destroy();
+
+    // Checkpointing
+    void note_start_checkpoint_unlocked();
+    void note_end_checkpoint(int fd);
+    void note_skipped_checkpoint();
+    void maybe_truncate_file_on_open(int fd);
+
+    // Blocknums
+    void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
+    void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint);
+    void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
+    void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
+    void free_unused_blocknums(BLOCKNUM root);
+    void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd);
+    void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size);
+
+    // External verfication
+    void verify_blocknum_allocated(BLOCKNUM b);
+    void verify_no_data_blocks_except_root(BLOCKNUM root);
+    void verify_no_free_blocknums();
+
+    // Serialization
+    void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size);
+
+    // DEBUG ONLY (ftdump included), tests included
+    void blocknum_dump_translation(BLOCKNUM b);
+    void dump_translation_table_pretty(FILE *f);
+    void dump_translation_table(FILE *f);
+    void block_free(uint64_t offset);
+
+    int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); 
+    void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep);
+
+    // Requires: blocktable lock is held.
+    // Requires: report->file_size_bytes is already filled in.
+    void get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report);
+
+    int64_t get_blocks_in_use_unlocked();
+
+    void get_info64(struct ftinfo64 *);
+
+    int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *);
+
+private:
+    struct block_translation_pair {
+        // If in the freelist, use next_free_blocknum, otherwise diskoff.
+        union {
+            DISKOFF  diskoff; 
+            BLOCKNUM next_free_blocknum;
+        } u;
+
+        // Set to 0xFFFFFFFFFFFFFFFF for free
+        DISKOFF size;
+    };
+
+    // This is the BTT (block translation table)
+    // When the translation (btt) is stored on disk:
+    //   In Header:
+    //       size_on_disk
+    //       location_on_disk
+    //   In block translation table (in order):
+    //       smallest_never_used_blocknum
+    //       blocknum_freelist_head
+    //       array
+    //       a checksum
+    struct translation {
+        enum translation_type type;
+
+        // Number of elements in array (block_translation).  always >= smallest_never_used_blocknum
+        int64_t length_of_array;
+        BLOCKNUM smallest_never_used_blocknum;
+
+        // Next (previously used) unused blocknum (free list)
+        BLOCKNUM blocknum_freelist_head;
+        struct block_translation_pair *block_translation;
+
+        // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
+        // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
+    };
+
+    void _create_internal();
+    int _translation_deserialize_from_buffer(struct translation *t,    // destination into which to deserialize
+                                             DISKOFF location_on_disk, // location of translation_buffer
+                                             uint64_t size_on_disk,
+                                             unsigned char * translation_buffer);   // buffer with serialized translation
+
+    void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype);
+    void _maybe_optimize_translation(struct translation *t);
+    void _maybe_expand_translation(struct translation *t);
+    bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair);
+    void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b);
+    int64_t _calculate_size_on_disk(struct translation *t);
+    bool _pair_is_unallocated(struct block_translation_pair *pair);
+    void _alloc_inprogress_translation_on_disk_unlocked();
+    void _dump_translation_internal(FILE *f, struct translation *t);
+
+    // Blocknum management
+    void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
+    void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint);
+    void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft);
+    void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint);
+    void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
+
+    // File management
+    void _maybe_truncate_file(int fd, uint64_t size_needed_before);
+    void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset);
+
+    // Verification
+    bool _is_valid_blocknum(struct translation *t, BLOCKNUM b);
+    void _verify_valid_blocknum(struct translation *t, BLOCKNUM b);
+    bool _is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b);
+    void _verify_valid_freeable_blocknum(struct translation *t, BLOCKNUM b);
+    bool _no_data_blocks_except_root(BLOCKNUM root);
+    bool _blocknum_allocated(BLOCKNUM b);
+
+    // Locking 
+    //
+    // TODO: Move the lock to the FT
+    void _mutex_lock();
+    void _mutex_unlock();
+
+    // The current translation is the one used by client threads. 
+    // It is not represented on disk.
+    struct translation _current;
+
+    // The translation used by the checkpoint currently in progress. 
+    // If the checkpoint thread allocates a block, it must also update the current translation.
+    struct translation _inprogress;
+
+    // The translation for the data that shall remain inviolate on disk until the next checkpoint finishes,
+    // after which any blocks used only in this translation can be freed.
+    struct translation _checkpointed;
+
+    // The in-memory data structure for block allocation. 
+    // There is no on-disk data structure for block allocation.
+    // Note: This is *allocation* not *translation* - the block allocator is unaware of which
+    //       blocks are used for which translation, but simply allocates and deallocates blocks.
+    block_allocator _bt_block_allocator;
+    toku_mutex_t _mutex;
+    struct nb_mutex _safe_file_size_lock;
+    bool _checkpoint_skipped;
+    uint64_t _safe_file_size;
+
+    // Because the lock is in a weird place right now
+    friend void toku_ft_lock(struct ft *ft);
+    friend void toku_ft_unlock(struct ft *ft);
+};
+
 // For serialize / deserialize
 
 #include "ft/serialize/wbuf.h"
diff --git a/ft/serialize/ft-serialize.cc b/ft/serialize/ft-serialize.cc
index 0badabdb42b..e6bd84733a2 100644
--- a/ft/serialize/ft-serialize.cc
+++ b/ft/serialize/ft-serialize.cc
@@ -161,12 +161,12 @@ deserialize_descriptor_from_rbuf(struct rbuf *rb, DESCRIPTOR desc, int layout_ve
 }
 
 static int
-deserialize_descriptor_from(int fd, BLOCK_TABLE bt, DESCRIPTOR desc, int layout_version) {
+deserialize_descriptor_from(int fd, block_table *bt, DESCRIPTOR desc, int layout_version) {
     int r = 0;
     DISKOFF offset;
     DISKOFF size;
-    unsigned char *dbuf = NULL;
-    toku_get_descriptor_offset_size(bt, &offset, &size);
+    unsigned char *dbuf = nullptr;
+    bt->get_descriptor_offset_size(&offset, &size);
     memset(desc, 0, sizeof(*desc));
     if (size > 0) {
         lazy_assert(size>=4); //4 for checksum
@@ -274,11 +274,10 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
             assert(readsz <= (ssize_t)size_to_read);
         }
         // Create table and read in data.
-        r = toku_blocktable_create_from_buffer(fd,
-                                               &ft->blocktable,
-                                               translation_address_on_disk,
-                                               translation_size_on_disk,
-                                               tbuf);
+        r = ft->blocktable.create_from_buffer(fd,
+                                              translation_address_on_disk,
+                                              translation_size_on_disk,
+                                              tbuf);
         toku_free(tbuf);
         if (r != 0) {
             goto exit;
@@ -426,7 +425,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
     }
 
     invariant((uint32_t) ft->layout_version_read_from_disk == version);
-    r = deserialize_descriptor_from(fd, ft->blocktable, &ft->descriptor, version);
+    r = deserialize_descriptor_from(fd, &ft->blocktable, &ft->descriptor, version);
     if (r != 0) {
         goto exit;
     }
@@ -804,18 +803,20 @@ void toku_serialize_ft_to_wbuf (
     lazy_assert(wbuf->ndone == wbuf->size);
 }
 
-void toku_serialize_ft_to (int fd, FT_HEADER h, BLOCK_TABLE blocktable, CACHEFILE cf) {
+void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) {
     lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS);
     struct wbuf w_translation;
     int64_t size_translation;
     int64_t address_translation;
 
-    //Must serialize translation first, to get address,size for header.
-    toku_serialize_translation_to_wbuf(blocktable, fd, &w_translation,
-                                               &address_translation,
-                                               &size_translation);
-    assert(size_translation == w_translation.ndone); // the bytes written are the size
-    assert(w_translation.size % 512 == 0);           // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized.
+    // Must serialize translation first, to get address,size for header.
+    bt->serialize_translation_to_wbuf(fd, &w_translation,
+                                      &address_translation,
+                                      &size_translation);
+    assert(size_translation == w_translation.ndone);
+
+    // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized.
+    assert(w_translation.size % 512 == 0);
 
     struct wbuf w_main;
     size_t size_main       = toku_serialize_ft_size(h);
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index 65eb8c74154..285acc188e0 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -845,8 +845,8 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
     invariant(blocknum.b>=0);
     DISKOFF offset;
 
-    toku_blocknum_realloc_on_disk(ft->blocktable, blocknum, n_to_write, &offset,
-                                  ft, fd, for_checkpoint); //dirties h
+    ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
+                                   ft, fd, for_checkpoint); //dirties h
 
     tokutime_t t0 = toku_time_now();
     toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
@@ -1085,7 +1085,7 @@ void read_block_from_fd_into_rbuf(
 {
     // get the file offset and block size for the block
     DISKOFF offset, size;
-    toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &offset, &size);
+    ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size);
     DISKOFF size_aligned = roundup_to_multiple(512, size);
     uint8_t *XMALLOC_N_ALIGNED(512, size_aligned, raw_block);
     rbuf_init(rb, raw_block, size);
@@ -1101,11 +1101,12 @@ static const int read_header_heuristic_max = 32*1024;
 #define MIN(a,b) (((a)>(b)) ? (b) : (a))
 #endif
 
-static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKNUM blocknum, FT ft, struct rbuf *rb, struct ftnode_fetch_extra *bfe)
 // Effect: If the header part of the node is small enough, then read it into the rbuf.  The rbuf will be allocated to be big enough in any case.
-{
+static void read_ftnode_header_from_fd_into_rbuf_if_small_enough(int fd, BLOCKNUM blocknum,
+                                                                 FT ft, struct rbuf *rb,
+                                                                 struct ftnode_fetch_extra *bfe) {
     DISKOFF offset, size;
-    toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &offset, &size);
+    ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size);
     DISKOFF read_size = roundup_to_multiple(512, MIN(read_header_heuristic_max, size));
     uint8_t *XMALLOC_N_ALIGNED(512, roundup_to_multiple(512, size), raw_block);
     rbuf_init(rb, raw_block, read_size);
@@ -1937,10 +1938,8 @@ deserialize_and_upgrade_ftnode(FTNODE node,
     // we read the different sub-sections.
     // get the file offset and block size for the block
     DISKOFF offset, size;
-    toku_translate_blocknum_to_offset_size(bfe->ft->blocktable,
-                                           blocknum,
-                                           &offset,
-                                           &size);
+    bfe->ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size);
+
     struct rbuf rb;
     r = read_and_decompress_block_from_fd_into_rbuf(fd,
                                                     blocknum,
@@ -2218,16 +2217,13 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i
     // 
     // get the file offset and block size for the block
     DISKOFF node_offset, total_node_disk_size;
-    toku_translate_blocknum_to_offset_size(
-        bfe->ft->blocktable, 
-        node->blocknum, 
-        &node_offset, 
-        &total_node_disk_size
-        );
+    bfe->ft->blocktable.translate_blocknum_to_offset_size(node->blocknum, &node_offset, &total_node_disk_size);
 
     uint32_t curr_offset = BP_START(ndd, childnum);
-    uint32_t curr_size   = BP_SIZE (ndd, childnum);
-    struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0};
+    uint32_t curr_size = BP_SIZE (ndd, childnum);
+
+    struct rbuf rb;
+    rbuf_init(&rb, nullptr, 0);
 
     uint32_t pad_at_beginning = (node_offset+curr_offset)%512;
     uint32_t padded_size = roundup_to_multiple(512, pad_at_beginning + curr_size);
@@ -2530,20 +2526,22 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
         serialized_log = &serialized_local;
         toku_serialize_rollback_log_to_memory_uncompressed(log, serialized_log);
     }
+
     BLOCKNUM blocknum = serialized_log->blocknum;
+    invariant(blocknum.b >= 0);
 
-    //Compress and malloc buffer to write
+    // Compress and malloc buffer to write
     serialize_uncompressed_block_to_memory(serialized_log->data,
-            serialized_log->n_sub_blocks, serialized_log->sub_block,
-            ft->h->compression_method, &n_to_write, &compressed_buf);
+                                           serialized_log->n_sub_blocks,
+                                           serialized_log->sub_block,
+                                           ft->h->compression_method,
+                                           &n_to_write, &compressed_buf);
 
-    {
-        lazy_assert(blocknum.b>=0);
-        DISKOFF offset;
-        toku_blocknum_realloc_on_disk(ft->blocktable, blocknum, n_to_write, &offset,
-                                      ft, fd, for_checkpoint); //dirties h
-        toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
-    }
+    // Dirties the ft
+    DISKOFF offset;
+    ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
+                                   ft, fd, for_checkpoint);
+    toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
     toku_free(compressed_buf);
     if (!is_serialized) {
         toku_static_serialized_rollback_log_destroy(&serialized_local);
@@ -2803,16 +2801,19 @@ cleanup:
     return r;
 }
 
-// Read rollback log node from file into struct.  Perform version upgrade if necessary.
-int
-toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft) {
+// Read rollback log node from file into struct.
+// Perform version upgrade if necessary.
+int toku_deserialize_rollback_log_from(int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft) {
     int layout_version = 0;
     int r;
-    struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0};
+
+    struct rbuf rb;
+    rbuf_init(&rb, nullptr, 0);
 
     // get the file offset and block size for the block
     DISKOFF offset, size;
-    toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &offset, &size);
+    ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size);
+
     // if the size is 0, then the blocknum is unused
     if (size == 0) {
         // blocknum is unused, just create an empty one and get out
@@ -2838,7 +2839,9 @@ toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE
     r = deserialize_rollback_log_from_rbuf_versioned(layout_version, blocknum, logp, &rb);
 
 cleanup:
-    if (rb.buf) toku_free(rb.buf);
+    if (rb.buf) {
+        toku_free(rb.buf);
+    }
     return r;
 }
 
diff --git a/ft/tests/block_allocator_test.cc b/ft/tests/block_allocator_test.cc
index a7bb4f6641d..308663d7a12 100644
--- a/ft/tests/block_allocator_test.cc
+++ b/ft/tests/block_allocator_test.cc
@@ -90,12 +90,6 @@ PATENT RIGHTS GRANT:
 
 #include "test.h"
 
-static void ba_alloc_at(block_allocator *ba, uint64_t size, uint64_t offset) {
-    ba->validate();
-    ba->alloc_block_at(size * 512, offset * 512);
-    ba->validate();
-}
-
 static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
     ba->validate();
     uint64_t actual_answer;
@@ -133,28 +127,8 @@ static void
 test_ba0 (void) {
     block_allocator allocator;
     block_allocator *ba = &allocator;
-    uint64_t b0, b1;
     ba->create(100*512, 1*512);
     assert(ba->allocated_limit()==100*512);
-    ba_alloc_at(ba, 50, 100);
-    assert(ba->allocated_limit()==150*512);
-    ba_alloc_at(ba, 25, 150);
-    ba_alloc   (ba, 10, &b0);
-    ba_check_l (ba, 0, 0,   100);
-    ba_check_l (ba, 1, 100,  50);
-    ba_check_l (ba, 2, 150,  25);
-    ba_check_l (ba, 3, b0,  10);
-    ba_check_none (ba, 4);
-    assert(b0==175);
-    ba_free(ba, 150);
-    ba_alloc_at(ba, 10, 150);
-    ba_alloc(ba, 10, &b0);
-    assert(b0==160);
-    ba_alloc(ba, 10, &b0);
-    ba_alloc(ba, 113, &b1);
-    assert(113*512==ba->block_size(b1 *512));
-    assert(10 *512==ba->block_size(b0 *512));
-    assert(50 *512==ba->block_size(100*512));
 
     uint64_t b2, b3, b4, b5, b6, b7;
     ba_alloc(ba, 100, &b2);     
diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc
index 30d09344cd6..2cf2741bd8e 100644
--- a/ft/tests/ft-bfe-query.cc
+++ b/ft/tests/ft-bfe-query.cc
@@ -422,22 +422,22 @@ test_prefetching(void) {
                  16);
     ft_h->cmp.create(int64_key_cmp, nullptr);
     ft->ft = ft_h;
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -450,8 +450,8 @@ test_prefetching(void) {
 
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index 641d33dcf42..4660b9f7e11 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -358,22 +358,22 @@ test_serialize_nonleaf(void) {
     ft_h->cmp.create(string_key_cmp, nullptr);
     ft->ft = ft_h;
     
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -387,8 +387,8 @@ test_serialize_nonleaf(void) {
     toku_destroy_ftnode_internals(&sn);
     toku_free(ndd);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     ft_h->cmp.destroy();
     toku_free(ft_h);
@@ -438,22 +438,22 @@ test_serialize_leaf(void) {
                  16);
     ft->ft = ft_h;
     
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -466,8 +466,8 @@ test_serialize_leaf(void) {
 
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index 4c1e3b8cbdb..437ca6d9505 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -198,22 +198,22 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
     ft_h->cmp.create(long_key_cmp, nullptr);
     ft->ft = ft_h;
     
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -277,8 +277,8 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
 
     toku_ftnode_free(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -361,22 +361,22 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     ft_h->cmp.create(long_key_cmp, nullptr);
     ft->ft = ft_h;
     
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -412,8 +412,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     ft_h->cmp.destroy();
     toku_free(ft_h);
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index d82a42b6e08..80640a4b5ef 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -301,23 +301,23 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
 
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -373,8 +373,8 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
@@ -435,22 +435,22 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -508,8 +508,8 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
@@ -561,22 +561,22 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
                  16);
     ft->ft = ft_h;
     
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -636,8 +636,8 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
@@ -696,22 +696,22 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
                  16);
     ft->ft = ft_h;
     
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -773,8 +773,8 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
@@ -832,22 +832,22 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
                  16);
     ft->ft = ft_h;
     
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -901,8 +901,8 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
@@ -952,22 +952,22 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
                  16);
     ft->ft = ft_h;
     
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -1000,8 +1000,8 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
@@ -1075,22 +1075,22 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     ft_h->cmp.create(string_key_cmp, nullptr);
     ft->ft = ft_h;
     
-    toku_blocktable_create_new(&ft_h->blocktable);
+    ft_h->blocktable.create();
     { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
     //Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
-        toku_allocate_blocknum(ft_h->blocktable, &b, ft_h);
+        ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
     assert(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
-        toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size);
+        ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
         assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
         assert(size   == 100);
     }
@@ -1123,8 +1123,8 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    toku_block_free(ft_h->blocktable, block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-    toku_blocktable_destroy(&ft_h->blocktable);
+    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.destroy();
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
diff --git a/ft/tests/test_block_allocator_merge.cc b/ft/tests/test_block_allocator_merge.cc
deleted file mode 100644
index e0cd6ca1e15..00000000000
--- a/ft/tests/test_block_allocator_merge.cc
+++ /dev/null
@@ -1,236 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#ident "Copyright (c) 2009-2013 Tokutek Inc.  All rights reserved."
-#include "ft/serialize/block_allocator.h"
-#include <memory.h>
-#include <assert.h>
-// Test the merger.
-
-int verbose = 0;
-
-static void
-print_array (uint64_t n, const struct block_allocator::blockpair a[/*n*/]) {
-    printf("{");
-    for (uint64_t i=0; i<n; i++) printf(" %016lx", (long)a[i].offset);
-    printf("}\n");
-}
-
-static int
-compare_blockpairs (const void *av, const void *bv) {
-    const struct block_allocator::blockpair *CAST_FROM_VOIDP(a, av);
-    const struct block_allocator::blockpair *CAST_FROM_VOIDP(b, bv);
-    if (a->offset < b->offset) return -1;
-    if (a->offset > b->offset) return +1;
-    return 0;
-}
-
-static void
-test_merge (uint64_t an, const struct block_allocator::blockpair a[/*an*/],
-	    uint64_t bn, const struct block_allocator::blockpair b[/*bn*/]) {
-    if (verbose>1) { printf("a:"); print_array(an, a); }
-    if (verbose>1) { printf("b:"); print_array(bn, b); }
-    struct block_allocator::blockpair *MALLOC_N(an+bn, q);
-    struct block_allocator::blockpair *MALLOC_N(an+bn, m);
-    if (q==0 || m==0) {
-	fprintf(stderr, "malloc failed, continuing\n");
-	goto malloc_failed;
-    }
-    for (uint64_t i=0; i<an; i++) {
-	q[i] = m[i] = a[i];
-    }
-    for (uint64_t i=0; i<bn; i++) {
-	q[an+i] = b[i];
-    }
-    if (verbose) printf("qsort\n");
-    qsort(q, an+bn, sizeof(*q), compare_blockpairs);
-    if (verbose>1) { printf("q:"); print_array(an+bn, q); }
-    if (verbose) printf("merge\n");
-    block_allocator::merge_blockpairs_into(an, m, bn, b);
-    if (verbose) printf("compare\n");
-    if (verbose>1) { printf("m:"); print_array(an+bn, m); }
-    for (uint64_t i=0; i<an+bn; i++) {
-	assert(q[i].offset == m[i].offset);
-    }
- malloc_failed:
-    toku_free(q);
-    toku_free(m);
-}
-
-static uint64_t
-compute_a (uint64_t i, int mode) {
-    if (mode==0) return (((uint64_t)random()) << 32) + i;
-    if (mode==1) return 2*i;
-    if (mode==2) return i;
-    if (mode==3) return (1LL<<50) + i;
-    abort();
-}
-static uint64_t
-compute_b (uint64_t i, int mode) {
-    if (mode==0) return (((uint64_t)random()) << 32) + i;
-    if (mode==1) return 2*i+1;
-    if (mode==2) return (1LL<<50) + i;
-    if (mode==3) return i;
-    abort();
-}
-    
-
-static void
-test_merge_n_m (uint64_t n, uint64_t m, int mode)
-{
-    struct block_allocator::blockpair *MALLOC_N(n, na);
-    struct block_allocator::blockpair *MALLOC_N(m, ma);
-    if (na==0 || ma==0) {
-	fprintf(stderr, "malloc failed, continuing\n");
-	goto malloc_failed;
-    }
-    if (verbose) printf("Filling a[%" PRIu64 "]\n", n);
-    for (uint64_t i=0; i<n; i++) {
-	na[i].offset = compute_a(i, mode);
-    }
-    if (verbose) printf("Filling b[%" PRIu64 "]\n", m);
-    for (uint64_t i=0; i<m; i++) {
-	if (verbose && i % (1+m/10) == 0) { printf("."); fflush(stdout); }
-	ma[i].offset = compute_b(i, mode);
-    }
-    qsort(na, n, sizeof(*na), compare_blockpairs);
-    qsort(ma, m, sizeof(*ma), compare_blockpairs);
-    if (verbose) fprintf(stderr, "\ntest_merge\n");
-    test_merge(n, na, m, ma);
- malloc_failed:
-    toku_free(na);
-    toku_free(ma);
-}
-
-static void
-test_big_merge (void) {
-    uint64_t G = 1024LL * 1024LL * 1024LL;
-    if (toku_os_get_phys_memory_size() < 40 * G) {
-	fprintf(stderr, "Skipping big merge because there is only %4.1fGiB physical memory\n", toku_os_get_phys_memory_size()/(1024.0*1024.0*1024.0));
-    } else {
-	uint64_t twoG = 2*G;
-
-	uint64_t an = twoG;
-	uint64_t bn = 1;
-	struct block_allocator::blockpair *MALLOC_N(an+bn, a);
-        struct block_allocator::blockpair *MALLOC_N(bn,    b);
-        if (a == nullptr) {
-            fprintf(stderr, "%s:%u malloc failed, continuing\n", __FUNCTION__, __LINE__);
-            goto malloc_failed;
-        }
-        if (b == nullptr) {
-            fprintf(stderr, "%s:%u malloc failed, continuing\n", __FUNCTION__, __LINE__);
-            goto malloc_failed;
-        }
-        assert(a);
-        assert(b);
-        for (uint64_t i=0; i<an; i++) a[i].offset=i+1;
-        b[0].offset = 0;
-        block_allocator::merge_blockpairs_into(an, a, bn, b);
-        for (uint64_t i=0; i<an+bn; i++) assert(a[i].offset == i);
-    malloc_failed:
-        toku_free(a);
-        toku_free(b);
-    }
-}
-
-int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
-    test_merge_n_m(4, 4, 0);
-    test_merge_n_m(16, 16, 0);
-    test_merge_n_m(0, 100, 0);
-    test_merge_n_m(100, 0, 0);
-    test_merge_n_m(1000000, 1000000, 0);
-    // Cannot run this on my laptop, or even on pointy
-#if 0
-    uint64_t too_big = 1024LL * 1024LL * 1024LL * 2;
-    test_merge_n_m(too_big, too_big);
-    test_merge_n_m(1, too_big, 0);
-#endif
-    test_big_merge();
-    return 0;
-}
diff --git a/ft/txn/rollback-ct-callbacks.cc b/ft/txn/rollback-ct-callbacks.cc
index cabf7cf5469..df98665b209 100644
--- a/ft/txn/rollback-ct-callbacks.cc
+++ b/ft/txn/rollback-ct-callbacks.cc
@@ -126,8 +126,7 @@ toku_rollback_flush_unused_log(
 {
     if (write_me) {
         DISKOFF offset;
-        toku_blocknum_realloc_on_disk(ft->blocktable, logname, 0, &offset,
-                                      ft, fd, for_checkpoint);
+        ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint);
     }
     if (!keep_me && !is_clone) {
         toku_free(log);
diff --git a/ft/txn/rollback.cc b/ft/txn/rollback.cc
index 15cd9054ddd..ffd2fc93a7b 100644
--- a/ft/txn/rollback.cc
+++ b/ft/txn/rollback.cc
@@ -98,7 +98,7 @@ PATENT RIGHTS GRANT:
 
 static void rollback_unpin_remove_callback(CACHEKEY* cachekey, bool for_checkpoint, void* extra) {
     FT CAST_FROM_VOIDP(ft, extra);
-    toku_free_blocknum(ft->blocktable, cachekey, ft, for_checkpoint);
+    ft->blocktable.free_blocknum(cachekey, ft, for_checkpoint);
 }
 
 void toku_rollback_log_unpin_and_remove(TOKUTXN txn, ROLLBACK_LOG_NODE log) {
@@ -216,7 +216,7 @@ static void rollback_log_create (
     CACHEFILE cf = txn->logger->rollback_cachefile;
     FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf));
     rollback_initialize_for_txn(log, txn, previous);
-    toku_allocate_blocknum(ft->blocktable, &log->blocknum, ft);
+    ft->blocktable.allocate_blocknum(&log->blocknum, ft);
     const uint32_t hash = toku_cachetable_hash(ft->cf, log->blocknum);
     *result = log;
     toku_cachetable_put(cf, log->blocknum, hash,
diff --git a/tools/ftverify.cc b/tools/ftverify.cc
index 89de2d6b12b..1365f34a320 100644
--- a/tools/ftverify.cc
+++ b/tools/ftverify.cc
@@ -412,10 +412,8 @@ cleanup:
 // Passes our check_block() function to be called as we iterate over
 // the block table.  This will print any interesting failures and
 // update us on our progress.
-static void
-check_block_table(int fd, BLOCK_TABLE bt, struct ft *h)
-{
-    int64_t num_blocks = toku_block_get_blocks_in_use_unlocked(bt);
+static void check_block_table(int fd, block_table *bt, struct ft *h) {
+    int64_t num_blocks = bt->get_blocks_in_use_unlocked();
     printf("Starting verification of checkpoint containing");
     printf(" %" PRId64 " blocks.\n", num_blocks);
     fflush(stdout);
@@ -425,13 +423,11 @@ check_block_table(int fd, BLOCK_TABLE bt, struct ft *h)
 					     .blocks_failed = 0,
 					     .total_blocks = num_blocks,
 					     .h = h };
-    int r = 0;
-    r = toku_blocktable_iterate(bt, 
-				TRANSLATION_CURRENT,
-				check_block,
-				&extra,
-				true,
-				true);
+    int r = bt->iterate(block_table::TRANSLATION_CURRENT,
+                    check_block,
+                    &extra,
+                    true,
+                    true);
     if (r != 0) {
         // We can print more information here if necessary.
     }
@@ -493,11 +489,11 @@ main(int argc, char const * const argv[])
     // walk over the block table and check blocks
     if (h1) {
         printf("Checking dictionary from header 1.\n");
-        check_block_table(dictfd, h1->blocktable, h1);
+        check_block_table(dictfd, &h1->blocktable, h1);
     }
     if (h2) {
         printf("Checking dictionary from header 2.\n");
-        check_block_table(dictfd, h2->blocktable, h2);
+        check_block_table(dictfd, &h2->blocktable, h2);
     }
     if (h1 == NULL && h2 == NULL) {
         printf("Both headers have a corruption and could not be used.\n");
diff --git a/tools/tokuftdump.cc b/tools/tokuftdump.cc
index f6385b4f725..8b3f48e7f19 100644
--- a/tools/tokuftdump.cc
+++ b/tools/tokuftdump.cc
@@ -237,7 +237,7 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT ft) {
     assert(n!=0);
     printf("ftnode\n");
     DISKOFF disksize, diskoffset;
-    toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &diskoffset, &disksize);
+    ft->blocktable.translate_blocknum_to_offset_size(blocknum, &diskoffset, &disksize);
     printf(" diskoffset  =%" PRId64 "\n", diskoffset);
     printf(" disksize    =%" PRId64 "\n", disksize);
     printf(" serialize_size =%u\n", toku_serialize_ftnode_size(n));
@@ -334,13 +334,13 @@ ok:
 }
 
 static void dump_block_translation(FT ft, uint64_t offset) {
-    toku_blocknum_dump_translation(ft->blocktable, make_blocknum(offset));
+    ft->blocktable.blocknum_dump_translation(make_blocknum(offset));
 }
 
 static void dump_fragmentation(int UU(f), FT ft, int tsv) {
     int64_t used_space;
     int64_t total_space;
-    toku_blocktable_internal_fragmentation(ft->blocktable, &total_space, &used_space);
+    ft->blocktable.internal_fragmentation(&total_space, &used_space);
     int64_t fragsizes = total_space - used_space;
 
     if (tsv) {
@@ -386,8 +386,8 @@ static void dump_nodesizes(int fd, FT ft) {
     memset(&info, 0, sizeof(info));
     info.fd = fd;
     info.ft = ft;
-    toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED,
-                            nodesizes_helper, &info, true, true);
+    ft->blocktable.iterate(block_table::TRANSLATION_CHECKPOINTED,
+                           nodesizes_helper, &info, true, true);
     printf("leafblocks\t%" PRIu64 "\n", info.leafblocks);
     printf("blocksizes\t%" PRIu64 "\n", info.blocksizes);
     printf("leafsizes\t%" PRIu64 "\n", info.leafsizes);
@@ -476,7 +476,7 @@ static void verify_block(unsigned char *cp, uint64_t file_offset, uint64_t size)
 
 static void dump_block(int fd, BLOCKNUM blocknum, FT ft) {
     DISKOFF offset, size;
-    toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &offset, &size);
+    ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size);
     printf("%" PRId64 " at %" PRId64 " size %" PRId64 "\n", blocknum.b, offset, size);
 
     unsigned char *CAST_FROM_VOIDP(vp, toku_malloc(size));
@@ -688,22 +688,22 @@ int main (int argc, const char *const argv[]) {
             dump_fragmentation(fd, ft, do_tsv);
         }
         if (do_translation_table) {
-            toku_dump_translation_table_pretty(stdout, ft->blocktable);
+            ft->blocktable.dump_translation_table_pretty(stdout);
         }
         if (do_garbage) {
             dump_garbage_stats(fd, ft);
         }
         if (!do_header && !do_rootnode && !do_fragmentation && !do_translation_table && !do_garbage) {
             printf("Block translation:");
-            toku_dump_translation_table(stdout, ft->blocktable);
+            ft->blocktable.dump_translation_table(stdout);
 
             dump_header(ft);
             
             struct __dump_node_extra info;
             info.fd = fd;
             info.ft = ft;
-            toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED,
-                                    dump_node_wrapper, &info, true, true);
+            ft->blocktable.iterate(block_table::TRANSLATION_CHECKPOINTED,
+                                   dump_node_wrapper, &info, true, true);
         }
     }
     toku_cachefile_close(&cf, false, ZERO_LSN);

From 54f982815ea7398997bc6e61023a2379cec81acb Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Fri, 18 Jul 2014 14:50:29 -0400
Subject: [PATCH 099/190] FT-304 Remove inconsistent 'struct' keyword which
 made the osx build sad.

---
 ft/ft-internal.h            | 2 +-
 ft/serialize/ft-serialize.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index e6214df4d9b..fb04cd1bd90 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -225,7 +225,7 @@ struct ft {
     // These are not read-only:
 
     // protected by blocktable lock
-    struct block_table blocktable;
+    block_table blocktable;
 
     // protected by atomic builtins
     STAT64INFO_S in_memory_stats;
diff --git a/ft/serialize/ft-serialize.h b/ft/serialize/ft-serialize.h
index 856d32d549d..05957d70f69 100644
--- a/ft/serialize/ft-serialize.h
+++ b/ft/serialize/ft-serialize.h
@@ -92,7 +92,7 @@ PATENT RIGHTS GRANT:
 #include "ft/serialize/block_table.h"
 
 size_t toku_serialize_ft_size(struct ft_header *h);
-void toku_serialize_ft_to(int fd, struct ft_header *h, struct block_table *blocktable, CACHEFILE cf);
+void toku_serialize_ft_to(int fd, struct ft_header *h, block_table *bt, CACHEFILE cf);
 void toku_serialize_ft_to_wbuf(struct wbuf *wbuf, struct ft_header *h, DISKOFF translation_location_on_disk, DISKOFF translation_size_on_disk);
 void toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset);
 void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc);

From 7c03c6483363ea8f152169da3822c9fbd9455540 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 22 Jul 2014 08:57:17 -0400
Subject: [PATCH 100/190] FT-304 Add stress test coverage for
 db->get_fragmentation(db)

---
 src/tests/test_stress7.cc                | 10 ++++++----
 src/tests/threaded_stress_test_helpers.h | 10 ++++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/tests/test_stress7.cc b/src/tests/test_stress7.cc
index 5db318521ee..e1e477c234a 100644
--- a/src/tests/test_stress7.cc
+++ b/src/tests/test_stress7.cc
@@ -108,7 +108,7 @@ stress_table(DB_ENV *env, DB **dbp, struct cli_args *cli_args) {
     //
 
     if (verbose) printf("starting creation of pthreads\n");
-    const int num_threads = 4 + cli_args->num_update_threads + cli_args->num_ptquery_threads;
+    const int num_threads = 5 + cli_args->num_update_threads + cli_args->num_ptquery_threads;
     struct arg myargs[num_threads];
     for (int i = 0; i < num_threads; i++) {
         arg_init(&myargs[i], dbp, env, cli_args);
@@ -129,19 +129,21 @@ stress_table(DB_ENV *env, DB **dbp, struct cli_args *cli_args) {
     myargs[1].operation_extra = &soe[1];
     myargs[1].operation = scan_op;
 
-    // make the guy that runs HOT in the background
+    // make the guys that run hot optimize, keyrange, and frag stats in the background
     myargs[2].operation = hot_op;
     myargs[3].operation = keyrange_op;
+    myargs[4].operation = frag_op;
+    myargs[4].sleep_ms = 100;
 
     struct update_op_args uoe = get_update_op_args(cli_args, NULL);
     // make the guy that updates the db
-    for (int i = 4; i < 4 + cli_args->num_update_threads; ++i) {
+    for (int i = 5; i < 5 + cli_args->num_update_threads; ++i) {
         myargs[i].operation_extra = &uoe;
         myargs[i].operation = update_op;
     }
 
     // make the guy that does point queries
-    for (int i = 4 + cli_args->num_update_threads; i < num_threads; i++) {
+    for (int i = 5 + cli_args->num_update_threads; i < num_threads; i++) {
         myargs[i].operation = ptquery_op;
     }
     run_workers(myargs, num_threads, cli_args->num_seconds, false, cli_args);
diff --git a/src/tests/threaded_stress_test_helpers.h b/src/tests/threaded_stress_test_helpers.h
index 0fda39a50aa..0269867744e 100644
--- a/src/tests/threaded_stress_test_helpers.h
+++ b/src/tests/threaded_stress_test_helpers.h
@@ -1077,6 +1077,16 @@ static int UU() keyrange_op(DB_TXN *txn, ARG arg, void* UU(operation_extra), voi
     return r;
 }
 
+static int UU() frag_op(DB_TXN *UU(txn), ARG arg, void* UU(operation_extra), void *UU(stats_extra)) {
+    int db_index = myrandom_r(arg->random_data)%arg->cli->num_DBs;
+    DB *db = arg->dbp[db_index];
+
+    TOKU_DB_FRAGMENTATION_S frag;
+    int r = db->get_fragmentation(db, &frag);
+    invariant_zero(r);
+    return r;
+}
+
 static void UU() get_key_after_bytes_callback(const DBT *UU(end_key), uint64_t UU(skipped), void *UU(extra)) {
     // nothing
 }

From a0252cbe20794b332b96db749fc093d1c720ce53 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 22 Jul 2014 08:57:22 -0400
Subject: [PATCH 101/190] FT-304 Fix an oops in the blocktable, first exposed
 in a MySQl test and now by src/tests/stress_test7.cc

---
 ft/serialize/block_allocator.cc | 19 +++++++++----------
 ft/serialize/block_table.cc     |  4 ++--
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index a8fb88dbbef..ff726da2ebd 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -100,7 +100,7 @@ PATENT RIGHTS GRANT:
 #include "ft/serialize/block_allocator.h"
 #include "ft/serialize/block_allocator_strategy.h"
 
-#if 0
+#if TOKU_DEBUG_PARANOID
 #define VALIDATE() validate()
 #else
 #define VALIDATE()
@@ -180,19 +180,18 @@ void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint
                                              struct blockpair *pairs, uint64_t n_blocks) {
     _create_internal(reserve_at_beginning, alignment);
 
-    for (uint64_t i = 0; i < _n_blocks; i++) {
-        // Allocator does not support size 0 blocks. See block_allocator_free_block.
-        invariant(pairs[i].size > 0);
-        invariant(pairs[i].offset >= _reserve_at_beginning);
-        invariant(pairs[i].offset % _alignment == 0);
-
-        _n_bytes_in_use += pairs[i].size;
-    }
     _n_blocks = n_blocks;
-
     grow_blocks_array_by(_n_blocks);
     memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair));
     qsort(_blocks_array, _n_blocks, sizeof(struct blockpair), compare_blockpairs);
+    for (uint64_t i = 0; i < _n_blocks; i++) {
+        // Allocator does not support size 0 blocks. See block_allocator_free_block.
+        invariant(_blocks_array[i].size > 0);
+        invariant(_blocks_array[i].offset >= _reserve_at_beginning);
+        invariant(_blocks_array[i].offset % _alignment == 0);
+
+        _n_bytes_in_use += _blocks_array[i].size;
+    }
 
     VALIDATE();
 }
diff --git a/ft/serialize/block_table.cc b/ft/serialize/block_table.cc
index 39d4ac7346b..698752a5b06 100644
--- a/ft/serialize/block_table.cc
+++ b/ft/serialize/block_table.cc
@@ -959,7 +959,7 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
 
     struct translation *checkpointed = &_checkpointed;
     for (int64_t i = 0; i < checkpointed->length_of_array; i++) {
-        struct block_translation_pair *pair = &_checkpointed.block_translation[i];
+        struct block_translation_pair *pair = &checkpointed->block_translation[i];
         if (pair->size > 0 && !(i < current->length_of_array &&
                                 current->block_translation[i].size > 0 &&
                                 current->block_translation[i].u.diskoff == pair->u.diskoff)) {
@@ -970,7 +970,7 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
 
     struct translation *inprogress = &_inprogress;
     for (int64_t i = 0; i < inprogress->length_of_array; i++) {
-        struct block_translation_pair *pair = &_inprogress.block_translation[i];
+        struct block_translation_pair *pair = &inprogress->block_translation[i];
         if (pair->size > 0 && !(i < current->length_of_array &&
                                 current->block_translation[i].size > 0 &&
                                 current->block_translation[i].u.diskoff == pair->u.diskoff) &&

From 1f9a77d340dfac1dabf4c4df9bdc918c54ec80a4 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 22 Jul 2014 08:57:48 -0400
Subject: [PATCH 102/190] FT-300 Use an environment variable to determine which
 file the block allocator trace gets written to

---
 ft/serialize/block_allocator.cc   | 56 +++++++++++++++++++++----------
 ft/serialize/block_allocator.h    |  8 +++++
 ft/serialize/ft_node-serialize.cc | 14 ++++----
 3 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index ff726da2ebd..ec5c94fe4c9 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -106,12 +106,32 @@ PATENT RIGHTS GRANT:
 #define VALIDATE()
 #endif
 
-static inline bool ba_trace_enabled() {
-#if 0
-    return true;
-#else
-    return false;
-#endif
+static FILE *ba_trace_file = nullptr;
+
+void block_allocator::maybe_initialize_trace(void) {
+    const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH");        
+    if (ba_trace_path != nullptr) {
+        ba_trace_file = toku_os_fopen(ba_trace_path, "w");
+        if (ba_trace_file == nullptr) {
+            fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), "
+                            "but it could not be opened for writing (errno %d)\n",
+                            ba_trace_path, get_maybe_error_errno());
+        } else {
+            fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path);
+        }
+    }
+}
+
+void block_allocator::maybe_close_trace() {
+    if (ba_trace_file != nullptr) {
+        int r = toku_os_fclose(ba_trace_file);
+        if (r != 0) {
+            fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n",
+                            r, get_maybe_error_errno());
+        } else {
+            fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n");
+        }
+    }
 }
 
 void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) {
@@ -131,16 +151,16 @@ void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t a
 
 void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
     _create_internal(reserve_at_beginning, alignment);
-    if (ba_trace_enabled()) {
-        fprintf(stderr, "ba_trace_create %p\n", this);
+    if (ba_trace_file != nullptr) {
+        fprintf(ba_trace_file, "ba_trace_create %p\n", this);
     }
 }
 
 void block_allocator::destroy() {
     toku_free(_blocks_array);
 
-    if (ba_trace_enabled()) {
-        fprintf(stderr, "ba_trace_destroy %p\n", this);
+    if (ba_trace_file != nullptr) {
+        fprintf(ba_trace_file, "ba_trace_destroy %p\n", this);
     }
 }
 
@@ -264,8 +284,8 @@ done:
     _n_blocks++;
     VALIDATE();
 
-    if (ba_trace_enabled()) {
-        fprintf(stderr, "ba_trace_alloc %p %lu %lu\n",
+    if (ba_trace_file != nullptr) {
+        fprintf(ba_trace_file, "ba_trace_alloc %p %lu %lu\n",
                 this, static_cast<unsigned long>(size), static_cast<unsigned long>(*offset));
     }
 }
@@ -282,11 +302,11 @@ int64_t block_allocator::find_block(uint64_t offset) {
     uint64_t lo = 0;
     uint64_t hi = _n_blocks;
     while (1) {
-        assert(lo<hi); // otherwise no such block exists.
-        uint64_t mid = (lo+hi)/2;
+        assert(lo < hi); // otherwise no such block exists.
+        uint64_t mid = (lo + hi) / 2;
         uint64_t thisoff = _blocks_array[mid].offset;
         if (thisoff < offset) {
-            lo = mid+1;
+            lo = mid + 1;
         } else if (thisoff > offset) {
             hi = mid;
         } else {
@@ -305,13 +325,13 @@ void block_allocator::free_block(uint64_t offset) {
     int64_t bn = find_block(offset);
     assert(bn >= 0); // we require that there is a block with that offset.
     _n_bytes_in_use -= _blocks_array[bn].size;
-    memmove(&_blocks_array[bn], &_blocks_array[bn +1 ],
+    memmove(&_blocks_array[bn], &_blocks_array[bn + 1],
             (_n_blocks - bn - 1) * sizeof(struct blockpair));
     _n_blocks--;
     VALIDATE();
 
-    if (ba_trace_enabled()) {
-        fprintf(stderr, "ba_trace_free %p %lu\n",
+    if (ba_trace_file != nullptr) {
+        fprintf(ba_trace_file, "ba_trace_free %p %lu\n",
                 this, static_cast<unsigned long>(offset));
                 
     }
diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h
index b79e5c4eb56..43472e623fe 100644
--- a/ft/serialize/block_allocator.h
+++ b/ft/serialize/block_allocator.h
@@ -215,6 +215,14 @@ public:
     //  report->checkpoint_bytes_additional is ignored on return
     void get_statistics(TOKU_DB_FRAGMENTATION report);
 
+    // Block allocator tracing.
+    // - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file
+    //   should be written to.
+    // - Trace may be replayed by ba_trace_replay tool in tools/ directory
+    //   eg: "cat mytracefile | ba_trace_replay"
+    static void maybe_initialize_trace();
+    static void maybe_close_trace();
+
 private:
     void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment);
     void grow_blocks_array_by(uint64_t n_to_add);
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index 285acc188e0..9aeb2aaeedd 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -97,6 +97,7 @@ PATENT RIGHTS GRANT:
 #include "ft/node.h"
 #include "ft/logger/log-internal.h"
 #include "ft/txn/rollback.h"
+#include "ft/serialize/block_allocator.h"
 #include "ft/serialize/block_table.h"
 #include "ft/serialize/compress.h"
 #include "ft/serialize/ft_node-serialize.h"
@@ -141,18 +142,19 @@ struct toku_thread_pool *get_ft_pool(void) {
     return ft_pool;
 }
 
-void 
-toku_ft_serialize_layer_init(void) {
+void toku_ft_serialize_layer_init(void) {
     num_cores = toku_os_get_number_active_processors();
-    int r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r);
+    int r = toku_thread_pool_create(&ft_pool, num_cores);
+    lazy_assert_zero(r);
+    block_allocator::maybe_initialize_trace();
 }
 
-void
-toku_ft_serialize_layer_destroy(void) {
+void toku_ft_serialize_layer_destroy(void) {
     toku_thread_pool_destroy(&ft_pool);
+    block_allocator::maybe_close_trace();
 }
 
-enum {FILE_CHANGE_INCREMENT = (16<<20)};
+enum { FILE_CHANGE_INCREMENT = (16 << 20) };
 
 static inline uint64_t 
 alignup64(uint64_t a, uint64_t b) {

From 6d2968d04d2fabdbe186b644d58d06029fa47c09 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 22 Jul 2014 08:57:48 -0400
Subject: [PATCH 103/190] FT-300 Fix a bunch of issues with the replay tool,
 including: - Failure to handle multiple allocators at once (oops) - Failure
 to handle a clean shutdown where allocators are destroyed by   the trace
 gracefully and so we can't get stats for them after the run   (need to clean
 this up eventually) - Added line numbers to error messages so debugging is
 easier

---
 tools/ba_replay.cc | 103 +++++++++++++++++++++++++++------------------
 1 file changed, 62 insertions(+), 41 deletions(-)

diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index f9052f07fdc..8d18b1718af 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -111,9 +111,9 @@ using std::set;
 using std::string;
 using std::vector;
 
-static void ba_replay_assert(bool pred, const char *msg, const char *line) {
+static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) {
     if (!pred) {
-        fprintf(stderr, "%s, line: %s\n", msg, line);
+        fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line);
         abort();
     }
 }
@@ -131,31 +131,31 @@ static char *tidy_line(char *line) {
     return line;
 }
 
-static int64_t parse_number(char **ptr, int base) {
+static int64_t parse_number(char **ptr, int line_num, int base) {
     *ptr = tidy_line(*ptr);
 
     char *new_ptr;
     int64_t n = strtoll(*ptr, &new_ptr, base);
-    ba_replay_assert(n >= 0, "malformed trace", *ptr);
+    ba_replay_assert(n >= 0, "malformed trace", *ptr, line_num);
     *ptr = new_ptr;
     return n;
 }
 
-static uint64_t parse_uint64(char **ptr) {
-    int64_t n = parse_number(ptr, 10);
-    ba_replay_assert(n >= 0, "malformed trace", *ptr);
+static uint64_t parse_uint64(char **ptr, int line_num) {
+    int64_t n = parse_number(ptr, line_num, 10);
+    ba_replay_assert(n >= 0, "malformed trace", *ptr, line_num);
     // we happen to know that the uint64's we deal with will
     // take less than 63 bits (they come from pointers)
     return static_cast<uint64_t>(n);
 }
 
-static string parse_token(char **ptr) {
+static string parse_token(char **ptr, int line_num) {
     char *line = *ptr;
 
     // parse the first token, which represents the traced function
     char token[64];
     int r = sscanf(line, "%64s", token);
-    ba_replay_assert(r == 1, "malformed trace", line);
+    ba_replay_assert(r == 1, "malformed trace", line, line_num);
     *ptr += strlen(token);
     return string(token);
 }
@@ -164,63 +164,73 @@ static vector<string> canonicalize_trace_from(FILE *file) {
     // new trace, canonicalized from a raw trace
     vector<string> canonicalized_trace;
 
-    // raw trace offset `result' -> canonical allocation id, generated in sequence
-    //
-    // keeps track of which allocation results map to a specific allocation event
-    // later, when we write free()s to the trace, we'll need to translate
-    // the offset to the allocation seq num
-    map<uint64_t, uint64_t> offset_to_seq_num;
-    uint64_t allocation_seq_num = 0;
-
-    // maps raw allocator id to canonical allocator id, generated in sequence
+    // raw allocator id -> canonical allocator id
     //
     // keeps track of allocators that were created as part of the trace,
     // and therefore will be part of the canonicalized trace.
-    map<uint64_t, uint64_t> allocator_ids;
     uint64_t allocator_id_seq_num = 0;
+    map<uint64_t, uint64_t> allocator_ids;
 
+    // allocated offset -> allocation seq num
+    //
+    uint64_t allocation_seq_num = 0;
+    typedef map<uint64_t, uint64_t> offset_seq_map;
+
+    // raw allocator id -> offset_seq_map that tracks its allocations
+    map<uint64_t, offset_seq_map> offset_to_seq_num_maps;
+
+    int line_num = 0;
     const int max_line = 512;
     char line[max_line];
     while (fgets(line, max_line, file) != nullptr) {
+        line_num++;
+
         // removes leading whitespace and trailing newline
         char *ptr = tidy_line(line);
 
-        string fn = parse_token(&ptr);
-        int64_t allocator_id = parse_number(&ptr, 16);
+        string fn = parse_token(&ptr, line_num);
+        int64_t allocator_id = parse_number(&ptr, line_num, 16);
 
         std::stringstream ss;
         if (fn == "ba_trace_create") {
             // only allocators created in the raw traec will be part of the
             // canonical trace, so save the next canonical allocator id here.
-            ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line);
+            ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num);
             allocator_ids[allocator_id] = allocator_id_seq_num;
             ss << fn << ' ' << allocator_id_seq_num << ' ' << std::endl;
             allocator_id_seq_num++;
         } else if (allocator_ids.count(allocator_id) > 0) {
             // this allocator is part of the canonical trace
             uint64_t canonical_allocator_id = allocator_ids[allocator_id];
+
+            // this is the map that tracks allocations for this allocator
+            offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
+
             if (fn == "ba_trace_alloc") {
-                const uint64_t size = parse_uint64(&ptr);
-                const uint64_t offset = parse_uint64(&ptr);
-                ba_replay_assert(offset_to_seq_num.count(offset) == 0, "corrupted trace: double alloc", line);
+                const uint64_t size = parse_uint64(&ptr, line_num);
+                const uint64_t offset = parse_uint64(&ptr, line_num);
+                ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num);
 
                 // remember that an allocation at `offset' has the current alloc seq num
-                offset_to_seq_num[offset] = allocation_seq_num;
+                (*map)[offset] = allocation_seq_num;
 
                 // translate `offset = alloc(size)' to `asn = alloc(size)'
                 ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << allocation_seq_num << std::endl;
                 allocation_seq_num++;
             } else if (fn == "ba_trace_free") {
-                const uint64_t offset = parse_uint64(&ptr);
-                ba_replay_assert(offset_to_seq_num.count(offset) != 0, "corrupted trace: invalid free", line);
+                const uint64_t offset = parse_uint64(&ptr, line_num);
+                ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num);
 
                 // get the alloc seq num for an allcation that occurred at `offset'
-                const uint64_t asn = offset_to_seq_num[offset];
+                const uint64_t asn = (*map)[offset];
+                map->erase(offset);
 
                 // translate `free(offset)' to `free(asn)'
                 ss << fn << ' ' << canonical_allocator_id << ' ' << asn << std::endl;
             } else if (fn == "ba_trace_destroy") {
+                // Remove this allocator from both maps
                 allocator_ids.erase(allocator_id);
+                offset_to_seq_num_maps.erase(allocator_id);
 
                 // translate `destroy(ptr_id) to destroy(canonical_id)'
                 ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl;
@@ -241,20 +251,23 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
     // maps allocation seq num to allocated offset
     map<uint64_t, uint64_t> seq_num_to_offset;
 
+    int line_num = 0;
     for (vector<string>::const_iterator it = canonicalized_trace.begin();
          it != canonicalized_trace.end(); it++) {
+        line_num++;
+
         char *line = toku_strdup(it->c_str());
 
-        printf("playing canonical trace line: %s", line);
+        printf("playing canonical trace line #%d: %s", line_num, line);
         char *ptr = tidy_line(line);
 
         // canonical allocator id is in base 10, not 16
-        string fn = parse_token(&ptr);
-        int64_t allocator_id = parse_number(&ptr, 10);
+        string fn = parse_token(&ptr, line_num);
+        int64_t allocator_id = parse_number(&ptr, line_num, 10);
 
         if (fn == "ba_trace_create") {
             ba_replay_assert(allocator_map->count(allocator_id) == 0,
-                             "corrupted canonical trace: double create", ptr);
+                             "corrupted canonical trace: double create", line, line_num);
 
             block_allocator *ba = new block_allocator();
             ba->create(8096, 4096); // header reserve, alignment - taken from block_table.cc
@@ -264,30 +277,33 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
             (*allocator_map)[allocator_id] = ba;
         } else {
             ba_replay_assert(allocator_map->count(allocator_id) > 0,
-                             "corrupted canonical trace: no such allocator", line);
+                             "corrupted canonical trace: no such allocator", line, line_num);
 
             block_allocator *ba = (*allocator_map)[allocator_id];
             if (fn == "ba_trace_alloc") {
-                const uint64_t size = parse_uint64(&ptr);
-                const uint64_t asn = parse_uint64(&ptr);
+                const uint64_t size = parse_uint64(&ptr, line_num);
+                const uint64_t asn = parse_uint64(&ptr, line_num);
                 ba_replay_assert(seq_num_to_offset.count(asn) == 0,
-                                 "corrupted canonical trace: double alloc", line);
+                                 "corrupted canonical trace: double alloc (asn in use)", line, line_num);
 
                 uint64_t offset;
                 ba->alloc_block(size, &offset);
                 seq_num_to_offset[asn] = offset;
             } else if (fn == "ba_trace_free") {
-                const uint64_t asn = parse_uint64(&ptr);
+                const uint64_t asn = parse_uint64(&ptr, line_num);
                 ba_replay_assert(seq_num_to_offset.count(asn) == 1,
-                                 "corrupted canonical trace: double free", line);
+                                 "corrupted canonical trace: double free (asn unused)", line, line_num);
 
                 uint64_t offset = seq_num_to_offset[asn];
                 ba->free_block(offset);
                 seq_num_to_offset.erase(asn);
             } else if (fn == "ba_trace_destroy") {
-                allocator_map->erase(allocator_id);
+                // TODO: Clean this up - we won't be able to catch no such allocator errors
+                // if we don't actually not the destroy. We only do it here so that the caller
+                // can gather statistics on all closed allocators at the end of the run.
+                // allocator_map->erase(allocator_id);
             } else {
-                ba_replay_assert(false, "corrupted canonical trace: bad fn", line);
+                ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num);
             }
         }
 
@@ -310,6 +326,11 @@ static void print_result(uint64_t allocator_id,
                          TOKU_DB_FRAGMENTATION report) {
     uint64_t total_bytes = report->data_bytes + report->unused_bytes;
     uint64_t total_blocks = report->data_blocks + report->unused_blocks;
+    if (total_bytes < 32UL * 1024 * 1024) {
+        printf("skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
+        return;
+    }
+
     printf("\n");
     printf("allocator_id:   %20" PRId64 "\n", allocator_id);
     printf("strategy:       %20s\n", strategy_str(strategy));

From d4a3c6f63a632a4f1257561db600db0566a49a5c Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 22 Jul 2014 08:57:48 -0400
Subject: [PATCH 104/190] FT-300 Add best-fit strategy for replay testing

---
 ft/serialize/block_allocator.cc          |  7 ++++++-
 ft/serialize/block_allocator.h           |  3 ++-
 ft/serialize/block_allocator_strategy.cc | 22 ++++++++++++++++++++++
 ft/serialize/block_allocator_strategy.h  |  4 ++++
 tools/ba_replay.cc                       |  2 ++
 5 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index ec5c94fe4c9..fcec2b70815 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -153,6 +153,7 @@ void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment)
     _create_internal(reserve_at_beginning, alignment);
     if (ba_trace_file != nullptr) {
         fprintf(ba_trace_file, "ba_trace_create %p\n", this);
+        fflush(ba_trace_file);
     }
 }
 
@@ -161,6 +162,7 @@ void block_allocator::destroy() {
 
     if (ba_trace_file != nullptr) {
         fprintf(ba_trace_file, "ba_trace_destroy %p\n", this);
+        fflush(ba_trace_file);
     }
 }
 
@@ -226,6 +228,8 @@ block_allocator::choose_block_to_alloc_after(size_t size) {
     switch (_strategy) {
     case BA_STRATEGY_FIRST_FIT:
         return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment);
+    case BA_STRATEGY_BEST_FIT:
+        return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment);
     default:
         abort();
     }
@@ -287,6 +291,7 @@ done:
     if (ba_trace_file != nullptr) {
         fprintf(ba_trace_file, "ba_trace_alloc %p %lu %lu\n",
                 this, static_cast<unsigned long>(size), static_cast<unsigned long>(*offset));
+        fflush(ba_trace_file);
     }
 }
 
@@ -333,7 +338,7 @@ void block_allocator::free_block(uint64_t offset) {
     if (ba_trace_file != nullptr) {
         fprintf(ba_trace_file, "ba_trace_free %p %lu\n",
                 this, static_cast<unsigned long>(offset));
-                
+        fflush(ba_trace_file);
     }
 }
 
diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h
index 43472e623fe..32d97c000a2 100644
--- a/ft/serialize/block_allocator.h
+++ b/ft/serialize/block_allocator.h
@@ -125,7 +125,8 @@ public:
     static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
 
     enum allocation_strategy {
-        BA_STRATEGY_FIRST_FIT = 1
+        BA_STRATEGY_FIRST_FIT = 1,
+        BA_STRATEGY_BEST_FIT
     };
 
     struct blockpair {
diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
index 5108f151248..5c60b094573 100644
--- a/ft/serialize/block_allocator_strategy.cc
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -102,3 +102,25 @@ block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_ar
     }
     return nullptr;
 }
+
+// Best fit block allocation
+struct block_allocator::blockpair *
+block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
+                                    uint64_t n_blocks, uint64_t size, uint64_t alignment) {
+    struct block_allocator::blockpair *best_bp = nullptr;
+    uint64_t best_hole_size = 0;
+    for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
+        // Consider the space after blocknum
+        struct block_allocator::blockpair *bp = &blocks_array[blocknum];
+        uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
+        if (possible_offset + size <= bp[1].offset) {
+            // It fits here. Is it the best fit?
+            uint64_t hole_size = (bp[1].offset - possible_offset) + size;
+            if (best_bp == nullptr || hole_size < best_hole_size) {
+                best_hole_size = hole_size;
+                best_bp = bp;
+            }
+        }
+    }
+    return best_bp;
+}
diff --git a/ft/serialize/block_allocator_strategy.h b/ft/serialize/block_allocator_strategy.h
index 3dc5fca9813..bc8d3642034 100644
--- a/ft/serialize/block_allocator_strategy.h
+++ b/ft/serialize/block_allocator_strategy.h
@@ -100,6 +100,10 @@ public:
     first_fit(struct block_allocator::blockpair *blocks_array,
               uint64_t n_blocks, uint64_t size, uint64_t alignment);
 
+    static struct block_allocator::blockpair *
+    best_fit(struct block_allocator::blockpair *blocks_array,
+              uint64_t n_blocks, uint64_t size, uint64_t alignment);
+
 private:
     // Effect: align a value by rounding up.
     static inline uint64_t _align(uint64_t value, uint64_t ba_alignment) {
diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index 8d18b1718af..c1b491538a3 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -316,6 +316,8 @@ static const char *strategy_str(block_allocator::allocation_strategy strategy) {
     switch (strategy) {
     case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT:
         return "first-fit";
+    case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT:
+        return "best-fit";
     default:
         abort();
     }

From 465cca901097fe5f3cfc64fa1774245199cdf708 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 22 Jul 2014 09:36:36 -0400
Subject: [PATCH 105/190] FT-304 Fix a memory leak by further separating
 initialization paths in the block table

---
 ft/serialize/block_table.cc | 67 ++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 30 deletions(-)

diff --git a/ft/serialize/block_table.cc b/ft/serialize/block_table.cc
index 698752a5b06..10fd85dccd9 100644
--- a/ft/serialize/block_table.cc
+++ b/ft/serialize/block_table.cc
@@ -148,22 +148,12 @@ static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
 
 // does NOT initialize the block allocator: the caller is responsible
 void block_table::_create_internal() {
+    memset(&_current, 0, sizeof(struct translation));
+    memset(&_inprogress, 0, sizeof(struct translation));
+    memset(&_checkpointed, 0, sizeof(struct translation));
     memset(&_mutex, 0, sizeof(_mutex));
     toku_mutex_init(&_mutex, nullptr);
     nb_mutex_init(&_safe_file_size_lock);
-
-    _checkpointed.type = TRANSLATION_CHECKPOINTED;
-    _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS);
-    _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b;
-    _checkpointed.blocknum_freelist_head = freelist_null;
-    XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation);
-    for (int64_t i = 0; i < _checkpointed.length_of_array; i++) {
-        _checkpointed.block_translation[i].size = 0;
-        _checkpointed.block_translation[i].u.diskoff = diskoff_unused;
-    }
-
-    // we just created a default checkpointed, now copy it to current.  
-    _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
 }
 
 // Fill in the checkpointed translation from buffer, and copy checkpointed to current.
@@ -217,6 +207,19 @@ void block_table::create() {
     // Does not initialize the block allocator
     _create_internal();
 
+    _checkpointed.type = TRANSLATION_CHECKPOINTED;
+    _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS);
+    _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b;
+    _checkpointed.blocknum_freelist_head = freelist_null;
+    XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation);
+    for (int64_t i = 0; i < _checkpointed.length_of_array; i++) {
+        _checkpointed.block_translation[i].size = 0;
+        _checkpointed.block_translation[i].u.diskoff = diskoff_unused;
+    }
+
+    // we just created a default checkpointed, now copy it to current.  
+    _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
+
     // Create an empty block allocator.
     _bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
                                block_allocator::BLOCK_ALLOCATOR_ALIGNMENT);
@@ -263,22 +266,25 @@ void block_table::maybe_truncate_file_on_open(int fd) {
     _mutex_unlock();
 }
 
-void block_table::_copy_translation(struct translation * dst, struct translation * src, enum translation_type newtype) {
-    paranoid_invariant(src->length_of_array >= src->smallest_never_used_blocknum.b); //verify invariant
-    paranoid_invariant(newtype==TRANSLATION_DEBUG ||
-                       (src->type == TRANSLATION_CURRENT      && newtype == TRANSLATION_INPROGRESS) ||
-                       (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT));
+void block_table::_copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype) {
+    // We intend to malloc a fresh block, so the incoming translation should be empty
+    invariant_null(dst->block_translation);
+
+    invariant(src->length_of_array >= src->smallest_never_used_blocknum.b);
+    invariant(newtype == TRANSLATION_DEBUG ||
+              (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) ||
+              (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT));
     dst->type = newtype;
     dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum;
-    dst->blocknum_freelist_head       = src->blocknum_freelist_head; 
-    // destination btt is of fixed size.  Allocate+memcpy the exact length necessary.
-    dst->length_of_array              = dst->smallest_never_used_blocknum.b;
+    dst->blocknum_freelist_head = src->blocknum_freelist_head; 
+
+    // destination btt is of fixed size. Allocate + memcpy the exact length necessary.
+    dst->length_of_array = dst->smallest_never_used_blocknum.b;
     XMALLOC_N(dst->length_of_array, dst->block_translation);
-    memcpy(dst->block_translation,
-           src->block_translation,
-           dst->length_of_array * sizeof(*dst->block_translation));
-    //New version of btt is not yet stored on disk.
-    dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size      = 0;
+    memcpy(dst->block_translation, src->block_translation, dst->length_of_array * sizeof(*dst->block_translation));
+
+    // New version of btt is not yet stored on disk.
+    dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0;
     dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused;
 }
 
@@ -338,11 +344,12 @@ void block_table::_maybe_optimize_translation(struct translation *t) {
 // block table must be locked by caller of this function
 void block_table::note_start_checkpoint_unlocked() {
     toku_mutex_assert_locked(&_mutex);
-    // Copy current translation to inprogress translation.
-    paranoid_invariant(_inprogress.block_translation == NULL);
-    //We're going to do O(n) work to copy the translation, so we
-    //can afford to do O(n) work by optimizing the translation
+
+    // We're going to do O(n) work to copy the translation, so we
+    // can afford to do O(n) work by optimizing the translation
     _maybe_optimize_translation(&_current);
+
+    // Copy current translation to inprogress translation.
     _copy_translation(&_inprogress, &_current, TRANSLATION_INPROGRESS);
 
     _checkpoint_skipped = false;

From a1680150cf5a632dc9825b0669656bf1379ca01c Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 22 Jul 2014 10:17:25 -0400
Subject: [PATCH 106/190] FT-304 Fix a maybe-uninitialized warning found by gcc
 4.9

---
 ft/serialize/block_table.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ft/serialize/block_table.cc b/ft/serialize/block_table.cc
index 10fd85dccd9..1ae38d7dcba 100644
--- a/ft/serialize/block_table.cc
+++ b/ft/serialize/block_table.cc
@@ -882,6 +882,8 @@ int block_table::iterate(enum translation_type type,
     }
 
     struct translation fakecurrent;
+    memset(&fakecurrent, 0, sizeof(struct translation));
+
     struct translation *t = &fakecurrent;
     if (r == 0) {
         _mutex_lock();

From 2df69f5258188e96b97dbe9c72cf2d9626257745 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 22 Jul 2014 11:03:08 -0400
Subject: [PATCH 107/190] #261 Tokutek/mariadb-5.5#69 run part_index_scan test
 on mariadb

---
 mysql-test/suite/tokudb.bugs/t/part_index_scan.test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
index 23d797af92f..77bae5cc90c 100644
--- a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
+++ b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
@@ -1,6 +1,7 @@
 # verify that index scans on parititions are not slow
 # due totokudb bulk fetch not being used
 source include/have_tokudb.inc;
+source include/have_partition.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2,t3;

From 3d4e10eeebe564cf13df5c80c7e1994af8b0509f Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 23 Jul 2014 10:00:13 -0400
Subject: [PATCH 108/190] #267 fix mdev5932 test and result

---
 mysql-test/suite/tokudb.bugs/r/mdev5932.result | 1 +
 mysql-test/suite/tokudb.bugs/t/mdev5932.test   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/mysql-test/suite/tokudb.bugs/r/mdev5932.result b/mysql-test/suite/tokudb.bugs/r/mdev5932.result
index 2aaa321fed6..f179ee36f95 100644
--- a/mysql-test/suite/tokudb.bugs/r/mdev5932.result
+++ b/mysql-test/suite/tokudb.bugs/r/mdev5932.result
@@ -1,5 +1,6 @@
 drop table if exists t1,t2;
 drop table if exists t1i,t2i;
+drop table if exists tsub,t3;
 CREATE TABLE t1 (a CHAR(3), INDEX(a)) ENGINE=TokuDB;
 INSERT INTO t1 VALUES ('foo'),( NULL);
 SELECT * FROM t1 WHERE 'bar' NOT IN ( SELECT t1_1.a FROM t1 AS t1_1, t1 AS t1_2 );
diff --git a/mysql-test/suite/tokudb.bugs/t/mdev5932.test b/mysql-test/suite/tokudb.bugs/t/mdev5932.test
index 34a1f7eab82..879b57ade63 100644
--- a/mysql-test/suite/tokudb.bugs/t/mdev5932.test
+++ b/mysql-test/suite/tokudb.bugs/t/mdev5932.test
@@ -4,6 +4,7 @@ source include/have_innodb.inc;
 disable_warnings;
 drop table if exists t1,t2;
 drop table if exists t1i,t2i;
+drop table if exists tsub,t3;
 enable_warnings;
 
 CREATE TABLE t1 (a CHAR(3), INDEX(a)) ENGINE=TokuDB;

From f17d5c171b4ff78e802193eb28ad5a13d80e6758 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 23 Jul 2014 13:50:34 -0400
Subject: [PATCH 109/190] #261 run multiple queries in part_index_scan

---
 .../tokudb.bugs/r/part_index_scan.result      | 162 ++++++++++++++++++
 .../suite/tokudb.bugs/t/part_index_scan.test  |  73 ++++++--
 2 files changed, 218 insertions(+), 17 deletions(-)

diff --git a/mysql-test/suite/tokudb.bugs/r/part_index_scan.result b/mysql-test/suite/tokudb.bugs/r/part_index_scan.result
index 3ede692cc78..0d416c734b7 100644
--- a/mysql-test/suite/tokudb.bugs/r/part_index_scan.result
+++ b/mysql-test/suite/tokudb.bugs/r/part_index_scan.result
@@ -64,6 +64,60 @@ insert into t3 select * from t;
 select count(*) from t1;
 count(*)
 8388608
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
 select count(*) from t2;
 count(*)
 8388608
@@ -71,10 +125,91 @@ count(*)
 select count(*) from t3;
 count(*)
 8388608
+select count(*) from t3;
+count(*)
+8388608
+select count(*) from t3;
+count(*)
+8388608
+select count(*) from t3;
+count(*)
+8388608
+select count(*) from t3;
+count(*)
+8388608
+select count(*) from t3;
+count(*)
+8388608
+select count(*) from t3;
+count(*)
+8388608
+select count(*) from t3;
+count(*)
+8388608
+select count(*) from t3;
+count(*)
+8388608
+select count(*) from t3;
+count(*)
+8388608
 1
 select count(*) from t1 where num>7000000;
 count(*)
 1847274
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
 select count(*) from t2 where num>7000000;
 count(*)
 1847274
@@ -82,5 +217,32 @@ count(*)
 select count(*) from t3 where num>7000000;
 count(*)
 1847274
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
 1
 drop table if exists t,t1,t2,t3;
diff --git a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
index 77bae5cc90c..be60fca3af3 100644
--- a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
+++ b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
@@ -1,10 +1,13 @@
 # verify that index scans on parititions are not slow
-# due totokudb bulk fetch not being used
+# due to tokudb bulk fetch not being used
 source include/have_tokudb.inc;
-source include/have_partition.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2,t3;
+enable_warnings;
+
+let $debug = 0;
+let $maxq = 10;
 
 CREATE TABLE `t` (
   `num` int(10) unsigned auto_increment NOT NULL,
@@ -13,6 +16,7 @@ CREATE TABLE `t` (
 );
 
 # put 8M rows into t
+if ($debug) { let $ts = `select now()`; echo "start $ts"; }
 INSERT INTO t values (null,null);
 INSERT INTO t SELECT null,null FROM t;
 INSERT INTO t SELECT null,null FROM t;
@@ -37,7 +41,9 @@ INSERT INTO t SELECT null,null FROM t;
 INSERT INTO t SELECT null,null FROM t;
 INSERT INTO t SELECT null,null FROM t;
 INSERT INTO t SELECT null,null FROM t;
+if ($debug) { let $ts = `select now()`; echo "select $ts"; }
 SELECT count(*) FROM t;
+if ($debug) { let $ts = `select now()`; echo "select done $ts"; }
 
 CREATE TABLE `t1` (
   `num` int(10) unsigned NOT NULL,
@@ -68,48 +74,81 @@ PARTITION BY RANGE (num)
  PARTITION p7 VALUES LESS THAN (8000000),
  PARTITION px VALUES LESS THAN MAXVALUE);
 
+if ($debug) { let $ts = `select now()`; echo "insert t1 $ts"; }
 insert into t1 select * from t;
+if ($debug) { let $ts = `select now()`; echo "insert t2 $ts"; }
 insert into t2 select * from t;
+if ($debug) { let $ts = `select now()`; echo "insert t3 $ts"; }
 insert into t3 select * from t;
+if ($debug) { let $ts = `select now()`; echo "select t1 $ts"; }
 
 # verify that full index scans on partitioned tables t2 and t3 are comparable to a non-partitioned table t1
 let $s = `select to_seconds(now())`;
-select count(*) from t1;
+let $i = 0; 
+while ($i < $maxq) {
+    select count(*) from t1;
+    inc $i;
+}
 let $t1 = `select to_seconds(now()) - $s`;
-# echo $t1;
+
+if ($debug) { let $ts = `select now()`; echo "select t2 $ts"; }
 
 let $s = `select to_seconds(now())`;
-select count(*) from t2;
+let $i = 0;
+while ($i < $maxq) {
+    select count(*) from t2;
+    inc $i;
+}
 let $t2 = `select to_seconds(now()) - $s`;
-# echo $t2;
 let $d = `select abs($t2 - $t1) <= $t1`;
 echo $d;
 
+if ($debug) { let $ts = `select now()`; echo "select t3 $ts"; }
+
 let $s = `select to_seconds(now())`;
-select count(*) from t3;
+let $i = 0;
+while ($i < $maxq) {
+    select count(*) from t3;
+    inc $i;
+}
 let $t3 = `select to_seconds(now()) - $s`;
-# echo $t3;
 let $d = `select abs($t3 - $t1) <= $t1`;
 echo $d;
 
-let $s = `select to_seconds(now())`;
-select count(*) from t1 where num>7000000;
-let $t1 = `select to_seconds(now()) - $s`;
-# echo $t1;
+if ($debug) { let $ts = `select now()`; echo "select t1 $ts"; }
 
 let $s = `select to_seconds(now())`;
-select count(*) from t2 where num>7000000;
+let $i = 0;
+while ($i < $maxq) {
+    select count(*) from t1 where num>7000000;
+    inc $i;
+}
+let $t1 = `select to_seconds(now()) - $s`;
+
+if ($debug) { let $ts = `select now()`; echo "select t2 $ts"; }
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    select count(*) from t2 where num>7000000;
+    inc $i;
+}
 let $t2 = `select to_seconds(now()) - $s`;
-# echo $t2;
 let $d = `select abs($t2 - $t1) <= $t1`;
 echo $d;
 
+if ($debug) { let $ts = `select now()`; echo "select t3 $ts"; }
+
 let $s = `select to_seconds(now())`;
-select count(*) from t3 where num>7000000;
+let $i = 0;
+while ($i < $maxq) {
+    select count(*) from t3 where num>7000000;
+    inc $i;
+}
 let $t3 = `select to_seconds(now()) - $s`;
-# echo $t3;
 let $d = `select abs($t3 - $t1) <= $t1`;
 echo $d;
 
-enable_warnings;
+if ($debug) { let $ts = `select now()`; echo "done $ts"; }
+
 drop table if exists t,t1,t2,t3;

From 82a4d5d11a6e63e23386ae318a6635ded53d789f Mon Sep 17 00:00:00 2001
From: RIch Prohaska <prohaska@tokutek.com>
Date: Wed, 23 Jul 2014 13:55:22 -0400
Subject: [PATCH 110/190] Update README.md

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index e49e26f118d..ff1773fc2b0 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,17 @@
 TokuDB
 ======
 
-TokuDB is a high-performance, transactional storage engine for MySQL and
+TokuDB is a high-performance, write optimized, transactional storage engine for MySQL and
 MariaDB.  For more details, see our [product page][products].
 
-This repository contains the MySQL plugin that uses the [TokuKV][tokukv]
+This repository contains the MySQL plugin that uses the [TokuFT][tokuft]
 core.
 
 There are also patches to the MySQL and MariaDB kernels, available in our
 forks of [mysql][mysql] and [mariadb][mariadb].
 
 [products]: http://www.tokutek.com/products/tokudb-for-mysql/
-[tokukv]: http://github.com/Tokutek/ft-index
+[tokuft]: http://github.com/Tokutek/ft-index
 [mysql]: http://github.com/Tokutek/mysql
 [mariadb]: http://github.com/Tokutek/mariadb
 

From 3e8a29888bdf62c11e20c1dccb2fe180025b61b0 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 24 Jul 2014 13:19:30 -0400
Subject: [PATCH 111/190] FT-300 Add 'heat' to the block allocator API, which
 is a hint for how likely the allocation will need to move again at the next
 checkpoint (we pass the node height for this value). The new heat zone
 allocation strategy uses the heat value to put nonleaf nodes towards the end
 of the file and leaf nodes towards the beginning.

---
 ft/serialize/block_allocator.cc          | 33 ++++------
 ft/serialize/block_allocator.h           | 17 ++++--
 ft/serialize/block_allocator_strategy.cc | 77 +++++++++++++++++++++---
 ft/serialize/block_allocator_strategy.h  | 11 ++--
 ft/serialize/block_table.cc              | 12 ++--
 ft/serialize/block_table.h               |  4 +-
 ft/serialize/ft_node-serialize.cc        | 12 +++-
 ft/tests/block_allocator_test.cc         |  2 +-
 ft/tests/ft-bfe-query.cc                 |  2 +-
 ft/tests/ft-clock-test.cc                |  4 +-
 ft/tests/ft-serialize-benchmark.cc       |  4 +-
 ft/tests/ft-serialize-test.cc            | 14 ++---
 ft/txn/rollback-ct-callbacks.cc          |  2 +-
 tools/ba_replay.cc                       | 10 ++-
 14 files changed, 140 insertions(+), 64 deletions(-)

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index fcec2b70815..97b7f777621 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -89,8 +89,9 @@ PATENT RIGHTS GRANT:
 #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
 #ident "$Id$"
 
-#include <string>
-#include <cstring>
+#include <algorithm>
+
+#include <string.h>
 
 #include "portability/memory.h"
 #include "portability/toku_assert.h"
@@ -186,18 +187,6 @@ void block_allocator::grow_blocks_array() {
     grow_blocks_array_by(1);
 }
 
-int block_allocator::compare_blockpairs(const void *av, const void *bv) {
-    const struct blockpair *a = (const struct blockpair *) av;
-    const struct blockpair *b = (const struct blockpair *) bv;
-    if (a->offset < b->offset) {
-        return -1;
-    } else if (a->offset > b->offset) {
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
 void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
                                              struct blockpair *pairs, uint64_t n_blocks) {
     _create_internal(reserve_at_beginning, alignment);
@@ -205,7 +194,7 @@ void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint
     _n_blocks = n_blocks;
     grow_blocks_array_by(_n_blocks);
     memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair));
-    qsort(_blocks_array, _n_blocks, sizeof(struct blockpair), compare_blockpairs);
+    std::sort(_blocks_array, _blocks_array + _n_blocks);
     for (uint64_t i = 0; i < _n_blocks; i++) {
         // Allocator does not support size 0 blocks. See block_allocator_free_block.
         invariant(_blocks_array[i].size > 0);
@@ -224,19 +213,21 @@ static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
 }
 
 struct block_allocator::blockpair *
-block_allocator::choose_block_to_alloc_after(size_t size) {
+block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) {
     switch (_strategy) {
     case BA_STRATEGY_FIRST_FIT:
         return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment);
     case BA_STRATEGY_BEST_FIT:
         return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment);
+    case BA_STRATEGY_HEAT_ZONE:
+        return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat);
     default:
         abort();
     }
 }
 
 // Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
-void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
+void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) {
     struct blockpair *bp;
 
     // Allocator does not support size 0 blocks. See block_allocator_free_block.
@@ -264,7 +255,7 @@ void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
         goto done;
     }
 
-    bp = choose_block_to_alloc_after(size);
+    bp = choose_block_to_alloc_after(size, heat);
     if (bp != nullptr) {
         // our allocation strategy chose the space after `bp' to fit the new block
         uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
@@ -289,8 +280,10 @@ done:
     VALIDATE();
 
     if (ba_trace_file != nullptr) {
-        fprintf(ba_trace_file, "ba_trace_alloc %p %lu %lu\n",
-                this, static_cast<unsigned long>(size), static_cast<unsigned long>(*offset));
+        fprintf(ba_trace_file, "ba_trace_alloc %p %lu %lu %lu\n",
+                this, static_cast<unsigned long>(size),
+                static_cast<unsigned long>(heat),
+                static_cast<unsigned long>(*offset));
         fflush(ba_trace_file);
     }
 }
diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h
index 32d97c000a2..e7791006a1f 100644
--- a/ft/serialize/block_allocator.h
+++ b/ft/serialize/block_allocator.h
@@ -126,7 +126,8 @@ public:
 
     enum allocation_strategy {
         BA_STRATEGY_FIRST_FIT = 1,
-        BA_STRATEGY_BEST_FIT
+        BA_STRATEGY_BEST_FIT,
+        BA_STRATEGY_HEAT_ZONE
     };
 
     struct blockpair {
@@ -135,6 +136,12 @@ public:
         blockpair(uint64_t o, uint64_t s) :
             offset(o), size(s) {
         }
+        int operator<(const struct blockpair &rhs) {
+            return offset < rhs.offset;
+        }
+        int operator<(const uint64_t &o) {
+            return offset < o;
+        }
     };
 
     // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
@@ -172,7 +179,9 @@ public:
     // Parameters:
     //  size (IN):    The size of the block.  (The size does not have to be aligned.)
     //  offset (OUT): The location of the block.
-    void alloc_block(uint64_t size, uint64_t *offset);
+    //  heat (IN):    A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint)
+    //                Heat values are lexiographically ordered (like integers), but their specific values are arbitrary
+    void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset);
 
     // Effect: Free the block at offset.
     // Requires: There must be a block currently allocated at that offset.
@@ -229,9 +238,7 @@ private:
     void grow_blocks_array_by(uint64_t n_to_add);
     void grow_blocks_array();
     int64_t find_block(uint64_t offset);
-    struct blockpair *choose_block_to_alloc_after(size_t size);
-
-    static int compare_blockpairs(const void *av, const void *bv);
+    struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat);
 
     // How much to reserve at the beginning
     uint64_t _reserve_at_beginning;
diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
index 5c60b094573..c5575cef164 100644
--- a/ft/serialize/block_allocator_strategy.cc
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -86,13 +86,31 @@ PATENT RIGHTS GRANT:
   under this License.
 */
 
+#include <algorithm>
+
+#include "portability/toku_assert.h"
+
 #include "ft/serialize/block_allocator_strategy.h"
 
+static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
+    return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
+}
+
 // First fit block allocation
-struct block_allocator::blockpair *
-block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
-                                    uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
+static struct block_allocator::blockpair *
+_first_fit(struct block_allocator::blockpair *blocks_array,
+           uint64_t n_blocks, uint64_t size, uint64_t alignment,
+           bool forward) {
+    if (n_blocks == 1) {
+        // won't enter loop, can't underflow the direction < 0 case
+        return nullptr;
+    }
+
+    for (uint64_t n_spaces_to_check = n_blocks - 1,
+                  blocknum = forward ? 0 : n_blocks - 2;
+         n_spaces_to_check > 0;
+         n_spaces_to_check--, forward ? blocknum++ : blocknum--) {
+        invariant(blocknum < n_blocks);
         // Consider the space after blocknum
         struct block_allocator::blockpair *bp = &blocks_array[blocknum];
         uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
@@ -103,19 +121,26 @@ block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_ar
     return nullptr;
 }
 
+struct block_allocator::blockpair *
+block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
+                                    uint64_t n_blocks, uint64_t size, uint64_t alignment) {
+    return _first_fit(blocks_array, n_blocks, size, alignment, true);
+}
+
 // Best fit block allocation
 struct block_allocator::blockpair *
 block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
-                                    uint64_t n_blocks, uint64_t size, uint64_t alignment) {
+                                   uint64_t n_blocks, uint64_t size, uint64_t alignment) {
     struct block_allocator::blockpair *best_bp = nullptr;
     uint64_t best_hole_size = 0;
     for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
         // Consider the space after blocknum
         struct block_allocator::blockpair *bp = &blocks_array[blocknum];
         uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
-        if (possible_offset + size <= bp[1].offset) {
+        uint64_t possible_end_offset = possible_offset + size;
+        if (possible_end_offset <= bp[1].offset) {
             // It fits here. Is it the best fit?
-            uint64_t hole_size = (bp[1].offset - possible_offset) + size;
+            uint64_t hole_size = bp[1].offset - possible_end_offset;
             if (best_bp == nullptr || hole_size < best_hole_size) {
                 best_hole_size = hole_size;
                 best_bp = bp;
@@ -124,3 +149,41 @@ block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_arr
     }
     return best_bp;
 }
+
+struct block_allocator::blockpair *
+block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
+                                    uint64_t n_blocks, uint64_t size, uint64_t alignment,
+                                    uint64_t heat) {
+    if (heat > 0) {
+        const double hot_zone_threshold = 0.85;
+
+        // Hot allocation. Find the beginning of the hot zone.
+        struct block_allocator::blockpair *bp = &blocks_array[n_blocks - 1];
+        uint64_t highest_offset = _align(bp->offset + bp->size, alignment);
+        uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
+
+        bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
+        uint64_t blocks_in_zone = (blocks_array + n_blocks) - bp;
+        uint64_t blocks_outside_zone = bp - blocks_array;
+        invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
+
+        if (blocks_in_zone > 0) {
+            // Find the first fit in the hot zone, going forward.
+            bp = _first_fit(bp, blocks_in_zone, size, alignment, true);
+            if (bp != nullptr) {
+                return bp;
+            }
+        }
+        if (blocks_outside_zone > 0) {
+            // Find the first fit in the cold zone, going backwards.
+            bp = _first_fit(bp, blocks_outside_zone, size, alignment, false);
+            if (bp != nullptr) {
+                return bp;
+            }
+        }
+    } else {
+        // Cold allocations are simply first-fit from the beginning.
+        return _first_fit(blocks_array, n_blocks, size, alignment, true);
+    }
+    return nullptr;
+}
diff --git a/ft/serialize/block_allocator_strategy.h b/ft/serialize/block_allocator_strategy.h
index bc8d3642034..ffa6412659f 100644
--- a/ft/serialize/block_allocator_strategy.h
+++ b/ft/serialize/block_allocator_strategy.h
@@ -102,11 +102,10 @@ public:
 
     static struct block_allocator::blockpair *
     best_fit(struct block_allocator::blockpair *blocks_array,
-              uint64_t n_blocks, uint64_t size, uint64_t alignment);
+             uint64_t n_blocks, uint64_t size, uint64_t alignment);
 
-private:
-    // Effect: align a value by rounding up.
-    static inline uint64_t _align(uint64_t value, uint64_t ba_alignment) {
-        return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
-    }
+    static struct block_allocator::blockpair *
+    heat_zone(struct block_allocator::blockpair *blocks_array,
+              uint64_t n_blocks, uint64_t size, uint64_t alignment,
+              uint64_t heat);
 };
diff --git a/ft/serialize/block_table.cc b/ft/serialize/block_table.cc
index 1ae38d7dcba..4bbe709934d 100644
--- a/ft/serialize/block_table.cc
+++ b/ft/serialize/block_table.cc
@@ -447,7 +447,7 @@ bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM
            old_pair->u.diskoff == t->block_translation[b.b].u.diskoff;
 }
 
-void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint) {
+void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) {
     toku_mutex_assert_locked(&_mutex);
     ft_set_dirty(ft, for_checkpoint);
 
@@ -466,7 +466,7 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *o
     if (size > 0) {
         // Allocate a new block if the size is greater than 0,
         // if the size is just 0, offset will be set to diskoff_unused
-        _bt_block_allocator.alloc_block(size, &allocator_offset);
+        _bt_block_allocator.alloc_block(size, heat, &allocator_offset);
     }
     t->block_translation[b.b].u.diskoff = allocator_offset;
     *offset = allocator_offset;
@@ -497,11 +497,11 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
     }
 }
 
-void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint) {
+void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) {
     _mutex_lock();
     struct translation *t = &_current;
     _verify_valid_freeable_blocknum(t, b);
-    _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint);
+    _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat);
 
     _ensure_safe_write_unlocked(fd, size, *offset);
     _mutex_unlock();
@@ -526,7 +526,7 @@ void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
     //Allocate a new block
     int64_t size = _calculate_size_on_disk(t);
     uint64_t offset;
-    _bt_block_allocator.alloc_block(size, &offset);
+    _bt_block_allocator.alloc_block(size, 0, &offset);
     t->block_translation[b.b].u.diskoff = offset;
     t->block_translation[b.b].size      = size;
 }
@@ -930,7 +930,7 @@ void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_siz
 void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) {
     toku_mutex_assert_locked(&_mutex);
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
-    _realloc_on_disk_internal(b, size, offset, ft, false);
+    _realloc_on_disk_internal(b, size, offset, ft, false, 0);
 }
 
 void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) {
diff --git a/ft/serialize/block_table.h b/ft/serialize/block_table.h
index 8e9e2279526..280c8be23fa 100644
--- a/ft/serialize/block_table.h
+++ b/ft/serialize/block_table.h
@@ -167,7 +167,7 @@ public:
 
     // Blocknums
     void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
-    void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint);
+    void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat);
     void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
     void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
     void free_unused_blocknums(BLOCKNUM root);
@@ -258,7 +258,7 @@ private:
     void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
     void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint);
     void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft);
-    void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint);
+    void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat);
     void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
 
     // File management
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index 9aeb2aaeedd..d1900b4be41 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -847,8 +847,12 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
     invariant(blocknum.b>=0);
     DISKOFF offset;
 
+    // Dirties the ft
     ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
-                                   ft, fd, for_checkpoint); //dirties h
+                                   ft, fd, for_checkpoint,
+                                   // Allocations for nodes high in the tree are considered 'hot',
+                                   // as they are likely to move again in the next checkpoint.
+                                   node->height);
 
     tokutime_t t0 = toku_time_now();
     toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
@@ -2542,7 +2546,11 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
     // Dirties the ft
     DISKOFF offset;
     ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
-                                   ft, fd, for_checkpoint);
+                                   ft, fd, for_checkpoint,
+                                   // We consider rollback log flushing the hottest possible allocation,
+                                   // since rollback logs are short-lived compared to FT nodes.
+                                   INT_MAX);
+
     toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
     toku_free(compressed_buf);
     if (!is_serialized) {
diff --git a/ft/tests/block_allocator_test.cc b/ft/tests/block_allocator_test.cc
index 308663d7a12..b15f115d268 100644
--- a/ft/tests/block_allocator_test.cc
+++ b/ft/tests/block_allocator_test.cc
@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
 static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
     ba->validate();
     uint64_t actual_answer;
-    ba->alloc_block(512 * size, &actual_answer);
+    ba->alloc_block(512 * size, 0, &actual_answer);
     ba->validate();
 
     assert(actual_answer%512==0);
diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc
index 2cf2741bd8e..3eb9ff7129b 100644
--- a/ft/tests/ft-bfe-query.cc
+++ b/ft/tests/ft-bfe-query.cc
@@ -434,7 +434,7 @@ test_prefetching(void) {
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index 4660b9f7e11..e3d8d2bedab 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -370,7 +370,7 @@ test_serialize_nonleaf(void) {
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
@@ -450,7 +450,7 @@ test_serialize_leaf(void) {
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index 437ca6d9505..75f425f4859 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -210,7 +210,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
@@ -373,7 +373,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index 80640a4b5ef..d97c174d71a 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -314,7 +314,7 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
@@ -447,7 +447,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
@@ -573,7 +573,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
@@ -708,7 +708,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
@@ -844,7 +844,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
@@ -964,7 +964,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
@@ -1087,7 +1087,7 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
         assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
diff --git a/ft/txn/rollback-ct-callbacks.cc b/ft/txn/rollback-ct-callbacks.cc
index df98665b209..27ccfa4bc11 100644
--- a/ft/txn/rollback-ct-callbacks.cc
+++ b/ft/txn/rollback-ct-callbacks.cc
@@ -126,7 +126,7 @@ toku_rollback_flush_unused_log(
 {
     if (write_me) {
         DISKOFF offset;
-        ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint);
+        ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX);
     }
     if (!keep_me && !is_clone) {
         toku_free(log);
diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index c1b491538a3..062feee9de1 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -208,6 +208,7 @@ static vector<string> canonicalize_trace_from(FILE *file) {
 
             if (fn == "ba_trace_alloc") {
                 const uint64_t size = parse_uint64(&ptr, line_num);
+                const uint64_t heat = parse_uint64(&ptr, line_num);
                 const uint64_t offset = parse_uint64(&ptr, line_num);
                 ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num);
 
@@ -215,7 +216,7 @@ static vector<string> canonicalize_trace_from(FILE *file) {
                 (*map)[offset] = allocation_seq_num;
 
                 // translate `offset = alloc(size)' to `asn = alloc(size)'
-                ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << allocation_seq_num << std::endl;
+                ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl;
                 allocation_seq_num++;
             } else if (fn == "ba_trace_free") {
                 const uint64_t offset = parse_uint64(&ptr, line_num);
@@ -282,12 +283,13 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
             block_allocator *ba = (*allocator_map)[allocator_id];
             if (fn == "ba_trace_alloc") {
                 const uint64_t size = parse_uint64(&ptr, line_num);
+                const uint64_t heat = parse_uint64(&ptr, line_num);
                 const uint64_t asn = parse_uint64(&ptr, line_num);
                 ba_replay_assert(seq_num_to_offset.count(asn) == 0,
                                  "corrupted canonical trace: double alloc (asn in use)", line, line_num);
 
                 uint64_t offset;
-                ba->alloc_block(size, &offset);
+                ba->alloc_block(size, heat, &offset);
                 seq_num_to_offset[asn] = offset;
             } else if (fn == "ba_trace_free") {
                 const uint64_t asn = parse_uint64(&ptr, line_num);
@@ -318,6 +320,8 @@ static const char *strategy_str(block_allocator::allocation_strategy strategy) {
         return "first-fit";
     case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT:
         return "best-fit";
+    case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE:
+        return "heat-zone";
     default:
         abort();
     }
@@ -361,6 +365,8 @@ int main(void) {
 
     vector<enum block_allocator::allocation_strategy> candidate_strategies;
     candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
+    candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
+    candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
 
     for (vector<enum block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
          it != candidate_strategies.end(); it++) {

From 45794eac7fb63f38b84be6c12bd5020fd1d84387 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 24 Jul 2014 15:38:12 -0400
Subject: [PATCH 112/190] FT-279 Clean up ftnode_fetch_extra struct and, most
 importantly, its initialization code

---
 ft/ft-cachetable-wrappers.cc                 |   6 +-
 ft/ft-cachetable-wrappers.h                  |   6 +-
 ft/ft-flusher.cc                             |  20 +-
 ft/ft-hot-flusher.cc                         |   4 +-
 ft/ft-internal.h                             | 134 ++++---
 ft/ft-ops.cc                                 | 370 ++++++++-----------
 ft/ft-test-helpers.cc                        |  16 +-
 ft/ft-verify.cc                              |   4 +-
 ft/ft.cc                                     |   4 +-
 ft/node.h                                    |  67 ----
 ft/serialize/ft_node-serialize.cc            |  56 ++-
 ft/serialize/ft_node-serialize.h             |   6 +-
 ft/tests/ft-bfe-query.cc                     |  33 +-
 ft/tests/ft-clock-test.cc                    |  18 +-
 ft/tests/ft-serialize-benchmark.cc           |   8 +-
 ft/tests/ft-serialize-test.cc                |  12 +-
 ft/tests/test-checkpoint-during-flush.cc     |   6 +-
 ft/tests/test-checkpoint-during-merge.cc     |   6 +-
 ft/tests/test-checkpoint-during-rebalance.cc |   6 +-
 ft/tests/test-checkpoint-during-split.cc     |   6 +-
 ft/tests/test-dirty-flushes-on-cleaner.cc    |  10 +-
 ft/tests/test-flushes-on-cleaner.cc          |  12 +-
 ft/tests/test-hot-with-bounds.cc             |   6 +-
 ft/tests/test-merges-on-cleaner.cc           |   6 +-
 ft/tests/test-oldest-referenced-xid-flush.cc |   4 +-
 ft/tests/test3856.cc                         |   2 +-
 ft/tests/test3884.cc                         |   2 +-
 ft/tests/test4244.cc                         |   4 +-
 ft/tests/test_rightmost_leaf_split_merge.cc  |   4 +-
 tools/tokuftdump.cc                          |  10 +-
 30 files changed, 387 insertions(+), 461 deletions(-)

diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index 685de99fec2..d80dea884cd 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -209,7 +209,7 @@ toku_pin_ftnode_for_query(
     UNLOCKERS unlockers,
     ANCESTORS ancestors,
     const pivot_bounds &bounds,
-    FTNODE_FETCH_EXTRA bfe,
+    ftnode_fetch_extra *bfe,
     bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
     FTNODE *node_p,
     bool* msgs_applied)
@@ -322,7 +322,7 @@ toku_pin_ftnode_with_dep_nodes(
     FT ft,
     BLOCKNUM blocknum,
     uint32_t fullhash,
-    FTNODE_FETCH_EXTRA bfe,
+    ftnode_fetch_extra *bfe,
     pair_lock_type lock_type,
     uint32_t num_dependent_nodes,
     FTNODE *dependent_nodes,
@@ -364,7 +364,7 @@ toku_pin_ftnode_with_dep_nodes(
 void toku_pin_ftnode(FT ft,
                      BLOCKNUM blocknum,
                      uint32_t fullhash,
-                     FTNODE_FETCH_EXTRA bfe,
+                     ftnode_fetch_extra *bfe,
                      pair_lock_type lock_type,
                      FTNODE *node_p,
                      bool move_messages) {
diff --git a/ft/ft-cachetable-wrappers.h b/ft/ft-cachetable-wrappers.h
index 5af425e18ff..d65ab083efd 100644
--- a/ft/ft-cachetable-wrappers.h
+++ b/ft/ft-cachetable-wrappers.h
@@ -148,7 +148,7 @@ toku_pin_ftnode_for_query(
     UNLOCKERS unlockers,
     ANCESTORS ancestors,
     const pivot_bounds &bounds,
-    FTNODE_FETCH_EXTRA bfe,
+    ftnode_fetch_extra *bfe,
     bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
     FTNODE *node_p,
     bool* msgs_applied
@@ -159,7 +159,7 @@ void toku_pin_ftnode(
     FT ft,
     BLOCKNUM blocknum,
     uint32_t fullhash,
-    FTNODE_FETCH_EXTRA bfe,
+    ftnode_fetch_extra *bfe,
     pair_lock_type lock_type,
     FTNODE *node_p,
     bool move_messages
@@ -171,7 +171,7 @@ void toku_pin_ftnode_with_dep_nodes(
     FT ft,
     BLOCKNUM blocknum,
     uint32_t fullhash,
-    FTNODE_FETCH_EXTRA bfe,
+    ftnode_fetch_extra *bfe,
     pair_lock_type lock_type,
     uint32_t num_dependent_nodes,
     FTNODE *dependent_nodes,
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index e1d76a5d8f4..f9b342da3cd 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -491,8 +491,8 @@ ct_maybe_merge_child(struct flusher_advice *fa,
             uint32_t fullhash;
             CACHEKEY root;
             toku_calculate_root_offset_pointer(ft, &root, &fullhash);
-            struct ftnode_fetch_extra bfe;
-            fill_bfe_for_full_read(&bfe, ft);
+            ftnode_fetch_extra bfe;
+            bfe.create_for_full_read(ft);
             toku_pin_ftnode(ft, root, fullhash, &bfe, PL_WRITE_EXPENSIVE, &root_node, true);
             toku_ftnode_assert_fully_in_memory(root_node);
         }
@@ -1075,8 +1075,8 @@ ft_split_child(
 
 static void bring_node_fully_into_memory(FTNODE node, FT ft) {
     if (!toku_ftnode_fully_in_memory(node)) {
-        struct ftnode_fetch_extra bfe;
-        fill_bfe_for_full_read(&bfe, ft);
+        ftnode_fetch_extra bfe;
+        bfe.create_for_full_read(ft);
         toku_cachetable_pf_pinned_pair(
             node,
             toku_ftnode_pf_callback,
@@ -1379,8 +1379,8 @@ ft_merge_child(
     FTNODE childa, childb;
     {
         uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnuma);
-        struct ftnode_fetch_extra bfe;
-        fill_bfe_for_full_read(&bfe, ft);
+        ftnode_fetch_extra bfe;
+        bfe.create_for_full_read(ft);
         toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &node, &childa, true);
     }
     // for test
@@ -1390,8 +1390,8 @@ ft_merge_child(
         dep_nodes[0] = node;
         dep_nodes[1] = childa;
         uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnumb);
-        struct ftnode_fetch_extra bfe;
-        fill_bfe_for_full_read(&bfe, ft);
+        ftnode_fetch_extra bfe;
+        bfe.create_for_full_read(ft);
         toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 2, dep_nodes, &childb, true);
     }
 
@@ -1520,10 +1520,10 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
     ft->blocktable.verify_blocknum_allocated(targetchild);
     uint32_t childfullhash = compute_child_fullhash(ft->cf, parent, childnum);
     FTNODE child;
-    struct ftnode_fetch_extra bfe;
+    ftnode_fetch_extra bfe;
     // Note that we don't read the entire node into memory yet.
     // The idea is let's try to do the minimum work before releasing the parent lock
-    fill_bfe_for_min_read(&bfe, ft);
+    bfe.create_for_min_read(ft);
     toku_pin_ftnode_with_dep_nodes(ft, targetchild, childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &parent, &child, true);
 
     // for test
diff --git a/ft/ft-hot-flusher.cc b/ft/ft-hot-flusher.cc
index aa695185838..f1e6f1a93de 100644
--- a/ft/ft-hot-flusher.cc
+++ b/ft/ft-hot-flusher.cc
@@ -328,8 +328,8 @@ toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right,
             // Get root node (the first parent of each successive HOT
             // call.)
             toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
-            struct ftnode_fetch_extra bfe;
-            fill_bfe_for_full_read(&bfe, ft_handle->ft);
+            ftnode_fetch_extra bfe;
+            bfe.create_for_full_read(ft_handle->ft);
             toku_pin_ftnode(ft_handle->ft,
                             (BLOCKNUM) root_key,
                             fullhash,
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index fb04cd1bd90..571e696484c 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -298,6 +298,92 @@ struct ft_handle {
 PAIR_ATTR make_ftnode_pair_attr(FTNODE node);
 PAIR_ATTR make_invalid_pair_attr(void);
 
+//
+// Field in ftnode_fetch_extra that tells the 
+// partial fetch callback what piece of the node
+// is needed by the ydb
+//
+enum ftnode_fetch_type {
+    ftnode_fetch_none = 1, // no partitions needed.  
+    ftnode_fetch_subset, // some subset of partitions needed
+    ftnode_fetch_prefetch, // this is part of a prefetch call
+    ftnode_fetch_all, // every partition is needed
+    ftnode_fetch_keymatch, // one child is needed if it holds both keys
+};
+
+// Info passed to cachetable fetch callbacks to say which parts of a node
+// should be fetched (perhaps a subset, perhaps the whole thing, depending
+// on operation)
+class ftnode_fetch_extra {
+public:
+    // Used when the whole node must be in memory, such as for flushes.
+    void create_for_full_read(FT ft);
+
+    // A subset of children are necessary. Used by point queries.
+    void create_for_subset_read(FT ft, ft_search *search, const DBT *left, const DBT *right,
+                                bool left_is_neg_infty, bool right_is_pos_infty,
+                                bool disable_prefetching, bool read_all_partitions);
+
+    // No partitions are necessary - only pivots and/or subtree estimates.
+    // Currently used for stat64.
+    void create_for_min_read(FT ft);
+
+    // Used to prefetch partitions that fall within the bounds given by the cursor.
+    void create_for_prefetch(FT ft, struct ft_cursor *cursor);
+
+    // Only a portion of the node (within a keyrange) is required.
+    // Used by keysrange when the left and right key are in the same basement node.
+    void create_for_keymatch(FT ft, const DBT *left, const DBT *right,
+                             bool disable_prefetching, bool read_all_partitions);
+
+    void destroy(void);
+
+    // return: true if a specific childnum is required to be in memory
+    bool wants_child_available(int childnum) const;
+
+    // return: the childnum of the leftmost child that is required to be in memory
+    int leftmost_child_wanted(FTNODE node) const;
+
+    // return: the childnum of the rightmost child that is required to be in memory
+    int rightmost_child_wanted(FTNODE node) const;
+
+    // needed for reading a node off disk
+    FT ft;
+
+    enum ftnode_fetch_type type;
+
+    // used in the case where type == ftnode_fetch_subset
+    // parameters needed to find out which child needs to be decompressed (so it can be read)
+    ft_search *search;
+    DBT range_lock_left_key, range_lock_right_key;
+    bool left_is_neg_infty, right_is_pos_infty;
+
+    // states if we should try to aggressively fetch basement nodes 
+    // that are not specifically needed for current query, 
+    // but may be needed for other cursor operations user is doing
+    // For example, if we have not disabled prefetching,
+    // and the user is doing a dictionary wide scan, then
+    // even though a query may only want one basement node,
+    // we fetch all basement nodes in a leaf node.
+    bool disable_prefetching;
+
+    // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
+    // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
+    int child_to_read;
+
+    // when we read internal nodes, we want to read all the data off disk in one I/O
+    // then we'll treat it as normal and only decompress the needed partitions etc.
+    bool read_all_partitions;
+
+    // Accounting: How many bytes were read, and how much time did we spend doing I/O?
+    uint64_t bytes_read;
+    tokutime_t io_time;
+    tokutime_t decompress_time;
+    tokutime_t deserialize_time;
+
+private:
+    void _create_internal(FT ft_);
+};
 
 // Only exported for tests.
 // Cachetable callbacks for ftnodes.
@@ -333,47 +419,6 @@ STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode);
 
 void toku_verify_or_set_counts(FTNODE);
 
-//
-// Helper function to fill a ftnode_fetch_extra with data
-// that will tell the fetch callback that the entire node is
-// necessary. Used in cases where the entire node
-// is required, such as for flushes.
-//
-void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT ft);
-
-//
-// Helper function to fill a ftnode_fetch_extra with data
-// that will tell the fetch callback that an explicit range of children is
-// necessary. Used in cases where the portion of the node that is required
-// is known in advance, e.g. for keysrange when the left and right key
-// are in the same basement node.
-//
-void fill_bfe_for_keymatch(struct ftnode_fetch_extra *bfe, FT ft,
-                           const DBT *left, const DBT *right,
-                           bool disable_prefetching, bool read_all_partitions);
-//
-// Helper function to fill a ftnode_fetch_extra with data
-// that will tell the fetch callback that some subset of the node
-// necessary. Used in cases where some of the node is required
-// such as for a point query.
-//
-void fill_bfe_for_subset_read(struct ftnode_fetch_extra *bfe, FT ft, ft_search *search,
-                              const DBT *left, const DBT *right,
-                              bool left_is_neg_infty, bool right_is_pos_infty,
-                              bool disable_prefetching, bool read_all_partitions);
-
-//
-// Helper function to fill a ftnode_fetch_extra with data
-// that will tell the fetch callback that no partitions are
-// necessary, only the pivots and/or subtree estimates.
-// Currently used for stat64.
-//
-void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT ft);
-
-void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_cursor *cursor);
-
-void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe);
-
 // TODO: consider moving this to ft/pivotkeys.cc
 class pivot_bounds {
 public:
@@ -396,11 +441,6 @@ private:
     const DBT _upper_bound_inclusive;
 };
 
-// TODO: move into the ftnode_fetch_extra class
-bool toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum);
-int toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
-int toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
-
 // allocate a block number
 // allocate and initialize a ftnode
 // put the ftnode into the cache table
@@ -584,7 +624,7 @@ typedef struct {
     TOKU_ENGINE_STATUS_ROW_S status[FT_STATUS_NUM_ROWS];
 } FT_STATUS_S, *FT_STATUS;
 
-void toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe);
+void toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe);
 void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint);
 void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time);
 void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time);
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 6844ca3478b..502aca43adc 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -608,42 +608,123 @@ next_dict_id(void) {
     return d;
 }
 
-//
-// Given a bfe and a childnum, returns whether the query that constructed the bfe
-// wants the child available.
-// Requires: bfe->child_to_read to have been set
-//
-bool
-toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum)
-{
-    return bfe->type == ftnode_fetch_all ||
-        (bfe->child_to_read == childnum &&
-         (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_keymatch));
+// TODO: This isn't so pretty
+void ftnode_fetch_extra::_create_internal(FT ft_) {
+    ft = ft_;
+
+    toku_init_dbt(&range_lock_left_key);
+    toku_init_dbt(&range_lock_right_key);
+    left_is_neg_infty = false;
+    right_is_pos_infty = false;
+    child_to_read = -1;
+    disable_prefetching = false;
+    read_all_partitions = false;
+    bytes_read = 0;
+    io_time = 0;
+    deserialize_time = 0;
+    decompress_time = 0;
 }
 
-int
-toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
-{
-    paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch || bfe->type == ftnode_fetch_keymatch);
-    if (bfe->left_is_neg_infty) {
+void ftnode_fetch_extra::create_for_full_read(FT ft_) {
+    _create_internal(ft_);
+
+    type = ftnode_fetch_all;
+}
+
+void ftnode_fetch_extra::create_for_keymatch(FT ft_,
+                           const DBT *left, const DBT *right,
+                           bool disable_prefetching_, bool read_all_partitions_) {
+    _create_internal(ft_);
+    invariant(ft->h->type == FT_CURRENT);
+
+    type = ftnode_fetch_keymatch;
+    if (left != nullptr) {
+        toku_copyref_dbt(&range_lock_left_key, *left);
+    }
+    if (right != nullptr) {
+        toku_copyref_dbt(&range_lock_right_key, *right);
+    }
+    disable_prefetching = disable_prefetching_;
+    read_all_partitions = read_all_partitions_;
+}
+
+void ftnode_fetch_extra::create_for_subset_read(FT ft_, ft_search *search_,
+                              const DBT *left, const DBT *right,
+                              bool left_is_neg_infty_, bool right_is_pos_infty_,
+                              bool disable_prefetching_, bool read_all_partitions_) {
+    _create_internal(ft_);
+    invariant(ft->h->type == FT_CURRENT);
+
+    type = ftnode_fetch_subset;
+    search = search_;
+    if (left != nullptr) {
+        toku_copyref_dbt(&range_lock_left_key, *left);
+    }
+    if (right != nullptr) {
+        toku_copyref_dbt(&range_lock_right_key, *right);
+    }
+    left_is_neg_infty = left_is_neg_infty_;
+    right_is_pos_infty = right_is_pos_infty_;
+    disable_prefetching = disable_prefetching_;
+    read_all_partitions = read_all_partitions_;
+}
+
+void ftnode_fetch_extra::create_for_min_read(FT ft_) {
+    _create_internal(ft_);
+    invariant(ft->h->type == FT_CURRENT);
+
+    type = ftnode_fetch_none;
+}
+
+void ftnode_fetch_extra::create_for_prefetch(FT ft_, struct ft_cursor *cursor) {
+    _create_internal(ft_);
+    invariant(ft->h->type == FT_CURRENT);
+
+    type = ftnode_fetch_prefetch;
+    const DBT *left = &cursor->range_lock_left_key;
+    if (left->data) {
+        toku_clone_dbt(&range_lock_left_key, *left);
+    }
+    const DBT *right = &cursor->range_lock_right_key;
+    if (right->data) {
+        toku_clone_dbt(&range_lock_right_key, *right);
+    }
+    left_is_neg_infty = cursor->left_is_neg_infty;
+    right_is_pos_infty = cursor->right_is_pos_infty;
+    disable_prefetching = cursor->disable_prefetching;
+}
+
+void ftnode_fetch_extra::destroy(void) {
+    toku_destroy_dbt(&range_lock_left_key);
+    toku_destroy_dbt(&range_lock_right_key);
+}
+
+// Requires: child_to_read to have been set
+bool ftnode_fetch_extra::wants_child_available(int childnum) const {
+    return type == ftnode_fetch_all ||
+        (child_to_read == childnum &&
+         (type == ftnode_fetch_subset || type == ftnode_fetch_keymatch));
+}
+
+int ftnode_fetch_extra::leftmost_child_wanted(FTNODE node) const {
+    paranoid_invariant(type == ftnode_fetch_subset || type == ftnode_fetch_prefetch || type == ftnode_fetch_keymatch);
+    if (left_is_neg_infty) {
         return 0;
-    } else if (bfe->range_lock_left_key.data == nullptr) {
+    } else if (range_lock_left_key.data == nullptr) {
         return -1;
     } else {
-        return toku_ftnode_which_child(node, &bfe->range_lock_left_key, bfe->ft->cmp);
+        return toku_ftnode_which_child(node, &range_lock_left_key, ft->cmp);
     }
 }
 
-int
-toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
-{
-    paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch || bfe->type == ftnode_fetch_keymatch);
-    if (bfe->right_is_pos_infty) {
+int ftnode_fetch_extra::rightmost_child_wanted(FTNODE node) const {
+    paranoid_invariant(type == ftnode_fetch_subset || type == ftnode_fetch_prefetch || type == ftnode_fetch_keymatch);
+    if (right_is_pos_infty) {
         return node->n_children - 1;
-    } else if (bfe->range_lock_right_key.data == nullptr) {
+    } else if (range_lock_right_key.data == nullptr) {
         return -1;
     } else {
-        return toku_ftnode_which_child(node, &bfe->range_lock_right_key, bfe->ft->cmp);
+        return toku_ftnode_which_child(node, &range_lock_right_key, ft->cmp);
     }
 }
 
@@ -843,7 +924,7 @@ void toku_ftnode_flush_callback(
 }
 
 void
-toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe)
+toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe)
 {
     if (bfe->type == ftnode_fetch_prefetch) {
         STATUS_INC(FT_NUM_PIVOTS_FETCHED_PREFETCH, 1);
@@ -865,7 +946,7 @@ int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNU
     assert(extraargs);
     assert(*ftnode_pv == NULL);
     FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
-    struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *)extraargs;
+    ftnode_fetch_extra *bfe = (ftnode_fetch_extra *)extraargs;
     FTNODE *node=(FTNODE*)ftnode_pv;
     // deserialize the node, must pass the bfe in because we cannot
     // evaluate what piece of the the node is necessary until we get it at
@@ -1125,7 +1206,7 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
     // placeholder for now
     bool retval = false;
     FTNODE node = (FTNODE) ftnode_pv;
-    struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *) read_extraargs;
+    ftnode_fetch_extra *bfe = (ftnode_fetch_extra *) read_extraargs;
     //
     // The three types of fetches that the ft layer may request are:
     //  - ftnode_fetch_none: no partitions are necessary (example use: stat64)
@@ -1169,8 +1250,8 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
         // makes no sense to have prefetching disabled
         // and still call this function
         paranoid_invariant(!bfe->disable_prefetching);
-        int lc = toku_bfe_leftmost_child_wanted(bfe, node);
-        int rc = toku_bfe_rightmost_child_wanted(bfe, node);
+        int lc = bfe->leftmost_child_wanted(node);
+        int rc = bfe->rightmost_child_wanted(node);
         for (int i = lc; i <= rc; ++i) {
             if (BP_STATE(node, i) != PT_AVAIL) {
                 retval = true;
@@ -1183,8 +1264,8 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
         // we find out what basement node the query cares about
         // and check if it is available
         if (node->height == 0) {
-            int left_child = toku_bfe_leftmost_child_wanted(bfe, node);
-            int right_child = toku_bfe_rightmost_child_wanted(bfe, node);
+            int left_child = bfe->leftmost_child_wanted(node);
+            int right_child = bfe->rightmost_child_wanted(node);
             if (left_child == right_child) {
                 bfe->child_to_read = left_child;
                 unsafe_touch_clock(node,bfe->child_to_read);
@@ -1201,7 +1282,7 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
 
 static void
 ft_status_update_partial_fetch_reason(
-    struct ftnode_fetch_extra* bfe,
+    ftnode_fetch_extra *bfe,
     int childnum,
     enum pt_state state,
     bool is_leaf
@@ -1334,7 +1415,7 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar
     int r = 0;
     FTNODE node = (FTNODE) ftnode_pv;
     FTNODE_DISK_DATA ndd = (FTNODE_DISK_DATA) disk_data;
-    struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *) read_extraargs;
+    ftnode_fetch_extra *bfe = (ftnode_fetch_extra *) read_extraargs;
     // there must be a reason this is being called. If we get a garbage type or the type is ftnode_fetch_none,
     // then something went wrong
     assert((bfe->type == ftnode_fetch_subset) || (bfe->type == ftnode_fetch_all) || (bfe->type == ftnode_fetch_prefetch) || (bfe->type == ftnode_fetch_keymatch));
@@ -1344,8 +1425,8 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar
         (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch)
         )
     {
-        lc = toku_bfe_leftmost_child_wanted(bfe, node);
-        rc = toku_bfe_rightmost_child_wanted(bfe, node);
+        lc = bfe->leftmost_child_wanted(node);
+        rc = bfe->rightmost_child_wanted(node);
     } else {
         lc = -1;
         rc = -1;
@@ -1354,7 +1435,7 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar
         if (BP_STATE(node,i) == PT_AVAIL) {
             continue;
         }
-        if ((lc <= i && i <= rc) || toku_bfe_wants_child_available(bfe, i)) {
+        if ((lc <= i && i <= rc) || bfe->wants_child_available(i)) {
             enum pt_state state = BP_STATE(node, i);
             if (state == PT_COMPRESSED) {
                 r = toku_deserialize_bp_from_compressed(node, i, bfe);
@@ -1388,127 +1469,6 @@ int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_he
     return be.cmp(&kdbt, be.key);
 }
 
-void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT ft) {
-    bfe->type = ftnode_fetch_all;
-    bfe->ft = ft;
-    bfe->search = nullptr;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    bfe->left_is_neg_infty = false;
-    bfe->right_is_pos_infty = false;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = false;
-    bfe->read_all_partitions = false;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
-}
-
-void fill_bfe_for_keymatch(struct ftnode_fetch_extra *bfe, FT ft,
-                           const DBT *left, const DBT *right,
-                           bool disable_prefetching, bool read_all_partitions) {
-    paranoid_invariant(ft->h->type == FT_CURRENT);
-    bfe->type = ftnode_fetch_keymatch;
-    bfe->ft = ft;
-    bfe->search = nullptr;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    if (left) {
-        toku_copyref_dbt(&bfe->range_lock_left_key, *left);
-    }
-
-    if (right) {
-        toku_copyref_dbt(&bfe->range_lock_right_key, *right);
-    }
-    bfe->left_is_neg_infty = left == nullptr;
-    bfe->right_is_pos_infty = right == nullptr;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = disable_prefetching;
-    bfe->read_all_partitions = read_all_partitions;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
-}
-
-void fill_bfe_for_subset_read(struct ftnode_fetch_extra *bfe, FT ft, ft_search *search,
-                              const DBT *left, const DBT *right,
-                              bool left_is_neg_infty, bool right_is_pos_infty,
-                              bool disable_prefetching, bool read_all_partitions) {
-    paranoid_invariant(ft->h->type == FT_CURRENT);
-    bfe->type = ftnode_fetch_subset;
-    bfe->ft = ft;
-    bfe->search = search;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    if (left) {
-        toku_copyref_dbt(&bfe->range_lock_left_key, *left);
-    }
-    if (right) {
-        toku_copyref_dbt(&bfe->range_lock_right_key, *right);
-    }
-    bfe->left_is_neg_infty = left_is_neg_infty;
-    bfe->right_is_pos_infty = right_is_pos_infty;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = disable_prefetching;
-    bfe->read_all_partitions = read_all_partitions;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
-}
-
-void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT ft) {
-    paranoid_invariant(ft->h->type == FT_CURRENT);
-    bfe->type = ftnode_fetch_none;
-    bfe->ft = ft;
-    bfe->search = nullptr;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    bfe->left_is_neg_infty = false;
-    bfe->right_is_pos_infty = false;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = false;
-    bfe->read_all_partitions = false;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
-}
-
-void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_cursor *cursor) {
-    paranoid_invariant(ft->h->type == FT_CURRENT);
-    bfe->type = ftnode_fetch_prefetch;
-    bfe->ft = ft;
-    bfe->search = nullptr;
-    toku_init_dbt(&bfe->range_lock_left_key);
-    toku_init_dbt(&bfe->range_lock_right_key);
-    const DBT *left = &cursor->range_lock_left_key;
-    if (left->data) {
-        toku_clone_dbt(&bfe->range_lock_left_key, *left);
-    }
-    const DBT *right = &cursor->range_lock_right_key;
-    if (right->data) {
-        toku_clone_dbt(&bfe->range_lock_right_key, *right);
-    }
-    bfe->left_is_neg_infty = cursor->left_is_neg_infty;
-    bfe->right_is_pos_infty = cursor->right_is_pos_infty;
-    bfe->child_to_read = -1;
-    bfe->disable_prefetching = cursor->disable_prefetching;
-    bfe->read_all_partitions = false;
-    bfe->bytes_read = 0;
-    bfe->io_time = 0;
-    bfe->deserialize_time = 0;
-    bfe->decompress_time = 0;
-}
-
-void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) {
-    paranoid_invariant(bfe->type == ftnode_fetch_prefetch);
-    toku_destroy_dbt(&bfe->range_lock_left_key);
-    toku_destroy_dbt(&bfe->range_lock_right_key);
-}
-
 static void
 ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp)
 // Effect:  Create a new root node whose two children are the split of oldroot.
@@ -1567,8 +1527,8 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp)
     // ft_split_child released locks on newroot
     // and oldroot, so now we repin and
     // return to caller
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft);
     toku_pin_ftnode(
         ft,
         old_blocknum,
@@ -1702,8 +1662,8 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
             int parent_n_children = parent->n_children;
             toku_unpin_ftnode_read_only(ft, child);
             toku_unpin_ftnode_read_only(ft, parent);
-            struct ftnode_fetch_extra bfe;
-            fill_bfe_for_full_read(&bfe, ft);
+            ftnode_fetch_extra bfe;
+            bfe.create_for_full_read(ft);
             FTNODE newparent, newchild;
             toku_pin_ftnode(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, &newparent, true);
             if (newparent->height != parent_height || newparent->n_children != parent_n_children ||
@@ -1754,8 +1714,8 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
             uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum);
             toku_unpin_ftnode_read_only(ft, child);
             toku_unpin_ftnode_read_only(ft, parent);
-            struct ftnode_fetch_extra bfe;
-            fill_bfe_for_full_read(&bfe, ft);
+            ftnode_fetch_extra bfe;
+            bfe.create_for_full_read(ft);
             FTNODE newparent, newchild;
             toku_pin_ftnode(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, &newparent, true);
             if (newparent->height != parent_height || childnum >= newparent->n_children) {
@@ -1796,8 +1756,8 @@ static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t f
 {
     toku::context inject_ctx(CTX_MESSAGE_INJECTION);
     FTNODE node;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft);
     toku_pin_ftnode(ft, cachekey, fullhash, &bfe, PL_WRITE_CHEAP, &node, true);
     toku_ftnode_assert_fully_in_memory(node);
     paranoid_invariant(node->fullhash==fullhash);
@@ -1945,8 +1905,8 @@ static void push_something_in_subtree(
                     // promote and we're in the top two levels of the
                     // tree, don't stop just because someone else has the
                     // node locked.
-                    struct ftnode_fetch_extra bfe;
-                    fill_bfe_for_full_read(&bfe, ft);
+                    ftnode_fetch_extra bfe;
+                    bfe.create_for_full_read(ft);
                     if (lock_type == PL_WRITE_CHEAP) {
                         // We intend to take the write lock for message injection
                         toku::context inject_ctx(CTX_MESSAGE_INJECTION);
@@ -1985,8 +1945,8 @@ static void push_something_in_subtree(
                 if (did_split_or_merge) {
                     // Need to re-pin this node and try at this level again.
                     FTNODE newparent;
-                    struct ftnode_fetch_extra bfe;
-                    fill_bfe_for_full_read(&bfe, ft); // should be fully in memory, we just split it
+                    ftnode_fetch_extra bfe;
+                    bfe.create_for_full_read(ft); // should be fully in memory, we just split it
                     toku_pin_ftnode(ft, subtree_root_blocknum, subtree_root_fullhash, &bfe, PL_READ, &newparent, true);
                     push_something_in_subtree(ft, newparent, -1, msg, flow_deltas, gc_info, depth, loc, true);
                     return;
@@ -2072,8 +2032,8 @@ void toku_ft_root_put_msg(
     uint32_t fullhash;
     CACHEKEY root_key;
     toku_calculate_root_offset_pointer(ft, &root_key, &fullhash);
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft);
 
     size_t flow_deltas[] = { message_buffer::msg_memsize_in_buffer(msg), 0 };
 
@@ -2324,8 +2284,8 @@ static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS m
 
     // Pin the rightmost leaf with a write lock.
     rightmost_fullhash = toku_cachetable_hash(ft->cf, rightmost_blocknum);
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft);
     toku_pin_ftnode(ft, rightmost_blocknum, rightmost_fullhash, &bfe, PL_WRITE_CHEAP, &rightmost_leaf, true);
 
     // The rightmost blocknum never chances once it is initialized to something
@@ -3482,9 +3442,9 @@ static int
 ftnode_fetch_callback_and_free_bfe(CACHEFILE cf, PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs)
 {
     int r = toku_ftnode_fetch_callback(cf, p, fd, blocknum, fullhash, ftnode_pv, disk_data, sizep, dirtyp, extraargs);
-    struct ftnode_fetch_extra *CAST_FROM_VOIDP(ffe, extraargs);
-    destroy_bfe_for_prefetch(ffe);
-    toku_free(ffe);
+    ftnode_fetch_extra *CAST_FROM_VOIDP(bfe, extraargs);
+    bfe->destroy();
+    toku_free(bfe);
     return r;
 }
 
@@ -3492,9 +3452,9 @@ static int
 ftnode_pf_callback_and_free_bfe(void *ftnode_pv, void* disk_data, void *read_extraargs, int fd, PAIR_ATTR *sizep)
 {
     int r = toku_ftnode_pf_callback(ftnode_pv, disk_data, read_extraargs, fd, sizep);
-    struct ftnode_fetch_extra *CAST_FROM_VOIDP(ffe, read_extraargs);
-    destroy_bfe_for_prefetch(ffe);
-    toku_free(ffe);
+    ftnode_fetch_extra *CAST_FROM_VOIDP(bfe, read_extraargs);
+    bfe->destroy();
+    toku_free(bfe);
     return r;
 }
 
@@ -3522,8 +3482,8 @@ ft_node_maybe_prefetch(FT_HANDLE ft_handle, FTNODE node, int childnum, FT_CURSOR
         for (int i = childnum + 1; (i <= childnum + num_nodes_to_prefetch) && (i <= rc); i++) {
             BLOCKNUM nextchildblocknum = BP_BLOCKNUM(node, i);
             uint32_t nextfullhash = compute_child_fullhash(ft_handle->ft->cf, node, i);
-            struct ftnode_fetch_extra *MALLOC(bfe);
-            fill_bfe_for_prefetch(bfe, ft_handle->ft, ftcursor);
+            ftnode_fetch_extra *XCALLOC(bfe);
+            bfe->create_for_prefetch(ft_handle->ft, ftcursor);
             bool doing_prefetch = false;
             toku_cachefile_prefetch(
                 ft_handle->ft->cf,
@@ -3537,7 +3497,7 @@ ft_node_maybe_prefetch(FT_HANDLE ft_handle, FTNODE node, int childnum, FT_CURSOR
                 &doing_prefetch
                 );
             if (!doing_prefetch) {
-                destroy_bfe_for_prefetch(bfe);
+                bfe->destroy();
                 toku_free(bfe);
             }
             *doprefetch = false;
@@ -3583,9 +3543,8 @@ ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search *searc
     // If the current node's height is greater than 1, then its child is an internal node.
     // Therefore, to warm the cache better (#5798), we want to read all the partitions off disk in one shot.
     bool read_all_partitions = node->height > 1;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_subset_read(
-        &bfe,
+    ftnode_fetch_extra bfe;
+    bfe.create_for_subset_read(
         ft_handle->ft,
         search,
         &ftcursor->range_lock_left_key,
@@ -3879,9 +3838,8 @@ try_again:
     //  - At this point, toku_ftnode_pin_holding_lock has returned, with bfe.child_to_read set,
     //  - ft_search_node is called, assuming that the node and its relevant partition are in memory.
     //
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_subset_read(
-        &bfe,
+    ftnode_fetch_extra bfe;
+    bfe.create_for_subset_read(
         ft,
         search,
         &ftcursor->range_lock_left_key,
@@ -4068,8 +4026,8 @@ toku_ft_keysrange_internal (FT_HANDLE ft_handle, FTNODE node,
                             uint64_t* less, uint64_t* equal_left, uint64_t* middle,
                             uint64_t* equal_right, uint64_t* greater, bool* single_basement_node,
                             uint64_t estimated_num_rows,
-                            struct ftnode_fetch_extra *min_bfe, // set up to read a minimal read.
-                            struct ftnode_fetch_extra *match_bfe, // set up to read a basement node iff both keys in it
+                            ftnode_fetch_extra *min_bfe, // set up to read a minimal read.
+                            ftnode_fetch_extra *match_bfe, // set up to read a basement node iff both keys in it
                             struct unlockers *unlockers, ANCESTORS ancestors, const pivot_bounds &bounds)
 // Implementation note: Assign values to less, equal, and greater, and then on the way out (returning up the stack) we add more values in.
 {
@@ -4166,10 +4124,10 @@ void toku_ft_keysrange(FT_HANDLE ft_handle, DBT* key_left, DBT* key_right, uint6
         return;
     }
     paranoid_invariant(!(!key_left && key_right));
-    struct ftnode_fetch_extra min_bfe;
-    struct ftnode_fetch_extra match_bfe;
-    fill_bfe_for_min_read(&min_bfe, ft_handle->ft);  // read pivot keys but not message buffers
-    fill_bfe_for_keymatch(&match_bfe, ft_handle->ft, key_left, key_right, false, false);  // read basement node only if both keys in it.
+    ftnode_fetch_extra min_bfe;
+    ftnode_fetch_extra match_bfe;
+    min_bfe.create_for_min_read(ft_handle->ft); // read pivot keys but not message buffers
+    match_bfe.create_for_keymatch(ft_handle->ft, key_left, key_right, false, false); // read basement node only if both keys in it.
 try_again:
     {
         uint64_t less = 0, equal_left = 0, middle = 0, equal_right = 0, greater = 0;
@@ -4304,9 +4262,9 @@ static int get_key_after_bytes_in_basementnode(FT ft, BASEMENTNODE bn, const DBT
     return r;
 }
 
-static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped);
+static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped);
 
-static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
+static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
     int r;
     struct ancestors next_ancestors = {node, childnum, ancestors};
     BLOCKNUM childblocknum = BP_BLOCKNUM(node, childnum);
@@ -4325,7 +4283,7 @@ static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLO
     return get_key_after_bytes_in_subtree(ft_h, ft, child, &next_unlockers, &next_ancestors, next_bounds, bfe, search, subtree_bytes, start_key, skip_len, callback, cb_extra, skipped);
 }
 
-static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
+static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
     int r;
     int childnum = toku_ft_search_which_child(ft->cmp, node, search);
     const uint64_t child_subtree_bytes = subtree_bytes / node->n_children;
@@ -4389,8 +4347,8 @@ int toku_ft_get_key_after_bytes(FT_HANDLE ft_h, const DBT *start_key, uint64_t s
 //  an error code otherwise
 {
     FT ft = ft_h->ft;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(ft);
     while (true) {
         FTNODE root;
         {
@@ -4453,8 +4411,8 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
     toku_get_node_for_verify(blocknum, ft_handle, &node);
     result=toku_verify_ftnode(ft_handle, ft_handle->ft->h->max_msn_in_ft, ft_handle->ft->h->max_msn_in_ft, false, node, -1, lorange, hirange, NULL, NULL, 0, 1, 0);
     uint32_t fullhash = toku_cachetable_hash(ft_handle->ft->cf, blocknum);
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft_handle->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft_handle->ft);
     toku_pin_ftnode(
         ft_handle->ft,
         blocknum,
@@ -4653,8 +4611,8 @@ static bool is_empty_fast_iter (FT_HANDLE ft_handle, FTNODE node) {
             {
                 BLOCKNUM childblocknum = BP_BLOCKNUM(node,childnum);
                 uint32_t fullhash =  compute_child_fullhash(ft_handle->ft->cf, node, childnum);
-                struct ftnode_fetch_extra bfe;
-                fill_bfe_for_full_read(&bfe, ft_handle->ft);
+                ftnode_fetch_extra bfe;
+                bfe.create_for_full_read(ft_handle->ft);
                 // don't need to pass in dependent nodes as we are not
                 // modifying nodes we are pinning
                 toku_pin_ftnode(
@@ -4692,8 +4650,8 @@ bool toku_ft_is_empty_fast (FT_HANDLE ft_handle)
     {
         CACHEKEY root_key;
         toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
-        struct ftnode_fetch_extra bfe;
-        fill_bfe_for_full_read(&bfe, ft_handle->ft);
+        ftnode_fetch_extra bfe;
+        bfe.create_for_full_read(ft_handle->ft);
         toku_pin_ftnode(
             ft_handle->ft,
             root_key,
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index 769965686c1..2eabaafb62a 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -177,8 +177,8 @@ int toku_testsetup_get_sersize(FT_HANDLE ft_handle, BLOCKNUM diskoff) // Return
 {
     assert(testsetup_initialized);
     void *node_v;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft_handle->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft_handle->ft);
     int r  = toku_cachetable_get_and_pin(
         ft_handle->ft->cf, diskoff,
         toku_cachetable_hash(ft_handle->ft->cf, diskoff),
@@ -204,8 +204,8 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, const
 
     assert(testsetup_initialized);
 
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft_handle->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft_handle->ft);
     r = toku_cachetable_get_and_pin(
         ft_handle->ft->cf,
         blocknum,
@@ -258,8 +258,8 @@ testhelper_string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
 void
 toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t)
 {
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, t->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(t->ft);
     toku_pin_ftnode(
         t->ft, 
         b,
@@ -277,8 +277,8 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, en
 
     assert(testsetup_initialized);
 
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft_handle->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft_handle->ft);
     r = toku_cachetable_get_and_pin(
         ft_handle->ft->cf,
         blocknum,
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index 28dc8eef17c..d21f4d1d805 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -288,8 +288,8 @@ toku_get_node_for_verify(
     )
 {
     uint32_t fullhash = toku_cachetable_hash(ft_handle->ft->cf, blocknum);
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft_handle->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft_handle->ft);
     toku_pin_ftnode(
         ft_handle->ft,
         blocknum,
diff --git a/ft/ft.cc b/ft/ft.cc
index 7430606758a..f41853f76c1 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -1045,8 +1045,8 @@ garbage_helper(BLOCKNUM blocknum, int64_t UU(size), int64_t UU(address), void *e
     struct garbage_helper_extra *CAST_FROM_VOIDP(info, extra);
     FTNODE node;
     FTNODE_DISK_DATA ndd;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, info->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(info->ft);
     int fd = toku_cachefile_get_fd(info->ft->cf);
     int r = toku_deserialize_ftnode_from(fd, blocknum, 0, &node, &ndd, &bfe);
     if (r != 0) {
diff --git a/ft/node.h b/ft/node.h
index 1c77ff95eca..460d7876db8 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -366,73 +366,6 @@ void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM blocknum, int height, in
 int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &cmp);
 void toku_ftnode_save_ct_pair(CACHEKEY key, void *value_data, PAIR p);
 
-//
-// Field in ftnode_fetch_extra that tells the 
-// partial fetch callback what piece of the node
-// is needed by the ydb
-//
-enum ftnode_fetch_type {
-    ftnode_fetch_none=1, // no partitions needed.  
-    ftnode_fetch_subset, // some subset of partitions needed
-    ftnode_fetch_prefetch, // this is part of a prefetch call
-    ftnode_fetch_all, // every partition is needed
-    ftnode_fetch_keymatch, // one child is needed if it holds both keys
-};
-
-static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) UU();
-static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) {
-    switch (type) {
-        case ftnode_fetch_none:
-        case ftnode_fetch_subset:
-        case ftnode_fetch_prefetch:
-        case ftnode_fetch_all:
-        case ftnode_fetch_keymatch:
-            return true;
-        default:
-            return false;
-    }
-}
-
-//
-// An extra parameter passed to cachetable functions 
-// That is used in all types of fetch callbacks.
-// The contents help the partial fetch and fetch
-// callbacks retrieve the pieces of a node necessary
-// for the ensuing operation (flush, query, ...)
-//
-struct ft_search;
-struct ftnode_fetch_extra {
-    enum ftnode_fetch_type type;
-    // needed for reading a node off disk
-    FT ft;
-    // used in the case where type == ftnode_fetch_subset
-    // parameters needed to find out which child needs to be decompressed (so it can be read)
-    ft_search *search;
-    DBT range_lock_left_key, range_lock_right_key;
-    bool left_is_neg_infty, right_is_pos_infty;
-    // states if we should try to aggressively fetch basement nodes 
-    // that are not specifically needed for current query, 
-    // but may be needed for other cursor operations user is doing
-    // For example, if we have not disabled prefetching,
-    // and the user is doing a dictionary wide scan, then
-    // even though a query may only want one basement node,
-    // we fetch all basement nodes in a leaf node.
-    bool disable_prefetching;
-    // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
-    // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
-    int child_to_read;
-    // when we read internal nodes, we want to read all the data off disk in one I/O
-    // then we'll treat it as normal and only decompress the needed partitions etc.
-
-    bool read_all_partitions;
-    // Accounting: How many bytes were read, and how much time did we spend doing I/O?
-    uint64_t bytes_read;
-    tokutime_t io_time;
-    tokutime_t decompress_time;
-    tokutime_t deserialize_time;
-};
-typedef struct ftnode_fetch_extra *FTNODE_FETCH_EXTRA;
-
 //
 // TODO: put the heaviside functions into their respective 'struct .*extra;' namespaces
 //
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index d1900b4be41..ff18fb12ba8 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -1110,7 +1110,7 @@ static const int read_header_heuristic_max = 32*1024;
 // Effect: If the header part of the node is small enough, then read it into the rbuf.  The rbuf will be allocated to be big enough in any case.
 static void read_ftnode_header_from_fd_into_rbuf_if_small_enough(int fd, BLOCKNUM blocknum,
                                                                  FT ft, struct rbuf *rb,
-                                                                 struct ftnode_fetch_extra *bfe) {
+                                                                 ftnode_fetch_extra *bfe) {
     DISKOFF offset, size;
     ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size);
     DISKOFF read_size = roundup_to_multiple(512, MIN(read_header_heuristic_max, size));
@@ -1277,7 +1277,7 @@ setup_available_ftnode_partition(FTNODE node, int i) {
 // Assign the child_to_read member of the bfe from the given ftnode
 // that has been brought into memory.
 static void
-update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe)
+update_bfe_using_ftnode(FTNODE node, ftnode_fetch_extra *bfe)
 {
     if (bfe->type == ftnode_fetch_subset && bfe->search != NULL) {
         // we do not take into account prefetching yet
@@ -1297,8 +1297,8 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe)
         // we find out what basement node the query cares about
         // and check if it is available
         if (node->height == 0) {
-            int left_child = toku_bfe_leftmost_child_wanted(bfe, node);
-            int right_child = toku_bfe_rightmost_child_wanted(bfe, node);
+            int left_child = bfe->leftmost_child_wanted(node);
+            int right_child = bfe->rightmost_child_wanted(node);
             if (left_child == right_child) {
                 bfe->child_to_read = left_child;
             }
@@ -1310,14 +1310,14 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe)
 // initialize all of the given ftnode's partitions.
 static void
 setup_partitions_using_bfe(FTNODE node,
-                           struct ftnode_fetch_extra *bfe,
+                           ftnode_fetch_extra *bfe,
                            bool data_in_memory)
 {
     // Leftmost and Rightmost Child bounds.
     int lc, rc;
     if (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch) {
-        lc = toku_bfe_leftmost_child_wanted(bfe, node);
-        rc = toku_bfe_rightmost_child_wanted(bfe, node);
+        lc = bfe->leftmost_child_wanted(node);
+        rc = bfe->rightmost_child_wanted(node);
     } else {
         lc = -1;
         rc = -1;
@@ -1330,7 +1330,7 @@ setup_partitions_using_bfe(FTNODE node,
     for (int i = 0; i < node->n_children; i++) {
         BP_INIT_UNTOUCHED_CLOCK(node,i);
         if (data_in_memory) {
-            BP_STATE(node, i) = ((toku_bfe_wants_child_available(bfe, i) || (lc <= i && i <= rc))
+            BP_STATE(node, i) = ((bfe->wants_child_available(i) || (lc <= i && i <= rc))
                                  ? PT_AVAIL : PT_COMPRESSED);
         } else {
             BP_STATE(node, i) = PT_ON_DISK;
@@ -1354,7 +1354,7 @@ setup_partitions_using_bfe(FTNODE node,
     }
 }
 
-static void setup_ftnode_partitions(FTNODE node, struct ftnode_fetch_extra* bfe, bool data_in_memory)
+static void setup_ftnode_partitions(FTNODE node, ftnode_fetch_extra *bfe, bool data_in_memory)
 // Effect: Used when reading a ftnode into main memory, this sets up the partitions.
 //   We set bfe->child_to_read as well as the BP_STATE and the data pointers (e.g., with set_BSB or set_BNULL or other set_ operations).
 // Arguments:  Node: the node to set up.
@@ -1473,7 +1473,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
                                                       FTNODE_DISK_DATA* ndd, 
                                                       BLOCKNUM blocknum,
                                                       uint32_t fullhash,
-                                                      struct ftnode_fetch_extra *bfe,
+                                                      ftnode_fetch_extra *bfe,
                                                       struct rbuf *rb,
                                                       int fd)
 // If we have enough information in the rbuf to construct a header, then do so.
@@ -1604,7 +1604,6 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
     // rbuf, so we might be able to store the compressed data for some
     // objects.
     // We can proceed to deserialize the individual subblocks.
-    paranoid_invariant(is_valid_ftnode_fetch_type(bfe->type));
 
     // setup the memory of the partitions
     // for partitions being decompressed, create either message buffer or basement node
@@ -1627,7 +1626,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
 
     // handle clock
     for (int i = 0; i < node->n_children; i++) {
-        if (toku_bfe_wants_child_available(bfe, i)) {
+        if (bfe->wants_child_available(i)) {
             paranoid_invariant(BP_STATE(node,i) == PT_AVAIL);
             BP_TOUCH_CLOCK(node,i);
         }
@@ -1660,7 +1659,7 @@ cleanup:
 static int
 deserialize_and_upgrade_internal_node(FTNODE node,
                                       struct rbuf *rb,
-                                      struct ftnode_fetch_extra* bfe,
+                                      ftnode_fetch_extra *bfe,
                                       STAT64INFO info)
 {
     int version = node->layout_version_read_from_disk;
@@ -1719,8 +1718,8 @@ deserialize_and_upgrade_internal_node(FTNODE node,
     // sure we properly intitialize our partitions before filling them
     // in from our soon-to-be-upgraded node.
     update_bfe_using_ftnode(node, bfe);
-    struct ftnode_fetch_extra temp_bfe;
-    temp_bfe.type = ftnode_fetch_all;
+    ftnode_fetch_extra temp_bfe;
+    temp_bfe.create_for_full_read(nullptr);
     setup_partitions_using_bfe(node, &temp_bfe, true);
 
     // Cache the highest MSN generated for the message buffers.  This
@@ -1780,7 +1779,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
 static int
 deserialize_and_upgrade_leaf_node(FTNODE node,
                                   struct rbuf *rb,
-                                  struct ftnode_fetch_extra* bfe,
+                                  ftnode_fetch_extra *bfe,
                                   STAT64INFO info)
 {
     int r = 0;
@@ -1821,8 +1820,8 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
     // Create one basement node to contain all the leaf entries by
     // setting up the single partition and updating the bfe.
     update_bfe_using_ftnode(node, bfe);
-    struct ftnode_fetch_extra temp_bfe;
-    fill_bfe_for_full_read(&temp_bfe, bfe->ft);
+    ftnode_fetch_extra temp_bfe;
+    temp_bfe.create_for_full_read(bfe->ft);
     setup_partitions_using_bfe(node, &temp_bfe, true);
 
     // 11. Deserialize the partition maps, though they are not used in the
@@ -1933,7 +1932,7 @@ static int
 deserialize_and_upgrade_ftnode(FTNODE node,
                                 FTNODE_DISK_DATA* ndd,
                                 BLOCKNUM blocknum,
-                                struct ftnode_fetch_extra* bfe,
+                                ftnode_fetch_extra *bfe,
                                 STAT64INFO info,
                                 int fd)
 {
@@ -2023,7 +2022,7 @@ deserialize_ftnode_from_rbuf(
     FTNODE_DISK_DATA* ndd,
     BLOCKNUM blocknum,
     uint32_t fullhash,
-    struct ftnode_fetch_extra* bfe,
+    ftnode_fetch_extra *bfe,
     STAT64INFO info,
     struct rbuf *rb,
     int fd
@@ -2120,7 +2119,6 @@ deserialize_ftnode_from_rbuf(
 
     // now that the node info has been deserialized, we can proceed to deserialize
     // the individual sub blocks
-    paranoid_invariant(is_valid_ftnode_fetch_type(bfe->type));
 
     // setup the memory of the partitions
     // for partitions being decompressed, create either message buffer or basement node
@@ -2207,7 +2205,7 @@ cleanup:
 }
 
 int
-toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe) {
+toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, ftnode_fetch_extra *bfe) {
     int r = 0;
     assert(BP_STATE(node,childnum) == PT_ON_DISK);
     assert(node->bp[childnum].ptr.tag == BCT_NULL);
@@ -2287,7 +2285,7 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i
 
 // Take a ftnode partition that is in the compressed state, and make it avail
 int
-toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe) {
+toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe) {
     int r = 0;
     assert(BP_STATE(node, childnum) == PT_COMPRESSED);
     SUB_BLOCK curr_sb = BSB(node, childnum);
@@ -2332,7 +2330,7 @@ deserialize_ftnode_from_fd(int fd,
                             uint32_t fullhash,
                             FTNODE *ftnode,
                             FTNODE_DISK_DATA *ndd,
-                            struct ftnode_fetch_extra *bfe,
+                            ftnode_fetch_extra *bfe,
                             STAT64INFO info)
 {
     struct rbuf rb = RBUF_INITIALIZER;
@@ -2361,7 +2359,7 @@ toku_deserialize_ftnode_from (int fd,
                                uint32_t fullhash,
                                FTNODE *ftnode,
                                FTNODE_DISK_DATA* ndd,
-                               struct ftnode_fetch_extra* bfe
+                               ftnode_fetch_extra *bfe
     )
 // Effect: Read a node in.  If possible, read just the header.
 {
@@ -2864,8 +2862,8 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft)
 
     FTNODE unused_node = NULL;
     FTNODE_DISK_DATA unused_ndd = NULL;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(ft);
     r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &unused_node, &unused_ndd,
                                    &bfe, &ft->h->on_disk_stats);
     ft->in_memory_stats = ft->h->on_disk_stats;
@@ -2888,8 +2886,8 @@ toku_upgrade_msn_from_root_to_header(int fd, FT ft)
 
     FTNODE node;
     FTNODE_DISK_DATA ndd;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(ft);
     r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &node, &ndd, &bfe, nullptr);
     if (r != 0) {
         goto exit;
diff --git a/ft/serialize/ft_node-serialize.h b/ft/serialize/ft_node-serialize.h
index 14b6e307415..b9b87c58a3a 100644
--- a/ft/serialize/ft_node-serialize.h
+++ b/ft/serialize/ft_node-serialize.h
@@ -108,9 +108,9 @@ int toku_serialize_rollback_log_to(int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROL
 void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized);
 
 int toku_deserialize_rollback_log_from(int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft);
-int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra *bfe);
-int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe);
-int toku_deserialize_ftnode_from(int fd, BLOCKNUM off, uint32_t fullhash, FTNODE *node, FTNODE_DISK_DATA *ndd, struct ftnode_fetch_extra *bfe);
+int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, ftnode_fetch_extra *bfe);
+int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe);
+int toku_deserialize_ftnode_from(int fd, BLOCKNUM off, uint32_t fullhash, FTNODE *node, FTNODE_DISK_DATA *ndd, ftnode_fetch_extra *bfe);
 
 // used by nonleaf node partial eviction
 void toku_create_compressed_partition_from_available(FTNODE node, int childnum,
diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc
index 3eb9ff7129b..357d9fa4b87 100644
--- a/ft/tests/ft-bfe-query.cc
+++ b/ft/tests/ft-bfe-query.cc
@@ -114,12 +114,12 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     cursor->right_is_pos_infty = true;
     cursor->disable_prefetching = false;
     
-    struct ftnode_fetch_extra bfe;
+    ftnode_fetch_extra bfe;
 
     // quick test to see that we have the right behavior when we set
     // disable_prefetching to true
     cursor->disable_prefetching = true;
-    fill_bfe_for_prefetch(&bfe, ft_h, cursor);
+    bfe.create_for_prefetch( ft_h, cursor);
     FTNODE_DISK_DATA ndd = NULL;
     r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
     assert(r==0);
@@ -131,14 +131,14 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     assert(BP_STATE(dn,0) == PT_ON_DISK);
     assert(BP_STATE(dn,1) == PT_ON_DISK);
     assert(BP_STATE(dn,2) == PT_ON_DISK);
-    destroy_bfe_for_prefetch(&bfe);
+    bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     // now enable prefetching again
     cursor->disable_prefetching = false;
     
-    fill_bfe_for_prefetch(&bfe, ft_h, cursor);
+    bfe.create_for_prefetch( ft_h, cursor);
     r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
     assert(r==0);
     assert(dn->n_children == 3);
@@ -153,14 +153,14 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     assert(BP_STATE(dn,0) == PT_AVAIL);
     assert(BP_STATE(dn,1) == PT_AVAIL);
     assert(BP_STATE(dn,2) == PT_AVAIL);
-    destroy_bfe_for_prefetch(&bfe);
+    bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     uint64_t left_key = 150;
     toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t));
     cursor->left_is_neg_infty = false;
-    fill_bfe_for_prefetch(&bfe, ft_h, cursor);
+    bfe.create_for_prefetch( ft_h, cursor);
     r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
     assert(r==0);
     assert(dn->n_children == 3);
@@ -175,14 +175,14 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     assert(BP_STATE(dn,0) == PT_ON_DISK);
     assert(BP_STATE(dn,1) == PT_AVAIL);
     assert(BP_STATE(dn,2) == PT_AVAIL);
-    destroy_bfe_for_prefetch(&bfe);
+    bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     uint64_t right_key = 151;
     toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t));
     cursor->right_is_pos_infty = false;
-    fill_bfe_for_prefetch(&bfe, ft_h, cursor);
+    bfe.create_for_prefetch( ft_h, cursor);
     r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
     assert(r==0);
     assert(dn->n_children == 3);
@@ -197,13 +197,13 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     assert(BP_STATE(dn,0) == PT_ON_DISK);
     assert(BP_STATE(dn,1) == PT_AVAIL);
     assert(BP_STATE(dn,2) == PT_ON_DISK);
-    destroy_bfe_for_prefetch(&bfe);
+    bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     left_key = 100000;
     right_key = 100000;
-    fill_bfe_for_prefetch(&bfe, ft_h, cursor);
+    bfe.create_for_prefetch( ft_h, cursor);
     r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
     assert(r==0);
     assert(dn->n_children == 3);
@@ -218,13 +218,13 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     assert(BP_STATE(dn,0) == PT_ON_DISK);
     assert(BP_STATE(dn,1) == PT_ON_DISK);
     assert(BP_STATE(dn,2) == PT_AVAIL);
-    destroy_bfe_for_prefetch(&bfe);
+    bfe.destroy();
     toku_free(ndd);
     toku_ftnode_free(&dn);
 
     left_key = 100;
     right_key = 100;
-    fill_bfe_for_prefetch(&bfe, ft_h, cursor);
+    bfe.create_for_prefetch( ft_h, cursor);
     r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
     assert(r==0);
     assert(dn->n_children == 3);
@@ -239,7 +239,7 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     assert(BP_STATE(dn,0) == PT_AVAIL);
     assert(BP_STATE(dn,1) == PT_ON_DISK);
     assert(BP_STATE(dn,2) == PT_ON_DISK);
-    destroy_bfe_for_prefetch(&bfe);
+    bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
@@ -260,15 +260,14 @@ test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     cursor->left_is_neg_infty = true;
     cursor->right_is_pos_infty = true;
     
-    struct ftnode_fetch_extra bfe;
-
     uint64_t left_key = 150;
     uint64_t right_key = 151;
     DBT left, right;
     toku_fill_dbt(&left, &left_key, sizeof(left_key));
     toku_fill_dbt(&right, &right_key, sizeof(right_key));
-    fill_bfe_for_subset_read(
-        &bfe,
+
+    ftnode_fetch_extra bfe;
+    bfe.create_for_subset_read(
         ft_h,
         NULL, 
         &left,
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index e3d8d2bedab..18ac7cdc011 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -146,8 +146,8 @@ le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val)
 static void
 test1(int fd, FT ft_h, FTNODE *dn) {
     int r;
-    struct ftnode_fetch_extra bfe_all;
-    fill_bfe_for_full_read(&bfe_all, ft_h);
+    ftnode_fetch_extra bfe_all;
+    bfe_all.create_for_full_read(ft_h);
     FTNODE_DISK_DATA ndd = NULL;
     r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all);
     bool is_leaf = ((*dn)->height == 0);
@@ -217,7 +217,6 @@ static int search_cmp(const struct ft_search& UU(so), const DBT* UU(key)) {
 
 static void
 test2(int fd, FT ft_h, FTNODE *dn) {
-    struct ftnode_fetch_extra bfe_subset;
     DBT left, right;
     DB dummy_db;
     memset(&dummy_db, 0, sizeof(dummy_db));
@@ -225,8 +224,8 @@ test2(int fd, FT ft_h, FTNODE *dn) {
     memset(&right, 0, sizeof(right));
     ft_search search;
     
-    fill_bfe_for_subset_read(
-        &bfe_subset,
+    ftnode_fetch_extra bfe_subset;
+    bfe_subset.create_for_subset_read(
         ft_h,
         ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
         &left,
@@ -236,6 +235,7 @@ test2(int fd, FT ft_h, FTNODE *dn) {
         false,
         false
         );
+
     FTNODE_DISK_DATA ndd = NULL;
     int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset);
     assert(r==0);
@@ -270,17 +270,15 @@ test2(int fd, FT ft_h, FTNODE *dn) {
 
 static void
 test3_leaf(int fd, FT ft_h, FTNODE *dn) {
-    struct ftnode_fetch_extra bfe_min;
     DBT left, right;
     DB dummy_db;
     memset(&dummy_db, 0, sizeof(dummy_db));
     memset(&left, 0, sizeof(left));
     memset(&right, 0, sizeof(right));
     
-    fill_bfe_for_min_read(
-        &bfe_min,
-        ft_h
-        );
+    ftnode_fetch_extra bfe_min;
+    bfe_min.create_for_min_read(ft_h);
+
     FTNODE_DISK_DATA ndd = NULL;
     int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min);
     assert(r==0);
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index 75f425f4859..089da09038b 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -247,9 +247,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
     total_start.tv_sec = total_start.tv_usec = 0;
     total_end.tv_sec = total_end.tv_usec = 0;
 
-    struct ftnode_fetch_extra bfe;
+    ftnode_fetch_extra bfe;
     for (int i = 0; i < deser_runs; i++) {
-        fill_bfe_for_full_read(&bfe, ft_h);
+        bfe.create_for_full_read(ft_h);
         gettimeofday(&t[0], NULL);
         FTNODE_DISK_DATA ndd2 = NULL;
         r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
@@ -392,8 +392,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     dt *= 1000;
     printf("serialize nonleaf(ms):   %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
 
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft_h);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft_h);
     gettimeofday(&t[0], NULL);
     FTNODE_DISK_DATA ndd2 = NULL;
     r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index d97c174d71a..cc66054459a 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -165,14 +165,14 @@ static void
 setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) {
     int r;
     if (bft == read_all) {
-        struct ftnode_fetch_extra bfe;
-        fill_bfe_for_full_read(&bfe, ft_h);
+        ftnode_fetch_extra bfe;
+        bfe.create_for_full_read(ft_h);
         r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
         assert(r==0);
     }
     else if (bft == read_compressed || bft == read_none) {
-        struct ftnode_fetch_extra bfe;
-        fill_bfe_for_min_read(&bfe, ft_h);
+        ftnode_fetch_extra bfe;
+        bfe.create_for_min_read(ft_h);
         r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
         assert(r==0);
         // assert all bp's are compressed or on disk.
@@ -199,7 +199,7 @@ setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_D
                 // that it is available
                 // then run partial eviction to get it compressed
                 PAIR_ATTR attr;
-                fill_bfe_for_full_read(&bfe, ft_h);
+                bfe.create_for_full_read(ft_h);
                 assert(toku_ftnode_pf_req_callback(*dn, &bfe));
                 r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
                 assert(r==0);
@@ -221,7 +221,7 @@ setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_D
             }
         }
         // now decompress them
-        fill_bfe_for_full_read(&bfe, ft_h);
+        bfe.create_for_full_read(ft_h);
         assert(toku_ftnode_pf_req_callback(*dn, &bfe));
         PAIR_ATTR attr;
         r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
diff --git a/ft/tests/test-checkpoint-during-flush.cc b/ft/tests/test-checkpoint-during-flush.cc
index 59f9748c4c9..49541ae8a6d 100644
--- a/ft/tests/test-checkpoint-during-flush.cc
+++ b/ft/tests/test-checkpoint-during-flush.cc
@@ -227,8 +227,8 @@ doit (bool after_child_pin) {
         );
     
     FTNODE node = NULL;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, t->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(t->ft);
     toku_pin_ftnode(
         t->ft, 
         node_root,
@@ -282,7 +282,7 @@ doit (bool after_child_pin) {
     //
     // now pin the root, verify that we have a message in there, and that it is clean
     //
-    fill_bfe_for_full_read(&bfe, c_ft->ft);
+    bfe.create_for_full_read(c_ft->ft);
     toku_pin_ftnode(
         c_ft->ft, 
         node_root,
diff --git a/ft/tests/test-checkpoint-during-merge.cc b/ft/tests/test-checkpoint-during-merge.cc
index 2c1f8838614..cf8b8e57fae 100644
--- a/ft/tests/test-checkpoint-during-merge.cc
+++ b/ft/tests/test-checkpoint-during-merge.cc
@@ -245,8 +245,8 @@ doit (int state) {
     toku_unpin_ftnode(t->ft, node);
 
     
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, t->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(t->ft);
     toku_pin_ftnode_with_dep_nodes(
         t->ft, 
         node_root,
@@ -305,7 +305,7 @@ doit (int state) {
     //
     // now pin the root, verify that the state is what we expect
     //
-    fill_bfe_for_full_read(&bfe, c_ft->ft);
+    bfe.create_for_full_read(c_ft->ft);
     toku_pin_ftnode_with_dep_nodes(
         c_ft->ft, 
         node_root,
diff --git a/ft/tests/test-checkpoint-during-rebalance.cc b/ft/tests/test-checkpoint-during-rebalance.cc
index 6446c26777b..98c4ab5a6c1 100644
--- a/ft/tests/test-checkpoint-during-rebalance.cc
+++ b/ft/tests/test-checkpoint-during-rebalance.cc
@@ -265,8 +265,8 @@ doit (int state) {
     toku_unpin_ftnode(t->ft, node);
 
     
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, t->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(t->ft);
     toku_pin_ftnode(
         t->ft, 
         node_root,
@@ -321,7 +321,7 @@ doit (int state) {
     //
     // now pin the root, verify that the state is what we expect
     //
-    fill_bfe_for_full_read(&bfe, c_ft->ft);
+    bfe.create_for_full_read(c_ft->ft);
     toku_pin_ftnode(
         c_ft->ft, 
         node_root,
diff --git a/ft/tests/test-checkpoint-during-split.cc b/ft/tests/test-checkpoint-during-split.cc
index 94f17403336..d25e81b0dcc 100644
--- a/ft/tests/test-checkpoint-during-split.cc
+++ b/ft/tests/test-checkpoint-during-split.cc
@@ -241,8 +241,8 @@ doit (bool after_split) {
         );
     
     FTNODE node = NULL;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, t->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(t->ft);
     toku_pin_ftnode(
         t->ft, 
         node_root,
@@ -297,7 +297,7 @@ doit (bool after_split) {
     //
     // now pin the root, verify that we have a message in there, and that it is clean
     //
-    fill_bfe_for_full_read(&bfe, c_ft->ft);
+    bfe.create_for_full_read(c_ft->ft);
     toku_pin_ftnode(
         c_ft->ft, 
         node_root,
diff --git a/ft/tests/test-dirty-flushes-on-cleaner.cc b/ft/tests/test-dirty-flushes-on-cleaner.cc
index 2c5b97a48da..55aa6c969bc 100644
--- a/ft/tests/test-dirty-flushes-on-cleaner.cc
+++ b/ft/tests/test-dirty-flushes-on-cleaner.cc
@@ -237,8 +237,8 @@ doit (void) {
 
     // now lock and release the leaf node to make sure it is what we expect it to be.
     FTNODE node = NULL;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, ft->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(ft->ft);
     toku_pin_ftnode_with_dep_nodes(
         ft->ft, 
         node_leaf,
@@ -268,7 +268,7 @@ doit (void) {
     // node is in memory and another is
     // on disk
     //
-    fill_bfe_for_min_read(&bfe, ft->ft);
+    bfe.create_for_min_read(ft->ft);
     toku_pin_ftnode_with_dep_nodes(
         ft->ft, 
         node_leaf,
@@ -289,7 +289,7 @@ doit (void) {
     //
     // now let us induce a clean on the internal node
     //    
-    fill_bfe_for_min_read(&bfe, ft->ft);
+    bfe.create_for_min_read(ft->ft);
     toku_pin_ftnode_with_dep_nodes(
         ft->ft, 
         node_internal,
@@ -314,7 +314,7 @@ doit (void) {
         );
 
     // verify that node_internal's buffer is empty
-    fill_bfe_for_min_read(&bfe, ft->ft);
+    bfe.create_for_min_read(ft->ft);
     toku_pin_ftnode_with_dep_nodes(
         ft->ft, 
         node_internal,
diff --git a/ft/tests/test-flushes-on-cleaner.cc b/ft/tests/test-flushes-on-cleaner.cc
index aba314efb0f..bef3286291f 100644
--- a/ft/tests/test-flushes-on-cleaner.cc
+++ b/ft/tests/test-flushes-on-cleaner.cc
@@ -243,8 +243,8 @@ doit (bool keep_other_bn_in_memory) {
     assert_zero(r);
     // now lock and release the leaf node to make sure it is what we expect it to be.
     FTNODE node = NULL;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, ft->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(ft->ft);
     toku_pin_ftnode(
         ft->ft, 
         node_leaf,
@@ -280,7 +280,7 @@ doit (bool keep_other_bn_in_memory) {
         // but only one should have broadcast message
         // applied.
         //
-        fill_bfe_for_full_read(&bfe, ft->ft);
+        bfe.create_for_full_read(ft->ft);
     }
     else {
         //
@@ -289,7 +289,7 @@ doit (bool keep_other_bn_in_memory) {
         // node is in memory and another is
         // on disk
         //
-        fill_bfe_for_min_read(&bfe, ft->ft);
+        bfe.create_for_min_read(ft->ft);
     }
     toku_pin_ftnode(
         ft->ft, 
@@ -314,7 +314,7 @@ doit (bool keep_other_bn_in_memory) {
     //
     // now let us induce a clean on the internal node
     //    
-    fill_bfe_for_min_read(&bfe, ft->ft);
+    bfe.create_for_min_read(ft->ft);
     toku_pin_ftnode(
         ft->ft, 
         node_internal,
@@ -337,7 +337,7 @@ doit (bool keep_other_bn_in_memory) {
         );
 
     // verify that node_internal's buffer is empty
-    fill_bfe_for_min_read(&bfe, ft->ft);
+    bfe.create_for_min_read(ft->ft);
     toku_pin_ftnode(
         ft->ft, 
         node_internal,
diff --git a/ft/tests/test-hot-with-bounds.cc b/ft/tests/test-hot-with-bounds.cc
index 5966fe207b1..efd48b7172e 100644
--- a/ft/tests/test-hot-with-bounds.cc
+++ b/ft/tests/test-hot-with-bounds.cc
@@ -180,8 +180,8 @@ doit (void) {
     // the root, one in each buffer, let's verify this.
 
     FTNODE node = NULL;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, t->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(t->ft);
     toku_pin_ftnode(
         t->ft, 
         node_root,
@@ -210,7 +210,7 @@ doit (void) {
     // at this point, we have should have flushed
     // only the middle buffer, let's verify this.
     node = NULL;
-    fill_bfe_for_min_read(&bfe, t->ft);
+    bfe.create_for_min_read(t->ft);
     toku_pin_ftnode(
         t->ft, 
         node_root,
diff --git a/ft/tests/test-merges-on-cleaner.cc b/ft/tests/test-merges-on-cleaner.cc
index 437f33fcd7b..532625b4342 100644
--- a/ft/tests/test-merges-on-cleaner.cc
+++ b/ft/tests/test-merges-on-cleaner.cc
@@ -229,8 +229,8 @@ doit (void) {
     r = toku_ft_lookup(ft, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair);
     assert(r==0);
 
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, ft->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(ft->ft);
     toku_pin_ftnode(
         ft->ft, 
         node_internal,
@@ -252,7 +252,7 @@ doit (void) {
         );
 
     // verify that node_internal's buffer is empty
-    fill_bfe_for_min_read(&bfe, ft->ft);
+    bfe.create_for_min_read(ft->ft);
     toku_pin_ftnode(
         ft->ft, 
         node_internal,
diff --git a/ft/tests/test-oldest-referenced-xid-flush.cc b/ft/tests/test-oldest-referenced-xid-flush.cc
index c223d12ea71..ef6143dba21 100644
--- a/ft/tests/test-oldest-referenced-xid-flush.cc
+++ b/ft/tests/test-oldest-referenced-xid-flush.cc
@@ -167,8 +167,8 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
 
     // first verify the child
     FTNODE node = NULL;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_min_read(&bfe, t->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_min_read(t->ft);
     toku_pin_ftnode(
         t->ft,
         child_nonleaf_blocknum,
diff --git a/ft/tests/test3856.cc b/ft/tests/test3856.cc
index 4d601ec1c88..e892c334242 100644
--- a/ft/tests/test3856.cc
+++ b/ft/tests/test3856.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 
 // it used to be the case that we copied the left and right keys of a
 // range to be prelocked but never freed them, this test checks that they
-// are freed (as of this time, this happens in destroy_bfe_for_prefetch)
+// are freed (as of this time, this happens in ftnode_fetch_extra::destroy())
 
 #include "test.h"
 
diff --git a/ft/tests/test3884.cc b/ft/tests/test3884.cc
index 602e89c57ca..e79c28325f9 100644
--- a/ft/tests/test3884.cc
+++ b/ft/tests/test3884.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 
 // it used to be the case that we copied the left and right keys of a
 // range to be prelocked but never freed them, this test checks that they
-// are freed (as of this time, this happens in destroy_bfe_for_prefetch)
+// are freed (as of this time, this happens in ftnode_fetch_extra::destroy())
 
 #include "test.h"
 
diff --git a/ft/tests/test4244.cc b/ft/tests/test4244.cc
index e21b86fec9b..8f484129dd0 100644
--- a/ft/tests/test4244.cc
+++ b/ft/tests/test4244.cc
@@ -147,8 +147,8 @@ doit (void) {
     // then node_internal should be huge
     // we pin it and verify that it is not
     FTNODE node;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, t->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(t->ft);
     toku_pin_ftnode(
         t->ft, 
         node_internal,
diff --git a/ft/tests/test_rightmost_leaf_split_merge.cc b/ft/tests/test_rightmost_leaf_split_merge.cc
index caa01d83f0c..854bc9d4609 100644
--- a/ft/tests/test_rightmost_leaf_split_merge.cc
+++ b/ft/tests/test_rightmost_leaf_split_merge.cc
@@ -143,8 +143,8 @@ static void test_split_merge(void) {
 
     BLOCKNUM root_blocknum = ft->h->root_blocknum;
     FTNODE root_node;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft);
     toku_pin_ftnode(ft, root_blocknum,
                    toku_cachetable_hash(ft->cf, ft->h->root_blocknum),
                    &bfe, PL_WRITE_EXPENSIVE, &root_node, true);
diff --git a/tools/tokuftdump.cc b/tools/tokuftdump.cc
index 8b3f48e7f19..a6df16af197 100644
--- a/tools/tokuftdump.cc
+++ b/tools/tokuftdump.cc
@@ -229,9 +229,9 @@ static int print_le(const void* key, const uint32_t keylen, const LEAFENTRY &le,
 
 static void dump_node(int fd, BLOCKNUM blocknum, FT ft) {
     FTNODE n;
-    struct ftnode_fetch_extra bfe;
-    FTNODE_DISK_DATA ndd = NULL;
-    fill_bfe_for_full_read(&bfe, ft);
+    FTNODE_DISK_DATA ndd = nullptr;
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(ft);
     int r = toku_deserialize_ftnode_from (fd, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe);
     assert_zero(r);
     assert(n!=0);
@@ -366,8 +366,8 @@ static int nodesizes_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void
     frag_help_extra *CAST_FROM_VOIDP(info, extra);
     FTNODE n;
     FTNODE_DISK_DATA ndd = NULL;
-    struct ftnode_fetch_extra bfe;
-    fill_bfe_for_full_read(&bfe, info->ft);
+    ftnode_fetch_extra bfe;
+    bfe.create_for_full_read(info->ft);
     int r = toku_deserialize_ftnode_from(info->fd, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe);
     if (r==0) {
         info->blocksizes += size;

From cc4d84af0b9f9b101daa927d29ea99eb1d74e713 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 24 Jul 2014 16:30:16 -0400
Subject: [PATCH 113/190] FT-279 Constify the comparison operators for
 cleaniless (and to please the osx build)

---
 ft/serialize/block_allocator.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h
index e7791006a1f..ff4f9173500 100644
--- a/ft/serialize/block_allocator.h
+++ b/ft/serialize/block_allocator.h
@@ -136,10 +136,10 @@ public:
         blockpair(uint64_t o, uint64_t s) :
             offset(o), size(s) {
         }
-        int operator<(const struct blockpair &rhs) {
+        int operator<(const struct blockpair &rhs) const {
             return offset < rhs.offset;
         }
-        int operator<(const uint64_t &o) {
+        int operator<(const uint64_t &o) const {
             return offset < o;
         }
     };

From b95aca7ef3d6c43a25eb91d2aec370e955f506a9 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Fri, 25 Jul 2014 10:06:39 -0400
Subject: [PATCH 114/190] FT-279 Fix a few missed initializers found by
 src/tests/keyrange.tdb --get 0 test failure

Also, tidy up some indentation that got messed up during the refactor.
---
 ft/ft-ops.cc | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 502aca43adc..6ba766f5771 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -611,14 +611,19 @@ next_dict_id(void) {
 // TODO: This isn't so pretty
 void ftnode_fetch_extra::_create_internal(FT ft_) {
     ft = ft_;
+    type = ftnode_fetch_none;
+    search = nullptr;
 
     toku_init_dbt(&range_lock_left_key);
     toku_init_dbt(&range_lock_right_key);
     left_is_neg_infty = false;
     right_is_pos_infty = false;
+
+    // -1 means 'unknown', which is the correct default state
     child_to_read = -1;
     disable_prefetching = false;
     read_all_partitions = false;
+
     bytes_read = 0;
     io_time = 0;
     deserialize_time = 0;
@@ -631,9 +636,8 @@ void ftnode_fetch_extra::create_for_full_read(FT ft_) {
     type = ftnode_fetch_all;
 }
 
-void ftnode_fetch_extra::create_for_keymatch(FT ft_,
-                           const DBT *left, const DBT *right,
-                           bool disable_prefetching_, bool read_all_partitions_) {
+void ftnode_fetch_extra::create_for_keymatch(FT ft_, const DBT *left, const DBT *right,
+                                             bool disable_prefetching_, bool read_all_partitions_) {
     _create_internal(ft_);
     invariant(ft->h->type == FT_CURRENT);
 
@@ -644,14 +648,16 @@ void ftnode_fetch_extra::create_for_keymatch(FT ft_,
     if (right != nullptr) {
         toku_copyref_dbt(&range_lock_right_key, *right);
     }
+    left_is_neg_infty = left == nullptr;
+    right_is_pos_infty = right == nullptr;
     disable_prefetching = disable_prefetching_;
     read_all_partitions = read_all_partitions_;
 }
 
 void ftnode_fetch_extra::create_for_subset_read(FT ft_, ft_search *search_,
-                              const DBT *left, const DBT *right,
-                              bool left_is_neg_infty_, bool right_is_pos_infty_,
-                              bool disable_prefetching_, bool read_all_partitions_) {
+                                                const DBT *left, const DBT *right,
+                                                bool left_is_neg_infty_, bool right_is_pos_infty_,
+                                                bool disable_prefetching_, bool read_all_partitions_) {
     _create_internal(ft_);
     invariant(ft->h->type == FT_CURRENT);
 
@@ -707,7 +713,9 @@ bool ftnode_fetch_extra::wants_child_available(int childnum) const {
 }
 
 int ftnode_fetch_extra::leftmost_child_wanted(FTNODE node) const {
-    paranoid_invariant(type == ftnode_fetch_subset || type == ftnode_fetch_prefetch || type == ftnode_fetch_keymatch);
+    paranoid_invariant(type == ftnode_fetch_subset ||
+                       type == ftnode_fetch_prefetch ||
+                       type == ftnode_fetch_keymatch);
     if (left_is_neg_infty) {
         return 0;
     } else if (range_lock_left_key.data == nullptr) {
@@ -718,7 +726,9 @@ int ftnode_fetch_extra::leftmost_child_wanted(FTNODE node) const {
 }
 
 int ftnode_fetch_extra::rightmost_child_wanted(FTNODE node) const {
-    paranoid_invariant(type == ftnode_fetch_subset || type == ftnode_fetch_prefetch || type == ftnode_fetch_keymatch);
+    paranoid_invariant(type == ftnode_fetch_subset ||
+                       type == ftnode_fetch_prefetch ||
+                       type == ftnode_fetch_keymatch);
     if (right_is_pos_infty) {
         return node->n_children - 1;
     } else if (range_lock_right_key.data == nullptr) {

From 0b8ad621d18fc0ee608a064957d33d87d4802ee5 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Wed, 30 Jul 2014 14:12:25 -0400
Subject: [PATCH 115/190] FT-309 Add padded-fit allocation strategy

---
 ft/serialize/block_allocator.h           |  1 +
 ft/serialize/block_allocator_strategy.cc | 35 ++++++++++++++++++++----
 ft/serialize/block_allocator_strategy.h  |  4 +++
 tools/ba_replay.cc                       |  1 +
 4 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h
index ff4f9173500..2ba5e350c1c 100644
--- a/ft/serialize/block_allocator.h
+++ b/ft/serialize/block_allocator.h
@@ -127,6 +127,7 @@ public:
     enum allocation_strategy {
         BA_STRATEGY_FIRST_FIT = 1,
         BA_STRATEGY_BEST_FIT,
+        BA_STRATEGY_PADDED_FIT,
         BA_STRATEGY_HEAT_ZONE
     };
 
diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
index c5575cef164..e4d8f184b61 100644
--- a/ft/serialize/block_allocator_strategy.cc
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -96,11 +96,20 @@ static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
     return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
 }
 
+static uint64_t _next_power_of_two(uint64_t value) {
+    uint64_t r = 4096;
+    while (r < value) {
+        r *= 2;
+        invariant(r > 0);
+    }
+    return r;
+}
+
 // First fit block allocation
 static struct block_allocator::blockpair *
 _first_fit(struct block_allocator::blockpair *blocks_array,
            uint64_t n_blocks, uint64_t size, uint64_t alignment,
-           bool forward) {
+           bool forward, uint64_t max_padding) {
     if (n_blocks == 1) {
         // won't enter loop, can't underflow the direction < 0 case
         return nullptr;
@@ -113,7 +122,8 @@ _first_fit(struct block_allocator::blockpair *blocks_array,
         invariant(blocknum < n_blocks);
         // Consider the space after blocknum
         struct block_allocator::blockpair *bp = &blocks_array[blocknum];
-        uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
+        uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
+        uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
         if (possible_offset + size <= bp[1].offset) {
             return bp;
         }
@@ -124,7 +134,7 @@ _first_fit(struct block_allocator::blockpair *blocks_array,
 struct block_allocator::blockpair *
 block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
                                     uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    return _first_fit(blocks_array, n_blocks, size, alignment, true);
+    return _first_fit(blocks_array, n_blocks, size, alignment, true, 0);
 }
 
 // Best fit block allocation
@@ -150,6 +160,19 @@ block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_arr
     return best_bp;
 }
 
+// First fit into a block that is oversized by up to max_padding.
+// The hope is that if we purposefully waste a bit of space at allocation
+// time we'll be more likely to reuse this block later.
+struct block_allocator::blockpair *
+block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
+                                     uint64_t n_blocks, uint64_t size, uint64_t alignment) {
+    static const uint64_t absolute_max_padding = 128 * 1024;
+    static const uint64_t desired_fragmentation_divisor = 10;
+    uint64_t desired_padding = size / desired_fragmentation_divisor;
+    desired_padding = std::min(_next_power_of_two(desired_padding), absolute_max_padding);
+    return _first_fit(blocks_array, n_blocks, size, alignment, true, desired_padding);
+}
+
 struct block_allocator::blockpair *
 block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
                                     uint64_t n_blocks, uint64_t size, uint64_t alignment,
@@ -169,21 +192,21 @@ block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_ar
 
         if (blocks_in_zone > 0) {
             // Find the first fit in the hot zone, going forward.
-            bp = _first_fit(bp, blocks_in_zone, size, alignment, true);
+            bp = _first_fit(bp, blocks_in_zone, size, alignment, true, 0);
             if (bp != nullptr) {
                 return bp;
             }
         }
         if (blocks_outside_zone > 0) {
             // Find the first fit in the cold zone, going backwards.
-            bp = _first_fit(bp, blocks_outside_zone, size, alignment, false);
+            bp = _first_fit(bp, blocks_outside_zone, size, alignment, false, 0);
             if (bp != nullptr) {
                 return bp;
             }
         }
     } else {
         // Cold allocations are simply first-fit from the beginning.
-        return _first_fit(blocks_array, n_blocks, size, alignment, true);
+        return _first_fit(blocks_array, n_blocks, size, alignment, true, 0);
     }
     return nullptr;
 }
diff --git a/ft/serialize/block_allocator_strategy.h b/ft/serialize/block_allocator_strategy.h
index ffa6412659f..924142a89b4 100644
--- a/ft/serialize/block_allocator_strategy.h
+++ b/ft/serialize/block_allocator_strategy.h
@@ -104,6 +104,10 @@ public:
     best_fit(struct block_allocator::blockpair *blocks_array,
              uint64_t n_blocks, uint64_t size, uint64_t alignment);
 
+    static struct block_allocator::blockpair *
+    padded_fit(struct block_allocator::blockpair *blocks_array,
+               uint64_t n_blocks, uint64_t size, uint64_t alignment);
+
     static struct block_allocator::blockpair *
     heat_zone(struct block_allocator::blockpair *blocks_array,
               uint64_t n_blocks, uint64_t size, uint64_t alignment,
diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index 062feee9de1..776896f5d07 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -366,6 +366,7 @@ int main(void) {
     vector<enum block_allocator::allocation_strategy> candidate_strategies;
     candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
     candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
+    candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT);
     candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
 
     for (vector<enum block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();

From ff647c8780461fe3155427006401838d2151c80d Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Wed, 30 Jul 2014 15:44:11 -0400
Subject: [PATCH 116/190] FT-272 Simple block allocator strategy unit test,
 improved base unit test

---
 ft/tests/block_allocator_strategy_test.cc | 176 ++++++++++++++++++++++
 ft/tests/block_allocator_test.cc          |  56 ++++---
 2 files changed, 210 insertions(+), 22 deletions(-)
 create mode 100644 ft/tests/block_allocator_strategy_test.cc

diff --git a/ft/tests/block_allocator_strategy_test.cc b/ft/tests/block_allocator_strategy_test.cc
new file mode 100644
index 00000000000..1b26f82ff9e
--- /dev/null
+++ b/ft/tests/block_allocator_strategy_test.cc
@@ -0,0 +1,176 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#include "ft/tests/test.h"
+
+#include "ft/serialize/block_allocator_strategy.h"
+
+static const uint64_t alignment = 4096;
+
+static void test_first_vs_best_fit(void) {
+    struct block_allocator::blockpair pairs[] = {
+        block_allocator::blockpair(1 * alignment, 6 * alignment),
+        // hole between 7x align -> 8x align
+        block_allocator::blockpair(8 * alignment, 4 * alignment),
+        // hole between 12x align -> 16x align
+        block_allocator::blockpair(16 * alignment, 1 * alignment),
+        block_allocator::blockpair(17 * alignment, 2 * alignment),
+        // hole between 19 align -> 21x align
+        block_allocator::blockpair(21 * alignment, 2 * alignment),
+    };
+    const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
+    
+    block_allocator::blockpair *bp;
+
+    // first fit
+    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment);
+    assert(bp == &pairs[0]);
+    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment);
+    assert(bp == &pairs[0]);
+    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment);
+    assert(bp == &pairs[1]);
+    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment);
+    assert(bp == nullptr);
+
+    // best fit
+    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment);
+    assert(bp == &pairs[0]);
+    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment);
+    assert(bp == &pairs[3]);
+    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment);
+    assert(bp == &pairs[1]);
+    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment);
+    assert(bp == nullptr);
+}
+
+static void test_padded_fit(void) {
+    struct block_allocator::blockpair pairs[] = {
+        block_allocator::blockpair(1 * alignment, 1 * alignment),
+        // 4096 byte hole after bp[0]
+        block_allocator::blockpair(3 * alignment, 1 * alignment),
+        // 8192 byte hole after bp[1]
+        block_allocator::blockpair(6 * alignment, 1 * alignment),
+        // 16384 byte hole after bp[2]
+        block_allocator::blockpair(11 * alignment, 1 * alignment),
+        // 32768 byte hole after bp[3]
+        block_allocator::blockpair(17 * alignment, 1 * alignment),
+        // 116kb hole after bp[4]
+        block_allocator::blockpair(113 * alignment, 1 * alignment),
+        // 256kb hole after bp[5]
+        block_allocator::blockpair(371 * alignment, 1 * alignment),
+    };
+    const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
+    
+    block_allocator::blockpair *bp;
+
+    // padding for a 100 byte allocation will be < than standard alignment,
+    // so it should fit in the first 4096 byte hole.
+    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment);
+    assert(bp == &pairs[0]);
+
+    // Even padded, a 12kb alloc will fit in a 16kb hole
+    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment);
+    assert(bp == &pairs[2]);
+
+    // would normally fit in the 116kb hole but the padding will bring it over
+    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment);
+    assert(bp == &pairs[5]);
+
+    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment);
+    assert(bp == &pairs[5]);
+}
+
+int test_main(int argc, const char *argv[]) {
+    (void) argc;
+    (void) argv;
+
+    test_first_vs_best_fit();
+    test_padded_fit();
+
+    return 0;
+}
diff --git a/ft/tests/block_allocator_test.cc b/ft/tests/block_allocator_test.cc
index b15f115d268..81dcc739bcd 100644
--- a/ft/tests/block_allocator_test.cc
+++ b/ft/tests/block_allocator_test.cc
@@ -93,7 +93,8 @@ PATENT RIGHTS GRANT:
 static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
     ba->validate();
     uint64_t actual_answer;
-    ba->alloc_block(512 * size, 0, &actual_answer);
+    const uint64_t heat = random() % 2;
+    ba->alloc_block(512 * size, heat, &actual_answer);
     ba->validate();
 
     assert(actual_answer%512==0);
@@ -123,11 +124,11 @@ static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order
 
 
 // Simple block allocator test
-static void
-test_ba0 (void) {
+static void test_ba0(block_allocator::allocation_strategy strategy) {
     block_allocator allocator;
     block_allocator *ba = &allocator;
     ba->create(100*512, 1*512);
+    ba->set_strategy(strategy);
     assert(ba->allocated_limit()==100*512);
 
     uint64_t b2, b3, b4, b5, b6, b7;
@@ -160,22 +161,23 @@ test_ba0 (void) {
 
 // Manually to get coverage of all the code in the block allocator.
 static void
-test_ba1 (int n_initial) {
+test_ba1(block_allocator::allocation_strategy strategy, int n_initial) {
     block_allocator allocator;
     block_allocator *ba = &allocator;
     ba->create(0*512, 1*512);
-    int i;
+    ba->set_strategy(strategy);
+
     int n_blocks=0;
     uint64_t blocks[1000];
-    for (i=0; i<1000; i++) {
-	if (i<n_initial || random()%2 == 0) {
-	    if (n_blocks<1000) {
+    for (int i = 0; i < 1000; i++) {
+	if (i < n_initial || random() % 2 == 0) {
+	    if (n_blocks < 1000) {
 		ba_alloc(ba, 1, &blocks[n_blocks]);
 		//printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
 		n_blocks++;
 	    } 
 	} else {
-	    if (n_blocks>0) {
+	    if (n_blocks > 0) {
 		int blocknum = random()%n_blocks;
 		//printf("F[%d]%ld\n", blocknum, blocks[blocknum]);
 		ba_free(ba, blocks[blocknum]);
@@ -197,7 +199,9 @@ test_ba2 (void)
     uint64_t b[6];
     enum { BSIZE = 1024 };
     ba->create(100*512, BSIZE*512);
+    ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT);
     assert(ba->allocated_limit()==100*512);
+
     ba_check_l    (ba, 0, 0, 100);
     ba_check_none (ba, 1);
 
@@ -206,16 +210,16 @@ test_ba2 (void)
     ba_check_l    (ba, 1, BSIZE, 100);
     ba_check_none (ba, 2);
 
-    ba_alloc (ba, BSIZE+100, &b[1]);
+    ba_alloc (ba, BSIZE + 100, &b[1]);
     ba_check_l    (ba, 0, 0, 100);
     ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE+100);
+    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
     ba_check_none (ba, 3);
 
     ba_alloc (ba, 100, &b[2]);
     ba_check_l    (ba, 0, 0, 100);
     ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE+100);
+    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
     ba_check_l    (ba, 3, 4*BSIZE,       100);
     ba_check_none (ba, 4);
 
@@ -224,7 +228,7 @@ test_ba2 (void)
     ba_alloc (ba, 100, &b[5]);
     ba_check_l    (ba, 0, 0, 100);
     ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE+100);
+    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
     ba_check_l    (ba, 3, 4*BSIZE,       100);
     ba_check_l    (ba, 4, 5*BSIZE,       100);
     ba_check_l    (ba, 5, 6*BSIZE,       100);
@@ -234,7 +238,7 @@ test_ba2 (void)
     ba_free (ba, 4*BSIZE);
     ba_check_l    (ba, 0, 0, 100);
     ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE+100);
+    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
     ba_check_l    (ba, 3, 5*BSIZE,       100);
     ba_check_l    (ba, 4, 6*BSIZE,       100);
     ba_check_l    (ba, 5, 7*BSIZE,       100);
@@ -245,7 +249,7 @@ test_ba2 (void)
     assert(b2==4*BSIZE);
     ba_check_l    (ba, 0, 0, 100);
     ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE+100);
+    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
     ba_check_l    (ba, 3, 4*BSIZE,       100);
     ba_check_l    (ba, 4, 5*BSIZE,       100);
     ba_check_l    (ba, 5, 6*BSIZE,       100);
@@ -255,7 +259,7 @@ test_ba2 (void)
     ba_free (ba,   BSIZE);
     ba_free (ba, 5*BSIZE);
     ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1, 2*BSIZE, BSIZE+100);
+    ba_check_l    (ba, 1, 2*BSIZE, BSIZE + 100);
     ba_check_l    (ba, 2, 4*BSIZE,       100);
     ba_check_l    (ba, 3, 6*BSIZE,       100);
     ba_check_l    (ba, 4, 7*BSIZE,       100);
@@ -273,7 +277,7 @@ test_ba2 (void)
     assert(b5==5*BSIZE);
     ba_check_l    (ba, 0, 0, 100);
     ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE+100);
+    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
     ba_check_l    (ba, 3, 4*BSIZE,       100);
     ba_check_l    (ba, 4, 5*BSIZE,       100);
     ba_check_l    (ba, 5, 6*BSIZE,       100);
@@ -290,7 +294,7 @@ test_ba2 (void)
     assert(b8==10*BSIZE);
     ba_check_l    (ba, 0, 0, 100);
     ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE+100);
+    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
     ba_check_l    (ba, 3, 4*BSIZE,       100);
     ba_check_l    (ba, 4, 5*BSIZE,       100);
     ba_check_l    (ba, 5, 6*BSIZE,       100);
@@ -321,10 +325,18 @@ test_ba2 (void)
 
 int
 test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
-    test_ba0();
-    test_ba1(0);
-    test_ba1(10);
-    test_ba1(20);
+    enum block_allocator::allocation_strategy strategies[] = {
+        block_allocator::BA_STRATEGY_FIRST_FIT,
+        block_allocator::BA_STRATEGY_BEST_FIT,
+        block_allocator::BA_STRATEGY_PADDED_FIT,
+        block_allocator::BA_STRATEGY_HEAT_ZONE,
+    };
+    for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) {
+        test_ba0(strategies[i]);
+        test_ba1(strategies[i], 0);
+        test_ba1(strategies[i], 10);
+        test_ba1(strategies[i], 20);
+    }
     test_ba2();
     return 0;
 }

From f680b2ccba899aff8af4e195e05903feb11304c1 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Wed, 30 Jul 2014 16:12:42 -0400
Subject: [PATCH 117/190] FT-309 Fix replay tool to not crash when replaying
 padded-fit

---
 tools/ba_replay.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index 776896f5d07..b7ca63a5a8d 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -322,6 +322,8 @@ static const char *strategy_str(block_allocator::allocation_strategy strategy) {
         return "best-fit";
     case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE:
         return "heat-zone";
+    case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT:
+        return "padded-fit";
     default:
         abort();
     }

From 59e976de665599863ba7592153212245c4112c1f Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Thu, 31 Jul 2014 11:04:49 -0400
Subject: [PATCH 118/190] FT-311 create data and log files with the execute bit
 OFF

---
 ft/ft-ops.cc        | 13 ++++++-------
 ft/loader/loader.cc |  2 +-
 ft/logger/logger.cc |  2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 6ba766f5771..8a311d312ee 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -2774,19 +2774,20 @@ static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode
     }
 }
 
+static const mode_t file_mode = S_IRUSR+S_IWUSR+S_IRGRP+S_IWGRP+S_IROTH+S_IWOTH;
+
 // open a file for use by the ft
 // Requires:  File does not exist.
 static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) {
-    mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO;
     int r;
     int fd;
     int er;
-    fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, mode);
+    fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode);
     assert(fd==-1);
     if ((er = get_maybe_error_errno()) != ENOENT) {
         return er;
     }
-    fd = ft_open_maybe_direct(fname, O_RDWR | O_CREAT | O_BINARY, mode);
+    fd = ft_open_maybe_direct(fname, O_RDWR | O_CREAT | O_BINARY, file_mode);
     if (fd==-1) {
         r = get_error_errno();
         return r;
@@ -2804,9 +2805,8 @@ static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp)
 
 // open a file for use by the ft.  if the file does not exist, error
 static int ft_open_file(const char *fname, int *fdp) {
-    mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO;
     int fd;
-    fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, mode);
+    fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode);
     if (fd==-1) {
         return get_error_errno();
     }
@@ -2976,13 +2976,12 @@ ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only
         }
         if (r==ENOENT && is_create) {
             did_create = true;
-            mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO;
             if (txn) {
                 BYTESTRING bs = { .len=(uint32_t) strlen(fname_in_env), .data = (char*)fname_in_env };
                 toku_logger_save_rollback_fcreate(txn, reserved_filenum, &bs); // bs is a copy of the fname relative to the environment
             }
             txn_created = (bool)(txn!=NULL);
-            toku_logger_log_fcreate(txn, fname_in_env, reserved_filenum, mode, ft_h->options.flags, ft_h->options.nodesize, ft_h->options.basementnodesize, ft_h->options.compression_method);
+            toku_logger_log_fcreate(txn, fname_in_env, reserved_filenum, file_mode, ft_h->options.flags, ft_h->options.nodesize, ft_h->options.basementnodesize, ft_h->options.compression_method);
             r = ft_create_file(ft_h, fname_in_cwd, &fd);
             if (r) { goto exit; }
         }
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index db3d3ae225c..8c508265357 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -2731,7 +2731,7 @@ static int loader_do_i (FTLOADER bl,
     if (r) goto error;
 
     {
-        mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO;
+        mode_t mode = S_IRUSR+S_IWUSR + S_IRGRP+S_IWGRP;
         int fd = toku_os_open(new_fname, O_RDWR| O_CREAT | O_BINARY, mode); // #2621
         if (fd < 0) {
             r = get_error_errno(); goto error;
diff --git a/ft/logger/logger.cc b/ft/logger/logger.cc
index f00044b75a2..3ce3a7cc2a6 100644
--- a/ft/logger/logger.cc
+++ b/ft/logger/logger.cc
@@ -723,7 +723,7 @@ static int open_logfile (TOKULOGGER logger)
     snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, logger->next_log_file_number, TOKU_LOG_VERSION);
     long long index = logger->next_log_file_number;
     if (logger->write_log_files) {
-        logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRWXU);     
+        logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRUSR+S_IWUSR);     
         if (logger->fd==-1) {
             return get_error_errno();
         }

From 0b49d864f985b6f6886a94557caf4ee57b5f6c93 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Thu, 31 Jul 2014 12:23:23 -0400
Subject: [PATCH 119/190] #269 use bulk fetch for replace select

---
 storage/tokudb/ha_tokudb.cc | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index bbb671b135b..76b4b6a0bdc 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -4363,6 +4363,19 @@ static bool index_key_is_null(TABLE *table, uint keynr, const uchar *key, uint k
     return key_can_be_null && key_len > 0 && key[0] != 0;
 }
 
+// Return true if bulk fetch can be used
+static bool tokudb_do_bulk_fetch(THD *thd) {
+    switch (thd_sql_command(thd)) {
+    case SQLCOM_SELECT:
+    case SQLCOM_CREATE_TABLE:
+    case SQLCOM_INSERT_SELECT:
+    case SQLCOM_REPLACE_SELECT:
+        return true;
+    default:
+        return false;
+    }
+}
+
 //
 // Notification that a range query getting all elements that equal a key
 //  to take place. Will pre acquire read lock
@@ -4395,7 +4408,7 @@ int ha_tokudb::prepare_index_key_scan(const uchar * key, uint key_len) {
 
     range_lock_grabbed = true;
     range_lock_grabbed_null = index_key_is_null(table, tokudb_active_index, key, key_len);
-    doing_bulk_fetch = thd_sql_command(thd) == SQLCOM_SELECT || thd_sql_command(thd) == SQLCOM_CREATE_TABLE || thd_sql_command(thd) == SQLCOM_INSERT_SELECT;
+    doing_bulk_fetch = tokudb_do_bulk_fetch(thd);
     bulk_fetch_iteration = 0;
     rows_fetched_using_bulk_fetch = 0;
     error = 0;
@@ -5705,7 +5718,7 @@ int ha_tokudb::prelock_range(const key_range *start_key, const key_range *end_ke
     }
 
     // at this point, determine if we will be doing bulk fetch
-    doing_bulk_fetch = thd_sql_command(thd) == SQLCOM_SELECT || thd_sql_command(thd) == SQLCOM_CREATE_TABLE || thd_sql_command(thd) == SQLCOM_INSERT_SELECT;
+    doing_bulk_fetch = tokudb_do_bulk_fetch(thd);
     bulk_fetch_iteration = 0;
     rows_fetched_using_bulk_fetch = 0;
 

From 4dbb45fd990c2b3299e7ca87634fd172f79f8703 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 31 Jul 2014 18:17:31 -0400
Subject: [PATCH 120/190] FT-309 Fix a bug in heat zone, pretty-up the replay's
 output

---
 ft/serialize/block_allocator.cc          |  5 +-
 ft/serialize/block_allocator_strategy.cc | 27 +++++-----
 tools/ba_replay.cc                       | 64 +++++++++++++++++++-----
 3 files changed, 68 insertions(+), 28 deletions(-)

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index 97b7f777621..d9501e8cb4c 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -221,6 +221,8 @@ block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) {
         return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment);
     case BA_STRATEGY_HEAT_ZONE:
         return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat);
+    case BA_STRATEGY_PADDED_FIT:
+        return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment);
     default:
         abort();
     }
@@ -260,7 +262,8 @@ void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset
         // our allocation strategy chose the space after `bp' to fit the new block
         uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
         uint64_t blocknum = bp - _blocks_array;
-        assert(&_blocks_array[blocknum] == bp);
+        invariant(&_blocks_array[blocknum] == bp);
+        invariant(blocknum < _n_blocks);
         memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp));
         bp[1].offset = answer_offset;
         bp[1].size = size;
diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
index e4d8f184b61..fd86f802871 100644
--- a/ft/serialize/block_allocator_strategy.cc
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -115,16 +115,14 @@ _first_fit(struct block_allocator::blockpair *blocks_array,
         return nullptr;
     }
 
-    for (uint64_t n_spaces_to_check = n_blocks - 1,
-                  blocknum = forward ? 0 : n_blocks - 2;
-         n_spaces_to_check > 0;
-         n_spaces_to_check--, forward ? blocknum++ : blocknum--) {
-        invariant(blocknum < n_blocks);
-        // Consider the space after blocknum
-        struct block_allocator::blockpair *bp = &blocks_array[blocknum];
+    struct block_allocator::blockpair *bp = forward ? &blocks_array[0] : &blocks_array[-1];
+    for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
+         n_spaces_to_check--, forward ? bp++ : bp--) {
+        // Consider the space after bp
         uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
         uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
         if (possible_offset + size <= bp[1].offset) {
+            invariant((forward ? bp - blocks_array : blocks_array - bp) < (int64_t) n_blocks);
             return bp;
         }
     }
@@ -178,28 +176,29 @@ block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_ar
                                     uint64_t n_blocks, uint64_t size, uint64_t alignment,
                                     uint64_t heat) {
     if (heat > 0) {
+        struct block_allocator::blockpair *bp, *boundary_bp;
         const double hot_zone_threshold = 0.85;
 
         // Hot allocation. Find the beginning of the hot zone.
-        struct block_allocator::blockpair *bp = &blocks_array[n_blocks - 1];
-        uint64_t highest_offset = _align(bp->offset + bp->size, alignment);
+        boundary_bp = &blocks_array[n_blocks - 1];
+        uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment);
         uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
 
-        bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
-        uint64_t blocks_in_zone = (blocks_array + n_blocks) - bp;
-        uint64_t blocks_outside_zone = bp - blocks_array;
+        boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
+        uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp;
+        uint64_t blocks_outside_zone = boundary_bp - blocks_array;
         invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
 
         if (blocks_in_zone > 0) {
             // Find the first fit in the hot zone, going forward.
-            bp = _first_fit(bp, blocks_in_zone, size, alignment, true, 0);
+            bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, true, 0);
             if (bp != nullptr) {
                 return bp;
             }
         }
         if (blocks_outside_zone > 0) {
             // Find the first fit in the cold zone, going backwards.
-            bp = _first_fit(bp, blocks_outside_zone, size, alignment, false, 0);
+            bp = _first_fit(boundary_bp, blocks_outside_zone, size, alignment, false, 0);
             if (bp != nullptr) {
                 return bp;
             }
diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index b7ca63a5a8d..eab4c16e635 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -111,6 +111,8 @@ using std::set;
 using std::string;
 using std::vector;
 
+static bool verbose = false;
+
 static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) {
     if (!pred) {
         fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line);
@@ -259,7 +261,10 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
 
         char *line = toku_strdup(it->c_str());
 
-        printf("playing canonical trace line #%d: %s", line_num, line);
+        if (verbose) {
+            printf("playing canonical trace line #%d: %s", line_num, line);
+        }
+
         char *ptr = tidy_line(line);
 
         // canonical allocator id is in base 10, not 16
@@ -335,30 +340,43 @@ static void print_result(uint64_t allocator_id,
     uint64_t total_bytes = report->data_bytes + report->unused_bytes;
     uint64_t total_blocks = report->data_blocks + report->unused_blocks;
     if (total_bytes < 32UL * 1024 * 1024) {
-        printf("skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
+        printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
+        printf("\n");
         return;
     }
 
-    printf("\n");
-    printf("allocator_id:   %20" PRId64 "\n", allocator_id);
-    printf("strategy:       %20s\n", strategy_str(strategy));
+    printf(" allocator_id:   %20" PRId64 "\n", allocator_id);
+    printf(" strategy:       %20s\n", strategy_str(strategy));
 
     // byte statistics
-    printf("total bytes:    %20" PRId64 "\n", total_bytes);
-    printf("used bytes:     %20" PRId64 " (%.3lf)\n", report->data_bytes,
+    printf(" total bytes:    %20" PRId64 "\n", total_bytes);
+    printf(" used bytes:     %20" PRId64 " (%.3lf)\n", report->data_bytes,
            static_cast<double>(report->data_bytes) / total_bytes);
-    printf("unused bytes:   %20" PRId64 " (%.3lf)\n", report->unused_bytes,
+    printf(" unused bytes:   %20" PRId64 " (%.3lf)\n", report->unused_bytes,
            static_cast<double>(report->unused_bytes) / total_bytes);
 
     // block statistics
-    printf("total blocks:   %20" PRId64 "\n", total_blocks);
-    printf("used blocks:    %20" PRId64 " (%.3lf)\n", report->data_blocks,
+    printf(" total blocks:   %20" PRId64 "\n", total_blocks);
+    printf(" used blocks:    %20" PRId64 " (%.3lf)\n", report->data_blocks,
            static_cast<double>(report->data_blocks) / total_blocks);
-    printf("unused blocks:  %20" PRId64 " (%.3lf)\n", report->unused_blocks,
+    printf(" unused blocks:  %20" PRId64 " (%.3lf)\n", report->unused_blocks,
            static_cast<double>(report->unused_blocks) / total_blocks);
 
     // misc
-    printf("largest unused: %20" PRId64 "\n", report->largest_unused_block);
+    printf(" largest unused: %20" PRId64 "\n", report->largest_unused_block);
+    printf("\n");
+}
+
+static void merge_fragmentation_reports(TOKU_DB_FRAGMENTATION dst,
+                                        TOKU_DB_FRAGMENTATION src) {
+    dst->file_size_bytes += src->file_size_bytes;
+    dst->data_bytes += src->data_bytes;
+    dst->data_blocks += src->data_blocks;
+    dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional;
+    dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional;
+    dst->unused_bytes += src->unused_bytes;
+    dst->unused_blocks += src->unused_blocks;
+    dst->largest_unused_block += src->largest_unused_block;
 }
 
 int main(void) {
@@ -371,6 +389,11 @@ int main(void) {
     candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT);
     candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
 
+    printf("\n");
+    printf("Individual reports, by allocator:\n");
+    printf("\n");
+
+    map<block_allocator::allocation_strategy, TOKU_DB_FRAGMENTATION_S> reports_by_strategy; 
     for (vector<enum block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
          it != candidate_strategies.end(); it++) {
         const block_allocator::allocation_strategy strategy(*it);
@@ -381,16 +404,31 @@ int main(void) {
         map<uint64_t, block_allocator *> allocator_map;
         replay_canonicalized_trace(canonicalized_trace, strategy, &allocator_map);
 
+        TOKU_DB_FRAGMENTATION_S aggregate_report;
+        memset(&aggregate_report, 0, sizeof(aggregate_report));
         for (map<uint64_t, block_allocator *>::iterator al = allocator_map.begin();
              al != allocator_map.end(); al++) {
             block_allocator *ba = al->second;
 
             TOKU_DB_FRAGMENTATION_S report;
+            memset(&report, 0, sizeof(report));
             ba->get_statistics(&report);
             ba->destroy();
 
-            print_result(al->first, strategy,&report);
+            merge_fragmentation_reports(&aggregate_report, &report);
+            print_result(al->first, strategy, &report);
         }
+        reports_by_strategy[strategy] = aggregate_report;
+    }
+
+    printf("\n");
+    printf("Aggregate reports, by strategy:\n");
+    printf("\n");
+
+    for (map<block_allocator::allocation_strategy, TOKU_DB_FRAGMENTATION_S>::iterator it = reports_by_strategy.begin();
+         it != reports_by_strategy.end(); it++) {
+        TOKU_DB_FRAGMENTATION report = &it->second;
+        print_result(0, it->first, report);
     }
 
     return 0;

From 4a152ecfff78b0e47f41ca7a6b443d2bf3b184e8 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 31 Jul 2014 18:17:31 -0400
Subject: [PATCH 121/190] FT-309 Fix up tracing code, add a lock (to extend
 fprintf atomicity to multiple calls), support create_from_blockpairs in the
 allocator and trace tool

---
 ft/serialize/block_allocator.cc | 95 +++++++++++++++++++++++++--------
 ft/serialize/block_allocator.h  |  9 ++++
 tools/ba_replay.cc              | 88 ++++++++++++++++++++++--------
 3 files changed, 149 insertions(+), 43 deletions(-)

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index d9501e8cb4c..c752ad546b7 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -147,24 +147,21 @@ void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t a
     _n_bytes_in_use = reserve_at_beginning;
     _strategy = BA_STRATEGY_FIRST_FIT;
 
+    memset(&_trace_lock, 0, sizeof(toku_mutex_t));
+    toku_mutex_init(&_trace_lock, nullptr);
+
     VALIDATE();
 }
 
 void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
     _create_internal(reserve_at_beginning, alignment);
-    if (ba_trace_file != nullptr) {
-        fprintf(ba_trace_file, "ba_trace_create %p\n", this);
-        fflush(ba_trace_file);
-    }
+    _trace_create();
 }
 
 void block_allocator::destroy() {
     toku_free(_blocks_array);
-
-    if (ba_trace_file != nullptr) {
-        fprintf(ba_trace_file, "ba_trace_destroy %p\n", this);
-        fflush(ba_trace_file);
-    }
+    _trace_destroy();
+    toku_mutex_destroy(&_trace_lock);
 }
 
 void block_allocator::set_strategy(enum allocation_strategy strategy) {
@@ -205,6 +202,8 @@ void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint
     }
 
     VALIDATE();
+
+    _trace_create_from_blockpairs();
 }
 
 // Effect: align a value by rounding up.
@@ -282,13 +281,7 @@ done:
     _n_blocks++;
     VALIDATE();
 
-    if (ba_trace_file != nullptr) {
-        fprintf(ba_trace_file, "ba_trace_alloc %p %lu %lu %lu\n",
-                this, static_cast<unsigned long>(size),
-                static_cast<unsigned long>(heat),
-                static_cast<unsigned long>(*offset));
-        fflush(ba_trace_file);
-    }
+    _trace_alloc(size, heat, *offset);
 }
 
 // Find the index in the blocks array that has a particular offset.  Requires that the block exist.
@@ -330,12 +323,8 @@ void block_allocator::free_block(uint64_t offset) {
             (_n_blocks - bn - 1) * sizeof(struct blockpair));
     _n_blocks--;
     VALIDATE();
-
-    if (ba_trace_file != nullptr) {
-        fprintf(ba_trace_file, "ba_trace_free %p %lu\n",
-                this, static_cast<unsigned long>(offset));
-        fflush(ba_trace_file);
-    }
+    
+    _trace_free(offset);
 }
 
 uint64_t block_allocator::block_size(uint64_t offset) {
@@ -462,3 +451,65 @@ void block_allocator::validate() const {
     }
     assert(n_bytes_in_use == _n_bytes_in_use);
 }
+
+// Tracing
+
+void block_allocator::_trace_create(void) {
+    if (ba_trace_file != nullptr) {
+        toku_mutex_lock(&_trace_lock);
+        fprintf(ba_trace_file, "ba_trace_create %p %lu %lu\n", this,
+                _reserve_at_beginning, _alignment);
+        toku_mutex_unlock(&_trace_lock);
+
+        fflush(ba_trace_file);
+    }
+}
+
+void block_allocator::_trace_create_from_blockpairs(void) {
+    if (ba_trace_file != nullptr) {
+        toku_mutex_lock(&_trace_lock);
+        fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %lu %lu ", this,
+                _reserve_at_beginning, _alignment);
+        for (uint64_t i = 0; i < _n_blocks; i++) {
+            fprintf(ba_trace_file, "[%lu %lu] ", _blocks_array[i].offset, _blocks_array[i].size);
+        }
+        fprintf(ba_trace_file, "\n");
+        toku_mutex_unlock(&_trace_lock);
+
+        fflush(ba_trace_file);
+    }
+}
+
+void block_allocator::_trace_destroy(void) {
+    if (ba_trace_file != nullptr) {
+        toku_mutex_lock(&_trace_lock);
+        fprintf(ba_trace_file, "ba_trace_destroy %p\n", this);
+        toku_mutex_unlock(&_trace_lock);
+
+        fflush(ba_trace_file);
+    }
+}
+
+void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) {
+    if (ba_trace_file != nullptr) {
+        toku_mutex_lock(&_trace_lock);
+        fprintf(ba_trace_file, "ba_trace_alloc %p %lu %lu %lu\n", this,
+                static_cast<unsigned long>(size),
+                static_cast<unsigned long>(heat),
+                static_cast<unsigned long>(offset));
+        toku_mutex_unlock(&_trace_lock);
+
+        fflush(ba_trace_file);
+    }
+}
+
+void block_allocator::_trace_free(uint64_t offset) {
+    if (ba_trace_file != nullptr) {
+        toku_mutex_lock(&_trace_lock);
+        fprintf(ba_trace_file, "ba_trace_free %p %lu\n", this,
+                static_cast<unsigned long>(offset));
+        toku_mutex_unlock(&_trace_lock);
+
+        fflush(ba_trace_file);
+    }
+}
diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h
index 2ba5e350c1c..d849b656dfc 100644
--- a/ft/serialize/block_allocator.h
+++ b/ft/serialize/block_allocator.h
@@ -94,6 +94,7 @@ PATENT RIGHTS GRANT:
 
 #include <db.h>
 
+#include "portability/toku_pthread.h"
 #include "portability/toku_stdint.h"
 
 // Block allocator.
@@ -241,6 +242,14 @@ private:
     int64_t find_block(uint64_t offset);
     struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat);
 
+    // Tracing
+    toku_mutex_t _trace_lock;
+    void _trace_create(void);
+    void _trace_create_from_blockpairs(void);
+    void _trace_destroy(void);
+    void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset);
+    void _trace_free(uint64_t offset);
+
     // How much to reserve at the beginning
     uint64_t _reserve_at_beginning;
     // Block alignment
diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index eab4c16e635..68890c116ff 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -120,21 +120,16 @@ static void ba_replay_assert(bool pred, const char *msg, const char *line, int l
     }
 }
 
-// return line with whitespace skipped, and any newline replaced with a null byte
-static char *tidy_line(char *line) {
+static char *trim_whitespace(char *line) {
     // skip leading whitespace
     while (isspace(*line)) {
         line++;
     }
-    char *ptr = strchr(line, '\n');
-    if (ptr != nullptr) {
-        *ptr = '\0';
-    }
     return line;
 }
 
 static int64_t parse_number(char **ptr, int line_num, int base) {
-    *ptr = tidy_line(*ptr);
+    *ptr = trim_whitespace(*ptr);
 
     char *new_ptr;
     int64_t n = strtoll(*ptr, &new_ptr, base);
@@ -152,7 +147,7 @@ static uint64_t parse_uint64(char **ptr, int line_num) {
 }
 
 static string parse_token(char **ptr, int line_num) {
-    char *line = *ptr;
+    char *line = trim_whitespace(*ptr);
 
     // parse the first token, which represents the traced function
     char token[64];
@@ -162,6 +157,44 @@ static string parse_token(char **ptr, int line_num) {
     return string(token);
 }
 
+static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) {
+    char *line = trim_whitespace(*ptr);
+    uint64_t offset, size;
+    int bytes_read;
+    int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read);
+    ba_replay_assert(r == 3, "malformed trace", line, line_num);
+    *ptr += bytes_read;
+    return block_allocator::blockpair(offset, size);
+}
+
+static char *strip_newline(char *line, bool *found) {
+    char *ptr = strchr(line, '\n');
+    if (ptr != nullptr) {
+        *found = true;
+        *ptr = '\0';
+    }
+    return line;
+}
+
+static char *read_trace_line(FILE *file) {
+    const int buf_size = 4096;
+    char buf[buf_size];
+    std::stringstream ss;
+    while (true) {
+        if (fgets(buf, buf_size, file) == nullptr) {
+            break;
+        }
+        bool has_newline = false;
+        ss << strip_newline(buf, &has_newline);
+        if (has_newline) {
+            // end of the line, we're done out
+            break;
+        }
+    }
+    std::string s = ss.str();
+    return toku_strdup(s.c_str());
+}
+
 static vector<string> canonicalize_trace_from(FILE *file) {
     // new trace, canonicalized from a raw trace
     vector<string> canonicalized_trace;
@@ -182,24 +215,22 @@ static vector<string> canonicalize_trace_from(FILE *file) {
     map<uint64_t, offset_seq_map> offset_to_seq_num_maps;
 
     int line_num = 0;
-    const int max_line = 512;
-    char line[max_line];
-    while (fgets(line, max_line, file) != nullptr) {
+    char *line;
+    while ((line = read_trace_line(file)) != nullptr) {
         line_num++;
-
-        // removes leading whitespace and trailing newline
-        char *ptr = tidy_line(line);
+        char *ptr = line;
 
         string fn = parse_token(&ptr, line_num);
         int64_t allocator_id = parse_number(&ptr, line_num, 16);
 
         std::stringstream ss;
-        if (fn == "ba_trace_create") {
-            // only allocators created in the raw traec will be part of the
-            // canonical trace, so save the next canonical allocator id here.
+        if (fn.find("ba_trace_create") != string::npos) {
+            // either a create or a create_from_blockpairs. either way,
+            // we only convert the allocator_id to an allocator_id_seq_num
+            // in the canonical trace and leave the rest of the line as-is.
             ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num);
             allocator_ids[allocator_id] = allocator_id_seq_num;
-            ss << fn << ' ' << allocator_id_seq_num << ' ' << std::endl;
+            ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl;
             allocator_id_seq_num++;
         } else if (allocator_ids.count(allocator_id) > 0) {
             // this allocator is part of the canonical trace
@@ -243,6 +274,8 @@ static vector<string> canonicalize_trace_from(FILE *file) {
             continue;
         }
         canonicalized_trace.push_back(ss.str());
+
+        toku_free(line);
     }
 
     return canonicalized_trace;
@@ -265,18 +298,31 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
             printf("playing canonical trace line #%d: %s", line_num, line);
         }
 
-        char *ptr = tidy_line(line);
+        char *ptr = trim_whitespace(line);
 
         // canonical allocator id is in base 10, not 16
         string fn = parse_token(&ptr, line_num);
         int64_t allocator_id = parse_number(&ptr, line_num, 10);
 
-        if (fn == "ba_trace_create") {
+        if (fn.find("ba_trace_create") != string::npos) {
+            const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num);
+            const uint64_t alignment = parse_uint64(&ptr, line_num);
             ba_replay_assert(allocator_map->count(allocator_id) == 0,
                              "corrupted canonical trace: double create", line, line_num);
 
             block_allocator *ba = new block_allocator();
-            ba->create(8096, 4096); // header reserve, alignment - taken from block_table.cc
+            if (fn == "ba_trace_create") {
+                ba->create(reserve_at_beginning, alignment);
+            } else {
+                ba_replay_assert(fn == "ba_trace_create_from_blockpairs",
+                                 "corrupted canonical trace: bad create fn", line, line_num);
+                vector<block_allocator::blockpair> pairs;
+                while (*trim_whitespace(ptr) != '\0') {
+                    block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
+                    pairs.push_back(bp);
+                }
+                ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size());
+            }
             ba->set_strategy(strategy);
 
             // caller owns the allocator_map and its contents

From f1141955423e92c306522c6a5b3fa8e5d25c582d Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 31 Jul 2014 19:40:28 -0400
Subject: [PATCH 122/190] FT-300 Fix an issue where `free' calls on blocks
 created during create_from_blockpairs would crash the replay. Also fix
 parsing bugs.

---
 tools/ba_replay.cc | 59 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 13 deletions(-)

diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index 68890c116ff..8b9bcaa4fda 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -130,10 +130,11 @@ static char *trim_whitespace(char *line) {
 
 static int64_t parse_number(char **ptr, int line_num, int base) {
     *ptr = trim_whitespace(*ptr);
+    char *line = *ptr;
 
     char *new_ptr;
-    int64_t n = strtoll(*ptr, &new_ptr, base);
-    ba_replay_assert(n >= 0, "malformed trace", *ptr, line_num);
+    int64_t n = strtoll(line, &new_ptr, base);
+    ba_replay_assert(n >= 0, "malformed trace", line, line_num);
     *ptr = new_ptr;
     return n;
 }
@@ -147,22 +148,25 @@ static uint64_t parse_uint64(char **ptr, int line_num) {
 }
 
 static string parse_token(char **ptr, int line_num) {
-    char *line = trim_whitespace(*ptr);
+    *ptr = trim_whitespace(*ptr);
+    char *line = *ptr;
 
     // parse the first token, which represents the traced function
     char token[64];
-    int r = sscanf(line, "%64s", token);
+    int r = sscanf(*ptr, "%64s", token);
     ba_replay_assert(r == 1, "malformed trace", line, line_num);
     *ptr += strlen(token);
     return string(token);
 }
 
 static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) {
-    char *line = trim_whitespace(*ptr);
+    *ptr = trim_whitespace(*ptr);
+    char *line = *ptr;
+
     uint64_t offset, size;
     int bytes_read;
     int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read);
-    ba_replay_assert(r == 3, "malformed trace", line, line_num);
+    ba_replay_assert(r == 2, "malformed trace", line, line_num);
     *ptr += bytes_read;
     return block_allocator::blockpair(offset, size);
 }
@@ -170,7 +174,9 @@ static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) {
 static char *strip_newline(char *line, bool *found) {
     char *ptr = strchr(line, '\n');
     if (ptr != nullptr) {
-        *found = true;
+        if (found != nullptr) {
+            *found = true;
+        }
         *ptr = '\0';
     }
     return line;
@@ -192,7 +198,7 @@ static char *read_trace_line(FILE *file) {
         }
     }
     std::string s = ss.str();
-    return toku_strdup(s.c_str());
+    return s.size() ? toku_strdup(s.c_str()) : nullptr;
 }
 
 static vector<string> canonicalize_trace_from(FILE *file) {
@@ -209,6 +215,7 @@ static vector<string> canonicalize_trace_from(FILE *file) {
     // allocated offset -> allocation seq num
     //
     uint64_t allocation_seq_num = 0;
+    static const uint64_t ASN_NONE = (uint64_t) -1;
     typedef map<uint64_t, uint64_t> offset_seq_map;
 
     // raw allocator id -> offset_seq_map that tracks its allocations
@@ -232,6 +239,19 @@ static vector<string> canonicalize_trace_from(FILE *file) {
             allocator_ids[allocator_id] = allocator_id_seq_num;
             ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl;
             allocator_id_seq_num++;
+
+            // For each blockpair created by this traceline, add its offset to the offset seq map
+            // with asn ASN_NONE so that later canonicalizations of `free' know whether to write
+            // down the asn or the raw offset.
+            //
+            // First, read passed the reserve / alignment values.
+            (void) parse_uint64(&ptr, line_num);
+            (void) parse_uint64(&ptr, line_num);
+            offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
+            while (*trim_whitespace(ptr) != '\0') {
+                const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
+                (*map)[bp.offset] = ASN_NONE;
+            }
         } else if (allocator_ids.count(allocator_id) > 0) {
             // this allocator is part of the canonical trace
             uint64_t canonical_allocator_id = allocator_ids[allocator_id];
@@ -259,8 +279,14 @@ static vector<string> canonicalize_trace_from(FILE *file) {
                 const uint64_t asn = (*map)[offset];
                 map->erase(offset);
 
-                // translate `free(offset)' to `free(asn)'
-                ss << fn << ' ' << canonical_allocator_id << ' ' << asn << std::endl;
+                // if there's an asn, then a corresponding ba_trace_alloc occurred and we should
+                // write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs
+                // and we write the original offset.
+                if (asn != ASN_NONE) {
+                    ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl;
+                } else {
+                    ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl;
+                }
             } else if (fn == "ba_trace_destroy") {
                 // Remove this allocator from both maps
                 allocator_ids.erase(allocator_id);
@@ -293,6 +319,7 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
         line_num++;
 
         char *line = toku_strdup(it->c_str());
+        line = strip_newline(line, nullptr);
 
         if (verbose) {
             printf("playing canonical trace line #%d: %s", line_num, line);
@@ -318,7 +345,7 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
                                  "corrupted canonical trace: bad create fn", line, line_num);
                 vector<block_allocator::blockpair> pairs;
                 while (*trim_whitespace(ptr) != '\0') {
-                    block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
+                    const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
                     pairs.push_back(bp);
                 }
                 ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size());
@@ -333,6 +360,7 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
 
             block_allocator *ba = (*allocator_map)[allocator_id];
             if (fn == "ba_trace_alloc") {
+                // replay an `alloc' whose result will be associated with a certain asn
                 const uint64_t size = parse_uint64(&ptr, line_num);
                 const uint64_t heat = parse_uint64(&ptr, line_num);
                 const uint64_t asn = parse_uint64(&ptr, line_num);
@@ -342,14 +370,19 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
                 uint64_t offset;
                 ba->alloc_block(size, heat, &offset);
                 seq_num_to_offset[asn] = offset;
-            } else if (fn == "ba_trace_free") {
+            } else if (fn == "ba_trace_free_asn") {
+                // replay a `free' on a block whose offset is the result of an alloc with an asn
                 const uint64_t asn = parse_uint64(&ptr, line_num);
                 ba_replay_assert(seq_num_to_offset.count(asn) == 1,
                                  "corrupted canonical trace: double free (asn unused)", line, line_num);
 
-                uint64_t offset = seq_num_to_offset[asn];
+                const uint64_t offset = seq_num_to_offset[asn];
                 ba->free_block(offset);
                 seq_num_to_offset.erase(asn);
+            } else if (fn == "ba_trace_free_offset") {
+                // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs
+                const uint64_t offset = parse_uint64(&ptr, line_num);
+                ba->free_block(offset);
             } else if (fn == "ba_trace_destroy") {
                 // TODO: Clean this up - we won't be able to catch no such allocator errors
                 // if we don't actually not the destroy. We only do it here so that the caller

From bb0b14b5688bc8cbbda295da8f7d40f5a3e28e81 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 31 Jul 2014 20:10:47 -0400
Subject: [PATCH 123/190] FT-309 Control heat zone and padded fit size via
 environment variables

---
 ft/serialize/block_allocator_strategy.cc | 44 ++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
index fd86f802871..6aca323ef1a 100644
--- a/ft/serialize/block_allocator_strategy.cc
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -158,6 +158,27 @@ block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_arr
     return best_bp;
 }
 
+static uint64_t desired_fragmentation_divisor = 10;
+
+// TODO: These compiler specific directives should be abstracted in a portability header
+//       portability/toku_compiler.h?
+__attribute__((__constructor__))
+static void determine_padded_fit_divisor_from_env(void) {
+    // TODO: Should be in portability as 'toku_os_getenv()?'
+    const char *s = getenv("TOKU_BA_PADDED_FIT_DIVISOR");
+    if (s != nullptr) {
+        const int64_t divisor = strtoll(s, nullptr, 10);
+        if (divisor < 0) {
+            fprintf(stderr, "tokuft: error: block allocator padded fit divisor found in environment (%s), "
+                            "but it's out of range (should be an integer > 0). defaulting to 10\n", s);
+            desired_fragmentation_divisor = 10;
+        } else {
+            fprintf(stderr, "tokuft: setting block allocator padded fit divisor to %s\n", s);
+            desired_fragmentation_divisor = divisor;
+        }
+    }
+}
+
 // First fit into a block that is oversized by up to max_padding.
 // The hope is that if we purposefully waste a bit of space at allocation
 // time we'll be more likely to reuse this block later.
@@ -165,19 +186,38 @@ struct block_allocator::blockpair *
 block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
                                      uint64_t n_blocks, uint64_t size, uint64_t alignment) {
     static const uint64_t absolute_max_padding = 128 * 1024;
-    static const uint64_t desired_fragmentation_divisor = 10;
     uint64_t desired_padding = size / desired_fragmentation_divisor;
     desired_padding = std::min(_next_power_of_two(desired_padding), absolute_max_padding);
     return _first_fit(blocks_array, n_blocks, size, alignment, true, desired_padding);
 }
 
+static double hot_zone_threshold = 0.85;
+
+// TODO: These compiler specific directives should be abstracted in a portability header
+//       portability/toku_compiler.h?
+__attribute__((__constructor__))
+static void determine_hot_zone_threshold_from_env(void) {
+    // TODO: Should be in portability as 'toku_os_getenv()?'
+    const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD");
+    if (s != nullptr) {
+        const double hot_zone = strtod(s, nullptr);
+        if (hot_zone < 1 || hot_zone > 99) {
+            fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), "
+                            "but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s);
+            hot_zone_threshold = 85 / 100;
+        } else {
+            fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s);
+            hot_zone_threshold = hot_zone / 100;
+        }
+    }
+}
+
 struct block_allocator::blockpair *
 block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
                                     uint64_t n_blocks, uint64_t size, uint64_t alignment,
                                     uint64_t heat) {
     if (heat > 0) {
         struct block_allocator::blockpair *bp, *boundary_bp;
-        const double hot_zone_threshold = 0.85;
 
         // Hot allocation. Find the beginning of the hot zone.
         boundary_bp = &blocks_array[n_blocks - 1];

From b5c0a601fd36030af5986ab1a83cc9f5a5f75cbe Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 31 Jul 2014 20:37:35 -0400
Subject: [PATCH 124/190] FT-300 Gather simple trace stats during the first
 canonical trace replay

---
 tools/ba_replay.cc | 45 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index 8b9bcaa4fda..684b4bd1436 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -307,16 +307,31 @@ static vector<string> canonicalize_trace_from(FILE *file) {
     return canonicalized_trace;
 }
 
+struct canonical_trace_stats {
+    uint64_t n_lines_replayed;
+
+    uint64_t n_create;
+    uint64_t n_create_from_blockpairs;
+    uint64_t n_alloc_hot;
+    uint64_t n_alloc_cold;
+    uint64_t n_free;
+    uint64_t n_destroy;
+
+    canonical_trace_stats() {
+        memset(this, 0, sizeof(*this));
+    }
+};
+
 static void replay_canonicalized_trace(const vector<string> &canonicalized_trace,
                                        block_allocator::allocation_strategy strategy,
-                                       map<uint64_t, block_allocator *> *allocator_map) {
+                                       map<uint64_t, block_allocator *> *allocator_map,
+                                       struct canonical_trace_stats *stats) {
     // maps allocation seq num to allocated offset
     map<uint64_t, uint64_t> seq_num_to_offset;
 
-    int line_num = 0;
     for (vector<string>::const_iterator it = canonicalized_trace.begin();
          it != canonicalized_trace.end(); it++) {
-        line_num++;
+        const int line_num = stats->n_lines_replayed++;
 
         char *line = toku_strdup(it->c_str());
         line = strip_newline(line, nullptr);
@@ -340,6 +355,7 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
             block_allocator *ba = new block_allocator();
             if (fn == "ba_trace_create") {
                 ba->create(reserve_at_beginning, alignment);
+                stats->n_create++;
             } else {
                 ba_replay_assert(fn == "ba_trace_create_from_blockpairs",
                                  "corrupted canonical trace: bad create fn", line, line_num);
@@ -349,6 +365,7 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
                     pairs.push_back(bp);
                 }
                 ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size());
+                stats->n_create_from_blockpairs++;
             }
             ba->set_strategy(strategy);
 
@@ -370,6 +387,7 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
                 uint64_t offset;
                 ba->alloc_block(size, heat, &offset);
                 seq_num_to_offset[asn] = offset;
+                heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++;
             } else if (fn == "ba_trace_free_asn") {
                 // replay a `free' on a block whose offset is the result of an alloc with an asn
                 const uint64_t asn = parse_uint64(&ptr, line_num);
@@ -379,15 +397,18 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
                 const uint64_t offset = seq_num_to_offset[asn];
                 ba->free_block(offset);
                 seq_num_to_offset.erase(asn);
+                stats->n_free++;
             } else if (fn == "ba_trace_free_offset") {
                 // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs
                 const uint64_t offset = parse_uint64(&ptr, line_num);
                 ba->free_block(offset);
+                stats->n_free++;
             } else if (fn == "ba_trace_destroy") {
                 // TODO: Clean this up - we won't be able to catch no such allocator errors
                 // if we don't actually not the destroy. We only do it here so that the caller
                 // can gather statistics on all closed allocators at the end of the run.
                 // allocator_map->erase(allocator_id);
+                stats->n_destroy++;
             } else {
                 ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num);
             }
@@ -472,6 +493,7 @@ int main(void) {
     printf("Individual reports, by allocator:\n");
     printf("\n");
 
+    struct canonical_trace_stats stats;
     map<block_allocator::allocation_strategy, TOKU_DB_FRAGMENTATION_S> reports_by_strategy; 
     for (vector<enum block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
          it != candidate_strategies.end(); it++) {
@@ -480,8 +502,11 @@ int main(void) {
         // replay the canonicalized trace against the current strategy.
         //
         // we provided the allocator map so we can gather statistics later
+        struct canonical_trace_stats dummy_stats;
         map<uint64_t, block_allocator *> allocator_map;
-        replay_canonicalized_trace(canonicalized_trace, strategy, &allocator_map);
+        replay_canonicalized_trace(canonicalized_trace, strategy, &allocator_map,
+                                   // Only need to gather canonical trace stats once
+                                   it == candidate_strategies.begin() ? &stats : &dummy_stats);
 
         TOKU_DB_FRAGMENTATION_S aggregate_report;
         memset(&aggregate_report, 0, sizeof(aggregate_report));
@@ -500,7 +525,6 @@ int main(void) {
         reports_by_strategy[strategy] = aggregate_report;
     }
 
-    printf("\n");
     printf("Aggregate reports, by strategy:\n");
     printf("\n");
 
@@ -510,5 +534,16 @@ int main(void) {
         print_result(0, it->first, report);
     }
 
+    printf("Overall trace stats:\n");
+    printf("\n");
+    printf(" n_lines_played:            %9" PRIu64 "\n", stats.n_lines_replayed);
+    printf(" n_create:                  %9" PRIu64 "\n", stats.n_create);
+    printf(" n_create_from_blockpairs:  %9" PRIu64 "\n", stats.n_create_from_blockpairs);
+    printf(" n_alloc_hot:               %9" PRIu64 "\n", stats.n_alloc_hot);
+    printf(" n_alloc_cold:              %9" PRIu64 "\n", stats.n_alloc_cold);
+    printf(" n_free:                    %9" PRIu64 "\n", stats.n_free);
+    printf(" n_destroy:                 %9" PRIu64 "\n", stats.n_destroy);
+    printf("\n");
+
     return 0;
 }

From 600a8647d73dcf157020a9a060da3aaa45faadf3 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 31 Jul 2014 20:57:41 -0400
Subject: [PATCH 125/190] FT-300 Print terse results when verbose = false

---
 tools/ba_replay.cc | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index 684b4bd1436..ad1b62cf4fc 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -111,6 +111,7 @@ using std::set;
 using std::string;
 using std::vector;
 
+static bool debug = false;
 static bool verbose = false;
 
 static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) {
@@ -336,7 +337,7 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
         char *line = toku_strdup(it->c_str());
         line = strip_newline(line, nullptr);
 
-        if (verbose) {
+        if (debug) {
             printf("playing canonical trace line #%d: %s", line_num, line);
         }
 
@@ -434,14 +435,13 @@ static const char *strategy_str(block_allocator::allocation_strategy strategy) {
     }
 }
 
-static void print_result(uint64_t allocator_id,
-                         block_allocator::allocation_strategy strategy,
-                         TOKU_DB_FRAGMENTATION report) {
+static void print_result_verbose(uint64_t allocator_id,
+                                 block_allocator::allocation_strategy strategy,
+                                 TOKU_DB_FRAGMENTATION report) {
     uint64_t total_bytes = report->data_bytes + report->unused_bytes;
     uint64_t total_blocks = report->data_blocks + report->unused_blocks;
     if (total_bytes < 32UL * 1024 * 1024) {
         printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
-        printf("\n");
         return;
     }
 
@@ -467,6 +467,26 @@ static void print_result(uint64_t allocator_id,
     printf("\n");
 }
 
+static void print_result(uint64_t allocator_id,
+                         block_allocator::allocation_strategy strategy,
+                         TOKU_DB_FRAGMENTATION report) {
+    uint64_t total_bytes = report->data_bytes + report->unused_bytes;
+    if (total_bytes < 32UL * 1024 * 1024) {
+        if (verbose) {
+            printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
+            printf("\n");
+        }
+        return;
+    }
+    if (verbose) {
+        print_result_verbose(allocator_id, strategy, report);
+    } else {
+        printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes\n",
+               strategy_str(strategy), allocator_id,
+               static_cast<double>(report->data_bytes) / total_bytes);
+    }
+}
+
 static void merge_fragmentation_reports(TOKU_DB_FRAGMENTATION dst,
                                         TOKU_DB_FRAGMENTATION src) {
     dst->file_size_bytes += src->file_size_bytes;
@@ -523,8 +543,10 @@ int main(void) {
             print_result(al->first, strategy, &report);
         }
         reports_by_strategy[strategy] = aggregate_report;
+        printf("\n");
     }
 
+    printf("\n");
     printf("Aggregate reports, by strategy:\n");
     printf("\n");
 
@@ -534,6 +556,7 @@ int main(void) {
         print_result(0, it->first, report);
     }
 
+    printf("\n");
     printf("Overall trace stats:\n");
     printf("\n");
     printf(" n_lines_played:            %9" PRIu64 "\n", stats.n_lines_replayed);

From 30f97ccdfda4e5a4530215b3bb0876bd0dfa288d Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 31 Jul 2014 22:43:37 -0400
Subject: [PATCH 126/190] FT-300 Add mean / stddev tracking for block
 allocation sizes to ba_replay

---
 tools/ba_replay.cc | 44 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index ad1b62cf4fc..bd7e8610d54 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -91,6 +91,7 @@ PATENT RIGHTS GRANT:
 
 #include <db.h>
 
+#include <math.h>
 #include <stdio.h>
 #include <string.h>
 
@@ -308,6 +309,26 @@ static vector<string> canonicalize_trace_from(FILE *file) {
     return canonicalized_trace;
 }
 
+struct streaming_variance_calculator {
+    int64_t n_samples;
+    int64_t mean;
+    int64_t variance;
+
+    // math credit: AoCP, Donald Knuth, '62
+    void add_sample(int64_t x) {
+        n_samples++;
+        if (n_samples == 1) {
+            mean = x;
+            variance = 0;
+        } else {
+            int64_t old_mean = mean;
+            mean = old_mean + ((x - old_mean) / n_samples);
+            variance = (((n_samples - 1) * variance) +
+                        ((x - old_mean) * (x - mean))) / n_samples;
+        }
+    }
+};
+
 struct canonical_trace_stats {
     uint64_t n_lines_replayed;
 
@@ -318,6 +339,9 @@ struct canonical_trace_stats {
     uint64_t n_free;
     uint64_t n_destroy;
 
+    struct streaming_variance_calculator alloc_hot_bytes;
+    struct streaming_variance_calculator alloc_cold_bytes;
+
     canonical_trace_stats() {
         memset(this, 0, sizeof(*this));
     }
@@ -389,6 +413,7 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
                 ba->alloc_block(size, heat, &offset);
                 seq_num_to_offset[asn] = offset;
                 heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++;
+                heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size);
             } else if (fn == "ba_trace_free_asn") {
                 // replay a `free' on a block whose offset is the result of an alloc with an asn
                 const uint64_t asn = parse_uint64(&ptr, line_num);
@@ -559,13 +584,18 @@ int main(void) {
     printf("\n");
     printf("Overall trace stats:\n");
     printf("\n");
-    printf(" n_lines_played:            %9" PRIu64 "\n", stats.n_lines_replayed);
-    printf(" n_create:                  %9" PRIu64 "\n", stats.n_create);
-    printf(" n_create_from_blockpairs:  %9" PRIu64 "\n", stats.n_create_from_blockpairs);
-    printf(" n_alloc_hot:               %9" PRIu64 "\n", stats.n_alloc_hot);
-    printf(" n_alloc_cold:              %9" PRIu64 "\n", stats.n_alloc_cold);
-    printf(" n_free:                    %9" PRIu64 "\n", stats.n_free);
-    printf(" n_destroy:                 %9" PRIu64 "\n", stats.n_destroy);
+    printf(" n_lines_played:            %15" PRIu64 "\n", stats.n_lines_replayed);
+    printf(" n_create:                  %15" PRIu64 "\n", stats.n_create);
+    printf(" n_create_from_blockpairs:  %15" PRIu64 "\n", stats.n_create_from_blockpairs);
+    printf(" n_alloc_hot:               %15" PRIu64 "\n", stats.n_alloc_hot);
+    printf(" n_alloc_cold:              %15" PRIu64 "\n", stats.n_alloc_cold);
+    printf(" n_free:                    %15" PRIu64 "\n", stats.n_free);
+    printf(" n_destroy:                 %15" PRIu64 "\n", stats.n_destroy);
+    printf("\n");
+    printf(" avg_alloc_hot:             %15" PRIu64 "\n", stats.alloc_hot_bytes.mean);
+    printf(" stddev_alloc_hot:          %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance));
+    printf(" avg_alloc_cold:            %15" PRIu64 "\n", stats.alloc_cold_bytes.mean);
+    printf(" stddev_alloc_cold:         %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance));
     printf("\n");
 
     return 0;

From 66fb8ffe3c1205009e3d6cd6de073d2c36d20e3c Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Fri, 1 Aug 2014 11:29:37 -0400
Subject: [PATCH 127/190] FT-309 Change the way padded-fit allocation alignment
 works

---
 ft/serialize/block_allocator_strategy.cc | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
index 6aca323ef1a..2120baa233f 100644
--- a/ft/serialize/block_allocator_strategy.cc
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -158,23 +158,23 @@ block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_arr
     return best_bp;
 }
 
-static uint64_t desired_fragmentation_divisor = 10;
+static uint64_t padded_fit_alignment = 64 * 1024;
 
 // TODO: These compiler specific directives should be abstracted in a portability header
 //       portability/toku_compiler.h?
 __attribute__((__constructor__))
-static void determine_padded_fit_divisor_from_env(void) {
+static void determine_padded_fit_alignment_from_env(void) {
     // TODO: Should be in portability as 'toku_os_getenv()?'
-    const char *s = getenv("TOKU_BA_PADDED_FIT_DIVISOR");
+    const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT");
     if (s != nullptr) {
-        const int64_t divisor = strtoll(s, nullptr, 10);
-        if (divisor < 0) {
-            fprintf(stderr, "tokuft: error: block allocator padded fit divisor found in environment (%s), "
+        const int64_t alignment = strtoll(s, nullptr, 10);
+        if (alignment < 0) {
+            fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), "
                             "but it's out of range (should be an integer > 0). defaulting to 10\n", s);
-            desired_fragmentation_divisor = 10;
+            padded_fit_alignment = 64 * 1024;
         } else {
-            fprintf(stderr, "tokuft: setting block allocator padded fit divisor to %s\n", s);
-            desired_fragmentation_divisor = divisor;
+            padded_fit_alignment = _next_power_of_two(alignment);
+            fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n", padded_fit_alignment);
         }
     }
 }
@@ -185,10 +185,7 @@ static void determine_padded_fit_divisor_from_env(void) {
 struct block_allocator::blockpair *
 block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
                                      uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    static const uint64_t absolute_max_padding = 128 * 1024;
-    uint64_t desired_padding = size / desired_fragmentation_divisor;
-    desired_padding = std::min(_next_power_of_two(desired_padding), absolute_max_padding);
-    return _first_fit(blocks_array, n_blocks, size, alignment, true, desired_padding);
+    return _first_fit(blocks_array, n_blocks, size, alignment, true, padded_fit_alignment);
 }
 
 static double hot_zone_threshold = 0.85;

From 20a2dd6672f997fc0f90dfb220ab658d46887fe1 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Fri, 1 Aug 2014 12:38:57 -0400
Subject: [PATCH 128/190] FT-300 Use portable printf format strings in the
 block allocator's tracing code

---
 ft/serialize/block_allocator.cc | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index c752ad546b7..c81799d6746 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -457,8 +457,8 @@ void block_allocator::validate() const {
 void block_allocator::_trace_create(void) {
     if (ba_trace_file != nullptr) {
         toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_create %p %lu %lu\n", this,
-                _reserve_at_beginning, _alignment);
+        fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n",
+                this, _reserve_at_beginning, _alignment);
         toku_mutex_unlock(&_trace_lock);
 
         fflush(ba_trace_file);
@@ -468,10 +468,11 @@ void block_allocator::_trace_create(void) {
 void block_allocator::_trace_create_from_blockpairs(void) {
     if (ba_trace_file != nullptr) {
         toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %lu %lu ", this,
-                _reserve_at_beginning, _alignment);
+        fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ",
+                this, _reserve_at_beginning, _alignment);
         for (uint64_t i = 0; i < _n_blocks; i++) {
-            fprintf(ba_trace_file, "[%lu %lu] ", _blocks_array[i].offset, _blocks_array[i].size);
+            fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ",
+                    _blocks_array[i].offset, _blocks_array[i].size);
         }
         fprintf(ba_trace_file, "\n");
         toku_mutex_unlock(&_trace_lock);
@@ -493,10 +494,8 @@ void block_allocator::_trace_destroy(void) {
 void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) {
     if (ba_trace_file != nullptr) {
         toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_alloc %p %lu %lu %lu\n", this,
-                static_cast<unsigned long>(size),
-                static_cast<unsigned long>(heat),
-                static_cast<unsigned long>(offset));
+        fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+                this, size, heat, offset);
         toku_mutex_unlock(&_trace_lock);
 
         fflush(ba_trace_file);
@@ -506,8 +505,7 @@ void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset
 void block_allocator::_trace_free(uint64_t offset) {
     if (ba_trace_file != nullptr) {
         toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_free %p %lu\n", this,
-                static_cast<unsigned long>(offset));
+        fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset);
         toku_mutex_unlock(&_trace_lock);
 
         fflush(ba_trace_file);

From 214f66232eb931118d3f08838ff75d22c436554b Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Fri, 1 Aug 2014 12:59:48 -0400
Subject: [PATCH 129/190] FT-300 Report before and after fragmentation reports
 so we can see how fragmentation changed

---
 tools/ba_replay.cc | 154 ++++++++++++++++++++++++++-------------------
 1 file changed, 90 insertions(+), 64 deletions(-)

diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index bd7e8610d54..e964c8263d6 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -306,6 +306,9 @@ static vector<string> canonicalize_trace_from(FILE *file) {
         toku_free(line);
     }
 
+    ba_replay_assert(allocator_ids.size() == 0,
+                     "corrupted trace: leaked allocators", "(no specific line)", -1);
+
     return canonicalized_trace;
 }
 
@@ -347,10 +350,35 @@ struct canonical_trace_stats {
     }
 };
 
+struct fragmentation_report {
+    TOKU_DB_FRAGMENTATION_S beginning;
+    TOKU_DB_FRAGMENTATION_S end;
+    fragmentation_report() {
+        memset(this, 0, sizeof(*this));
+    }
+    void merge(const struct fragmentation_report &src_report) {
+        for (int i = 0; i < 2; i++) {
+            TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end;
+            const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end;
+            dst->file_size_bytes += src->file_size_bytes;
+            dst->data_bytes += src->data_bytes;
+            dst->data_blocks += src->data_blocks;
+            dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional;
+            dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional;
+            dst->unused_bytes += src->unused_bytes;
+            dst->unused_blocks += src->unused_blocks;
+            dst->largest_unused_block += src->largest_unused_block;
+        }
+    }
+};
+
 static void replay_canonicalized_trace(const vector<string> &canonicalized_trace,
                                        block_allocator::allocation_strategy strategy,
-                                       map<uint64_t, block_allocator *> *allocator_map,
+                                       map<uint64_t, struct fragmentation_report> *reports,
                                        struct canonical_trace_stats *stats) {
+    // maps an allocator id to its block allocator
+    map<uint64_t, block_allocator *> allocator_map;
+
     // maps allocation seq num to allocated offset
     map<uint64_t, uint64_t> seq_num_to_offset;
 
@@ -374,7 +402,7 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
         if (fn.find("ba_trace_create") != string::npos) {
             const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num);
             const uint64_t alignment = parse_uint64(&ptr, line_num);
-            ba_replay_assert(allocator_map->count(allocator_id) == 0,
+            ba_replay_assert(allocator_map.count(allocator_id) == 0,
                              "corrupted canonical trace: double create", line, line_num);
 
             block_allocator *ba = new block_allocator();
@@ -394,13 +422,15 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
             }
             ba->set_strategy(strategy);
 
-            // caller owns the allocator_map and its contents
-            (*allocator_map)[allocator_id] = ba;
+            TOKU_DB_FRAGMENTATION_S report;
+            ba->get_statistics(&report);
+            (*reports)[allocator_id].beginning = report;
+            allocator_map[allocator_id] = ba;
         } else {
-            ba_replay_assert(allocator_map->count(allocator_id) > 0,
+            ba_replay_assert(allocator_map.count(allocator_id) > 0,
                              "corrupted canonical trace: no such allocator", line, line_num);
 
-            block_allocator *ba = (*allocator_map)[allocator_id];
+            block_allocator *ba = allocator_map[allocator_id];
             if (fn == "ba_trace_alloc") {
                 // replay an `alloc' whose result will be associated with a certain asn
                 const uint64_t size = parse_uint64(&ptr, line_num);
@@ -430,10 +460,11 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
                 ba->free_block(offset);
                 stats->n_free++;
             } else if (fn == "ba_trace_destroy") {
-                // TODO: Clean this up - we won't be able to catch no such allocator errors
-                // if we don't actually not the destroy. We only do it here so that the caller
-                // can gather statistics on all closed allocators at the end of the run.
-                // allocator_map->erase(allocator_id);
+                TOKU_DB_FRAGMENTATION_S report;
+                ba->get_statistics(&report);
+                ba->destroy();
+                (*reports)[allocator_id].end = report;
+                allocator_map.erase(allocator_id);
                 stats->n_destroy++;
             } else {
                 ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num);
@@ -442,6 +473,9 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
 
         toku_free(line);
     }
+
+    ba_replay_assert(allocator_map.size() == 0,
+                     "corrupted canonical trace: leaked allocators", "(no specific line", -1);
 }
 
 // TODO: Put this in the allocation strategy class
@@ -462,10 +496,8 @@ static const char *strategy_str(block_allocator::allocation_strategy strategy) {
 
 static void print_result_verbose(uint64_t allocator_id,
                                  block_allocator::allocation_strategy strategy,
-                                 TOKU_DB_FRAGMENTATION report) {
-    uint64_t total_bytes = report->data_bytes + report->unused_bytes;
-    uint64_t total_blocks = report->data_blocks + report->unused_blocks;
-    if (total_bytes < 32UL * 1024 * 1024) {
+                                 const struct fragmentation_report &report) {
+    if (report.end.data_bytes + report.end.unused_bytes < 32UL * 1024 * 1024) {
         printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
         return;
     }
@@ -473,30 +505,42 @@ static void print_result_verbose(uint64_t allocator_id,
     printf(" allocator_id:   %20" PRId64 "\n", allocator_id);
     printf(" strategy:       %20s\n", strategy_str(strategy));
 
-    // byte statistics
-    printf(" total bytes:    %20" PRId64 "\n", total_bytes);
-    printf(" used bytes:     %20" PRId64 " (%.3lf)\n", report->data_bytes,
-           static_cast<double>(report->data_bytes) / total_bytes);
-    printf(" unused bytes:   %20" PRId64 " (%.3lf)\n", report->unused_bytes,
-           static_cast<double>(report->unused_bytes) / total_bytes);
+    for (int i = 0; i < 2; i++) {
+        const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end;
+        printf("%s\n", i == 0 ? "BEFORE" : "AFTER");
 
-    // block statistics
-    printf(" total blocks:   %20" PRId64 "\n", total_blocks);
-    printf(" used blocks:    %20" PRId64 " (%.3lf)\n", report->data_blocks,
-           static_cast<double>(report->data_blocks) / total_blocks);
-    printf(" unused blocks:  %20" PRId64 " (%.3lf)\n", report->unused_blocks,
-           static_cast<double>(report->unused_blocks) / total_blocks);
+        uint64_t total_bytes = r->data_bytes + r->unused_bytes;
+        uint64_t total_blocks = r->data_blocks + r->unused_blocks;
 
-    // misc
-    printf(" largest unused: %20" PRId64 "\n", report->largest_unused_block);
-    printf("\n");
+        // byte statistics
+        printf(" total bytes:    %20" PRId64 "\n", total_bytes);
+        printf(" used bytes:     %20" PRId64 " (%.3lf)\n", r->data_bytes,
+               static_cast<double>(r->data_bytes) / total_bytes);
+        printf(" unused bytes:   %20" PRId64 " (%.3lf)\n", r->unused_bytes,
+               static_cast<double>(r->unused_bytes) / total_bytes);
+
+        // block statistics
+        printf(" total blocks:   %20" PRId64 "\n", total_blocks);
+        printf(" used blocks:    %20" PRId64 " (%.3lf)\n", r->data_blocks,
+               static_cast<double>(r->data_blocks) / total_blocks);
+        printf(" unused blocks:  %20" PRId64 " (%.3lf)\n", r->unused_blocks,
+               static_cast<double>(r->unused_blocks) / total_blocks);
+
+        // misc
+        printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block);
+        printf("\n");
+    }
 }
 
 static void print_result(uint64_t allocator_id,
                          block_allocator::allocation_strategy strategy,
-                         TOKU_DB_FRAGMENTATION report) {
-    uint64_t total_bytes = report->data_bytes + report->unused_bytes;
-    if (total_bytes < 32UL * 1024 * 1024) {
+                         const struct fragmentation_report &report) {
+    const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning;
+    const TOKU_DB_FRAGMENTATION_S *end = &report.end;
+
+    uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes;
+    uint64_t total_end_bytes = end->data_bytes + end->unused_bytes;
+    if (total_end_bytes < 32UL * 1024 * 1024) {
         if (verbose) {
             printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
             printf("\n");
@@ -506,24 +550,13 @@ static void print_result(uint64_t allocator_id,
     if (verbose) {
         print_result_verbose(allocator_id, strategy, report);
     } else {
-        printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes\n",
+        printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n",
                strategy_str(strategy), allocator_id,
-               static_cast<double>(report->data_bytes) / total_bytes);
+               static_cast<double>(report.end.data_bytes) / total_end_bytes,
+               static_cast<double>(report.beginning.data_bytes) / total_beginning_bytes);
     }
 }
 
-static void merge_fragmentation_reports(TOKU_DB_FRAGMENTATION dst,
-                                        TOKU_DB_FRAGMENTATION src) {
-    dst->file_size_bytes += src->file_size_bytes;
-    dst->data_bytes += src->data_bytes;
-    dst->data_blocks += src->data_blocks;
-    dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional;
-    dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional;
-    dst->unused_bytes += src->unused_bytes;
-    dst->unused_blocks += src->unused_blocks;
-    dst->largest_unused_block += src->largest_unused_block;
-}
-
 int main(void) {
     // Read the raw trace from stdin
     vector<string> canonicalized_trace = canonicalize_trace_from(stdin);
@@ -539,7 +572,7 @@ int main(void) {
     printf("\n");
 
     struct canonical_trace_stats stats;
-    map<block_allocator::allocation_strategy, TOKU_DB_FRAGMENTATION_S> reports_by_strategy; 
+    map<block_allocator::allocation_strategy, struct fragmentation_report> reports_by_strategy; 
     for (vector<enum block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
          it != candidate_strategies.end(); it++) {
         const block_allocator::allocation_strategy strategy(*it);
@@ -548,24 +581,18 @@ int main(void) {
         //
         // we provided the allocator map so we can gather statistics later
         struct canonical_trace_stats dummy_stats;
-        map<uint64_t, block_allocator *> allocator_map;
-        replay_canonicalized_trace(canonicalized_trace, strategy, &allocator_map,
+        map<uint64_t, struct fragmentation_report> reports;
+        replay_canonicalized_trace(canonicalized_trace, strategy, &reports,
                                    // Only need to gather canonical trace stats once
                                    it == candidate_strategies.begin() ? &stats : &dummy_stats);
 
-        TOKU_DB_FRAGMENTATION_S aggregate_report;
+        struct fragmentation_report aggregate_report;
         memset(&aggregate_report, 0, sizeof(aggregate_report));
-        for (map<uint64_t, block_allocator *>::iterator al = allocator_map.begin();
-             al != allocator_map.end(); al++) {
-            block_allocator *ba = al->second;
-
-            TOKU_DB_FRAGMENTATION_S report;
-            memset(&report, 0, sizeof(report));
-            ba->get_statistics(&report);
-            ba->destroy();
-
-            merge_fragmentation_reports(&aggregate_report, &report);
-            print_result(al->first, strategy, &report);
+        for (map<uint64_t, struct fragmentation_report>::iterator rp = reports.begin();
+             rp != reports.end(); rp++) {
+            const struct fragmentation_report &report = rp->second;
+            aggregate_report.merge(report);
+            print_result(rp->first, strategy, report);
         }
         reports_by_strategy[strategy] = aggregate_report;
         printf("\n");
@@ -575,10 +602,9 @@ int main(void) {
     printf("Aggregate reports, by strategy:\n");
     printf("\n");
 
-    for (map<block_allocator::allocation_strategy, TOKU_DB_FRAGMENTATION_S>::iterator it = reports_by_strategy.begin();
+    for (map<block_allocator::allocation_strategy, struct fragmentation_report>::iterator it = reports_by_strategy.begin();
          it != reports_by_strategy.end(); it++) {
-        TOKU_DB_FRAGMENTATION report = &it->second;
-        print_result(0, it->first, report);
+        print_result(0, it->first, it->second);
     }
 
     printf("\n");

From bade886e9f0f52e4ef66e1b92e7da08406d71dda Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Fri, 1 Aug 2014 13:29:00 -0400
Subject: [PATCH 130/190] FT-300 Proceed with leaked allocators, so that we can
 partially anaylze a running trace (that is, a trace that is still getting
 written to by some process)

---
 tools/ba_replay.cc | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index e964c8263d6..ca4b6daf8e8 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -306,8 +306,9 @@ static vector<string> canonicalize_trace_from(FILE *file) {
         toku_free(line);
     }
 
-    ba_replay_assert(allocator_ids.size() == 0,
-                     "corrupted trace: leaked allocators", "(no specific line)", -1);
+    if (allocator_ids.size() != 0) {
+        fprintf(stderr, "warning: leaked allocators. this is ok if the trace is still live");
+    }
 
     return canonicalized_trace;
 }
@@ -473,9 +474,6 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
 
         toku_free(line);
     }
-
-    ba_replay_assert(allocator_map.size() == 0,
-                     "corrupted canonical trace: leaked allocators", "(no specific line", -1);
 }
 
 // TODO: Put this in the allocation strategy class
@@ -497,7 +495,9 @@ static const char *strategy_str(block_allocator::allocation_strategy strategy) {
 static void print_result_verbose(uint64_t allocator_id,
                                  block_allocator::allocation_strategy strategy,
                                  const struct fragmentation_report &report) {
-    if (report.end.data_bytes + report.end.unused_bytes < 32UL * 1024 * 1024) {
+    if (report.end.data_bytes + report.end.unused_bytes +
+        report.beginning.data_bytes + report.beginning.unused_bytes
+        < 32UL * 1024 * 1024) {
         printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
         return;
     }
@@ -540,7 +540,7 @@ static void print_result(uint64_t allocator_id,
 
     uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes;
     uint64_t total_end_bytes = end->data_bytes + end->unused_bytes;
-    if (total_end_bytes < 32UL * 1024 * 1024) {
+    if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) {
         if (verbose) {
             printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
             printf("\n");

From ba4cc58dead9deccc90cdad950d673906fedcf8b Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 4 Aug 2014 15:49:28 -0400
Subject: [PATCH 131/190] #271 turn off FT debug flags by default.  can be
 overridden by cmake arguments

---
 storage/tokudb/CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt
index dfca3c67ded..c7c3f164eed 100644
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
@@ -14,6 +14,10 @@ IF(NOT TOKUDB_OK)
   RETURN()
 ENDIF()
 
+SET(BUILD_TESTING OFF CACHE BOOL "")
+SET(USE_VALGRIND OFF CACHE BOOL "")
+SET(TOKU_DEBUG_PARANOID OFF CACHE BOOL "")
+
 IF(NOT DEFINED TOKUDB_VERSION)
     IF(DEFINED ENV{TOKUDB_VERSION})
         SET(TOKUDB_VERSION $ENV{TOKUDB_VERSION})

From f94e36a5af887775c7c5623983be38c3e1eeaf7a Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 5 Aug 2014 10:28:32 -0400
Subject: [PATCH 132/190] FT-309 Default padded fit alignment should be 4096

---
 ft/serialize/block_allocator_strategy.cc | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
index 2120baa233f..b6354966e3b 100644
--- a/ft/serialize/block_allocator_strategy.cc
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -88,6 +88,8 @@ PATENT RIGHTS GRANT:
 
 #include <algorithm>
 
+#include <string.h>
+
 #include "portability/toku_assert.h"
 
 #include "ft/serialize/block_allocator_strategy.h"
@@ -96,7 +98,7 @@ static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
     return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
 }
 
-static uint64_t _next_power_of_two(uint64_t value) {
+static uint64_t _roundup_to_power_of_two(uint64_t value) {
     uint64_t r = 4096;
     while (r < value) {
         r *= 2;
@@ -158,7 +160,7 @@ block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_arr
     return best_bp;
 }
 
-static uint64_t padded_fit_alignment = 64 * 1024;
+static uint64_t padded_fit_alignment = 4096;
 
 // TODO: These compiler specific directives should be abstracted in a portability header
 //       portability/toku_compiler.h?
@@ -166,15 +168,16 @@ __attribute__((__constructor__))
 static void determine_padded_fit_alignment_from_env(void) {
     // TODO: Should be in portability as 'toku_os_getenv()?'
     const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT");
-    if (s != nullptr) {
+    if (s != nullptr && strlen(s) > 0) {
         const int64_t alignment = strtoll(s, nullptr, 10);
-        if (alignment < 0) {
+        if (alignment <= 0) {
             fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), "
-                            "but it's out of range (should be an integer > 0). defaulting to 10\n", s);
-            padded_fit_alignment = 64 * 1024;
+                            "but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n",
+                            s, padded_fit_alignment);
         } else {
-            padded_fit_alignment = _next_power_of_two(alignment);
-            fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n", padded_fit_alignment);
+            padded_fit_alignment = _roundup_to_power_of_two(alignment);
+            fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n",
+                    padded_fit_alignment);
         }
     }
 }
@@ -196,7 +199,7 @@ __attribute__((__constructor__))
 static void determine_hot_zone_threshold_from_env(void) {
     // TODO: Should be in portability as 'toku_os_getenv()?'
     const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD");
-    if (s != nullptr) {
+    if (s != nullptr && strlen(s) > 0) {
         const double hot_zone = strtod(s, nullptr);
         if (hot_zone < 1 || hot_zone > 99) {
             fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), "

From fd25ba710d342ba5c957ec21c1368ef79e796931 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Mon, 11 Aug 2014 15:05:13 -0400
Subject: [PATCH 133/190] FT-300 Add the ability to include/exclude certain
 strategies and clean up report formatting a bit. Also strengthen malformed
 trace detection.

---
 tools/ba_replay.cc | 145 ++++++++++++++++++++++++++++++---------------
 1 file changed, 98 insertions(+), 47 deletions(-)

diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index ca4b6daf8e8..b5e5fe7932f 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -91,6 +91,7 @@ PATENT RIGHTS GRANT:
 
 #include <db.h>
 
+#include <getopt.h>
 #include <math.h>
 #include <stdio.h>
 #include <string.h>
@@ -112,8 +113,7 @@ using std::set;
 using std::string;
 using std::vector;
 
-static bool debug = false;
-static bool verbose = false;
+static int verbose = false;
 
 static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) {
     if (!pred) {
@@ -136,14 +136,14 @@ static int64_t parse_number(char **ptr, int line_num, int base) {
 
     char *new_ptr;
     int64_t n = strtoll(line, &new_ptr, base);
-    ba_replay_assert(n >= 0, "malformed trace", line, line_num);
+    ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num);
+    ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num);
     *ptr = new_ptr;
     return n;
 }
 
 static uint64_t parse_uint64(char **ptr, int line_num) {
     int64_t n = parse_number(ptr, line_num, 10);
-    ba_replay_assert(n >= 0, "malformed trace", *ptr, line_num);
     // we happen to know that the uint64's we deal with will
     // take less than 63 bits (they come from pointers)
     return static_cast<uint64_t>(n);
@@ -156,7 +156,7 @@ static string parse_token(char **ptr, int line_num) {
     // parse the first token, which represents the traced function
     char token[64];
     int r = sscanf(*ptr, "%64s", token);
-    ba_replay_assert(r == 1, "malformed trace", line, line_num);
+    ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num);
     *ptr += strlen(token);
     return string(token);
 }
@@ -168,7 +168,7 @@ static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) {
     uint64_t offset, size;
     int bytes_read;
     int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read);
-    ba_replay_assert(r == 2, "malformed trace", line, line_num);
+    ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num);
     *ptr += bytes_read;
     return block_allocator::blockpair(offset, size);
 }
@@ -234,28 +234,31 @@ static vector<string> canonicalize_trace_from(FILE *file) {
 
         std::stringstream ss;
         if (fn.find("ba_trace_create") != string::npos) {
-            // either a create or a create_from_blockpairs. either way,
+            ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num);
+            ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs",
+                             "corrupted trace: bad fn", line, line_num);
+
             // we only convert the allocator_id to an allocator_id_seq_num
             // in the canonical trace and leave the rest of the line as-is.
-            ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num);
             allocator_ids[allocator_id] = allocator_id_seq_num;
             ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl;
             allocator_id_seq_num++;
 
-            // For each blockpair created by this traceline, add its offset to the offset seq map
-            // with asn ASN_NONE so that later canonicalizations of `free' know whether to write
-            // down the asn or the raw offset.
-            //
             // First, read passed the reserve / alignment values.
             (void) parse_uint64(&ptr, line_num);
             (void) parse_uint64(&ptr, line_num);
-            offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
-            while (*trim_whitespace(ptr) != '\0') {
-                const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
-                (*map)[bp.offset] = ASN_NONE;
+            if (fn == "ba_trace_create_from_blockpairs") {
+                // For each blockpair created by this traceline, add its offset to the offset seq map
+                // with asn ASN_NONE so that later canonicalizations of `free' know whether to write
+                // down the asn or the raw offset.
+                offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
+                while (*trim_whitespace(ptr) != '\0') {
+                    const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
+                    (*map)[bp.offset] = ASN_NONE;
+                }
             }
-        } else if (allocator_ids.count(allocator_id) > 0) {
-            // this allocator is part of the canonical trace
+        } else {
+            ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num);
             uint64_t canonical_allocator_id = allocator_ids[allocator_id];
 
             // this is the map that tracks allocations for this allocator
@@ -296,10 +299,9 @@ static vector<string> canonicalize_trace_from(FILE *file) {
 
                 // translate `destroy(ptr_id) to destroy(canonical_id)'
                 ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl;
+            } else {
+                ba_replay_assert(false, "corrupted trace: bad fn", line, line_num);
             }
-        } else {
-            // traced an alloc/free for an allocator not created as part of this trace, skip
-            continue;
         }
         canonicalized_trace.push_back(ss.str());
 
@@ -307,7 +309,7 @@ static vector<string> canonicalize_trace_from(FILE *file) {
     }
 
     if (allocator_ids.size() != 0) {
-        fprintf(stderr, "warning: leaked allocators. this is ok if the trace is still live");
+        fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running");
     }
 
     return canonicalized_trace;
@@ -390,10 +392,6 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
         char *line = toku_strdup(it->c_str());
         line = strip_newline(line, nullptr);
 
-        if (debug) {
-            printf("playing canonical trace line #%d: %s", line_num, line);
-        }
-
         char *ptr = trim_whitespace(line);
 
         // canonical allocator id is in base 10, not 16
@@ -476,8 +474,7 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
     }
 }
 
-// TODO: Put this in the allocation strategy class
-static const char *strategy_str(block_allocator::allocation_strategy strategy) {
+static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) {
     switch (strategy) {
     case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT:
         return "first-fit";
@@ -492,6 +489,23 @@ static const char *strategy_str(block_allocator::allocation_strategy strategy) {
     }
 }
 
+static block_allocator::allocation_strategy cstring_to_strategy(const char *str) {
+    if (strcmp(str, "first-fit") == 0) {
+        return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT;
+    }
+    if (strcmp(str, "best-fit") == 0) {
+        return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT;
+    }
+    if (strcmp(str, "heat-zone") == 0) {
+        return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE;
+    }
+    if (strcmp(str, "padded-fit") != 0) {
+        fprintf(stderr, "bad strategy string: %s\n", str);
+        abort();
+    }
+    return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT;
+}
+
 static void print_result_verbose(uint64_t allocator_id,
                                  block_allocator::allocation_strategy strategy,
                                  const struct fragmentation_report &report) {
@@ -503,7 +517,7 @@ static void print_result_verbose(uint64_t allocator_id,
     }
 
     printf(" allocator_id:   %20" PRId64 "\n", allocator_id);
-    printf(" strategy:       %20s\n", strategy_str(strategy));
+    printf(" strategy:       %20s\n", strategy_to_cstring(strategy));
 
     for (int i = 0; i < 2; i++) {
         const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end;
@@ -528,7 +542,6 @@ static void print_result_verbose(uint64_t allocator_id,
 
         // misc
         printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block);
-        printf("\n");
     }
 }
 
@@ -542,38 +555,76 @@ static void print_result(uint64_t allocator_id,
     uint64_t total_end_bytes = end->data_bytes + end->unused_bytes;
     if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) {
         if (verbose) {
-            printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
             printf("\n");
+            printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
         }
         return;
     }
+    printf("\n");
     if (verbose) {
         print_result_verbose(allocator_id, strategy, report);
     } else {
         printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n",
-               strategy_str(strategy), allocator_id,
+               strategy_to_cstring(strategy), allocator_id,
                static_cast<double>(report.end.data_bytes) / total_end_bytes,
                static_cast<double>(report.beginning.data_bytes) / total_beginning_bytes);
     }
 }
 
-int main(void) {
-    // Read the raw trace from stdin
+static int only_aggregate_reports;
+
+static struct option getopt_options[] = {
+    { "verbose", no_argument, &verbose, 1 },
+    { "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 },
+    { "include-strategy", required_argument, nullptr, 'i' },
+    { "exclude-strategy", required_argument, nullptr, 'x' },
+    { nullptr, 0, nullptr, 0 },
+};
+
+int main(int argc, char *argv[]) {
+    int opt;
+    set<block_allocator::allocation_strategy> candidate_strategies, excluded_strategies;
+    while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) {
+        switch (opt) {
+        case 0:
+            break;
+        case 'i':
+            candidate_strategies.insert(cstring_to_strategy(optarg));
+            break;
+        case 'x':
+            excluded_strategies.insert(cstring_to_strategy(optarg));
+            break;
+        case '?':
+        default:
+            abort();
+        };
+    }
+    // Default to everything if nothing was explicitly included.
+    if (candidate_strategies.empty()) {
+        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
+        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
+        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT);
+        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
+    }
+    // ..but remove anything that was explicitly excluded
+    for (set<block_allocator::allocation_strategy>::const_iterator it = excluded_strategies.begin();
+         it != excluded_strategies.end(); it++) {
+        candidate_strategies.erase(*it);
+    }
+
+    // Run the real trace
+    //
+    // First, read the raw trace from stdin
     vector<string> canonicalized_trace = canonicalize_trace_from(stdin);
 
-    vector<enum block_allocator::allocation_strategy> candidate_strategies;
-    candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
-    candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
-    candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT);
-    candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
-
-    printf("\n");
-    printf("Individual reports, by allocator:\n");
-    printf("\n");
+    if (!only_aggregate_reports) {
+        printf("\n");
+        printf("Individual reports, by allocator:\n");
+    }
 
     struct canonical_trace_stats stats;
     map<block_allocator::allocation_strategy, struct fragmentation_report> reports_by_strategy; 
-    for (vector<enum block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
+    for (set<block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
          it != candidate_strategies.end(); it++) {
         const block_allocator::allocation_strategy strategy(*it);
 
@@ -592,15 +643,15 @@ int main(void) {
              rp != reports.end(); rp++) {
             const struct fragmentation_report &report = rp->second;
             aggregate_report.merge(report);
-            print_result(rp->first, strategy, report);
+            if (!only_aggregate_reports) {
+                print_result(rp->first, strategy, report);
+            }
         }
         reports_by_strategy[strategy] = aggregate_report;
-        printf("\n");
     }
 
     printf("\n");
     printf("Aggregate reports, by strategy:\n");
-    printf("\n");
 
     for (map<block_allocator::allocation_strategy, struct fragmentation_report>::iterator it = reports_by_strategy.begin();
          it != reports_by_strategy.end(); it++) {

From ec2641f7ca1ac3b74295b62f8642d891c5bc1242 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 11 Aug 2014 16:46:06 -0400
Subject: [PATCH 134/190] Tokutek/mariadb-5.5#71 run part_index_scan on mariadb

---
 mysql-test/suite/tokudb.bugs/t/part_index_scan.test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
index be60fca3af3..69c4380bf50 100644
--- a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
+++ b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
@@ -1,6 +1,7 @@
 # verify that index scans on parititions are not slow
 # due to tokudb bulk fetch not being used
 source include/have_tokudb.inc;
+source include/have_partition.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2,t3;

From 0b72d47cefbad4477f0ec510012e50a7059dd7b1 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 12 Aug 2014 12:56:15 -0400
Subject: [PATCH 135/190] #272 set TokuDB product name

---
 storage/tokudb/hatoku_hton.cc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index b9970f3377d..85bdf72ef9e 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -320,6 +320,9 @@ static void handle_ydb_error(int error) {
         sql_print_error("                                                            ");
         sql_print_error("************************************************************");
         break;
+    default:
+        sql_print_error("%s unknown error %d", tokudb_hton_name, error);
+        break;
     }
 }
 
@@ -410,6 +413,12 @@ static int tokudb_init_func(void *p) {
         tokudb_home = mysql_real_data_home;
     DBUG_PRINT("info", ("tokudb_home: %s", tokudb_home));
 
+    r = db_env_set_toku_product_name(tokudb_hton_name);
+    if (r) {
+        sql_print_error("%s can not set product name error %d", tokudb_hton_name, r);
+        goto error;
+    }
+
     if ((r = db_env_create(&db_env, 0))) {
         DBUG_PRINT("info", ("db_env_create %d\n", r));
         handle_ydb_error(r);

From 880fac82e2c7ee5d57cf0dd5670afb06a9aa8375 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 12 Aug 2014 14:41:35 -0400
Subject: [PATCH 136/190] #272 set tokudb product name

---
 storage/tokudb/hatoku_hton.cc | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index 85bdf72ef9e..d9f86900907 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -92,6 +92,7 @@ PATENT RIGHTS GRANT:
 #define MYSQL_SERVER 1
 #include "hatoku_defines.h"
 #include <db.h>
+#include <ctype.h>
 
 #include "stdint.h"
 #if defined(_WIN32)
@@ -326,6 +327,16 @@ static void handle_ydb_error(int error) {
     }
 }
 
+static int tokudb_set_product_name(void) {
+    size_t n = strlen(tokudb_hton_name);
+    char tokudb_product_name[n+1];
+    memset(tokudb_product_name, 0, sizeof tokudb_product_name);
+    for (size_t i = 0; i < n; i++)
+        tokudb_product_name[i] = tolower(tokudb_hton_name[i]);
+    int r = db_env_set_toku_product_name(tokudb_product_name);
+    return r;
+}
+
 static int tokudb_init_func(void *p) {
     TOKUDB_DBUG_ENTER("%p", p);
     int r;
@@ -344,6 +355,12 @@ static int tokudb_init_func(void *p) {
     }
 #endif
 
+    r = tokudb_set_product_name();
+    if (r) {
+        sql_print_error("%s can not set product name error %d", tokudb_hton_name, r);
+        goto error;
+    }
+
     tokudb_pthread_mutex_init(&tokudb_mutex, MY_MUTEX_INIT_FAST);
     (void) my_hash_init(&tokudb_open_tables, table_alias_charset, 32, 0, 0, (my_hash_get_key) tokudb_get_key, 0, 0);
 
@@ -413,12 +430,6 @@ static int tokudb_init_func(void *p) {
         tokudb_home = mysql_real_data_home;
     DBUG_PRINT("info", ("tokudb_home: %s", tokudb_home));
 
-    r = db_env_set_toku_product_name(tokudb_hton_name);
-    if (r) {
-        sql_print_error("%s can not set product name error %d", tokudb_hton_name, r);
-        goto error;
-    }
-
     if ((r = db_env_create(&db_env, 0))) {
         DBUG_PRINT("info", ("db_env_create %d\n", r));
         handle_ydb_error(r);
@@ -1520,7 +1531,7 @@ static int tokudb_file_map_fill_table(THD *thd, TABLE_LIST *tables, COND *cond)
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB");
+        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
         error = -1;
     } else {
         error = tokudb_file_map(table, thd);
@@ -1667,7 +1678,7 @@ static int tokudb_fractal_tree_info_fill_table(THD *thd, TABLE_LIST *tables, CON
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB");
+        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
         error = -1;
     } else {
         error = tokudb_fractal_tree_info(table, thd);
@@ -1879,7 +1890,7 @@ static int tokudb_fractal_tree_block_map_fill_table(THD *thd, TABLE_LIST *tables
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB");
+        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
         error = -1;
     } else {
         error = tokudb_fractal_tree_block_map(table, thd);
@@ -2030,7 +2041,7 @@ static int tokudb_trx_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) {
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB");
+        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
         error = -1;
     } else {
         struct tokudb_trx_extra e = { thd, tables->table };
@@ -2101,7 +2112,7 @@ static int tokudb_lock_waits_fill_table(THD *thd, TABLE_LIST *tables, COND *cond
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB");
+        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
         error = -1;
     } else {
         struct tokudb_lock_waits_extra e = { thd, tables->table };
@@ -2178,7 +2189,7 @@ static int tokudb_locks_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) {
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB");
+        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
         error = -1;
     } else {
         struct tokudb_locks_extra e = { thd, tables->table };

From f011f45b2eb0e7fe46a4a0d9da2da16e64ae7a43 Mon Sep 17 00:00:00 2001
From: joel9001 <joel9001@hotmail.com>
Date: Tue, 12 Aug 2014 16:03:34 -0400
Subject: [PATCH 137/190] #268 Adding bulk fetch MTR test and result files

---
 .../r/bf_create_select_hash_part.result       | 328 +++++++++++
 .../r/bf_create_select_nonpart.result         | 242 ++++++++
 .../r/bf_create_select_range_part.result      | 335 +++++++++++
 .../r/bf_create_temp_select_nonpart.result    | 242 ++++++++
 .../r/bf_insert_select_dup_key_nonpart.result | 207 +++++++
 .../r/bf_insert_select_nonpart.result         | 205 +++++++
 .../r/bf_replace_select_nonpart.result        | 367 ++++++++++++
 .../suite/tokudb.bugs/r/bf_select_part.result | 543 ++++++++++++++++++
 .../t/bf_create_select_hash_part.test         | 141 +++++
 .../t/bf_create_select_nonpart.test           | 116 ++++
 .../t/bf_create_select_range_part.test        | 136 +++++
 .../t/bf_create_temp_select_nonpart.test      | 116 ++++
 .../t/bf_insert_select_dup_key_nonpart.test   | 125 ++++
 .../t/bf_insert_select_nonpart.test           | 120 ++++
 .../t/bf_replace_select_nonpart.test          | 188 ++++++
 .../suite/tokudb.bugs/t/bf_select_part.test   | 218 +++++++
 16 files changed, 3629 insertions(+)
 create mode 100644 mysql-test/suite/tokudb.bugs/r/bf_create_select_hash_part.result
 create mode 100644 mysql-test/suite/tokudb.bugs/r/bf_create_select_nonpart.result
 create mode 100644 mysql-test/suite/tokudb.bugs/r/bf_create_select_range_part.result
 create mode 100644 mysql-test/suite/tokudb.bugs/r/bf_create_temp_select_nonpart.result
 create mode 100644 mysql-test/suite/tokudb.bugs/r/bf_insert_select_dup_key_nonpart.result
 create mode 100644 mysql-test/suite/tokudb.bugs/r/bf_insert_select_nonpart.result
 create mode 100644 mysql-test/suite/tokudb.bugs/r/bf_replace_select_nonpart.result
 create mode 100644 mysql-test/suite/tokudb.bugs/r/bf_select_part.result
 create mode 100644 mysql-test/suite/tokudb.bugs/t/bf_create_select_hash_part.test
 create mode 100644 mysql-test/suite/tokudb.bugs/t/bf_create_select_nonpart.test
 create mode 100644 mysql-test/suite/tokudb.bugs/t/bf_create_select_range_part.test
 create mode 100644 mysql-test/suite/tokudb.bugs/t/bf_create_temp_select_nonpart.test
 create mode 100644 mysql-test/suite/tokudb.bugs/t/bf_insert_select_dup_key_nonpart.test
 create mode 100644 mysql-test/suite/tokudb.bugs/t/bf_insert_select_nonpart.test
 create mode 100644 mysql-test/suite/tokudb.bugs/t/bf_replace_select_nonpart.test
 create mode 100644 mysql-test/suite/tokudb.bugs/t/bf_select_part.test

diff --git a/mysql-test/suite/tokudb.bugs/r/bf_create_select_hash_part.result b/mysql-test/suite/tokudb.bugs/r/bf_create_select_hash_part.result
new file mode 100644
index 00000000000..f8c8e6c596d
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/r/bf_create_select_hash_part.result
@@ -0,0 +1,328 @@
+set default_storage_engine='tokudb';
+drop table if exists t,t1,t2,t3;
+CREATE TABLE `t` (
+`num` int(10) unsigned auto_increment NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+CREATE TABLE `t1` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) as select * from t;
+CREATE TABLE `t2` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) PARTITION BY HASH (num)
+PARTITIONS 8 as select * from t;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2;
+DROP TABLE t3;
+1
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t3;
+1
+drop table t,t1,t2;
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_create_select_nonpart.result b/mysql-test/suite/tokudb.bugs/r/bf_create_select_nonpart.result
new file mode 100644
index 00000000000..adacf1ed6aa
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/r/bf_create_select_nonpart.result
@@ -0,0 +1,242 @@
+set default_storage_engine='tokudb';
+drop table if exists t,t1,t2;
+CREATE TABLE `t` (
+`num` int(10) unsigned auto_increment NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+CREATE TABLE `t1` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) as select * from t;
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+CREATE TABLE t2 AS SELECT count(*) from t1;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1;
+DROP TABLE t2;
+1
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TABLE t2;
+1
+drop table t,t1;
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_create_select_range_part.result b/mysql-test/suite/tokudb.bugs/r/bf_create_select_range_part.result
new file mode 100644
index 00000000000..0e055e76d3f
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/r/bf_create_select_range_part.result
@@ -0,0 +1,335 @@
+set default_storage_engine='tokudb';
+drop table if exists t,t1,t2;
+CREATE TABLE `t` (
+`num` int(10) unsigned auto_increment NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+CREATE TABLE `t1` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) as select * from t;
+CREATE TABLE `t2` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) PARTITION BY RANGE (num)
+(PARTITION p0 VALUES LESS THAN (1000000),
+PARTITION p1 VALUES LESS THAN (2000000),
+PARTITION p2 VALUES LESS THAN (3000000),
+PARTITION p3 VALUES LESS THAN (4000000),
+PARTITION p4 VALUES LESS THAN (5000000),
+PARTITION p5 VALUES LESS THAN (6000000),
+PARTITION p6 VALUES LESS THAN (7000000),
+PARTITION p7 VALUES LESS THAN MAXVALUE) as select * from t;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1;
+count(*)
+8388608
+DROP TABLE t3;
+CREATE TABLE t4 AS SELECT count(*) from t2;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2;
+DROP TABLE t4;
+1
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE `t3` (`x` bigint);
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+DROP TABLE t3;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+DROP TABLE t4;
+1
+drop table t,t1,t2;
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_create_temp_select_nonpart.result b/mysql-test/suite/tokudb.bugs/r/bf_create_temp_select_nonpart.result
new file mode 100644
index 00000000000..6eddfaa9e82
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/r/bf_create_temp_select_nonpart.result
@@ -0,0 +1,242 @@
+set default_storage_engine='tokudb';
+drop table if exists t,t1,t2;
+CREATE TABLE `t` (
+`num` int(10) unsigned auto_increment NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+CREATE TABLE `t1` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) as select * from t;
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+DROP TEMPORARY TABLE t2;
+1
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+DROP TEMPORARY TABLE t2;
+1
+drop table t,t1;
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_insert_select_dup_key_nonpart.result b/mysql-test/suite/tokudb.bugs/r/bf_insert_select_dup_key_nonpart.result
new file mode 100644
index 00000000000..22e2846d181
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/r/bf_insert_select_dup_key_nonpart.result
@@ -0,0 +1,207 @@
+set default_storage_engine='tokudb';
+drop table if exists t,t1,t2;
+CREATE TABLE `t` (
+`num` int(10) unsigned auto_increment NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+CREATE TABLE `t1` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) as select * from t;
+CREATE TABLE `t2` (
+`num` int(10) unsigned auto_increment NOT NULL, 
+`count` bigint(20) NOT NULL,
+UNIQUE (num)
+) ENGINE=TokuDB DEFAULT CHARSET=latin1;
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+1
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+1
+drop table t,t1,t2;
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_insert_select_nonpart.result b/mysql-test/suite/tokudb.bugs/r/bf_insert_select_nonpart.result
new file mode 100644
index 00000000000..ba7d0f63cd6
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/r/bf_insert_select_nonpart.result
@@ -0,0 +1,205 @@
+set default_storage_engine='tokudb';
+drop table if exists t,t1,t2;
+CREATE TABLE `t` (
+`num` int(10) unsigned auto_increment NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+CREATE TABLE `t1` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) as select * from t;
+CREATE TABLE `t2` (
+`count` bigint(20) NOT NULL
+) ENGINE=TokuDB DEFAULT CHARSET=latin1;
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+INSERT into t2 SELECT count(*) from t1;
+INSERT into t2 SELECT count(*) from t1;
+INSERT into t2 SELECT count(*) from t1;
+INSERT into t2 SELECT count(*) from t1;
+INSERT into t2 SELECT count(*) from t1;
+INSERT into t2 SELECT count(*) from t1;
+INSERT into t2 SELECT count(*) from t1;
+INSERT into t2 SELECT count(*) from t1;
+INSERT into t2 SELECT count(*) from t1;
+INSERT into t2 SELECT count(*) from t1;
+1
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+1
+drop table t,t1,t2;
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_replace_select_nonpart.result b/mysql-test/suite/tokudb.bugs/r/bf_replace_select_nonpart.result
new file mode 100644
index 00000000000..eab0f103ed6
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/r/bf_replace_select_nonpart.result
@@ -0,0 +1,367 @@
+set default_storage_engine='tokudb';
+drop table if exists t,t1,t2;
+CREATE TABLE `t` (
+`num` int(10) unsigned auto_increment NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+CREATE TABLE `t1` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) as select * from t;
+CREATE TABLE `t2` (
+`count` bigint(20) NOT NULL
+) ENGINE=TokuDB DEFAULT CHARSET=latin1;
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+REPLACE into t2 SELECT count(*) from t1;
+REPLACE into t2 SELECT count(*) from t1;
+REPLACE into t2 SELECT count(*) from t1;
+REPLACE into t2 SELECT count(*) from t1;
+REPLACE into t2 SELECT count(*) from t1;
+REPLACE into t2 SELECT count(*) from t1;
+REPLACE into t2 SELECT count(*) from t1;
+REPLACE into t2 SELECT count(*) from t1;
+REPLACE into t2 SELECT count(*) from t1;
+REPLACE into t2 SELECT count(*) from t1;
+1
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+INSERT IGNORE into t2 SELECT count(*) from t1;
+INSERT IGNORE into t2 SELECT count(*) from t1;
+INSERT IGNORE into t2 SELECT count(*) from t1;
+INSERT IGNORE into t2 SELECT count(*) from t1;
+INSERT IGNORE into t2 SELECT count(*) from t1;
+INSERT IGNORE into t2 SELECT count(*) from t1;
+INSERT IGNORE into t2 SELECT count(*) from t1;
+INSERT IGNORE into t2 SELECT count(*) from t1;
+INSERT IGNORE into t2 SELECT count(*) from t1;
+INSERT IGNORE into t2 SELECT count(*) from t1;
+1
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+1
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+1
+drop table t,t1,t2;
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_select_part.result b/mysql-test/suite/tokudb.bugs/r/bf_select_part.result
new file mode 100644
index 00000000000..f36fc0e1ce3
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/r/bf_select_part.result
@@ -0,0 +1,543 @@
+set default_storage_engine='tokudb';
+drop table if exists t,t1,t2,t3;
+CREATE TABLE `t` (
+`num` int(10) unsigned auto_increment NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+CREATE TABLE `t1` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) as select * from t;
+CREATE TABLE `t2` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) PARTITION BY HASH (num)
+PARTITIONS 8 as select * from t;
+CREATE TABLE `t3` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) PARTITION BY RANGE (num)
+(PARTITION p0 VALUES LESS THAN (1000000),
+PARTITION p1 VALUES LESS THAN (2000000),
+PARTITION p2 VALUES LESS THAN (3000000),
+PARTITION p3 VALUES LESS THAN (4000000),
+PARTITION p4 VALUES LESS THAN (5000000),
+PARTITION p5 VALUES LESS THAN (6000000),
+PARTITION p6 VALUES LESS THAN (7000000),
+PARTITION p7 VALUES LESS THAN MAXVALUE) as select * from t;
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t2;
+count(*)
+8388608
+SELECT count(*) from t2;
+count(*)
+8388608
+SELECT count(*) from t2;
+count(*)
+8388608
+SELECT count(*) from t2;
+count(*)
+8388608
+SELECT count(*) from t2;
+count(*)
+8388608
+SELECT count(*) from t2;
+count(*)
+8388608
+SELECT count(*) from t2;
+count(*)
+8388608
+SELECT count(*) from t2;
+count(*)
+8388608
+SELECT count(*) from t2;
+count(*)
+8388608
+SELECT count(*) from t2;
+count(*)
+8388608
+1
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t1;
+count(*)
+8388608
+SELECT count(*) from t3;
+count(*)
+8388608
+SELECT count(*) from t3;
+count(*)
+8388608
+SELECT count(*) from t3;
+count(*)
+8388608
+SELECT count(*) from t3;
+count(*)
+8388608
+SELECT count(*) from t3;
+count(*)
+8388608
+SELECT count(*) from t3;
+count(*)
+8388608
+SELECT count(*) from t3;
+count(*)
+8388608
+SELECT count(*) from t3;
+count(*)
+8388608
+SELECT count(*) from t3;
+count(*)
+8388608
+SELECT count(*) from t3;
+count(*)
+8388608
+1
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t2 where num > 7000000;
+count(*)
+1847274
+1
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t1 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+SELECT count(*) from t3 where num > 7000000;
+count(*)
+1847274
+1
+drop table t,t1,t2,t3;
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_create_select_hash_part.test b/mysql-test/suite/tokudb.bugs/t/bf_create_select_hash_part.test
new file mode 100644
index 00000000000..1b7f07f2527
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/t/bf_create_select_hash_part.test
@@ -0,0 +1,141 @@
+# Verify that index and range scans are not slow
+# on tables during create select statements
+# due to tokudb bulk fetch not being used
+source include/have_tokudb.inc;
+source include/have_partition.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t,t1,t2,t3;
+enable_warnings;
+
+let $maxq = 10;
+
+CREATE TABLE `t` (
+  `num` int(10) unsigned auto_increment NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+# put 8M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+# Create base table (control table) from source table t
+CREATE TABLE `t1` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) as select * from t;
+
+# Create source hash partitioned table from source table t
+CREATE TABLE `t2` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) PARTITION BY HASH (num)
+PARTITIONS 8 as select * from t;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    CREATE TABLE `t3` (`x` bigint);
+    SELECT count(*) from t1;
+    DROP TABLE t3;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    CREATE TABLE t3 AS SELECT count(*) from t2;
+    DROP TABLE t3;
+    inc $i;
+}
+
+let $time_elapsed_create_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_create_select seconds.;
+
+# This check evaluates whether the time elapsed during the create select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+# Additionally, it is important to note that 1.5 is the multiplier applied to the time_elapsed_select
+# value because it appears that MySQL 5.5.39 uses a sorted index scan during the create select statement
+# while Percona Server 5.6 uses an unsorted index scan.
+# The issue has been resolved in MySQL 5.6 but still persists in Maria 10.0.12
+# in the defect found at https://mariadb.atlassian.net/browse/MDEV-6547.
+let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= 1.5 * $time_elapsed_select`;
+echo $verdict;
+if (!$verdict) { echo index scan t2 $time_elapsed_create_select $time_elapsed_select; }
+
+let $maxrq = 30;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    CREATE TABLE `t3` (`x` bigint); 
+    SELECT count(*) from t1 where num > 7000000;
+    DROP TABLE t3;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000;
+    DROP TABLE t3;
+    inc $i;
+}
+
+let $time_elapsed_create_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_create_select seconds.;
+
+# This check evaluates whether the time elapsed during the create select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+# Additionally, it is important to note that 1.5 is the multiplier applied to the time_elapsed_select
+# value because it appears that MySQL 5.5.39 uses a sorted index scan during the create select statement
+# while Percona Server 5.6 uses an unsorted index scan.
+# The issue has been resolved in MySQL 5.6 but still persists in Maria 10.0.12
+# in the defect found at https://mariadb.atlassian.net/browse/MDEV-6547.
+let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= 1.5 * $time_elapsed_select`;
+echo $verdict;
+if (!$verdict) { echo range scan t2 $time_elapsed_create_select $time_elapsed_select; }
+
+drop table t,t1,t2;
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_create_select_nonpart.test b/mysql-test/suite/tokudb.bugs/t/bf_create_select_nonpart.test
new file mode 100644
index 00000000000..7e70eb2da6b
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/t/bf_create_select_nonpart.test
@@ -0,0 +1,116 @@
+# Verify that index and range scans are not slow
+# on tables during create select statements
+# due to tokudb bulk fetch not being used
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t,t1,t2;
+enable_warnings;
+
+let $maxq = 10;
+
+CREATE TABLE `t` (
+  `num` int(10) unsigned auto_increment NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+# put 8M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+# Create first table from source table t
+CREATE TABLE `t1` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) as select * from t;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    CREATE TABLE t2 AS SELECT count(*) from t1;
+    DROP TABLE t2;
+    inc $i;
+}
+
+let $time_elapsed_create_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_create_select seconds.;
+
+# This check evaluates whether the time elapsed during the create select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+let $maxrq = 30;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+    DROP TABLE t2;
+    inc $i;
+}
+
+let $time_elapsed_create_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_create_select seconds.;
+
+# This check evaluates whether the time elapsed during the create select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+drop table t,t1;
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_create_select_range_part.test b/mysql-test/suite/tokudb.bugs/t/bf_create_select_range_part.test
new file mode 100644
index 00000000000..5b94f5e97bf
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/t/bf_create_select_range_part.test
@@ -0,0 +1,136 @@
+# Verify that index and range scans are not slow
+# on tables during create select statements
+# due to tokudb bulk fetch not being used
+source include/have_tokudb.inc;
+source include/have_partition.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t,t1,t2;
+enable_warnings;
+
+let $maxq = 10;
+
+CREATE TABLE `t` (
+  `num` int(10) unsigned auto_increment NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+# put 8M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+# Create base table (control table) from source table t
+CREATE TABLE `t1` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) as select * from t;
+
+# Create source range partitioned table from source table t
+CREATE TABLE `t2` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) PARTITION BY RANGE (num)
+(PARTITION p0 VALUES LESS THAN (1000000),
+ PARTITION p1 VALUES LESS THAN (2000000),
+ PARTITION p2 VALUES LESS THAN (3000000),
+ PARTITION p3 VALUES LESS THAN (4000000),
+ PARTITION p4 VALUES LESS THAN (5000000),
+ PARTITION p5 VALUES LESS THAN (6000000),
+ PARTITION p6 VALUES LESS THAN (7000000),
+ PARTITION p7 VALUES LESS THAN MAXVALUE) as select * from t;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    CREATE TABLE `t3` (`x` bigint);
+    SELECT count(*) from t1;
+    DROP TABLE t3;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    CREATE TABLE t4 AS SELECT count(*) from t2;
+    DROP TABLE t4;
+    inc $i;
+}
+
+let $time_elapsed_create_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_create_select seconds.;
+
+# This check evaluates whether the time elapsed during the create select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+let $maxrq = 30;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    CREATE TABLE `t3` (`x` bigint);
+    SELECT count(*) from t1 where num > 7000000;
+    DROP TABLE t3;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000;
+    DROP TABLE t4;
+    inc $i;
+}
+
+let $time_elapsed_create_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_create_select seconds.;
+
+# This check evaluates whether the time elapsed during the create select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+drop table t,t1,t2;
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_create_temp_select_nonpart.test b/mysql-test/suite/tokudb.bugs/t/bf_create_temp_select_nonpart.test
new file mode 100644
index 00000000000..56e3c91dbdf
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/t/bf_create_temp_select_nonpart.test
@@ -0,0 +1,116 @@
+# Verify that index and range scans are not slow
+# on temporary tables during create select statements
+# due to tokudb bulk fetch not being used
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t,t1,t2;
+enable_warnings;
+
+let $maxq = 10;
+
+CREATE TABLE `t` (
+  `num` int(10) unsigned auto_increment NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+# put 8M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+# Create first table from source table t
+CREATE TABLE `t1` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) as select * from t;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1;
+    DROP TEMPORARY TABLE t2;
+    inc $i;
+}
+
+let $time_elapsed_create_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_create_select seconds.;
+
+# This check evaluates whether the time elapsed during the create select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+let $maxrq = 30;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Range scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000;
+    DROP TEMPORARY TABLE t2;
+    inc $i;
+}
+
+let $time_elapsed_create_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Range scans took $time_elapsed_create_select seconds.;
+
+# This check evaluates whether the time elapsed during the create select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+drop table t,t1;
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_insert_select_dup_key_nonpart.test b/mysql-test/suite/tokudb.bugs/t/bf_insert_select_dup_key_nonpart.test
new file mode 100644
index 00000000000..5774a3c17d7
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/t/bf_insert_select_dup_key_nonpart.test
@@ -0,0 +1,125 @@
+# Verify that index and range scans are not slow
+# on tables during insert select on duplicate key statements
+# due to tokudb bulk fetch not being used.
+# In this test case, the on duplicate key condition does not need to fire
+# since the performance of the embedded select statement is all we are measuring.
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t,t1,t2;
+enable_warnings;
+
+let $maxq = 10;
+
+CREATE TABLE `t` (
+  `num` int(10) unsigned auto_increment NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+# put 8M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+# Create first table from source table t
+CREATE TABLE `t1` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) as select * from t;
+
+# Create second table t2 that will serve as the target for the insert select statment
+CREATE TABLE `t2` (
+  `num` int(10) unsigned auto_increment NOT NULL, 
+  `count` bigint(20) NOT NULL,
+  UNIQUE (num)
+  ) ENGINE=TokuDB DEFAULT CHARSET=latin1;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1;
+    inc $i;
+}
+let $time_elapsed_insert_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_insert_select seconds.;
+
+# This check evaluates whether the time elapsed during the insert select on duplicate key statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_insert_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+let $maxrq = 30;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Range scans took $time_elapsed_select seconds.;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1;
+    inc $i;
+}
+let $time_elapsed_insert_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Range scans took $time_elapsed_insert_select seconds.;
+
+# This check evaluates whether the time elapsed during the insert select on duplicate key statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_insert_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+enable_warnings;
+drop table t,t1,t2;
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_insert_select_nonpart.test b/mysql-test/suite/tokudb.bugs/t/bf_insert_select_nonpart.test
new file mode 100644
index 00000000000..31399dedae7
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/t/bf_insert_select_nonpart.test
@@ -0,0 +1,120 @@
+# Verify that index and range scans are not slow
+# on tables during insert select statements
+# due to tokudb bulk fetch not being used
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t,t1,t2;
+enable_warnings;
+
+let $maxq = 10;
+
+CREATE TABLE `t` (
+  `num` int(10) unsigned auto_increment NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+# put 8M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+# Create first table from source table t
+CREATE TABLE `t1` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) as select * from t;
+
+# Create second table t2 that will serve as the target for the insert select statment
+CREATE TABLE `t2` (
+  `count` bigint(20) NOT NULL
+  ) ENGINE=TokuDB DEFAULT CHARSET=latin1;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    INSERT into t2 SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_insert_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_insert_select seconds.;
+
+# This check evaluates whether the time elapsed during the insert select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_insert_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+let $maxrq = 30;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# This check evaluates whether the time elapsed during the insert select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+#echo Range scans took $time_elapsed_select seconds.;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    INSERT into t2 SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_insert_select = `select to_seconds(now()) - $s`;
+
+# This check evaluates whether the time elapsed during the insert select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+#echo Range scans took $time_elapsed_insert_select seconds.;
+
+# This check evaluates whether the time elapsed during the insert select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_insert_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+drop table t,t1,t2;
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_replace_select_nonpart.test b/mysql-test/suite/tokudb.bugs/t/bf_replace_select_nonpart.test
new file mode 100644
index 00000000000..6e900bcd9d3
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/t/bf_replace_select_nonpart.test
@@ -0,0 +1,188 @@
+# Verify that index and range scans are not slow
+# on tables during replace select and insert ignore statements
+# due to tokudb bulk fetch not being used
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t,t1,t2;
+enable_warnings;
+
+let $maxq = 10;
+
+CREATE TABLE `t` (
+  `num` int(10) unsigned auto_increment NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+# put 8M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+# Create first table from source table t
+CREATE TABLE `t1` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) as select * from t;
+
+# Create second table t2 that will serve as the target for the replace select statment
+CREATE TABLE `t2` (
+  `count` bigint(20) NOT NULL
+  ) ENGINE=TokuDB DEFAULT CHARSET=latin1;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    REPLACE into t2 SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_replace_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_replace_select seconds.;
+
+# This check evaluates whether the time elapsed during the replace select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_replace_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+##############################################################
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    INSERT IGNORE into t2 SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_insert_ignore_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_insert_ignore_select seconds.;
+
+# This check evaluates whether the time elapsed during the insert ignore select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_insert_ignore_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+##################################################################
+
+let $maxrq = 30;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Range scans took $time_elapsed_select seconds.;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    REPLACE into t2 SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_replace_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Range scans took $time_elapsed_replace_select seconds.;
+
+# This check evaluates whether the time elapsed during the replace select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_replace_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+####################################################################
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Range scans took $time_elapsed_select seconds.;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_insert_ignore_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Range scans took $time_elapsed_insert_ignore_select seconds.;
+
+# This check evaluates whether the time elapsed during the insert ignore select statement is on par
+# with the select statement, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_insert_ignore_select - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+
+#########################################################################
+
+drop table t,t1,t2;
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_select_part.test b/mysql-test/suite/tokudb.bugs/t/bf_select_part.test
new file mode 100644
index 00000000000..2e7bfd9a0d2
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/t/bf_select_part.test
@@ -0,0 +1,218 @@
+# Verify that index and range scans are not slow
+# on tables during create select statements
+# against hash and range partitioned tables
+# due to tokudb bulk fetch not being used
+source include/have_tokudb.inc;
+source include/have_partition.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t,t1,t2,t3;
+enable_warnings;
+
+let $maxq = 10;
+
+CREATE TABLE `t` (
+  `num` int(10) unsigned auto_increment NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+# put 8M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+# Create first table from source table t
+CREATE TABLE `t1` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) as select * from t;
+
+# Create second table from source table t
+CREATE TABLE `t2` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) PARTITION BY HASH (num)
+PARTITIONS 8 as select * from t;
+
+# Create third table from source table t;
+CREATE TABLE `t3` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) PARTITION BY RANGE (num)
+(PARTITION p0 VALUES LESS THAN (1000000),
+ PARTITION p1 VALUES LESS THAN (2000000),
+ PARTITION p2 VALUES LESS THAN (3000000),
+ PARTITION p3 VALUES LESS THAN (4000000),
+ PARTITION p4 VALUES LESS THAN (5000000),
+ PARTITION p5 VALUES LESS THAN (6000000),
+ PARTITION p6 VALUES LESS THAN (7000000),
+ PARTITION p7 VALUES LESS THAN MAXVALUE) as select * from t;
+
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+# echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t2;
+    inc $i;
+}
+
+let $time_elapsed_select_hash = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+# echo Index scans took $time_elapsed_select_hash seconds.;
+
+# This check evaluates whether the time elapsed during the select statement 
+# against a hashed partition table is on par with the select statment
+# against a non-partitioned table, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_select_hash - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+if (!$verdict) { echo index scan t2 $time_elapsed_select_hash $time_elapsed_select; }
+
+######################################################################
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t1;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t3;
+    inc $i;
+}
+
+let $time_elapsed_select_range = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select_range seconds.;
+
+# This check evaluates whether the time elapsed during the select statement 
+# against a range partition table is on par with the select statment
+# against a non-partitioned table, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_select_range - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+if (!$verdict) { echo index scan t3 $time_elapsed_select_range $time_elapsed_select; }
+
+#########################################################################
+
+let $maxrq = 30;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    SELECT count(*) from t2 where num > 7000000;
+    inc $i;
+}
+
+let $time_elapsed_select_hash = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select_hash seconds.;
+
+
+# This check evaluates whether the time elapsed during the select statement 
+# against a hash partition table is on par with the select statment
+# against a non-partitioned table, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_select_hash - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+if (!$verdict) { echo range scan t2 $time_elapsed_select_hash $time_elapsed_select; }
+
+#########################################################################
+
+let $maxrq = 30;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    SELECT count(*) from t1 where num > 7000000;
+    inc $i;
+}
+let $time_elapsed_select = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select seconds.;
+
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxrq) {
+    SELECT count(*) from t3 where num > 7000000;
+    inc $i;
+}
+
+let $time_elapsed_select_range = `select to_seconds(now()) - $s`;
+
+# The following line can be used to display the time elapsed data
+# which could be useful for debugging.
+#echo Index scans took $time_elapsed_select_range seconds.;
+
+
+# This check evaluates whether the time elapsed during the select statement 
+# against a range partition table is on par with the select statment
+# against a non-partitioned table, which will confirm that bulk fetch is in fact being used.
+let $verdict = `select abs($time_elapsed_select_range - $time_elapsed_select) <= $time_elapsed_select`;
+echo $verdict;
+if (!$verdict) { echo range scan t3 $time_elapsed_select_range $time_elapsed_select; }
+
+drop table t,t1,t2,t3;

From b16b461d25a529e7818b5623584077f5a3c52366 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Thu, 14 Aug 2014 15:18:27 -0400
Subject: [PATCH 138/190] DB-506 add a session variable to enable/disable bulk
 fetch default enabled

---
 storage/tokudb/ha_tokudb.cc   | 2 +-
 storage/tokudb/hatoku_hton.cc | 1 +
 storage/tokudb/hatoku_hton.h  | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 76b4b6a0bdc..a53581c8d02 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -4370,7 +4370,7 @@ static bool tokudb_do_bulk_fetch(THD *thd) {
     case SQLCOM_CREATE_TABLE:
     case SQLCOM_INSERT_SELECT:
     case SQLCOM_REPLACE_SELECT:
-        return true;
+        return THDVAR(thd, bulk_fetch) != 0;
     default:
         return false;
     }
diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index d9f86900907..37911a53086 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -1428,6 +1428,7 @@ static struct st_mysql_sys_var *tokudb_system_variables[] = {
 #if TOKUDB_CHECK_JEMALLOC
     MYSQL_SYSVAR(check_jemalloc),
 #endif
+    MYSQL_SYSVAR(bulk_fetch),
     NULL
 };
 
diff --git a/storage/tokudb/hatoku_hton.h b/storage/tokudb/hatoku_hton.h
index 6971dce44bf..ac0ddefbe5d 100644
--- a/storage/tokudb/hatoku_hton.h
+++ b/storage/tokudb/hatoku_hton.h
@@ -515,6 +515,8 @@ static uint tokudb_check_jemalloc;
 static MYSQL_SYSVAR_UINT(check_jemalloc, tokudb_check_jemalloc, 0, "Check if jemalloc is linked", NULL, NULL, 1, 0, 1, 0);
 #endif
 
+static MYSQL_THDVAR_BOOL(bulk_fetch, PLUGIN_VAR_THDLOCAL, "enable bulk fetch", NULL /*check*/, NULL /*update*/, true /*default*/);
+
 extern HASH tokudb_open_tables;
 extern pthread_mutex_t tokudb_mutex;
 extern uint32_t tokudb_write_status_frequency;

From 9e079da4cf8f2cafe5611ec93453bfc4997b2211 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 18 Aug 2014 09:13:29 -0400
Subject: [PATCH 139/190] FT-502 print huge pages guy when env fails to open

---
 storage/tokudb/hatoku_hton.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index 37911a53086..0f88939fce1 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -539,6 +539,7 @@ static int tokudb_init_func(void *p) {
 
     if (r) {
         DBUG_PRINT("info", ("env->open %d", r));
+        handle_ydb_error(r);
         goto error;
     }
 

From 96fbab5fcb4daf5db5ac9384a75dcf964bb06aec Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 18 Aug 2014 13:38:51 -0400
Subject: [PATCH 140/190] DB-500 allow simple deletes to use bulk fetch

---
 .../suite/tokudb/r/bf_delete_nonpart.result   | 54 ++++++++++++++
 .../suite/tokudb/r/bf_delete_trigger.result   | 54 ++++++++++++++
 .../suite/tokudb/t/bf_delete_nonpart.test     | 67 ++++++++++++++++++
 .../suite/tokudb/t/bf_delete_trigger.test     | 70 +++++++++++++++++++
 storage/tokudb/ha_tokudb.cc                   |  1 +
 5 files changed, 246 insertions(+)
 create mode 100644 mysql-test/suite/tokudb/r/bf_delete_nonpart.result
 create mode 100644 mysql-test/suite/tokudb/r/bf_delete_trigger.result
 create mode 100644 mysql-test/suite/tokudb/t/bf_delete_nonpart.test
 create mode 100644 mysql-test/suite/tokudb/t/bf_delete_trigger.test

diff --git a/mysql-test/suite/tokudb/r/bf_delete_nonpart.result b/mysql-test/suite/tokudb/r/bf_delete_nonpart.result
new file mode 100644
index 00000000000..e83b150c543
--- /dev/null
+++ b/mysql-test/suite/tokudb/r/bf_delete_nonpart.result
@@ -0,0 +1,54 @@
+set default_storage_engine='tokudb';
+drop table if exists t;
+CREATE TABLE `t` (id bigint not null auto_increment primary key, val bigint not null default 0);
+INSERT INTO t (id) values (null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+set tokudb_bulk_fetch = ON;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+set tokudb_bulk_fetch = OFF;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+delete from t where val > 0;
+1
+drop table t;
diff --git a/mysql-test/suite/tokudb/r/bf_delete_trigger.result b/mysql-test/suite/tokudb/r/bf_delete_trigger.result
new file mode 100644
index 00000000000..78bd8d2fe14
--- /dev/null
+++ b/mysql-test/suite/tokudb/r/bf_delete_trigger.result
@@ -0,0 +1,54 @@
+set default_storage_engine='tokudb';
+drop table if exists t;
+create table t (id bigint not null primary key, x bigint not null);
+insert into t values (1,0),(2,0),(3,0),(4,0);
+create trigger t_delete before delete on t for each row insert into t values (1000000,0);
+begin;
+delete from t where x=0;
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_delete;
+create trigger t_delete after delete on t for each row insert into t values (1000000,0);
+begin;
+delete from t where x=0;
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_delete;
+create trigger t_delete before delete on t for each row delete from t where id=1000000;
+begin;
+delete from t where x=0;
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_delete;
+create trigger t_delete after delete on t for each row delete from t where id=1000000;
+begin;
+delete from t where x=0;
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_delete;
+create trigger t_delete before delete on t for each row update t set x=x+1 where id=1000000;
+begin;
+delete from t where x=0;
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_delete;
+create trigger t_delete after delete on t for each row update t set x=x+1 where id=10000000;
+begin;
+delete from t where x=0;
+ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_delete;
+create table count (count bigint not null);
+create trigger t_delete before delete on t for each row insert into count select count(*) from t;
+begin;
+delete from t where x=0;
+select * from count;
+count
+4
+3
+2
+1
+rollback;
+drop trigger t_delete;
+drop table count;
+drop table t;
diff --git a/mysql-test/suite/tokudb/t/bf_delete_nonpart.test b/mysql-test/suite/tokudb/t/bf_delete_nonpart.test
new file mode 100644
index 00000000000..b80b3e6c3c9
--- /dev/null
+++ b/mysql-test/suite/tokudb/t/bf_delete_nonpart.test
@@ -0,0 +1,67 @@
+# Verify that index scans for delete statements use bulk fetch and are
+# at least twice as fast
+
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+CREATE TABLE `t` (id bigint not null auto_increment primary key, val bigint not null default 0);
+
+# put 8M rows into t
+INSERT INTO t (id) values (null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+# run $maxq measurements
+let $maxq = 10;
+
+# measure the time to do $maxq deletes from t that affect no rows with bulk fetch ON
+set tokudb_bulk_fetch = ON;
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    delete from t where val > 0;
+    inc $i;
+}
+let $time_elapsed_bf_on = `select to_seconds(now()) - $s`;
+
+# measure the time to do $maxq deletes from t that affect no rows with bulk fetch OFF
+set tokudb_bulk_fetch = OFF;
+let $s = `select to_seconds(now())`;
+let $i = 0;
+while ($i < $maxq) {
+    delete from t where val > 0;
+    inc $i;
+}
+let $time_elapsed_bf_off = `select to_seconds(now()) - $s`;
+
+# verify that a delete scan with bulk fetch ON is at least 2 times faster than with bulk fetch OFF
+let $verdict = `select $time_elapsed_bf_off > $time_elapsed_bf_on && ($time_elapsed_bf_off - $time_elapsed_bf_on) / $time_elapsed_bf_on >= 2`;
+echo $verdict;
+if (!$verdict) { echo $time_elapsed_bf_on $time_elapsed_bf_off; }
+
+drop table t;
diff --git a/mysql-test/suite/tokudb/t/bf_delete_trigger.test b/mysql-test/suite/tokudb/t/bf_delete_trigger.test
new file mode 100644
index 00000000000..031a48feab1
--- /dev/null
+++ b/mysql-test/suite/tokudb/t/bf_delete_trigger.test
@@ -0,0 +1,70 @@
+# verify that delete triggers can not insert, delete, or update rows in the target table
+
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+create table t (id bigint not null primary key, x bigint not null);
+insert into t values (1,0),(2,0),(3,0),(4,0);
+
+# verify that a before delete trigger can not insert into the target table
+create trigger t_delete before delete on t for each row insert into t values (1000000,0);
+begin;
+error 1442;
+delete from t where x=0;
+rollback;
+drop trigger t_delete;
+
+# verify that an after delete trigger can not insert into the target table
+create trigger t_delete after delete on t for each row insert into t values (1000000,0);
+begin;
+error 1442;
+delete from t where x=0;
+rollback;
+drop trigger t_delete;
+
+# verify that a before delete trigger can not delete from the target table
+create trigger t_delete before delete on t for each row delete from t where id=1000000;
+begin;
+error 1442;
+delete from t where x=0;
+rollback;
+drop trigger t_delete;
+
+# verify that an after delete trigger can not delete from the target table
+create trigger t_delete after delete on t for each row delete from t where id=1000000;
+begin;
+error 1442;
+delete from t where x=0;
+rollback;
+drop trigger t_delete;
+
+# verify that a before delete trigger can not update the target table
+create trigger t_delete before delete on t for each row update t set x=x+1 where id=1000000;
+begin;
+error 1442;
+delete from t where x=0;
+rollback;
+drop trigger t_delete;
+
+# verify that an after delete trigger can not update the target table
+create trigger t_delete after delete on t for each row update t set x=x+1 where id=10000000;
+begin;
+error 1442;
+delete from t where x=0;
+rollback;
+drop trigger t_delete;
+
+# can execute select on the target table in a delete trigger.  it better use a different handler.
+create table count (count bigint not null);
+create trigger t_delete before delete on t for each row insert into count select count(*) from t;
+begin;
+delete from t where x=0;
+select * from count;
+rollback;
+drop trigger t_delete;
+drop table count;
+
+drop table t;
\ No newline at end of file
diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index a53581c8d02..178b6a59d7d 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -4370,6 +4370,7 @@ static bool tokudb_do_bulk_fetch(THD *thd) {
     case SQLCOM_CREATE_TABLE:
     case SQLCOM_INSERT_SELECT:
     case SQLCOM_REPLACE_SELECT:
+    case SQLCOM_DELETE:
         return THDVAR(thd, bulk_fetch) != 0;
     default:
         return false;

From 0bce0dba3061c154f9e2b1f92aba929db0d29beb Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 18 Aug 2014 14:45:39 -0400
Subject: [PATCH 141/190] DB-504 test insert select with various triggers to
 make sure that bulk fetch will work

---
 .../tokudb/r/bf_insert_select_trigger.result  |  45 +++++
 .../r/bf_insert_select_update_trigger.result  | 121 +++++++++++++
 .../tokudb/t/bf_insert_select_trigger.test    |  65 +++++++
 .../t/bf_insert_select_update_trigger.test    | 170 ++++++++++++++++++
 4 files changed, 401 insertions(+)
 create mode 100644 mysql-test/suite/tokudb/r/bf_insert_select_trigger.result
 create mode 100644 mysql-test/suite/tokudb/r/bf_insert_select_update_trigger.result
 create mode 100644 mysql-test/suite/tokudb/t/bf_insert_select_trigger.test
 create mode 100644 mysql-test/suite/tokudb/t/bf_insert_select_update_trigger.test

diff --git a/mysql-test/suite/tokudb/r/bf_insert_select_trigger.result b/mysql-test/suite/tokudb/r/bf_insert_select_trigger.result
new file mode 100644
index 00000000000..860d26602dd
--- /dev/null
+++ b/mysql-test/suite/tokudb/r/bf_insert_select_trigger.result
@@ -0,0 +1,45 @@
+set default_storage_engine='tokudb';
+drop table if exists s,t;
+create table s (id bigint not null primary key, x bigint);
+insert into s values (1,0),(2,0),(3,0),(4,0);
+create table t like s;
+begin;
+insert into t select * from s;
+rollback;
+create trigger t_trigger before insert on t for each row insert into s values (1000000,0);
+begin;
+insert into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row insert into s values (1000000,0);
+begin;
+insert into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before insert on t for each row delete from s where id=1000000;
+begin;
+insert into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row delete from s where id=1000000;
+begin;
+insert into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+insert into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+insert into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+drop table s,t;
diff --git a/mysql-test/suite/tokudb/r/bf_insert_select_update_trigger.result b/mysql-test/suite/tokudb/r/bf_insert_select_update_trigger.result
new file mode 100644
index 00000000000..d7588441d92
--- /dev/null
+++ b/mysql-test/suite/tokudb/r/bf_insert_select_update_trigger.result
@@ -0,0 +1,121 @@
+set default_storage_engine='tokudb';
+drop table if exists s,t;
+create table s (id bigint not null primary key, x bigint);
+insert into s values (1,0),(2,0),(3,0),(4,0);
+create table t like s;
+begin;
+insert into t select * from s;
+rollback;
+create trigger t_trigger before insert on t for each row insert into s values (1000000,0);
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row insert into s values (1000000,0);
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before insert on t for each row delete from s where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row delete from s where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+truncate table t;
+insert into t values (1,0);
+create trigger t_trigger before insert on t for each row insert into s values (1000000,0);
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row insert into s values (1000000,0);
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before insert on t for each row delete from s where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row delete from s where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+truncate table t;
+insert into t values (1,0);
+create trigger t_trigger before update on t for each row insert into s values (1000000,0);
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after update on t for each row insert into s values (1000000,0);
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before update on t for each row delete from s where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after update on t for each row delete from s where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before update on t for each row update s set x=x+1 where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after update on t for each row update s set x=x+1 where id=1000000;
+begin;
+insert into t select * from s on duplicate key update x=t.x+1;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+drop table s,t;
diff --git a/mysql-test/suite/tokudb/t/bf_insert_select_trigger.test b/mysql-test/suite/tokudb/t/bf_insert_select_trigger.test
new file mode 100644
index 00000000000..337013c9dad
--- /dev/null
+++ b/mysql-test/suite/tokudb/t/bf_insert_select_trigger.test
@@ -0,0 +1,65 @@
+# verify that various insert triggers can not execute on the source table for an insert select statement
+
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists s,t;
+enable_warnings;
+
+create table s (id bigint not null primary key, x bigint);
+insert into s values (1,0),(2,0),(3,0),(4,0);
+
+create table t like s;
+begin;
+insert into t select * from s;
+rollback;
+
+# verify that before insert triggers can not insert into the source table
+create trigger t_trigger before insert on t for each row insert into s values (1000000,0);
+begin;
+error 1442;
+insert into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not insert into the source table
+create trigger t_trigger after insert on t for each row insert into s values (1000000,0);
+begin;
+error 1442;
+insert into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that before insert triggers can not delete from the source table
+create trigger t_trigger before insert on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+insert into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after  insert triggers can not delete from the source table
+create trigger t_trigger after insert on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+insert into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that before insert triggers can not update the source table
+create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+insert into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not update the source table
+create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+insert into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+drop table s,t;
diff --git a/mysql-test/suite/tokudb/t/bf_insert_select_update_trigger.test b/mysql-test/suite/tokudb/t/bf_insert_select_update_trigger.test
new file mode 100644
index 00000000000..d5addc69076
--- /dev/null
+++ b/mysql-test/suite/tokudb/t/bf_insert_select_update_trigger.test
@@ -0,0 +1,170 @@
+# verify that various insert triggers can not execute on the source table for an insert select statement
+
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists s,t;
+enable_warnings;
+
+create table s (id bigint not null primary key, x bigint);
+insert into s values (1,0),(2,0),(3,0),(4,0);
+
+create table t like s;
+begin;
+insert into t select * from s;
+rollback;
+# insert into t values (1,0);
+
+# verify that before insert triggers can not insert into the source table
+create trigger t_trigger before insert on t for each row insert into s values (1000000,0);
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not insert into the source table
+create trigger t_trigger after insert on t for each row insert into s values (1000000,0);
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that before insert triggers can not delete from the source table
+create trigger t_trigger before insert on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that after  insert triggers can not delete from the source table
+create trigger t_trigger after insert on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that before insert triggers can not update the source table
+create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not update the source table
+create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# force duplicate keys
+truncate table t;
+insert into t values (1,0);
+
+# verify that before insert triggers can not insert into the source table
+create trigger t_trigger before insert on t for each row insert into s values (1000000,0);
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not insert into the source table
+create trigger t_trigger after insert on t for each row insert into s values (1000000,0);
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that before insert triggers can not delete from the source table
+create trigger t_trigger before insert on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that after  insert triggers can not delete from the source table
+create trigger t_trigger after insert on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that before insert triggers can not update the source table
+create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not update the source table
+create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# force duplicate keys
+truncate table t;
+insert into t values (1,0);
+
+# verify that before insert triggers can not insert into the source table
+create trigger t_trigger before update on t for each row insert into s values (1000000,0);
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not insert into the source table
+create trigger t_trigger after update on t for each row insert into s values (1000000,0);
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that before update triggers can not delete from the source table
+create trigger t_trigger before update on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that after  insert triggers can not delete from the source table
+create trigger t_trigger after update on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that before update triggers can not update the source table
+create trigger t_trigger before update on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not update the source table
+create trigger t_trigger after update on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+insert into t select * from s on duplicate key update x=t.x+1;
+rollback;
+drop trigger t_trigger;
+
+drop table s,t;

From 401e0e5d685d5bf1fb407826a964f3db8b0c731a Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 18 Aug 2014 15:57:12 -0400
Subject: [PATCH 142/190] DB-504 move bulk fetch tests

---
 .../{tokudb.bugs => tokudb}/r/bf_create_select_hash_part.result   | 0
 .../{tokudb.bugs => tokudb}/r/bf_create_select_nonpart.result     | 0
 .../{tokudb.bugs => tokudb}/r/bf_create_select_range_part.result  | 0
 .../r/bf_create_temp_select_nonpart.result                        | 0
 .../r/bf_insert_select_dup_key_nonpart.result                     | 0
 .../{tokudb.bugs => tokudb}/r/bf_insert_select_nonpart.result     | 0
 .../{tokudb.bugs => tokudb}/r/bf_replace_select_nonpart.result    | 0
 mysql-test/suite/{tokudb.bugs => tokudb}/r/bf_select_part.result  | 0
 .../{tokudb.bugs => tokudb}/t/bf_create_select_hash_part.test     | 0
 .../suite/{tokudb.bugs => tokudb}/t/bf_create_select_nonpart.test | 0
 .../{tokudb.bugs => tokudb}/t/bf_create_select_range_part.test    | 0
 .../{tokudb.bugs => tokudb}/t/bf_create_temp_select_nonpart.test  | 0
 .../t/bf_insert_select_dup_key_nonpart.test                       | 0
 .../suite/{tokudb.bugs => tokudb}/t/bf_insert_select_nonpart.test | 0
 .../{tokudb.bugs => tokudb}/t/bf_replace_select_nonpart.test      | 0
 mysql-test/suite/{tokudb.bugs => tokudb}/t/bf_select_part.test    | 0
 16 files changed, 0 insertions(+), 0 deletions(-)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/r/bf_create_select_hash_part.result (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/r/bf_create_select_nonpart.result (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/r/bf_create_select_range_part.result (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/r/bf_create_temp_select_nonpart.result (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/r/bf_insert_select_dup_key_nonpart.result (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/r/bf_insert_select_nonpart.result (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/r/bf_replace_select_nonpart.result (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/r/bf_select_part.result (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/t/bf_create_select_hash_part.test (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/t/bf_create_select_nonpart.test (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/t/bf_create_select_range_part.test (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/t/bf_create_temp_select_nonpart.test (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/t/bf_insert_select_dup_key_nonpart.test (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/t/bf_insert_select_nonpart.test (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/t/bf_replace_select_nonpart.test (100%)
 rename mysql-test/suite/{tokudb.bugs => tokudb}/t/bf_select_part.test (100%)

diff --git a/mysql-test/suite/tokudb.bugs/r/bf_create_select_hash_part.result b/mysql-test/suite/tokudb/r/bf_create_select_hash_part.result
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/r/bf_create_select_hash_part.result
rename to mysql-test/suite/tokudb/r/bf_create_select_hash_part.result
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_create_select_nonpart.result b/mysql-test/suite/tokudb/r/bf_create_select_nonpart.result
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/r/bf_create_select_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_create_select_nonpart.result
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_create_select_range_part.result b/mysql-test/suite/tokudb/r/bf_create_select_range_part.result
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/r/bf_create_select_range_part.result
rename to mysql-test/suite/tokudb/r/bf_create_select_range_part.result
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_create_temp_select_nonpart.result b/mysql-test/suite/tokudb/r/bf_create_temp_select_nonpart.result
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/r/bf_create_temp_select_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_create_temp_select_nonpart.result
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_insert_select_dup_key_nonpart.result b/mysql-test/suite/tokudb/r/bf_insert_select_dup_key_nonpart.result
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/r/bf_insert_select_dup_key_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_insert_select_dup_key_nonpart.result
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_insert_select_nonpart.result b/mysql-test/suite/tokudb/r/bf_insert_select_nonpart.result
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/r/bf_insert_select_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_insert_select_nonpart.result
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_replace_select_nonpart.result b/mysql-test/suite/tokudb/r/bf_replace_select_nonpart.result
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/r/bf_replace_select_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_replace_select_nonpart.result
diff --git a/mysql-test/suite/tokudb.bugs/r/bf_select_part.result b/mysql-test/suite/tokudb/r/bf_select_part.result
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/r/bf_select_part.result
rename to mysql-test/suite/tokudb/r/bf_select_part.result
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_create_select_hash_part.test b/mysql-test/suite/tokudb/t/bf_create_select_hash_part.test
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/t/bf_create_select_hash_part.test
rename to mysql-test/suite/tokudb/t/bf_create_select_hash_part.test
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_create_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_create_select_nonpart.test
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/t/bf_create_select_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_create_select_nonpart.test
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_create_select_range_part.test b/mysql-test/suite/tokudb/t/bf_create_select_range_part.test
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/t/bf_create_select_range_part.test
rename to mysql-test/suite/tokudb/t/bf_create_select_range_part.test
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_create_temp_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_create_temp_select_nonpart.test
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/t/bf_create_temp_select_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_create_temp_select_nonpart.test
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_insert_select_dup_key_nonpart.test b/mysql-test/suite/tokudb/t/bf_insert_select_dup_key_nonpart.test
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/t/bf_insert_select_dup_key_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_insert_select_dup_key_nonpart.test
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_insert_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_insert_select_nonpart.test
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/t/bf_insert_select_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_insert_select_nonpart.test
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_replace_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_replace_select_nonpart.test
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/t/bf_replace_select_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_replace_select_nonpart.test
diff --git a/mysql-test/suite/tokudb.bugs/t/bf_select_part.test b/mysql-test/suite/tokudb/t/bf_select_part.test
similarity index 100%
rename from mysql-test/suite/tokudb.bugs/t/bf_select_part.test
rename to mysql-test/suite/tokudb/t/bf_select_part.test

From adf1e56e425ced72321705923ff68a1b2d694f5a Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 18 Aug 2014 15:58:49 -0400
Subject: [PATCH 143/190] DB-504 test replace select triggers for bulk fetch

---
 .../tokudb/r/bf_replace_select_trigger.result | 121 +++++++++++++
 .../tokudb/t/bf_replace_select_trigger.test   | 169 ++++++++++++++++++
 2 files changed, 290 insertions(+)
 create mode 100644 mysql-test/suite/tokudb/r/bf_replace_select_trigger.result
 create mode 100644 mysql-test/suite/tokudb/t/bf_replace_select_trigger.test

diff --git a/mysql-test/suite/tokudb/r/bf_replace_select_trigger.result b/mysql-test/suite/tokudb/r/bf_replace_select_trigger.result
new file mode 100644
index 00000000000..acd17170301
--- /dev/null
+++ b/mysql-test/suite/tokudb/r/bf_replace_select_trigger.result
@@ -0,0 +1,121 @@
+set default_storage_engine='tokudb';
+drop table if exists s,t;
+create table s (id bigint not null primary key, x bigint);
+insert into s values (1,0),(2,0),(3,0),(4,0);
+create table t like s;
+begin;
+replace into t select * from s;
+rollback;
+create trigger t_trigger before insert on t for each row replace into s values (1000000,0);
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row replace into s values (1000000,0);
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before insert on t for each row delete from s where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row delete from s where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+truncate table t;
+insert into t values (1,1);
+create trigger t_trigger before insert on t for each row replace into s values (1000000,0);
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row replace into s values (1000000,0);
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before insert on t for each row delete from s where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row delete from s where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+truncate table t;
+insert into t values (1,1);
+create trigger t_trigger before delete on t for each row replace into s values (1000000,0);
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after delete on t for each row replace into s values (1000000,0);
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before delete on t for each row delete from s where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after delete on t for each row delete from s where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger before delete on t for each row update s set x=x+1 where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+create trigger t_trigger after delete on t for each row update s set x=x+1 where id=1000000;
+begin;
+replace into t select * from s;
+ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger.
+rollback;
+drop trigger t_trigger;
+drop table s,t;
diff --git a/mysql-test/suite/tokudb/t/bf_replace_select_trigger.test b/mysql-test/suite/tokudb/t/bf_replace_select_trigger.test
new file mode 100644
index 00000000000..6b098ce04af
--- /dev/null
+++ b/mysql-test/suite/tokudb/t/bf_replace_select_trigger.test
@@ -0,0 +1,169 @@
+# verify that various insert and update triggers can not execute on the source table
+# for a replace select statement
+
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists s,t;
+enable_warnings;
+
+create table s (id bigint not null primary key, x bigint);
+insert into s values (1,0),(2,0),(3,0),(4,0);
+
+create table t like s;
+begin;
+replace into t select * from s;
+rollback;
+
+# verify that before insert triggers can not replace into the source table
+create trigger t_trigger before insert on t for each row replace into s values (1000000,0);
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not replace into the source table
+create trigger t_trigger after insert on t for each row replace into s values (1000000,0);
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that before insert triggers can not delete from the source table
+create trigger t_trigger before insert on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after  insert triggers can not delete from the source table
+create trigger t_trigger after insert on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that before insert triggers can not update the source table
+create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not update the source table
+create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+truncate table t;
+insert into t values (1,1);
+
+# verify that before insert triggers can not replace into the source table
+create trigger t_trigger before insert on t for each row replace into s values (1000000,0);
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not replace into the source table
+create trigger t_trigger after insert on t for each row replace into s values (1000000,0);
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that before insert triggers can not delete from the source table
+create trigger t_trigger before insert on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after  insert triggers can not delete from the source table
+create trigger t_trigger after insert on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that before insert triggers can not update the source table
+create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after insert triggers can not update the source table
+create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+truncate table t;
+insert into t values (1,1);
+
+# verify that before delete triggers can not replace into the source table
+create trigger t_trigger before delete on t for each row replace into s values (1000000,0);
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after delete triggers can not replace into the source table
+create trigger t_trigger after delete on t for each row replace into s values (1000000,0);
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that before delete triggers can not delete from the source table
+create trigger t_trigger before delete on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after delete triggers can not delete from the source table
+create trigger t_trigger after delete on t for each row delete from s where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that before delete triggers can not update the source table
+create trigger t_trigger before delete on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+# verify that after delete triggers can not update the source table
+create trigger t_trigger after delete on t for each row update s set x=x+1 where id=1000000;
+begin;
+error 1442;
+replace into t select * from s;
+rollback;
+drop trigger t_trigger;
+
+
+drop table s,t;

From 09223b255954cb37e29c0b289b0153690b535d54 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 19 Aug 2014 13:07:19 -0400
Subject: [PATCH 144/190] DB-708 move test scripts

---
 scripts/atc.ontime/atc_ontime_create.sql      |  95 -----
 .../atc.ontime/atc_ontime_create_covered.sql  | 103 ------
 scripts/atc.ontime/nodistinct.q8.sql          |   2 -
 scripts/atc.ontime/q0.result                  |   2 -
 scripts/atc.ontime/q0.sql                     |   1 -
 scripts/atc.ontime/q1.result                  |   8 -
 scripts/atc.ontime/q1.sql                     |   1 -
 scripts/atc.ontime/q2.result                  |   8 -
 scripts/atc.ontime/q2.sql                     |   1 -
 scripts/atc.ontime/q3.result                  |  11 -
 scripts/atc.ontime/q3.sql                     |   1 -
 scripts/atc.ontime/q4.result                  |  21 --
 scripts/atc.ontime/q4.sql                     |   1 -
 scripts/atc.ontime/q5.result                  |  21 --
 scripts/atc.ontime/q5.sql                     |   1 -
 scripts/atc.ontime/q6.result                  |  21 --
 scripts/atc.ontime/q6.sql                     |   1 -
 scripts/atc.ontime/q7.result                  |  22 --
 scripts/atc.ontime/q7.sql                     |   1 -
 scripts/atc.ontime/q8.10y.destcityname.result |  11 -
 scripts/atc.ontime/q8.10y.destcityname.sql    |   1 -
 scripts/atc.ontime/q8.10y.result              |  11 -
 scripts/atc.ontime/q8.10y.sql                 |   1 -
 scripts/atc.ontime/q8.1y.sql                  |   1 -
 scripts/atc.ontime/q8.1y.year5.sql            |   1 -
 scripts/atc.ontime/q8.2y.sql                  |   1 -
 scripts/atc.ontime/q8.3y.sql                  |   1 -
 scripts/atc.ontime/q8.4y.sql                  |   1 -
 scripts/atc.ontime/q8.result                  |  11 -
 scripts/atc.ontime/q8.sql                     |   1 -
 scripts/atc.ontime/q8.year5.sql               |   1 -
 scripts/atc.ontime/q9.result                  |  22 --
 scripts/atc.ontime/q9.sql                     |   1 -
 scripts/atc.ontime/qcount.main.sql            |   2 -
 scripts/atc.ontime/qcount.result              |   2 -
 scripts/atc.ontime/qcount.sql                 |   1 -
 scripts/atc.readme                            |  19 -
 scripts/nightly.mysql.build.and.test.bash     |  58 ---
 scripts/nightly.mysql.build.and.test.my.cnf   |   7 -
 scripts/run.atc.ontime.bash                   | 267 --------------
 scripts/run.iibench.bash                      | 163 ---------
 scripts/run.mysql.tests.bash                  | 196 ----------
 scripts/run.sql.bench.bash                    | 153 --------
 scripts/run.tpch.bash                         | 342 ------------------
 scripts/setup.mysql.bash                      | 231 ------------
 scripts/test.mysql.bash                       |  51 ---
 scripts/testbuildfromsrc.bash                 |  32 --
 scripts/testsandbox.bash                      |  32 --
 scripts/tpch.readme                           |  34 --
 49 files changed, 1977 deletions(-)
 delete mode 100644 scripts/atc.ontime/atc_ontime_create.sql
 delete mode 100644 scripts/atc.ontime/atc_ontime_create_covered.sql
 delete mode 100644 scripts/atc.ontime/nodistinct.q8.sql
 delete mode 100644 scripts/atc.ontime/q0.result
 delete mode 100644 scripts/atc.ontime/q0.sql
 delete mode 100644 scripts/atc.ontime/q1.result
 delete mode 100644 scripts/atc.ontime/q1.sql
 delete mode 100644 scripts/atc.ontime/q2.result
 delete mode 100644 scripts/atc.ontime/q2.sql
 delete mode 100644 scripts/atc.ontime/q3.result
 delete mode 100644 scripts/atc.ontime/q3.sql
 delete mode 100644 scripts/atc.ontime/q4.result
 delete mode 100644 scripts/atc.ontime/q4.sql
 delete mode 100644 scripts/atc.ontime/q5.result
 delete mode 100644 scripts/atc.ontime/q5.sql
 delete mode 100644 scripts/atc.ontime/q6.result
 delete mode 100644 scripts/atc.ontime/q6.sql
 delete mode 100644 scripts/atc.ontime/q7.result
 delete mode 100644 scripts/atc.ontime/q7.sql
 delete mode 100644 scripts/atc.ontime/q8.10y.destcityname.result
 delete mode 100644 scripts/atc.ontime/q8.10y.destcityname.sql
 delete mode 100644 scripts/atc.ontime/q8.10y.result
 delete mode 100644 scripts/atc.ontime/q8.10y.sql
 delete mode 100644 scripts/atc.ontime/q8.1y.sql
 delete mode 100644 scripts/atc.ontime/q8.1y.year5.sql
 delete mode 100644 scripts/atc.ontime/q8.2y.sql
 delete mode 100644 scripts/atc.ontime/q8.3y.sql
 delete mode 100644 scripts/atc.ontime/q8.4y.sql
 delete mode 100644 scripts/atc.ontime/q8.result
 delete mode 100644 scripts/atc.ontime/q8.sql
 delete mode 100644 scripts/atc.ontime/q8.year5.sql
 delete mode 100644 scripts/atc.ontime/q9.result
 delete mode 100644 scripts/atc.ontime/q9.sql
 delete mode 100644 scripts/atc.ontime/qcount.main.sql
 delete mode 100644 scripts/atc.ontime/qcount.result
 delete mode 100644 scripts/atc.ontime/qcount.sql
 delete mode 100644 scripts/atc.readme
 delete mode 100755 scripts/nightly.mysql.build.and.test.bash
 delete mode 100644 scripts/nightly.mysql.build.and.test.my.cnf
 delete mode 100755 scripts/run.atc.ontime.bash
 delete mode 100755 scripts/run.iibench.bash
 delete mode 100755 scripts/run.mysql.tests.bash
 delete mode 100755 scripts/run.sql.bench.bash
 delete mode 100755 scripts/run.tpch.bash
 delete mode 100755 scripts/setup.mysql.bash
 delete mode 100755 scripts/test.mysql.bash
 delete mode 100644 scripts/testbuildfromsrc.bash
 delete mode 100644 scripts/testsandbox.bash
 delete mode 100644 scripts/tpch.readme

diff --git a/scripts/atc.ontime/atc_ontime_create.sql b/scripts/atc.ontime/atc_ontime_create.sql
deleted file mode 100644
index 72148f81d6f..00000000000
--- a/scripts/atc.ontime/atc_ontime_create.sql
+++ /dev/null
@@ -1,95 +0,0 @@
-CREATE TABLE `ontime` (
-  `Year` year(4) DEFAULT NULL,
-  `Quarter` tinyint(4) DEFAULT NULL,
-  `Month` tinyint(4) DEFAULT NULL,
-  `DayofMonth` tinyint(4) DEFAULT NULL,
-  `DayOfWeek` tinyint(4) DEFAULT NULL,
-  `FlightDate` date DEFAULT NULL,
-  `UniqueCarrier` char(7) DEFAULT NULL,
-  `AirlineID` int(11) DEFAULT NULL,
-  `Carrier` char(2) DEFAULT NULL,
-  `TailNum` varchar(50) DEFAULT NULL,
-  `FlightNum` varchar(10) DEFAULT NULL,
-  `Origin` char(5) DEFAULT NULL,
-  `OriginCityName` varchar(100) DEFAULT NULL,
-  `OriginState` char(2) DEFAULT NULL,
-  `OriginStateFips` varchar(10) DEFAULT NULL,
-  `OriginStateName` varchar(100) DEFAULT NULL,
-  `OriginWac` int(11) DEFAULT NULL,
-  `Dest` char(5) DEFAULT NULL,
-  `DestCityName` varchar(100) DEFAULT NULL,
-  `DestState` char(2) DEFAULT NULL,
-  `DestStateFips` varchar(10) DEFAULT NULL,
-  `DestStateName` varchar(100) DEFAULT NULL,
-  `DestWac` int(11) DEFAULT NULL,
-  `CRSDepTime` int(11) DEFAULT NULL,
-  `DepTime` int(11) DEFAULT NULL,
-  `DepDelay` int(11) DEFAULT NULL,
-  `DepDelayMinutes` int(11) DEFAULT NULL,
-  `DepDel15` int(11) DEFAULT NULL,
-  `DepartureDelayGroups` int(11) DEFAULT NULL,
-  `DepTimeBlk` varchar(20) DEFAULT NULL,
-  `TaxiOut` int(11) DEFAULT NULL,
-  `WheelsOff` int(11) DEFAULT NULL,
-  `WheelsOn` int(11) DEFAULT NULL,
-  `TaxiIn` int(11) DEFAULT NULL,
-  `CRSArrTime` int(11) DEFAULT NULL,
-  `ArrTime` int(11) DEFAULT NULL,
-  `ArrDelay` int(11) DEFAULT NULL,
-  `ArrDelayMinutes` int(11) DEFAULT NULL,
-  `ArrDel15` int(11) DEFAULT NULL,
-  `ArrivalDelayGroups` int(11) DEFAULT NULL,
-  `ArrTimeBlk` varchar(20) DEFAULT NULL,
-  `Cancelled` tinyint(4) DEFAULT NULL,
-  `CancellationCode` char(1) DEFAULT NULL,
-  `Diverted` tinyint(4) DEFAULT NULL,
-  `CRSElapsedTime` INT(11) DEFAULT NULL,
-  `ActualElapsedTime` INT(11) DEFAULT NULL,
-  `AirTime` INT(11) DEFAULT NULL,
-  `Flights` INT(11) DEFAULT NULL,
-  `Distance` INT(11) DEFAULT NULL,
-  `DistanceGroup` TINYINT(4) DEFAULT NULL,
-  `CarrierDelay` INT(11) DEFAULT NULL,
-  `WeatherDelay` INT(11) DEFAULT NULL,
-  `NASDelay` INT(11) DEFAULT NULL,
-  `SecurityDelay` INT(11) DEFAULT NULL,
-  `LateAircraftDelay` INT(11) DEFAULT NULL,
-  `FirstDepTime` varchar(10) DEFAULT NULL,
-  `TotalAddGTime` varchar(10) DEFAULT NULL,
-  `LongestAddGTime` varchar(10) DEFAULT NULL,
-  `DivAirportLandings` varchar(10) DEFAULT NULL,
-  `DivReachedDest` varchar(10) DEFAULT NULL,
-  `DivActualElapsedTime` varchar(10) DEFAULT NULL,
-  `DivArrDelay` varchar(10) DEFAULT NULL,
-  `DivDistance` varchar(10) DEFAULT NULL,
-  `Div1Airport` varchar(10) DEFAULT NULL,
-  `Div1WheelsOn` varchar(10) DEFAULT NULL,
-  `Div1TotalGTime` varchar(10) DEFAULT NULL,
-  `Div1LongestGTime` varchar(10) DEFAULT NULL,
-  `Div1WheelsOff` varchar(10) DEFAULT NULL,
-  `Div1TailNum` varchar(10) DEFAULT NULL,
-  `Div2Airport` varchar(10) DEFAULT NULL,
-  `Div2WheelsOn` varchar(10) DEFAULT NULL,
-  `Div2TotalGTime` varchar(10) DEFAULT NULL,
-  `Div2LongestGTime` varchar(10) DEFAULT NULL,
-  `Div2WheelsOff` varchar(10) DEFAULT NULL,
-  `Div2TailNum` varchar(10) DEFAULT NULL,
-  `Div3Airport` varchar(10) DEFAULT NULL,
-  `Div3WheelsOn` varchar(10) DEFAULT NULL,
-  `Div3TotalGTime` varchar(10) DEFAULT NULL,
-  `Div3LongestGTime` varchar(10) DEFAULT NULL,
-  `Div3WheelsOff` varchar(10) DEFAULT NULL,
-  `Div3TailNum` varchar(10) DEFAULT NULL,
-  `Div4Airport` varchar(10) DEFAULT NULL,
-  `Div4WheelsOn` varchar(10) DEFAULT NULL,
-  `Div4TotalGTime` varchar(10) DEFAULT NULL,
-  `Div4LongestGTime` varchar(10) DEFAULT NULL,
-  `Div4WheelsOff` varchar(10) DEFAULT NULL,
-  `Div4TailNum` varchar(10) DEFAULT NULL,
-  `Div5Airport` varchar(10) DEFAULT NULL,
-  `Div5WheelsOn` varchar(10) DEFAULT NULL,
-  `Div5TotalGTime` varchar(10) DEFAULT NULL,
-  `Div5LongestGTime` varchar(10) DEFAULT NULL,
-  `Div5WheelsOff` varchar(10) DEFAULT NULL,
-  `Div5TailNum` varchar(10) DEFAULT NULL
-) ENGINE=TOKUDB;
\ No newline at end of file
diff --git a/scripts/atc.ontime/atc_ontime_create_covered.sql b/scripts/atc.ontime/atc_ontime_create_covered.sql
deleted file mode 100644
index 4ea091409c5..00000000000
--- a/scripts/atc.ontime/atc_ontime_create_covered.sql
+++ /dev/null
@@ -1,103 +0,0 @@
-CREATE TABLE `ontime` (
-  `Year` year(4) DEFAULT NULL,
-  `Quarter` tinyint(4) DEFAULT NULL,
-  `Month` tinyint(4) DEFAULT NULL,
-  `DayofMonth` tinyint(4) DEFAULT NULL,
-  `DayOfWeek` tinyint(4) DEFAULT NULL,
-  `FlightDate` date DEFAULT NULL,
-  `UniqueCarrier` char(7) DEFAULT NULL,
-  `AirlineID` int(11) DEFAULT NULL,
-  `Carrier` char(2) DEFAULT NULL,
-  `TailNum` varchar(50) DEFAULT NULL,
-  `FlightNum` varchar(10) DEFAULT NULL,
-  `Origin` char(5) DEFAULT NULL,
-  `OriginCityName` varchar(100) DEFAULT NULL,
-  `OriginState` char(2) DEFAULT NULL,
-  `OriginStateFips` varchar(10) DEFAULT NULL,
-  `OriginStateName` varchar(100) DEFAULT NULL,
-  `OriginWac` int(11) DEFAULT NULL,
-  `Dest` char(5) DEFAULT NULL,
-  `DestCityName` varchar(100) DEFAULT NULL,
-  `DestState` char(2) DEFAULT NULL,
-  `DestStateFips` varchar(10) DEFAULT NULL,
-  `DestStateName` varchar(100) DEFAULT NULL,
-  `DestWac` int(11) DEFAULT NULL,
-  `CRSDepTime` int(11) DEFAULT NULL,
-  `DepTime` int(11) DEFAULT NULL,
-  `DepDelay` int(11) DEFAULT NULL,
-  `DepDelayMinutes` int(11) DEFAULT NULL,
-  `DepDel15` int(11) DEFAULT NULL,
-  `DepartureDelayGroups` int(11) DEFAULT NULL,
-  `DepTimeBlk` varchar(20) DEFAULT NULL,
-  `TaxiOut` int(11) DEFAULT NULL,
-  `WheelsOff` int(11) DEFAULT NULL,
-  `WheelsOn` int(11) DEFAULT NULL,
-  `TaxiIn` int(11) DEFAULT NULL,
-  `CRSArrTime` int(11) DEFAULT NULL,
-  `ArrTime` int(11) DEFAULT NULL,
-  `ArrDelay` int(11) DEFAULT NULL,
-  `ArrDelayMinutes` int(11) DEFAULT NULL,
-  `ArrDel15` int(11) DEFAULT NULL,
-  `ArrivalDelayGroups` int(11) DEFAULT NULL,
-  `ArrTimeBlk` varchar(20) DEFAULT NULL,
-  `Cancelled` tinyint(4) DEFAULT NULL,
-  `CancellationCode` char(1) DEFAULT NULL,
-  `Diverted` tinyint(4) DEFAULT NULL,
-  `CRSElapsedTime` INT(11) DEFAULT NULL,
-  `ActualElapsedTime` INT(11) DEFAULT NULL,
-  `AirTime` INT(11) DEFAULT NULL,
-  `Flights` INT(11) DEFAULT NULL,
-  `Distance` INT(11) DEFAULT NULL,
-  `DistanceGroup` TINYINT(4) DEFAULT NULL,
-  `CarrierDelay` INT(11) DEFAULT NULL,
-  `WeatherDelay` INT(11) DEFAULT NULL,
-  `NASDelay` INT(11) DEFAULT NULL,
-  `SecurityDelay` INT(11) DEFAULT NULL,
-  `LateAircraftDelay` INT(11) DEFAULT NULL,
-  `FirstDepTime` varchar(10) DEFAULT NULL,
-  `TotalAddGTime` varchar(10) DEFAULT NULL,
-  `LongestAddGTime` varchar(10) DEFAULT NULL,
-  `DivAirportLandings` varchar(10) DEFAULT NULL,
-  `DivReachedDest` varchar(10) DEFAULT NULL,
-  `DivActualElapsedTime` varchar(10) DEFAULT NULL,
-  `DivArrDelay` varchar(10) DEFAULT NULL,
-  `DivDistance` varchar(10) DEFAULT NULL,
-  `Div1Airport` varchar(10) DEFAULT NULL,
-  `Div1WheelsOn` varchar(10) DEFAULT NULL,
-  `Div1TotalGTime` varchar(10) DEFAULT NULL,
-  `Div1LongestGTime` varchar(10) DEFAULT NULL,
-  `Div1WheelsOff` varchar(10) DEFAULT NULL,
-  `Div1TailNum` varchar(10) DEFAULT NULL,
-  `Div2Airport` varchar(10) DEFAULT NULL,
-  `Div2WheelsOn` varchar(10) DEFAULT NULL,
-  `Div2TotalGTime` varchar(10) DEFAULT NULL,
-  `Div2LongestGTime` varchar(10) DEFAULT NULL,
-  `Div2WheelsOff` varchar(10) DEFAULT NULL,
-  `Div2TailNum` varchar(10) DEFAULT NULL,
-  `Div3Airport` varchar(10) DEFAULT NULL,
-  `Div3WheelsOn` varchar(10) DEFAULT NULL,
-  `Div3TotalGTime` varchar(10) DEFAULT NULL,
-  `Div3LongestGTime` varchar(10) DEFAULT NULL,
-  `Div3WheelsOff` varchar(10) DEFAULT NULL,
-  `Div3TailNum` varchar(10) DEFAULT NULL,
-  `Div4Airport` varchar(10) DEFAULT NULL,
-  `Div4WheelsOn` varchar(10) DEFAULT NULL,
-  `Div4TotalGTime` varchar(10) DEFAULT NULL,
-  `Div4LongestGTime` varchar(10) DEFAULT NULL,
-  `Div4WheelsOff` varchar(10) DEFAULT NULL,
-  `Div4TailNum` varchar(10) DEFAULT NULL,
-  `Div5Airport` varchar(10) DEFAULT NULL,
-  `Div5WheelsOn` varchar(10) DEFAULT NULL,
-  `Div5TotalGTime` varchar(10) DEFAULT NULL,
-  `Div5LongestGTime` varchar(10) DEFAULT NULL,
-  `Div5WheelsOff` varchar(10) DEFAULT NULL,
-  `Div5TailNum` varchar(10) DEFAULT NULL,
-  KEY `Year` (`Year`,`Month`),
-  KEY `Year_2` (`Year`,`DayOfWeek`),
-  KEY `Year_3` (`Year`,`DepDelay`,`DayOfWeek`),
-  KEY `DayOfWeek` (`DayOfWeek`,`Year`,`DepDelay`),
-  KEY `Year_4` (`Year`,`DepDelay`,`Origin`,`Carrier`),
-  KEY `DepDelay` (`DepDelay`,`Year`),
-  KEY `Year_5` (`Year`,`DestCityName`,`OriginCityName`),
-  KEY `DestCityName` (`DestCityName`,`OriginCityName`,`Year`)
-) ENGINE=TOKUDB;
\ No newline at end of file
diff --git a/scripts/atc.ontime/nodistinct.q8.sql b/scripts/atc.ontime/nodistinct.q8.sql
deleted file mode 100644
index 547d6fa08e0..00000000000
--- a/scripts/atc.ontime/nodistinct.q8.sql
+++ /dev/null
@@ -1,2 +0,0 @@
-# Q8: As final I tested most popular destination in sense count of direct connected cities for different diapason of years.
-SELECT DestCityName, COUNT( OriginCityName) FROM ontime WHERE Year BETWEEN 2006 and 2007 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q0.result b/scripts/atc.ontime/q0.result
deleted file mode 100644
index 457e3b525de..00000000000
--- a/scripts/atc.ontime/q0.result
+++ /dev/null
@@ -1,2 +0,0 @@
-avg(c1)
-485021.3730
diff --git a/scripts/atc.ontime/q0.sql b/scripts/atc.ontime/q0.sql
deleted file mode 100644
index 258c88927b2..00000000000
--- a/scripts/atc.ontime/q0.sql
+++ /dev/null
@@ -1 +0,0 @@
-select avg(c1) from (select year,month,count(*) as c1 from ontime group by YEAR,month) t;
diff --git a/scripts/atc.ontime/q1.result b/scripts/atc.ontime/q1.result
deleted file mode 100644
index e1f9df4d004..00000000000
--- a/scripts/atc.ontime/q1.result
+++ /dev/null
@@ -1,8 +0,0 @@
-DayOfWeek	c
-5	8732424
-1	8730614
-4	8710843
-3	8685626
-2	8639632
-7	8274367
-6	7514194
diff --git a/scripts/atc.ontime/q1.sql b/scripts/atc.ontime/q1.sql
deleted file mode 100644
index e155da8fc3d..00000000000
--- a/scripts/atc.ontime/q1.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DayOfWeek, count(*) AS c FROM ontime WHERE Year BETWEEN 2000 AND 2008 GROUP BY DayOfWeek ORDER BY c DESC;
diff --git a/scripts/atc.ontime/q2.result b/scripts/atc.ontime/q2.result
deleted file mode 100644
index 0f88b842f45..00000000000
--- a/scripts/atc.ontime/q2.result
+++ /dev/null
@@ -1,8 +0,0 @@
-DayOfWeek	c
-5	2088300
-4	1918325
-1	1795120
-7	1782292
-3	1640798
-2	1538291
-6	1391984
diff --git a/scripts/atc.ontime/q2.sql b/scripts/atc.ontime/q2.sql
deleted file mode 100644
index 9d31ecd0ec5..00000000000
--- a/scripts/atc.ontime/q2.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DayOfWeek, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year BETWEEN 2000 AND 2008 GROUP BY DayOfWeek ORDER BY c DESC;
diff --git a/scripts/atc.ontime/q3.result b/scripts/atc.ontime/q3.result
deleted file mode 100644
index ecacefe261e..00000000000
--- a/scripts/atc.ontime/q3.result
+++ /dev/null
@@ -1,11 +0,0 @@
-Origin	c
-ORD	846692
-ATL	822955
-DFW	601318
-LAX	391247
-PHX	391191
-LAS	351713
-DEN	345108
-EWR	292916
-DTW	289233
-IAH	283861
diff --git a/scripts/atc.ontime/q3.sql b/scripts/atc.ontime/q3.sql
deleted file mode 100644
index 9daa49cda68..00000000000
--- a/scripts/atc.ontime/q3.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT Origin, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year BETWEEN 2000 AND 2008 GROUP BY Origin ORDER BY c DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q4.result b/scripts/atc.ontime/q4.result
deleted file mode 100644
index 1de2cf2c5d2..00000000000
--- a/scripts/atc.ontime/q4.result
+++ /dev/null
@@ -1,21 +0,0 @@
-carrier	count(*)
-WN	296293
-AA	176203
-MQ	145630
-US	135987
-UA	128174
-OO	127426
-EV	101796
-XE	99915
-DL	93675
-NW	90429
-CO	76662
-YV	67905
-FL	59460
-OH	59034
-B6	50740
-9E	46948
-AS	42830
-F9	23035
-AQ	4299
-HA	2746
diff --git a/scripts/atc.ontime/q4.sql b/scripts/atc.ontime/q4.sql
deleted file mode 100644
index 20447cd17ce..00000000000
--- a/scripts/atc.ontime/q4.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY carrier ORDER BY 2 DESC;
diff --git a/scripts/atc.ontime/q5.result b/scripts/atc.ontime/q5.result
deleted file mode 100644
index a790800d53f..00000000000
--- a/scripts/atc.ontime/q5.result
+++ /dev/null
@@ -1,21 +0,0 @@
-carrier	c	c2	c3
-EV	101796	286234	355.6391
-US	135987	485447	280.1274
-AA	176203	633857	277.9854
-MQ	145630	540494	269.4387
-AS	42830	160185	267.3783
-B6	50740	191450	265.0300
-UA	128174	490002	261.5785
-WN	296293	1168871	253.4865
-OH	59034	236032	250.1102
-CO	76662	323151	237.2327
-F9	23035	97760	235.6281
-YV	67905	294362	230.6853
-XE	99915	434773	229.8096
-FL	59460	263159	225.9471
-NW	90429	414526	218.1504
-OO	127426	597880	213.1297
-DL	93675	475889	196.8421
-9E	46948	258851	181.3707
-AQ	4299	46360	92.7308
-HA	2746	56175	48.8830
diff --git a/scripts/atc.ontime/q5.sql b/scripts/atc.ontime/q5.sql
deleted file mode 100644
index 59e5c8b95db..00000000000
--- a/scripts/atc.ontime/q5.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT t.carrier, c, c2, c*1000/c2 as c3 FROM (SELECT carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY carrier) t JOIN (SELECT carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY carrier) t2 ON (t.Carrier=t2.Carrier) ORDER BY c3 DESC;
diff --git a/scripts/atc.ontime/q6.result b/scripts/atc.ontime/q6.result
deleted file mode 100644
index 85a1db42079..00000000000
--- a/scripts/atc.ontime/q6.result
+++ /dev/null
@@ -1,21 +0,0 @@
-carrier	c	c2	c3
-UA	1096646	490002	2238.0439
-AS	354145	160185	2210.8500
-DL	1050448	475889	2207.3383
-AA	1276555	633857	2013.9479
-US	909154	485447	1872.8182
-WN	2165483	1168871	1852.6279
-NW	725076	414526	1749.1689
-MQ	876799	540494	1622.2178
-CO	522219	323151	1616.0216
-EV	461050	286234	1610.7451
-OH	301681	236032	1278.1360
-FL	298916	263159	1135.8760
-B6	197249	191450	1030.2899
-OO	556247	597880	930.3656
-F9	72150	97760	738.0319
-YV	198787	294362	675.3147
-XE	233488	434773	537.0343
-AQ	17239	46360	371.8507
-9E	89391	258851	345.3377
-HA	15968	56175	284.2546
diff --git a/scripts/atc.ontime/q6.sql b/scripts/atc.ontime/q6.sql
deleted file mode 100644
index 9ac157c9ac0..00000000000
--- a/scripts/atc.ontime/q6.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT t.carrier, c, c2, c*1000/c2 as c3 FROM (SELECT carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year BETWEEN 2000 and 2008 GROUP BY carrier) t JOIN (SELECT carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY carrier) t2 ON (t.Carrier=t2.Carrier) ORDER BY c3 DESC;
diff --git a/scripts/atc.ontime/q7.result b/scripts/atc.ontime/q7.result
deleted file mode 100644
index ffc0236300d..00000000000
--- a/scripts/atc.ontime/q7.result
+++ /dev/null
@@ -1,22 +0,0 @@
-Year	c1/c2
-1988	166.1709
-1989	199.5009
-1990	166.4513
-1991	147.2163
-1992	146.7543
-1993	154.2498
-1994	165.6803
-1995	193.9344
-1996	221.8281
-1997	191.6513
-1998	193.5638
-1999	200.8742
-2000	231.7167
-2001	189.0581
-2002	162.3769
-2003	150.2455
-2004	192.4838
-2005	207.5929
-2006	231.5599
-2007	245.3487
-2008	219.9228
diff --git a/scripts/atc.ontime/q7.sql b/scripts/atc.ontime/q7.sql
deleted file mode 100644
index d0313c06dee..00000000000
--- a/scripts/atc.ontime/q7.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT t.Year, c1/c2 FROM (select Year,count(*)*1000 as c1 from ontime WHERE DepDelay>10 GROUP BY Year) t JOIN (select Year,count(*) as c2 from ontime GROUP BY Year) t2 ON (t.Year=t2.Year);
diff --git a/scripts/atc.ontime/q8.10y.destcityname.result b/scripts/atc.ontime/q8.10y.destcityname.result
deleted file mode 100644
index e98b44c790b..00000000000
--- a/scripts/atc.ontime/q8.10y.destcityname.result
+++ /dev/null
@@ -1,11 +0,0 @@
-DestCityName	COUNT( DISTINCT OriginCityName)
-Atlanta, GA	190
-Chicago, IL	159
-Dallas/Ft.Worth, TX	151
-Cincinnati, OH	139
-Minneapolis, MN	131
-Houston, TX	127
-Detroit, MI	121
-Denver, CO	120
-Salt Lake City, UT	116
-New York, NY	111
diff --git a/scripts/atc.ontime/q8.10y.destcityname.sql b/scripts/atc.ontime/q8.10y.destcityname.sql
deleted file mode 100644
index 6ae5b91a54c..00000000000
--- a/scripts/atc.ontime/q8.10y.destcityname.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime FORCE INDEX(DestCityName) WHERE Year BETWEEN 1999 and 2009 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q8.10y.result b/scripts/atc.ontime/q8.10y.result
deleted file mode 100644
index e98b44c790b..00000000000
--- a/scripts/atc.ontime/q8.10y.result
+++ /dev/null
@@ -1,11 +0,0 @@
-DestCityName	COUNT( DISTINCT OriginCityName)
-Atlanta, GA	190
-Chicago, IL	159
-Dallas/Ft.Worth, TX	151
-Cincinnati, OH	139
-Minneapolis, MN	131
-Houston, TX	127
-Detroit, MI	121
-Denver, CO	120
-Salt Lake City, UT	116
-New York, NY	111
diff --git a/scripts/atc.ontime/q8.10y.sql b/scripts/atc.ontime/q8.10y.sql
deleted file mode 100644
index f70b4f6f220..00000000000
--- a/scripts/atc.ontime/q8.10y.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 1999 and 2009 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q8.1y.sql b/scripts/atc.ontime/q8.1y.sql
deleted file mode 100644
index 40b87b644b1..00000000000
--- a/scripts/atc.ontime/q8.1y.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 1999 and 1999 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q8.1y.year5.sql b/scripts/atc.ontime/q8.1y.year5.sql
deleted file mode 100644
index ef56cc3786f..00000000000
--- a/scripts/atc.ontime/q8.1y.year5.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime USE INDEX(year_5) WHERE Year BETWEEN 1999 and 1999 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q8.2y.sql b/scripts/atc.ontime/q8.2y.sql
deleted file mode 100644
index 7ab2d10080f..00000000000
--- a/scripts/atc.ontime/q8.2y.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 1999 and 2000 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q8.3y.sql b/scripts/atc.ontime/q8.3y.sql
deleted file mode 100644
index 340dc1aea4e..00000000000
--- a/scripts/atc.ontime/q8.3y.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 1999 and 2001 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q8.4y.sql b/scripts/atc.ontime/q8.4y.sql
deleted file mode 100644
index c271654ad8e..00000000000
--- a/scripts/atc.ontime/q8.4y.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 1999 and 2002 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q8.result b/scripts/atc.ontime/q8.result
deleted file mode 100644
index 35ed3e3f2c2..00000000000
--- a/scripts/atc.ontime/q8.result
+++ /dev/null
@@ -1,11 +0,0 @@
-DestCityName	COUNT( DISTINCT OriginCityName)
-Atlanta, GA	183
-Chicago, IL	147
-Dallas/Ft.Worth, TX	133
-Cincinnati, OH	129
-Minneapolis, MN	128
-Houston, TX	114
-Detroit, MI	112
-Denver, CO	111
-Salt Lake City, UT	108
-New York, NY	101
diff --git a/scripts/atc.ontime/q8.sql b/scripts/atc.ontime/q8.sql
deleted file mode 100644
index bd5312a4b76..00000000000
--- a/scripts/atc.ontime/q8.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 2006 and 2007 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q8.year5.sql b/scripts/atc.ontime/q8.year5.sql
deleted file mode 100644
index f6944cfccb9..00000000000
--- a/scripts/atc.ontime/q8.year5.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime use index(year_5) WHERE Year BETWEEN 2006 and 2007 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10;
diff --git a/scripts/atc.ontime/q9.result b/scripts/atc.ontime/q9.result
deleted file mode 100644
index 4790afb414b..00000000000
--- a/scripts/atc.ontime/q9.result
+++ /dev/null
@@ -1,22 +0,0 @@
-year	c1
-1988	5202096
-1989	5041200
-1990	5270893
-1991	5076925
-1992	5092157
-1993	5070501
-1994	5180048
-1995	5327435
-1996	5351983
-1997	5411843
-1998	5384721
-1999	5527884
-2000	5683047
-2001	5967780
-2002	5271359
-2003	6488540
-2004	7129270
-2005	7140596
-2006	7141922
-2007	7455458
-2008	7009728
diff --git a/scripts/atc.ontime/q9.sql b/scripts/atc.ontime/q9.sql
deleted file mode 100644
index 13ac1150449..00000000000
--- a/scripts/atc.ontime/q9.sql
+++ /dev/null
@@ -1 +0,0 @@
-select year,count(*) as c1 from ontime group by YEAR;
diff --git a/scripts/atc.ontime/qcount.main.sql b/scripts/atc.ontime/qcount.main.sql
deleted file mode 100644
index 0d27b611f44..00000000000
--- a/scripts/atc.ontime/qcount.main.sql
+++ /dev/null
@@ -1,2 +0,0 @@
-select count(*) from ontime use index();
-
diff --git a/scripts/atc.ontime/qcount.result b/scripts/atc.ontime/qcount.result
deleted file mode 100644
index 59130c40662..00000000000
--- a/scripts/atc.ontime/qcount.result
+++ /dev/null
@@ -1,2 +0,0 @@
-count(*)
-122225386
diff --git a/scripts/atc.ontime/qcount.sql b/scripts/atc.ontime/qcount.sql
deleted file mode 100644
index b3428c5781b..00000000000
--- a/scripts/atc.ontime/qcount.sql
+++ /dev/null
@@ -1 +0,0 @@
-select count(*) from ontime;
diff --git a/scripts/atc.readme b/scripts/atc.readme
deleted file mode 100644
index 856dc532b13..00000000000
--- a/scripts/atc.readme
+++ /dev/null
@@ -1,19 +0,0 @@
-The script to run the load the air traffic ontime database and run queries against it
-is called run.atc.ontime.bas.
-
-The queries are in the tokudb-engine/scripts/atc.ontime directory.
-
-The data for the ontime database is in the amazon s3 bucket called tokutek-mysql-data.
-
-$ s3ls -l tokutek-mysql-data --prefix=atc_On_Time_Performance
-2010-06-15T13:07:09.000Z   1073741824 atc_On_Time_Performance.mysql.csv.gz.aa
-2010-06-15T13:08:19.000Z   1073741824 atc_On_Time_Performance.mysql.csv.gz.ab
-2010-06-15T13:09:38.000Z   1073741824 atc_On_Time_Performance.mysql.csv.gz.ac
-2010-06-15T13:10:54.000Z    446709742 atc_On_Time_Performance.mysql.csv.gz.ad
-2010-06-15T13:11:26.000Z          503 atc_On_Time_Performance.mysql.csv.gz.xml
-
-The raw data is also stored in the amazon s3 bucket called tokutek-mysql-data.
-
-$ s3ls -l tokutek-mysql-data --prefix=atc
-
-
diff --git a/scripts/nightly.mysql.build.and.test.bash b/scripts/nightly.mysql.build.and.test.bash
deleted file mode 100755
index b9d1b6aca7f..00000000000
--- a/scripts/nightly.mysql.build.and.test.bash
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env bash
-
-function usage() {
-    echo "run nightly mysql and fractal tree regressions"
-    echo "uses gearman to schedule jobs onto test machines"
-}
-
-# generate a script that makes a mysql release and run tests on it
-function make_and_test_mysql() {
-    echo $(date) $* >>$nightlytrace 2>&1
-    echo "bash -x \$HOME/github/tokudb-engine/scripts/tokutek.make.mysql.bash $* >>$mysqltrace 2>&1; \
-        buildexitcode=\$?; \
-        echo \$(date) \$HOME/github/tokudb-engine/scripts/tokutek.make.mysql.bash -$* \$buildexitcode >>$mysqltrace; \
-        if [ \$buildexitcode -eq 0 ] ; then \$HOME/bin/test.mysql.bash $* >>/tmp/mysql.test.trace 2>&1; fi" \
-        | $gearmandir/bin/gearman -b -f mysql-build-$system-$arch -h $gearmandhost -p 4730 >>$nightlytrace 2>&1
-}
-
-# make a mysql release
-function make_mysql() {
-    echo $(date) $* >>$nightlytrace 2>&1
-    echo "\$HOME/github/tokudb-engine/scripts/tokutek.make.mysql.bash $* >>$mysqltrace 2>&1" | $gearmandir/bin/gearman -b -f mysql-build-$system-$arch -h $gearmandhost -p 4730 >>$nightlytrace 2>&1
-}
-
-# setup the PATH since cron gives us a minimal PATH
-PATH=$HOME/bin:$HOME/usr/local/bin:/usr/local/bin:$PATH
-source /etc/profile
-
-github_token=
-gearmandhost=localhost
-gearmandir=/usr/local/gearmand-1.1.6
-system=$(uname -s | tr '[:upper:]' '[:lower:]')
-arch=$(uname -m | tr '[:upper:]' '[:lower:]')
-now_ts=$(date +%s)
-cc=gcc
-cxx=g++
-
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [[ $arg =~ --(.*)=(.*) ]] ; then
-        eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-        usage; exit 1;
-    fi
-done
-
-nightlytrace=/tmp/$(whoami).nightly.trace
-mysqltrace=/tmp/$(whoami).mysql.build.trace.$now_ts
-
-make_and_test_mysql --mysqlbuild=mysql-5.6.16-tokudb-${now_ts}-debug-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token 
-make_and_test_mysql --mysqlbuild=mysql-5.6.16-tokudb-${now_ts}-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token --tests=run.mysql.tests.bash:run.sql.bench.bash 
-
-make_and_test_mysql --mysqlbuild=mysql-5.5.36-tokudb-${now_ts}-debug-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token 
-make_and_test_mysql --mysqlbuild=mysql-5.5.36-tokudb-${now_ts}-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token --tests=run.mysql.tests.bash:run.sql.bench.bash
-
-make_and_test_mysql --mysqlbuild=mariadb-5.5.35-tokudb-${now_ts}-debug-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token 
-make_and_test_mysql --mysqlbuild=mariadb-5.5.35-tokudb-${now_ts}-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token  --tests=run.mysql.tests.bash:run.sql.bench.bash
-
-exit 0
diff --git a/scripts/nightly.mysql.build.and.test.my.cnf b/scripts/nightly.mysql.build.and.test.my.cnf
deleted file mode 100644
index a837340924e..00000000000
--- a/scripts/nightly.mysql.build.and.test.my.cnf
+++ /dev/null
@@ -1,7 +0,0 @@
-[mysqld]
-tmpdir=/data/mysql/tmp
-max_connections=1024
-table_open_cache=1024
-loose_tokudb_cache_size=8G
-loose_tokudb_directio=1
-
diff --git a/scripts/run.atc.ontime.bash b/scripts/run.atc.ontime.bash
deleted file mode 100755
index dddab8bb1fe..00000000000
--- a/scripts/run.atc.ontime.bash
+++ /dev/null
@@ -1,267 +0,0 @@
-#!/usr/bin/env bash
-
-function usage() {
-    echo "run the atc ontime load and run"
-    echo "--mysqlbuild=$mysqlbuild"
-    echo "[--commit=$commit]"
-    echo "[--dbname=$dbname]"
-    echo "[--load=$load] [--check=$check] [--run=$run]"
-    echo "[--engine=$engine]"
-    echo "[--tokudb_load_save_space=$tokudb_load_save_space] [--tokudb_row_format=$tokudb_row_format] [--tokudb_loader_memory_size=$tokudb_loader_memory_size]"
-}
-
-function retry() {
-    local cmd
-    local retries
-    local exitcode
-    cmd=$*
-    let retries=0
-    while [ $retries -le 10 ] ; do
-        echo `date` $cmd
-        bash -c "$cmd"
-        exitcode=$?
-        echo `date` $cmd $exitcode $retries
-        let retries=retries+1
-        if [ $exitcode -eq 0 ] ; then break; fi
-        sleep 10
-    done
-    test $exitcode = 0
-}
-
-mysqlbuild=
-commit=0
-mysqlserver=`hostname`
-mysqluser=`whoami`
-mysqlsocket=/tmp/mysql.sock
-svnserver=https://svn.tokutek.com/tokudb
-basedir=$HOME/svn.build
-builddir=$basedir/mysql.build
-dbname=atc
-tblname=ontime
-load=1
-check=1
-run=1
-engine=tokudb
-tokudb_load_save_space=0
-tokudb_row_format=
-tokudb_loader_memory_size=
-verbose=0
-svn_server=https://svn.tokutek.com/tokudb
-svn_branch=.
-svn_revision=HEAD
-
-# parse the command line
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [[ $arg =~ --(.*)=(.*) ]] ; then
-        eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-        usage; exit 1
-    fi
-done
-
-if [[ $mysqlbuild =~ (.*)-(tokudb\-.*)-(linux)-(x86_64) ]] ; then
-    mysql=${BASH_REMATCH[1]}
-    tokudb=${BASH_REMATCH[2]}
-    system=${BASH_REMATCH[3]}
-    arch=${BASH_REMATCH[4]}
-else
-    exit 1
-fi
-
-if [ -d /usr/local/mysql/bin ] ; then
-    export PATH=/usr/local/mysql/bin:$PATH
-fi
-
-if [ -d /usr/local/mysql/lib/mysql ] ; then
-    export LD_LIBRARY_PATH=/usr/local/mysql/lib/mysql:$PATH
-fi
-
-# goto the base directory
-if [ ! -d $basedir ] ; then mkdir $basedir; fi
-
-pushd $basedir
-
-# update the build directory
-if [ ! -d $builddir ] ; then mkdir $builddir; fi
-
-date=`date +%Y%m%d`
-testresultsdir=$builddir/$date
-pushd $builddir
-    while [ ! -d $date ] ; do
-        svn mkdir $svn_server/mysql.build/$date -m ""
-        svn checkout $svn_server/mysql.build/$date
-        if [ $? -ne 0 ] ; then rm -rf $date; fi
-    done
-popd
-
-if [ $dbname = "atc" -a $engine != "tokudb" ] ; then dbname="atc_$engine"; fi
-
-runfile=$testresultsdir/$dbname-$tblname-$mysqlbuild-$mysqlserver
-if [ $tokudb_load_save_space != 0 ] ; then runfile=$runfile-compress; fi
-if [ "$tokudb_row_format" != "" ] ; then runfile=$runfile-$tokudb_row_format; fi
-if [ "$tokudb_loader_memory_size" != "" ] ; then runfile=$runfile-$tokudb_loader_memory_size; fi
-rm -rf $runfile
-
-testresult="PASS"
-
-# maybe get the atc data from s3
-if [ $testresult = "PASS" ] ; then
-    f=atc_On_Time_Performance.mysql.csv
-    if [ ! -f $f ] ; then
-        f=$f.gz
-        if [ ! -f $f ] ; then
-            echo `date` s3get --bundle tokutek-mysql-data $f >>$runfile 2>&1
-            s3get --verbose --bundle tokutek-mysql-data $f >>$runfile 2>&1
-            exitcode=$?
-            echo `date` s3get --bundle tokutek-mysql-data $f $exitcode >>$runfile 2>&1
-            if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-            if [ $testresult = "PASS" ] ; then
-                echo `date` gunzip $f >>$runfile 2>&1
-                gunzip $f
-                exitcode=$?
-                echo `date` gunzip $f $exitcode >>$runfile 2>&1
-                if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-            fi
-        fi
-    fi
-fi
-
-# checkout the atc test from svn
-atc=atc-$mysqlbuild
-if [ $testresult = "PASS" ] ; then
-    if [ -d atc-$mysqlbuild ] ; then rm -rf atc-$mysqlbuild; fi
-
-    retry svn export -r $svn_revision $svn_server/$svn_branch/mysql/tests/atc atc-$mysqlbuild
-    exitcode=$?
-    echo `date` svn export -r $svn_revision $svn_server/$svn_branch/mysql/tests/atc $exitcode >>$runfile 2>&1
-    if [ $exitcode != 0 ] ; then 
-        retry svn export -r $svn_revision $svn_server/mysql/tests/atc atc-$mysqlbuild
-        exitcode=$?
-        echo `date` svn export -r $svn_revision $svn_server/mysql/tests/atc $exitcode >>$runfile 2>&1
-    fi
-    if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-fi
-
-# create the database
-if [ $load -ne 0 -a $testresult = "PASS" ] ; then
-    echo `date` drop database if exists $dbname >>$runfile
-    mysql -S $mysqlsocket -u $mysqluser -e "drop database if exists $dbname"  >>$runfile 2>&1
-    exitcode=$?
-    echo `date` drop database if exists $dbname $exitcode>>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-    echo `date` create database $dbname >>$runfile
-    mysql -S $mysqlsocket -u $mysqluser -e "create database $dbname"  >>$runfile 2>&1
-    exitcode=$?
-    echo `date` create database $dbname $exitcode >>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-fi
-
-# create the table
-if [ $load -ne 0 -a $testresult = "PASS" ] ; then
-    echo `date` create table $dbname.$tblname >>$runfile
-    mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "source $atc/atc_ontime_create_covered.sql"  >>$runfile 2>&1
-    exitcode=$?
-    echo `date` create table $exitcode >>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-fi
-
-if [ $load -ne 0 -a $testresult = "PASS" -a "$tokudb_row_format" != "" ] ; then
-    echo `date` create table $dbname.$tblname >>$runfile
-    mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "alter table $tblname row_format=$tokudb_row_format"  >>$runfile 2>&1
-    exitcode=$?
-    echo `date` create table $exitcode >>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-fi
-
-if [ $load -ne 0 -a $testresult = "PASS" -a $engine != "tokudb" ] ; then
-    echo `date` alter table $engine >>$runfile
-    mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "alter table $tblname engine=$engine"  >>$runfile 2>&1
-    exitcode=$?
-    echo `date` alter table $engine $exitcode >>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-fi  
-
-if [ $testresult = "PASS" ] ; then
-    mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "show create table $tblname"  >>$runfile 2>&1
-fi
-
-if [ $testresult = "PASS" ] ; then
-    let default_loader_memory_size="$(mysql -S $mysqlsocket -u $mysqluser -e'select @@tokudb_loader_memory_size' --silent --skip-column-names)"
-    exitcode=$?
-    echo `date` get tokudb_loader_memory_size $exitcode >>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-    if [ "$tokudb_loader_memory_size" = "" ] ; then tokudb_loader_memory_size=$default_loader_memory_size; fi
-fi
-
-# load the data
-if [ $load -ne 0 -a $testresult = "PASS" ] ; then
-    echo `date` load data >>$runfile
-    start=$(date +%s)
-    mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "set tokudb_loader_memory_size=$tokudb_loader_memory_size;\
-        set tokudb_load_save_space=$tokudb_load_save_space; load data infile '$basedir/atc_On_Time_Performance.mysql.csv' into table $tblname" >>$runfile 2>&1
-    exitcode=$?
-    let loadtime=$(date +%s)-$start
-    echo `date` load data loadtime=$loadtime $exitcode >>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-fi
-
-# check the tables
-if [ $check -ne 0 -a $testresult = "PASS" ] ; then
-    echo `date` check table $tblname >> $runfile
-    mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "check table $tblname"  >>$runfile 2>&1
-    exitcode=$?
-    echo `date` check table $tblname $exitcode >> $runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-fi
-
-# run the queries
-if [ $run -ne 0 -a $testresult = "PASS" ] ; then
-    pushd $atc
-    for qfile in q*.sql ; do
-        if [[ $qfile =~ q(.*)\.sql ]] ; then
-            qname=${BASH_REMATCH[1]}
-            q=`cat $qfile`
-            qrun=q${qname}.run
-
-            echo `date` explain $qfile >>$runfile
-            if [ $verbose -ne 0 ] ; then echo explain $q >>$runfile; fi
-            mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "explain $q"  >$qrun
-            exitcode=$?
-            echo `date` explain $qfile $exitcode >>$runfile
-            if [ $verbose -ne 0 ] ; then cat $qrun >>$runfile; fi
-
-            echo `date` $qfile >>$runfile
-            start=$(date +%s)
-            if [ $verbose -ne 0 ] ; then echo $q >>$runfile; fi
-            mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "$q"  >$qrun
-            exitcode=$?
-            let qtime=$(date +%s)-$start
-            echo `date` $qfile qtime=$qtime $exitcode >>$runfile
-            if [ $verbose -ne 0 ] ; then cat $qrun >>$runfile; fi
-            if [ $exitcode -ne 0 ] ; then 
-                testresult="FAIL"
-            else
-                if [ -f q${qname}.result ] ; then
-                    diff $qrun q${qname}.result >>$runfile
-                    exitcode=$?
-                    if [ $exitcode -ne 0 ] ; then
-                        testresult="FAIL"
-                    fi
-                fi
-            fi
-        fi
-    done
-    popd
-fi
-
-# commit results
-if [ $commit != 0 ] ; then
-    svn add $runfile
-    retry svn commit -m \"$testresult $dbname $tblname $mysqlbuild $mysqlserver\" $runfile
-fi
-
-popd
-
-if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi
-exit $exitcode
diff --git a/scripts/run.iibench.bash b/scripts/run.iibench.bash
deleted file mode 100755
index 31cb96ad434..00000000000
--- a/scripts/run.iibench.bash
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env bash
-
-function usage() {
-    echo "run iibench"
-    echo "--mysqlbuild=$mysqlbuild"
-    echo "[--max_row=$max_rows] [--rows_per_report=$rows_per_report] [--insert_only=$insert_only] [ --check=$check]"
-    echo "[--commit=$commit]"
-}
-
-function retry() {
-    local cmd=$*
-    local retries
-    local exitcode
-    let retries=0
-    while [ $retries -le 10 ] ; do
-        echo `date` $cmd
-        bash -c "$cmd"
-        exitcode=$?
-        echo `date` $cmd $exitcode $retries
-        let retries=retries+1
-        if [ $exitcode -eq 0 ] ; then break; fi
-        sleep 10
-    done
-    test $exitcode = 0
-}
-
-mysqlbuild=
-commit=0
-check=1
-mysqlserver=`hostname`
-mysqluser=`whoami`
-mysqlsocket=/tmp/mysql.sock
-svn_server=https://svn.tokutek.com/tokudb
-svn_branch=.
-svn_revision=HEAD
-basedir=$HOME/svn.build
-builddir=$basedir/mysql.build
-system=`uname -s | tr [:upper:] [:lower:]`
-instancetype=
-testinstance=
-arch=`uname -m | tr [:upper:] [:lower:]`
-tracefile=/tmp/run.iibench.trace
-cmd=iibench
-dbname=$cmd
-engine=tokudb
-tblname=testit
-max_rows=50000000
-rows_per_report=1000000
-insert_only=1
-
-# parse the command line
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [ $arg = "--replace_into" ] ; then 
-        cmd=replace_into
-    elif [ $arg = "--insert_ignore" ] ; then
-        cmd=insert_ignore
-    elif [[ $arg =~ --(.*)=(.*) ]] ; then
-        eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-        usage; exit 1
-    fi
-done
-
-# setup the dbname
-if [ $dbname = "iibench" ] ; then dbname=${cmd}_${engine}; fi
-if [ "$testinstance" != "" ] ; then dbname=${dbname}_${testinstance}; fi
-
-if [ -d /usr/local/mysql ] ; then
-    export PATH=/usr/local/mysql/bin:$PATH
-fi
-
-if [ -d /usr/local/mysql/lib/mysql ] ; then
-    export LD_LIBRARY_PATH=/usr/local/mysql/lib/mysql:$PATH
-fi
-
-# goto the base directory
-if [ ! -d $basedir ] ; then mkdir $basedir; fi
-pushd $basedir
-
-# update the build directory
-if [ $commit != 0 ] ; then
-    if [ ! -d $builddir ] ; then mkdir $builddir; fi
-
-    date=`date +%Y%m%d`
-    testresultsdir=$builddir/$date
-    pushd $builddir
-        while [ ! -d $date ] ; do
-            svn mkdir $svn_server/mysql.build/$date -m ""
-            svn checkout -q $svn_server/mysql.build/$date
-            if [ $? -ne 0 ] ; then rm -rf $date; fi
-        done
-    popd
-else
-    testresultsdir=$PWD
-fi
-
-# checkout the code
-testdir=iibench-$mysqlbuild-$mysqlserver
-if [ "$testinstance" != "" ] ; then testdir=$testdir-$testinstance; fi
-rm -rf $testdir
-retry svn export -q -r $svn_revision $svn_server/$svn_branch/iibench $testdir
-exitcode=$?
-if [ $exitcode != 0 ] ; then 
-    retry svn export -q -r $svn_revision $svn_server/iibench $testdir
-    exitcode=$?
-fi
-if [ $exitcode != 0 ] ; then  exit 1; fi
-
-# create the iibench database
-mysql -S $mysqlsocket -u root -e "grant all on *.* to '$mysqluser'@'$mysqlserver'"
-exitcode=$?
-if [ $exitcode != 0 ] ; then exit 1; fi
-
-mysql -S $mysqlsocket -u $mysqluser -e "drop database if exists $dbname"
-exitcode=$?
-if [ $exitcode != 0 ] ; then exit 1; fi
-
-mysql -S $mysqlsocket -u $mysqluser -e "create database $dbname"
-exitcode=$?
-if [ $exitcode != 0 ] ; then exit 1; fi
-
-# run
-if [ $cmd = "iibench" -a $insert_only != 0 ] ; then
-    runfile=$testresultsdir/$dbname-insert_only-$max_rows-$mysqlbuild-$mysqlserver
-else
-    runfile=$testresultsdir/$dbname-$max_rows-$mysqlbuild-$mysqlserver
-fi
-if [ "$instancetype" != "" ] ; then runfile=$runfile-$instancetype; fi
-testresult="PASS"
-
-pushd $testdir/py
-    echo `date` $cmd start $mysqlbuild $svn_branch $svn_revision $max_rows $rows_per_report >>$runfile
-    runcmd=$cmd.py
-    args="--db_user=$mysqluser --db_name=$dbname --db_socket=$mysqlsocket --engine=$engine --setup --max_rows=$max_rows --rows_per_report=$rows_per_report --table_name=$tblname"
-    if [ $cmd = "iibench" -a $insert_only != 0 ] ; then runcmd="$runcmd --insert_only"; fi
-    if [ $cmd = "replace_into" ] ; then runcmd="replace_into.py --use_replace_into"; fi
-    if [ $cmd = "insert_ignore" ] ; then runcmd="replace_into.py"; fi
-    ./$runcmd $args >>$runfile 2>&1
-    exitcode=$?
-    echo `date` $cmd complete $exitcode >>$runfile
-    if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-popd
-
-if [ $check != 0 -a $testresult = "PASS" ] ; then
-    echo `date` check table $tblname >>$runfile
-    mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "check table $tblname" >>$runfile 2>&1
-    exitcode=$?
-    echo `date` check table $tblname $exitcode >>$runfile
-    if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-fi
-
-# commit results
-if [ $commit != 0 ] ; then
-    if [ $cmd = "iibench" -a $insert_only != 0 ] ; then cmd="$cmd insert_only"; fi
-    svn add $runfile
-    retry svn commit -m \"$testresult $cmd $max_rows $dbname $mysqlbuild $mysqlserver `hostname`\" $runfile
-fi
-
-popd
-
-if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi
-exit $exitcode
diff --git a/scripts/run.mysql.tests.bash b/scripts/run.mysql.tests.bash
deleted file mode 100755
index ce0fe88d99d..00000000000
--- a/scripts/run.mysql.tests.bash
+++ /dev/null
@@ -1,196 +0,0 @@
-#!/usr/bin/env bash
-# ident 4, no tabs
-
-function usage() {
-    echo "run the tokudb mysql tests"
-    echo "--mysqlbuild=$mysqlbuild"
-    echo "--commit=$commit"
-    echo "--tests=$tests --engine=$engine"
-}
-
-function retry() {
-    local cmd
-    local retries
-    local exitcode
-    cmd=$*
-    let retries=0
-    while [ $retries -le 10 ] ; do
-        echo `date` $cmd
-        bash -c "$cmd"
-        exitcode=$?
-        echo `date` $cmd $exitcode $retries
-        let retries=retries+1
-        if [ $exitcode -eq 0 ] ; then break; fi
-        sleep 10
-    done
-    test $exitcode = 0
-}
-
-svnserver=https://svn.tokutek.com/tokudb
-basedir=$HOME/svn.build
-builddir=$basedir/mysql.build
-mysqlbuild=
-mysql_basedir=/usr/local/mysql
-mysqlserver=`hostname`
-commit=0
-tests="*"
-engine=""
-parallel=auto
-
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [[ $arg =~ --(.*)=(.*) ]] ; then
-        eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-        usage; exit 1
-    fi
-done
-
-if [[ $mysqlbuild =~ (.*)-(tokudb\-.*)-(linux)-(x86_64) ]] ; then
-    mysql=${BASH_REMATCH[1]}
-    tokudb=${BASH_REMATCH[2]}
-    system=${BASH_REMATCH[3]}
-    arch=${BASH_REMATCH[4]}
-else
-    echo $mysqlbuild is not a tokudb build
-fi
-
-if [ -d $mysql_basedir/lib/mysql ] ; then
-    export LD_LIBRARY_PATH=$mysql_basedir/lib/mysql
-fi
-
-# update the build directory
-if [ ! -d $basedir ] ; then mkdir $basedir ; fi
-
-pushd $basedir
-if [ $? != 0 ] ; then exit 1; fi
-
-if [ ! -d $builddir ] ; then mkdir $builddir; fi
-
-# make the subversion directory that will hold the test results
-date=`date +%Y%m%d`
-testresultsdir=$builddir/$date
-pushd $builddir
-if [ $? = 0 ] ; then
-    while [ ! -d $date ] ; do
-        svn mkdir $svnserver/mysql.build/$date -m ""
-        svn checkout -q $svnserver/mysql.build/$date
-        if [ $? -ne 0 ] ; then rm -rf $date; fi
-    done
-    popd
-fi
-
-# generate a trace file name
-if [ -z $engine ] ; then
-    tracefile=mysql-test-$mysqlbuild-$mysqlserver
-else
-    tracefile=mysql-engine-$engine-$mysqlbuild-$mysqlserver
-fi
-echo >$testresultsdir/$tracefile
-
-if [ -z $engine ] ; then
-
-    # run all test suites including main
-    teststorun_original="main"
-    teststorun_tokudb=""
-    pushd $mysql_basedir/mysql-test/suite
-    if [ $? = 0 ] ; then
-        for t in $tests ;  do
-            if [[ $t =~ .*\.xfail$ ]] ; then continue; fi
-            if [ $t = "perfschema_stress" ] ; then continue; fi
-            if [ $t = "large_tests" ] ; then continue; fi
-            if [ $t = "pbxt" ] ; then continue; fi
-            if [ -d $t/t ] ; then 
-                if [[ $t =~ tokudb* ]] ; then
-                    if [ -z $teststorun_tokudb ] ; then teststorun_tokudb="$t" ; else teststorun_tokudb="$teststorun_tokudb,$t"; fi
-                else
-                    teststorun_original="$teststorun_original,$t";
-                fi
-            fi
-        done
-        popd
-    fi
-  
-    # run the tests
-    pushd $mysql_basedir/mysql-test
-    if [ $? = 0 ] ; then
-        if [[ $mysqlbuild =~ tokudb ]] ; then
-            # run standard tests
-            if [[ $mysqlbuild =~ 5\\.5 ]] ; then
-                ./mysql-test-run.pl --suite=$teststorun_original --big-test --max-test-fail=0 --force --retry=1 --testcase-timeout=60 \
-                    --mysqld=--default-storage-engine=myisam --mysqld=--sql-mode="" \
-                    --mysqld=--loose-tokudb_debug=3072 \
-                    --parallel=$parallel >>$testresultsdir/$tracefile 2>&1
-            else
-                ./mysql-test-run.pl --suite=$teststorun_original --big-test --max-test-fail=0 --force --retry=1 --testcase-timeout=60 \
-                    --mysqld=--loose-tokudb_debug=3072 \
-                    --parallel=$parallel >>$testresultsdir/$tracefile 2>&1
-            fi
-
-            # run tokudb tests
-            ./mysql-test-run.pl --suite=$teststorun_tokudb --big-test --max-test-fail=0 --force --retry=1 --testcase-timeout=60 \
-                --mysqld=--loose-tokudb_debug=3072 \
-                --parallel=$parallel >>$testresultsdir/$tracefile 2>&1  
-            # setup for engines tests
-            engine="tokudb"
-        else
-            ./mysql-test-run.pl --suite=$teststorun_original --big-test --max-test-fail=0 --force --retry=1 --testcase-timeout=60 \
-                --parallel=$parallel >>$testresultsdir/$tracefile 2>&1
-        fi
-        popd
-    fi
-fi
-
-if [ ! -z $engine ] ; then
-    teststorun="engines/funcs,engines/iuds"
-    pushd $mysql_basedir/mysql-test
-    if [ $? = 0 ] ; then
-        if [[ $mysqlbuild =~ 5\\.6 ]] ; then
-            ./mysql-test-run.pl --suite=$teststorun --force --retry-failure=0 --max-test-fail=0 --nowarnings --testcase-timeout=60 \
-                --mysqld=--default-storage-engine=$engine --mysqld=--default-tmp-storage-engine=$engine \
-                --parallel=$parallel >>$testresultsdir/$tracefile 2>&1
-        else
-            ./mysql-test-run.pl --suite=$teststorun --force --retry-failure=0 --max-test-fail=0 --nowarnings --testcase-timeout=60 \
-                --mysqld=--default-storage-engine=$engine \
-                --parallel=$parallel >>$testresultsdir/$tracefile 2>&1
-        fi
-        popd
-    fi
-fi
-
-# summarize the results
-let tests_failed=0
-let tests_passed=0
-while read line ; do
-    if [[ "$line" =~ (Completed|Timeout):\ Failed\ ([0-9]+)\/([0-9]+) ]] ; then
-            # failed[2]/total[3]
-        let tests_failed=tests_failed+${BASH_REMATCH[2]}
-        let tests_passed=tests_passed+${BASH_REMATCH[3]}-${BASH_REMATCH[2]}
-    elif [[ "$line" =~ Completed:\ All\ ([0-9]+)\ tests ]] ; then
-            # passed[1]
-        let tests_passed=tests_passed+${BASH_REMATCH[1]}
-    fi
-done <$testresultsdir/$tracefile
-
-# commit the results
-if [ $tests_failed = 0 ] ; then
-    testresult="PASS=$tests_passed"
-else
-    testresult="FAIL=$tests_failed PASS=$tests_passed"
-fi
-pushd $testresultsdir
-if [ $? = 0 ] ; then
-    if [ $commit != 0 ] ; then
-        svn add $tracefile 
-        if [[ $tracefile =~ "mysql-test" ]] ; then test=mysql-test; else test=mysql-engine-$engine; fi
-        retry svn commit -m \"$testresult $test $mysqlbuild $mysqlserver\" $tracefile 
-    fi
-    popd
-fi
-
-popd # $basedir
-
-if [[ $testresult =~ "PASS" ]] ; then exitcode=0; else exitcode=1; fi
-exit $exitcode
-
-
diff --git a/scripts/run.sql.bench.bash b/scripts/run.sql.bench.bash
deleted file mode 100755
index 2e24c9c5c89..00000000000
--- a/scripts/run.sql.bench.bash
+++ /dev/null
@@ -1,153 +0,0 @@
-#!/usr/bin/env bash
-
-function usage() {
-    echo "run the sql bench tests"
-    echo "--mysqlbuild=$mysqlbuild"
-    echo "--commit=$commit"
-}
-
-function retry() {
-    local cmd
-    local retries
-    local exitcode
-    cmd=$*
-    let retries=0
-    while [ $retries -le 10 ] ; do
-        echo `date` $cmd
-        bash -c "$cmd"
-        exitcode=$?
-        echo `date` $cmd $exitcode $retries
-        let retries=retries+1
-        if [ $exitcode -eq 0 ] ; then break; fi
-        sleep 10
-    done
-    test $exitcode = 0
-}
-
-svnserver=https://svn.tokutek.com/tokudb
-basedir=$HOME/svn.build
-builddir=$basedir/mysql.build
-mysqlbuild=
-mysqlserver=`hostname`
-commit=0
-engine=tokudb
-socket=/tmp/mysql.sock
-system=`uname -s | tr [:upper:] [:lower:]`
-arch=`uname -m | tr [:upper:] [:lower:]`
-
-# parse the command line
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [[ $arg =~ --(.*)=(.*) ]] ; then
-        eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-        usage; exit 1
-    fi
-done
-
-if [[ $mysqlbuild =~ (.*)-(tokudb-.*)-(linux)-(x86_64) ]] ; then
-    mysql=${BASH_REMATCH[1]}
-    tokudb=${BASH_REMATCH[2]}
-    system=${BASH_REMATCH[3]}
-    arch=${BASH_REMATCH[4]}
-else
-    echo $mysqlbuild is not a tokudb build
-fi
-
-# goto the base directory
-if [ ! -d $basedir ] ; then mkdir $basedir; fi
-pushd $basedir
-
-# update the build directory
-if [ ! -d $builddir ] ; then mkdir $builddir; fi
-
-date=`date +%Y%m%d`
-testresultsdir=$builddir/$date
-pushd $builddir
-while [ ! -d $date ] ; do
-    svn mkdir $svnserver/mysql.build/$date -m ""
-    svn checkout -q $svnserver/mysql.build/$date
-    if [ $? -ne 0 ] ; then rm -rf $date; fi
-done
-popd
-
-# run the tests
-pushd /usr/local/mysql/sql-bench
-
-tracefile=sql-bench-$engine-$mysqlbuild-$mysqlserver.trace
-summaryfile=sql-bench-$engine-$mysqlbuild-$mysqlserver.summary
-
-function mydate() {
-    date +"%Y%m%d %H:%M:%S"
-}
-
-function runtests() {
-    testargs=$*
-    for testname in test-* ; do
-        chmod +x ./$testname
-        echo `mydate` $testname $testargs
-        ./$testname $testargs
-        exitcode=$?
-        echo `mydate`
-        if [ $exitcode != 0 ] ; then
-            # assume that the test failure due to a crash.  allow mysqld to restart.
-            sleep 60
-        fi
-    done
-}
-
->$testresultsdir/$tracefile
-
-runtests --create-options=engine=$engine --socket=$socket --verbose --small-test         >>$testresultsdir/$tracefile 2>&1
-runtests --create-options=engine=$engine --socket=$socket --verbose --small-test --fast  >>$testresultsdir/$tracefile 2>&1
-runtests --create-options=engine=$engine --socket=$socket --verbose                      >>$testresultsdir/$tracefile 2>&1
-runtests --create-options=engine=$engine --socket=$socket --verbose              --fast  >>$testresultsdir/$tracefile 2>&1
-runtests --create-options=engine=$engine --socket=$socket --verbose              --fast --fast-insert >>$testresultsdir/$tracefile 2>&1
-runtests --create-options=engine=$engine --socket=$socket --verbose              --fast --lock-tables >>$testresultsdir/$tracefile 2>&1
-
-popd
-
-# summarize the results
-while read l ; do
-    if [[ $l =~ ^([0-9]{8}\ [0-9]{2}:[0-9]{2}:[0-9]{2})(.*)$ ]] ; then
-        t=${BASH_REMATCH[1]}
-        cmd=${BASH_REMATCH[2]}
-        if [ -z "$cmd" ] ; then
-            let duration=$(date -d "$t" +%s)-$(date -d "$tlast" +%s)
-            printf "%4s %s %8d %s\n" "$status" "$tlast" "$duration" "$cmdlast"
-        else
-            cmdlast=$cmd
-            tlast=$t
-            status=PASS
-        fi
-     else
-        if [[ $l =~ Got\ error|Died ]] ; then
-            status=FAIL
-        fi
-    fi
-done <$testresultsdir/$tracefile >$testresultsdir/$summaryfile
-
-testresult=""
-pf=`mktemp`
-egrep "^PASS" $testresultsdir/$summaryfile >$pf 2>&1
-if [ $? -eq 0 ] ; then testresult="PASS=`cat $pf | wc -l` $testresult"; fi
-egrep "^FAIL" $testresultsdir/$summaryfile >$pf 2>&1
-if [ $? -eq 0 ] ; then testresult="FAIL=`cat $pf | wc -l` $testresult"; fi
-rm $pf
-if [ "$testresult" = "" ] ; then testresult="?"; fi
-
-# commit the results
-pushd $testresultsdir
-if [ $commit != 0 ] ; then
-    svn add $tracefile $summaryfile
-    retry svn commit -m \"$testresult sql-bench $mysqlbuild $mysqlserver\" $tracefile $summaryfile
-fi
-popd
-
-popd
-
-if [[ $testresult =~ "PASS" ]] ; then exitcode=0; else exitcode=1; fi
-exit $exitcode
-
-
-
diff --git a/scripts/run.tpch.bash b/scripts/run.tpch.bash
deleted file mode 100755
index efc37d25d2e..00000000000
--- a/scripts/run.tpch.bash
+++ /dev/null
@@ -1,342 +0,0 @@
-#!/usr/bin/env bash
-
-function usage() {
-    echo "run the TPCH load and compare test"
-    echo "[--SCALE=$SCALE] [--ENGINE=$ENGINE]"
-    echo "[--dbgen=$dbgen] [--load=$load] [--check=$check] [--compare=$compare] [--query=$query]"
-    echo "[--mysqlbuild=$mysqlbuild] [--commit=$commit]"
-    echo "[--testinstance=$testinstance]"
-    echo "[--tokudb_load_save_space=$tokudb_load_save_space] [--tokudb_row_format=$tokudb_row_format] [--tokudb_loader_memory_size=$tokudb_loader_memory_size]"
-}
-
-function retry() {
-    local cmd
-    local retries
-    local exitcode
-    cmd=$*
-    let retries=0
-    while [ $retries -le 10 ] ; do
-        echo `date` $cmd
-        bash -c "$cmd"
-        exitcode=$?
-        echo `date` $cmd $exitcode $retries
-        let retries=retries+1
-        if [ $exitcode -eq 0 ] ; then break; fi
-        sleep 1
-    done
-    test $exitcode = 0
-}
-
-SCALE=1
-ENGINE=tokudb
-TABLES="part partsupp customer lineitem nation orders region supplier"
-dbgen=1
-load=1
-compare=1
-query=0
-check=1
-datadir=/usr/local/mysql/data
-mysqlbuild=
-commit=0
-mysqlserver=`hostname`
-mysqluser=`whoami`
-mysqlsocket=/tmp/mysql.sock
-basedir=$HOME/svn.build
-builddir=$basedir/mysql.build
-system=`uname -s | tr [:upper:] [:lower:]`
-arch=`uname -m | tr [:upper:] [:lower:]`
-testinstance=
-tokudb_load_save_space=0
-tokudb_row_format=
-tokudb_loader_memory_size=
-svn_server=https://svn.tokutek.com/tokudb
-svn_branch=.
-svn_revision=HEAD
-
-# parse the command line
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [[ $arg =~ --(.*)=(.*) ]] ; then
-        eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-        usage; exit 1
-    fi
-done
-
-if [[ $mysqlbuild =~ (.*)-(tokudb\-.*)-(linux)-(x86_64) ]] ; then
-    mysql=${BASH_REMATCH[1]}
-    tokudb=${BASH_REMATCH[2]}
-    system=${BASH_REMATCH[3]}
-    arch=${BASH_REMATCH[4]}
-else
-    exit 1
-fi
-
-dbname=tpch${SCALE}G_${ENGINE}
-if [ "$testinstance" != "" ] ; then dbname=${dbname}_${testinstance}; fi
-tpchdir=$basedir/tpch${SCALE}G
-
-if [ -d /usr/local/mysql ] ; then
-    export PATH=/usr/local/mysql/bin:$PATH
-fi
-
-if [ -d /usr/local/mysql/lib/mysql ] ; then
-    export LD_LIBRARY_PATH=/usr/local/mysql/lib/mysql:$PATH
-fi
-
-# goto the base directory
-if [ ! -d $basedir ] ; then mkdir $basedir; fi
-
-pushd $basedir
-
-# update the build directory
-if [ $commit != 0 ] ; then
-    if [ ! -d $builddir ] ; then mkdir $builddir; fi
-
-    date=`date +%Y%m%d`
-    testresultsdir=$builddir/$date
-    pushd $builddir
-        while [ ! -d $date ] ; do
-            svn mkdir $svn_server/mysql.build/$date -m ""
-            svn checkout -q $svn_server/mysql.build/$date
-            if [ $? -ne 0 ] ; then rm -rf $date; fi
-        done
-    popd
-else
-    testresultsdir=$PWD
-fi
-
-runfile=$testresultsdir/$dbname
-if [ $tokudb_load_save_space != 0 ] ; then runfile=$runfile-compress; fi
-if [ "$tokudb_row_format" != "" ] ; then runfile=$runfile-$tokudb_row_format; fi
-if [ "$tokudb_loader_memory_size" != "" ] ; then runfile=$runfile-$tokudb_loader_memory_size; fi
-runfile=$runfile-$mysqlbuild-$mysqlserver
-rm -rf $runfile
-
-testresult="PASS"
-
-# maybe get the tpch data from AWS S3
-if [ $compare != 0 ] && [ ! -d $tpchdir ] ; then
-    tpchtarball=tpch${SCALE}G_data_dump.tar
-    if [ ! -f $tpchtarball ] ; then
-        echo `date` s3get --bundle tokutek-mysql-data $tpchtarball >>$runfile 2>&1
-        s3get --verbose --bundle tokutek-mysql-data $tpchtarball >>$runfile 2>&1
-        exitcode=$?
-        echo `date` s3get --bundle tokutek-mysql-data $tpchtarball $exitcode >>$runfile 2>&1
-        if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-    fi
-    if [ $testresult = "PASS" ] ; then
-        tar xf $tpchtarball
-        exitcode=$?
-        echo `date` tar xf $tpchtarball $exitcode >>$runfile 2>&1
-        if [ $exitcode -ne 0 ] ; then 
-            testresult="FAIL"
-        else
-            # gunzip the data files
-            pushd tpch${SCALE}G/data/tpch${SCALE}G
-            for f in *.gz ; do
-                echo `date` gunzip $f >>$runfile 2>&1
-                gunzip $f
-            done
-            ls -l >>$runfile 2>&1
-            popd
-        fi
-    fi
-fi
-
-# checkout the tpch scripts
-tpchtestdir=tpch-$mysqlbuild
-if [ "$testinstance" != "" ] ; then tpchtestdir=${tpchtestdir}_${testinstance}; fi
-if [ $testresult = "PASS" ] ; then
-    rm -rf $tpchtestdir
-    retry svn export -q -r $svn_revision $svn_server/$svn_branch/tpch $tpchtestdir
-    exitcode=$?
-    echo `date` export $svn_server/$svn_branch/tpch $exitcode >>$runfile 2>&1
-    if [ $exitcode != 0 ] ; then
-        retry svn export -q -r $svn_revision $svn_server/tpch $tpchtestdir
-        exitcode=$?
-        echo `date` export $svn_server/tpch $exitcode >>$runfile 2>&1
-    fi
-    if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-fi
-
-# generate the tpch data
-if [ $dbgen != 0 -a $testresult = "PASS" ] ; then
-    pushd $tpchtestdir/dbgen
-        make
-        exitcode=$?
-        echo `date` make dbgen $exitcode >>$runfile 2>&1
-        if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-    popd
-    if [ $testresult = "PASS" ] ; then
-        dbgen=0
-        mkdir -p tpch${SCALE}G/data/tpch${SCALE}G
-        pushd tpch${SCALE}G/data/tpch${SCALE}G
-            if [ ! -f lineitem.tbl ] ; then dbgen=1; fi
-        popd
-        if [ $dbgen != 0 ] ; then
-            pushd $tpchtestdir/dbgen
-                ./dbgen -fF -s $SCALE
-                exitcode=$?
-                echo `date` dbgen -fF -s $SCALE $exitcode >>$runfile 2>&1
-                if [ $exitcode != 0 ] ; then
-                    testresult="FAIL"
-                else
-                    ls -l *.tbl >>$runfile
-                    chmod 0644 *.tbl 
-                    ls -l *.tbl >>$runfile
-                    mv *.tbl $basedir/tpch${SCALE}G/data/tpch${SCALE}G
-                fi
-            popd
-        fi
-    fi
-fi
-
-# create the tpch database
-if [ $load != 0 -a $testresult = "PASS" ] ; then
-    echo `date` drop database if exists $dbname >>$runfile
-    mysql -S $mysqlsocket -u $mysqluser -e "drop database if exists $dbname" >>$runfile 2>&1
-    exitcode=$?
-    echo `date` drop database if exists $dbname $exitcode>>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-    echo `date` create database $dbname >>$runfile
-    mysql -S $mysqlsocket -u $mysqluser -e "create database $dbname" >>$runfile 2>&1
-    exitcode=$?
-    echo `date` create database $dbname $exitcode >>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-fi
-
-# create the tpch tables
-if [ $load != 0 -a $testresult = "PASS" ] ; then
-    echo `date` create table >>$runfile
-    mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "source $basedir/tpch-$mysqlbuild/scripts/${ENGINE}_tpch_create_table.sql" >>$runfile 2>&1
-    exitcode=$?
-    echo `date` create table $exitcode >>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-fi
-
-# get the current loader memory size
-if [ $load != 0 -a $testresult = "PASS" ] ; then
-    let default_loader_memory_size="$(mysql -S $mysqlsocket -u $mysqluser -e'select @@tokudb_loader_memory_size' --silent --skip-column-names)"
-    exitcode=$?
-    echo `date` get tokudb_loader_memory_size $exitcode >>$runfile
-    if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-    if [ "$tokudb_loader_memory_size" = "" ] ; then tokudb_loader_memory_size=$default_loader_memory_size; fi
-fi
-
-# load the data
-if [ $load != 0 -a $testresult = "PASS" ] ; then
-    for tblname in $TABLES ; do
-        echo `date` load table $tblname >>$runfile
-        ls -l $tpchdir/data/tpch${SCALE}G/$tblname.tbl >>$runfile
-        start=$(date +%s)
-        mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "set tokudb_loader_memory_size=$tokudb_loader_memory_size;\
-            set tokudb_load_save_space=$tokudb_load_save_space;\
-            load data infile '$tpchdir/data/tpch${SCALE}G/$tblname.tbl' into table $tblname fields terminated by '|';" >>$runfile 2>&1
-        exitcode=$?
-        let loadtime=$(date +%s)-$start
-        echo `date` load table $tblname $exitcode loadtime=$loadtime>>$runfile
-        if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-    done
-fi
-
-if [ $check != 0 -a $testresult = "PASS" ] ; then
-    for tblname in lineitem ; do
-        echo `date` add clustering index $tblname >>$runfile
-        start=$(date +%s)
-        mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "set tokudb_loader_memory_size=$tokudb_loader_memory_size;\
-            set tokudb_load_save_space=$tokudb_load_save_space;\
-            set tokudb_create_index_online=0;\
-            create clustering index i_shipdate on lineitem (l_shipdate);" >>$runfile 2>&1
-        exitcode=$?
-        let loadtime=$(date +%s)-$start
-        echo `date` add clustering index $tblname $exitcode loadtime=$loadtime >>$runfile
-        if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-    done
-fi
-
-# check the tables
-if [ $check != 0 -a $testresult = "PASS" ] ; then
-    for tblname in $TABLES ; do
-        echo `date` check table $tblname >>$runfile
-        start=$(date +%s)
-        mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "check table $tblname" >>$runfile 2>&1
-        exitcode=$?
-        let checktime=$(date +%s)-$start
-        echo `date` check table $tblname $exitcode checktime=$checktime >>$runfile
-        if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-    done
-fi
-
-if [ $check != 0 -a $testresult = "PASS" ] ; then
-    for tblname in lineitem ; do
-        echo `date` drop index $tblname >>$runfile
-        mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "drop index i_shipdate on lineitem" >>$runfile 2>&1
-        exitcode=$?
-        echo `date` drop index $tblname $exitcode >>$runfile
-        if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
-    done
-fi
-
-# compare the data
-if [ $compare != 0 -a $testresult = "PASS" ] ; then
-    if [ -d $tpchdir/dump/tpch${SCALE}G ] ; then
-        mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "source $basedir/tpch-$mysqlbuild/scripts/dumptpch.sql" >>$runfile 2>&1
-        exitcode=$?
-        echo `date` dump data $exitcode >>$runfile
-        if [ $exitcode -ne 0 ] ; then 
-            testresult="FAIL"
-        else
-            # force the permissions on the dumpdir open
-            pushd $datadir/$dbname
-            exitcode=$?
-            if [ $exitcode != 0 ] ; then
-                sudo chmod g+rwx $datadir
-                sudo chmod g+rwx $datadir/$dbname
-                pushd $datadir/$dbname
-                exitcode=$?
-            fi
-            if [ $exitcode = 0 ] ; then
-                popd
-            fi
-
-            # compare the dump files
-            dumpdir=$datadir/$dbname
-            comparedir=$tpchdir/dump/tpch${SCALE}G
-            for f in $dumpdir/dump* ; do
-                d=`basename $f`
-                if [ ! -f $comparedir/$d ] && [ -f $comparedir/$d.gz ] ; then
-                    pushd $comparedir; gunzip $d.gz; popd
-                fi
-                if [ -f $comparedir/$d ] ; then
-                    diff -q $dumpdir/$d $comparedir/$d
-                    if [ $? = 0 ] ; then
-                        result="PASS"
-                    else 
-                        result="FAIL"
-                        testresult="FAIL"
-                    fi
-                else
-                    result="MISSING"
-                    testresult="FAIL"
-                fi
-                echo `date` $d $result >>$runfile
-            done
-            if [ $testresult = "PASS" ] ; then
-                # remove the dump files
-                rm -f $datadir/$dbname/dump*
-            fi
-        fi
-    fi
-fi
-
-# commit results
-if [ $commit != 0 ] ; then
-    svn add $runfile
-    retry svn commit -m \"$testresult $dbname $mysqlbuild $mysqlserver\" $runfile
-fi
-
-popd
-
-if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi
-exit $exitcode
diff --git a/scripts/setup.mysql.bash b/scripts/setup.mysql.bash
deleted file mode 100755
index 85132350289..00000000000
--- a/scripts/setup.mysql.bash
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env bash
-
-function usage() {
-    echo "setup.mysql.bash"
-    echo "--mysqlbuild=$mysqlbuild --shutdown=$shutdown --install=$install --startup=$startup"
-}
-
-function download_file() {
-    local file=$1
-    s3get $s3bucket $file $file
-}
-
-function download_tarball() {
-    local tarball=$1
-    if [ ! -f $tarball ] ; then
-        download_file $tarball
-        if [ $? -ne 0 ] ; then test 0 = 1; return; fi
-    fi
-    if [ ! -f $tarball.md5 ] ; then
-        download_file $tarball.md5
-        if [ $? -ne 0 ] ; then test 0 = 1; return; fi
-    fi
-}
-
-function install_tarball() {
-    local basedir=$1; local tarball=$2
-    tar -x -z -f $basedir/$tarball
-    if [ $? -ne 0 ] ; then test 0 = 1; return; fi
-}
-
-function check_md5() {
-    local tarball=$1
-    md5sum --check $tarball.md5
-    if [ $? -ne 0 ] ; then
-        # support jacksum md5 output which is almost the same as md5sum
-        diff -b <(cat $tarball.md5) <(md5sum $tarball)
-        if [ $? -ne 0 ] ; then test 0 = 1; return; fi
-    fi
-}
-
-mysqlbuild=
-shutdown=1
-install=1
-startup=1
-s3bucket=tokutek-mysql-build
-sleeptime=60
-builtins="mysqlbuild shutdown install startup s3bucket sleeptime"
-mysqld_args="--user=mysql --core-file --core-file-size=unlimited"
-sudo=/usr/bin/sudo
-defaultsfile=""
-if [ -f /etc/$(whoami).my.cnf ] ; then
-    defaultsfile=/etc/$(whoami).my.cnf
-fi
-
-function is_builtin() {
-    local v=$1; shift
-    local x
-    for x in $* ; do 
-        if [ $v = $x ] ; then echo 1; return; fi
-    done
-    echo 0
-}
-
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [ $arg = "--help" -o $arg = "-h" -o $arg = "-?" ] ; then
-        usage; exit 1
-    elif [[ $arg =~ --(.*)=(.*) ]] ; then
-        r=$(is_builtin ${BASH_REMATCH[1]} $builtins)
-        if [ $r = 1 ] ; then
-            eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-        else
-            mysqld_args="$mysqld_args $arg"
-        fi
-    else
-        mysqld_args="$mysqld_args $arg"
-    fi
-done
-
-if [ -d /data/mysql/tmp ] ; then mysqld_args="$mysqld_args --tmpdir=/data/mysql/tmp"; fi
-
-if [[ $mysqlbuild =~ (.*)-(tokudb\-.*)-(linux)-(x86_64) ]] ; then
-    mysql=${BASH_REMATCH[1]}
-    tokudb=${BASH_REMATCH[2]}
-    system=${BASH_REMATCH[3]}
-    arch=${BASH_REMATCH[4]}
-else
-    echo $mysqlbuild is not a tokudb build
-fi
-
-if [ ! -d downloads ] ; then mkdir downloads; fi
-
-pushd downloads
-if [ $? != 0 ] ; then exit 1; fi
-
-basedir=$PWD
-
-mysqltarball=$mysqlbuild.tar.gz
-
-# get the tarball
-download_tarball $mysqltarball
-if [ $? -ne 0 ] ; then exit 1; fi
-
-# check the md5 sum
-check_md5 $mysqltarball
-if [ $? -ne 0 ] ; then exit 1; fi
-
-tokudbtarball=""
-if [[ $mysqltarball =~ ^(Percona-Server.*)\.(Linux\.x86_64.*)$ ]] ; then
-    tar tzf $mysqltarball | egrep ha_tokudb.so >/dev/null 2>&1
-    if [ $? -ne 0 ] ; then
-        tokudbtarball=${BASH_REMATCH[1]}.TokuDB.${BASH_REMATCH[2]}
-        download_tarball $tokudbtarball
-        if [ $? -ne 0 ] ; then exit 1; fi
-        check_md5 $tokudbtarball
-        if [ $? -ne 0 ] ; then exit 1; fi
-    fi
-fi
-
-# set ldpath
-ldpath=""
-if [ -d /usr/local/gcc-4.7/lib64 ] ; then
-    echo skip ldpath="export LD_LIBRARY_PATH=/usr/local/gcc-4.7/lib64:\$LD_LIBRARY_PATH;"
-fi
-
-# shutdown mysql
-if [ $shutdown -ne 0 ] ; then
-    if [ -x /etc/init.d/mysql ] ; then
-        $sudo setsid /etc/init.d/mysql stop
-    else
-        /usr/local/mysql/bin/mysqladmin shutdown
-    fi
-    sleep $sleeptime
-fi
-
-pushd /usr/local
-if [ $? = 0 ] ; then 
-    rm mysql
-    popd
-fi
-
-# install the release
-pushd /usr/local/mysqls 2>/dev/null
-if [ $? = 0 ] ; then
-    mysqldir=mysqls/$mysqlbuild
-else
-    pushd /usr/local
-    if [ $? -ne 0 ] ; then exit 1; fi
-    mysqldir=$mysqlbuild
-fi
-
-if [ ! -d $mysqlbuild ] || [ $install -ne 0 ] ; then
-    rm mysql
-    if [ -d $mysqlbuild ] ; then $sudo rm -rf $mysqlbuild; fi
-
-    install_tarball $basedir $mysqltarball
-    if [ $? -ne 0 ] ; then exit 1; fi
-
-    if [ $tokudbtarball ] ; then
-        install_tarball $basedir $tokudbtarball
-        if [ $? -ne 0 ] ; then exit 1; fi
-    fi
-
-    ln -s $mysqldir /usr/local/mysql
-    if [ $? -ne 0 ] ; then exit 1; fi
-    ln -s $mysqldir /usr/local/$mysqlbuild
-    if [ $? -ne 0 ] ; then exit 1; fi
-
-    installdb=$mysqlbuild/bin/mysql_install_db
-    if [ ! -f $installdb ] ; then
-        installdb=$mysqlbuild/scripts/mysql_install_db
-    fi
-
-    $sudo chown -R mysql $mysqlbuild/data
-    $sudo chgrp -R mysql $mysqlbuild/data
-
-    # 5.6 debug build needs this 
-    if [ ! -f $mysqlbuild/bin/mysqld ] && [ -f $mysqlbuild/bin/mysqld-debug ] ; then
-	ln $mysqlbuild/bin/mysqld-debug $mysqlbuild/bin/mysqld
-    fi
-
-    if [ -z "$defaultsfile" ] ; then 
-        default_arg=""
-    else
-        default_arg="--defaults-file=$defaultsfile"
-    fi
-    $sudo bash -c "$ldpath $installdb $default_arg --user=mysql --basedir=$PWD/$mysqlbuild --datadir=$PWD/$mysqlbuild/data"
-    if [ $? -ne 0 ] ; then exit 1; fi
-else
-    # create link
-    rm /usr/local/mysql
-    ln -s $mysqldir /usr/local/mysql
-    if [ $? -ne 0 ] ; then exit 1; fi
-    rm /usr/local/$mysqlbuild
-    ln -s $mysqldir /usr/local/$mysqlbuild
-    if [ $? -ne 0 ] ; then exit 1; fi
-fi
-popd
-
-# start mysql
-if [ $startup -ne 0 ] ; then 
-    ulimit -a
-    # increase the open file limit
-    ulimit -n 10240
-    exitcode=$?
-    echo ulimit -n 10240 exitcode $exitcode
-
-    if [ -x /etc/init.d/mysql ] ; then
-        $sudo setsid /etc/init.d/mysql start
-    else
-        if [ -z "$defaultsfile" ] ; then
-            default_arg=""
-        else
-            default_arg="--defaults-file=$defaultsfile"
-        fi
-        j=/usr/local/mysql/lib/mysql/libjemalloc.so
-        if [ -f $j ] ; then
-            default_arg="$default_arg --malloc-lib=$j"
-        fi
-        $sudo -b bash -c "$ldpath /usr/local/mysql/bin/mysqld_safe $default_arg $mysqld_args" >/dev/null 2>&1 &
-    fi
-    sleep $sleeptime
-
-    # add mysql grants
-    /usr/local/mysql/bin/mysql -u root -e "grant all on *.* to tokubuild@localhost"
-    /usr/local/mysql/bin/mysql -u root -e "grant all on *.* to 'ec2-user'@localhost"
-fi
-
-popd
-
-exit 0
diff --git a/scripts/test.mysql.bash b/scripts/test.mysql.bash
deleted file mode 100755
index 5c389e05f3f..00000000000
--- a/scripts/test.mysql.bash
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env bash
-
-function usage() {
-    echo "run the mysql tests"
-    echo "--mysqlbuild=$mysqlbuild --tests=$tests"
-}
-
-function expand() {
-    echo $* | tr ,: " "
-}
-
-mysqlbuild=
-mysqlsocket=/tmp/mysql.sock
-gearmandir=/usr/local/gearmand-1.1.6
-gearmandhost=localhost
-system=$(uname -s | tr [:upper:] [:lower:])
-arch=$(uname -m | tr [:upper:] [:lower:])
-tests=run.mysql.tests.bash
-
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [[ $arg =~ --(.*)=(.*) ]] ; then
-	eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-	usage; exit 1;
-    fi
-done
-
-if [ -z $mysqlbuild ] ; then exit 1; fi
-
-for testname in $(expand $tests) ; do
-    if [ $testname = "run.mysql.tests.bash" ] ; then
-	run_mysqld=0
-    else
-	run_mysqld=1
-    fi
-    if [ $run_mysqld = 0 ] ; then
-	setupextra="--shutdown=1 --install=1 --startup=0"
-    else
-	setupextra="--shutdown=1 --install=1 --startup=1"
-    fi
-    echo "echo \$(date) $mysqlbuild >>/tmp/$(whoami).$testname.trace 2>&1; \
-          \$HOME/bin/setup.mysql.bash --mysqlbuild=$mysqlbuild $setupextra >>/tmp/$(whoami).$testname.trace 2>&1; \
-          testexitcode=\$?; \
-          echo \$(date) $mysqlbuild \$testexitcode >>/tmp/$(whoami).$testname.trace 2>&1; \
-          if [ \$testexitcode -ne 0 ] ; then exit 1; fi; \
-          \$HOME/bin/$testname --mysqlbuild=$mysqlbuild --commit=1 >>/tmp/$(whoami).$testname.trace 2>&1; \
-	  if [ $run_mysqld != 0 ] ; then mysqladmin -S$mysqlsocket shutdown; fi" | $gearmandir/bin/gearman -b -f mysql-test-$system-$arch -h $gearmandhost -p 4730
-done
-
-exit 0
diff --git a/scripts/testbuildfromsrc.bash b/scripts/testbuildfromsrc.bash
deleted file mode 100644
index 136841ea284..00000000000
--- a/scripts/testbuildfromsrc.bash
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env bash
-
-# for all source tarballs and their coresponding md5 files, build a binary release tarball
-
-system=$(uname -s|tr [:upper:] [:lower:])
-arch=$(uname -m)
-
-function expand() {
-    echo $* | tr ,: " "
-}
-
-for f in *.md5; do
-    if [[ $f =~ (.*).tar.gz.md5 ]] ; then
-        mysqlsrc=${BASH_REMATCH[1]}
-    else
-        exit 1
-    fi
-    if [ -d $mysqlsrc ] ; then continue; fi
-    md5sum --check $mysqlsrc.tar.gz.md5
-    if [ $? != 0 ] ; then exit 1; fi
-    tar xzf $mysqlsrc.tar.gz
-    if [ $? != 0 ] ; then exit 1; fi
-    mkdir $mysqlsrc/build.RelWithDebInfo
-    pushd $mysqlsrc/build.RelWithDebInfo
-    if [ $? != 0 ] ; then exit 1; fi
-    cmake -D BUILD_CONFIG=mysql_release -D CMAKE_BUILD_TYPE=RelWithDebInfo -D BUILD_TESTING=OFF ..
-    if [ $? != 0 ] ; then exit 1; fi
-    make -j4 package
-    if [ $? != 0 ] ; then exit 1; fi
-    if [ ! -f $mysqlsrc-$system-$arch.tar.gz ] ; then exit 1; fi
-    popd
-done	
diff --git a/scripts/testsandbox.bash b/scripts/testsandbox.bash
deleted file mode 100644
index c348db6fd4a..00000000000
--- a/scripts/testsandbox.bash
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env bash
-
-# for all tokudb binary tarballs, verify that we can create and run the tarball using the MySQL sandbox.
-
-function expand() {
-    echo $* | tr ,: " "
-}
-
-let n=0
-for f in *.md5; do
-    if [[ $f =~ (.*).tar.gz.md5 ]] ; then
-        mysqlbuild=${BASH_REMATCH[1]}
-    else
-        exit 1
-    fi
-
-    md5sum --check $f
-    if [ $? != 0 ] ; then exit 1; fi
-    make_sandbox --add_prefix=test$n- $mysqlbuild.tar.gz -- --sandbox_directory=test$n
-    if [ $? != 0 ] ; then exit 1; fi
-    pushd $HOME/sandboxes
-    if [ $? = 0 ] ; then
-        ./use_all 'show engines'
-        ./use_all 'create table test.t (a int primary key, b bigint, c varchar(256), d blob(500000), clustering key(b))'
-        ./use_all 'show create table test.t'
-	./stop_all	
-	popd
-    fi
-    let n=n+1
-done
-
-
diff --git a/scripts/tpch.readme b/scripts/tpch.readme
deleted file mode 100644
index b5128045ade..00000000000
--- a/scripts/tpch.readme
+++ /dev/null
@@ -1,34 +0,0 @@
-TPCH is an industry standard data warehouse benchmark.  We use TPCH databases to test the TokuDB loader.
-
-The run.tpch.bash script loads a TPCH database at a given scale factor into TokuDB.  It then uses the SQL
-CHECK TABLE statement to verify the fractal tree invariants.  Finally, it dumps the database and compares
-with a precomputed dump of the database from InnoDB.
-
-Here are some TPCH databases dumped from InnoDB. These dumps are used to compare with TPCH data loaded
-into TokuDB.
-
-$ s3ls tokutek-mysql-data -l --prefix=tpch
-2010-08-16T21:21:10.000Z   1073741824 tpch10G_data_dump.tar.0
-2010-08-16T21:38:45.000Z   1073741824 tpch10G_data_dump.tar.1
-2010-08-16T21:56:43.000Z   1073741824 tpch10G_data_dump.tar.2
-2010-08-16T22:14:49.000Z   1073741824 tpch10G_data_dump.tar.3
-2010-08-16T22:32:38.000Z   1073741824 tpch10G_data_dump.tar.4
-2010-08-16T22:51:04.000Z   1073741824 tpch10G_data_dump.tar.5
-2010-08-16T23:08:51.000Z     91262976 tpch10G_data_dump.tar.6
-2010-08-16T23:10:21.000Z          654 tpch10G_data_dump.tar.xml
-2010-08-12T17:45:09.000Z    633579520 tpch1G_data_dump.tar
-2010-08-12T17:56:30.000Z          160 tpch1G_data_dump.tar.xml
-2010-08-06T13:57:51.000Z    633610240 tpch1G_data_dump_innodb.tar
-2010-08-06T14:07:09.000Z          174 tpch1G_data_dump_innodb.tar.xml
-2010-11-28T12:20:58.000Z          886 tpch30G_data_dump.tar.xml
-2010-09-14T19:16:30.000Z   1073741824 tpch30G_dump_data.tar.0
-2010-09-14T19:40:02.000Z   1073741824 tpch30G_dump_data.tar.1
-2010-09-14T20:12:22.000Z   1073741824 tpch30G_dump_data.tar.2
-2010-09-14T20:45:23.000Z   1073741824 tpch30G_dump_data.tar.3
-2010-09-14T21:14:07.000Z   1073741824 tpch30G_dump_data.tar.4
-2010-09-14T21:37:54.000Z   1073741824 tpch30G_dump_data.tar.5
-2010-09-14T21:57:02.000Z   1073741824 tpch30G_dump_data.tar.6
-2010-09-14T22:16:59.000Z   1073741824 tpch30G_dump_data.tar.7
-2010-09-14T22:36:22.000Z   1073741824 tpch30G_dump_data.tar.8
-2010-09-14T22:55:25.000Z    382511104 tpch30G_dump_data.tar.9
-2010-09-14T23:02:04.000Z          886 tpch30G_dump_data.tar.xml

From 5a18a1b046f4d086226a0ebb63aa9eec268061cd Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:34:14 -0400
Subject: [PATCH 145/190] FT-440 Use a scoped malloc during garbage collection
 and for checkpoint cachefiles, also clean up some formatting.

---
 ft/cachetable/cachetable.cc |  7 ++--
 ft/ule.cc                   | 76 +++++++++++++++----------------------
 2 files changed, 34 insertions(+), 49 deletions(-)

diff --git a/ft/cachetable/cachetable.cc b/ft/cachetable/cachetable.cc
index b24f867bbaf..7073ff3435e 100644
--- a/ft/cachetable/cachetable.cc
+++ b/ft/cachetable/cachetable.cc
@@ -106,6 +106,7 @@ PATENT RIGHTS GRANT:
 #include "ft/cachetable/checkpoint.h"
 #include "ft/logger/log-internal.h"
 #include "util/rwlock.h"
+#include "util/scoped_malloc.h"
 #include "util/status.h"
 #include "util/context.h"
 
@@ -4589,7 +4590,8 @@ void checkpointer::remove_background_job() {
 }
 
 void checkpointer::end_checkpoint(void (*testcallback_f)(void*),  void* testextra) {
-    CACHEFILE *XMALLOC_N(m_checkpoint_num_files, checkpoint_cfs);
+    toku::scoped_malloc checkpoint_cfs_buf(m_checkpoint_num_files * sizeof(CACHEFILE));
+    CACHEFILE *checkpoint_cfs = reinterpret_cast<CACHEFILE *>(checkpoint_cfs_buf.get());
 
     this->fill_checkpoint_cfs(checkpoint_cfs);    
     this->checkpoint_pending_pairs();
@@ -4601,9 +4603,8 @@ void checkpointer::end_checkpoint(void (*testcallback_f)(void*),  void* testextr
     this->log_end_checkpoint();
     this->end_checkpoint_userdata(checkpoint_cfs);
 
-    //Delete list of cachefiles in the checkpoint,
+    // Delete list of cachefiles in the checkpoint,
     this->remove_cachefiles(checkpoint_cfs);
-    toku_free(checkpoint_cfs);
 }
 
 struct iterate_checkpoint_cfs {
diff --git a/ft/ule.cc b/ft/ule.cc
index 569c2d1ff50..1a9ab2a0867 100644
--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -328,11 +328,11 @@ xid_reads_committed_xid(TXNID tl1, TXNID xc, const xid_omt_t &snapshot_txnids, c
 //
 static void
 ule_simple_garbage_collection(ULE ule, txn_gc_info *gc_info) {
-    uint32_t curr_index = 0;
-    uint32_t num_entries;
     if (ule->num_cuxrs == 1) {
-        goto done;
+        return;
     }
+
+    uint32_t curr_index = 0;
     if (gc_info->mvcc_needed) {
         // starting at the top of the committed stack, find the first
         // uxr with a txnid that is less than oldest_referenced_xid
@@ -342,24 +342,21 @@ ule_simple_garbage_collection(ULE ule, txn_gc_info *gc_info) {
                 break;
             }
         }
-    }
-    else {
+    } else {
         // if mvcc is not needed, we can need the top committed
         // value and nothing else
         curr_index = ule->num_cuxrs - 1;
     }
-    // curr_index is now set to the youngest uxr older than oldest_referenced_xid
-    if (curr_index == 0) {
-        goto done;
-    }
 
-    // now get rid of the entries below curr_index
-    num_entries = ule->num_cuxrs + ule->num_puxrs - curr_index;
-    memmove(&ule->uxrs[0], &ule->uxrs[curr_index], num_entries * sizeof(ule->uxrs[0]));
-    ule->uxrs[0].xid = TXNID_NONE; //New 'bottom of stack' loses its TXNID
-    ule->num_cuxrs -= curr_index;
-    
-done:;
+    // curr_index is now set to the youngest uxr older than oldest_referenced_xid
+    // so if it's not the bottom of the stack..
+    if (curr_index != 0) {
+        // ..then we need to get rid of the entries below curr_index
+        uint32_t num_entries = ule->num_cuxrs + ule->num_puxrs - curr_index;
+        memmove(&ule->uxrs[0], &ule->uxrs[curr_index], num_entries * sizeof(ule->uxrs[0]));
+        ule->uxrs[0].xid = TXNID_NONE; // New 'bottom of stack' loses its TXNID
+        ule->num_cuxrs -= curr_index;
+    }
 }
 
 // TODO: Clean this up
@@ -367,15 +364,12 @@ extern bool garbage_collection_debug;
 
 static void
 ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &referenced_xids, const xid_omt_t &live_root_txns) {
-    if (ule->num_cuxrs == 1) goto done;
-    // will fail if too many num_cuxrs
-    bool necessary_static[MAX_TRANSACTION_RECORDS];
-    bool *necessary;
-    necessary = necessary_static;
-    if (ule->num_cuxrs >= MAX_TRANSACTION_RECORDS) {
-        XMALLOC_N(ule->num_cuxrs, necessary);
+    if (ule->num_cuxrs == 1) {
+        return;
     }
-    memset(necessary, 0, sizeof(necessary[0])*ule->num_cuxrs);
+
+    toku::scoped_calloc necessary_buf(ule->num_cuxrs * sizeof(bool));
+    bool *necessary = reinterpret_cast<bool *>(necessary_buf.get());
 
     uint32_t curr_committed_entry;
     curr_committed_entry = ule->num_cuxrs - 1;
@@ -405,24 +399,21 @@ ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &ref
         }
 
         tl1 = toku_get_youngest_live_list_txnid_for(xc, snapshot_xids, referenced_xids);
-        if (tl1 == xc) {
-            // if tl1 == xc, that means xc should be live and show up in 
-            // live_root_txns, which we check above. So, if we get
-            // here, something is wrong.
-            assert(false);
-        }
+
+        // if tl1 == xc, that means xc should be live and show up in live_root_txns, which we check above.
+        invariant(tl1 != xc);
+
         if (tl1 == TXNID_NONE) {
             // set tl1 to youngest live transaction older than ule->uxrs[curr_committed_entry]->xid
             tl1 = get_next_older_txnid(xc, snapshot_xids);
             if (tl1 == TXNID_NONE) {
-                //Remainder is garbage, we're done
+                // remainder is garbage, we're done
                 break;
             }
         }
-        if (garbage_collection_debug)
-        {
+        if (garbage_collection_debug) {
             int r = snapshot_xids.find_zero<TXNID, toku_find_xid_by_xid>(tl1, nullptr, nullptr);
-            invariant(r==0); //make sure that the txn you are claiming is live is actually live
+            invariant_zero(r); // make sure that the txn you are claiming is live is actually live
         }
         //
         // tl1 should now be set
@@ -436,30 +427,23 @@ ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &ref
             curr_committed_entry--;
         }
     } 
-    uint32_t first_free;
-    first_free = 0;
-    uint32_t i;
-    for (i = 0; i < ule->num_cuxrs; i++) {
-        //Shift values to 'delete' garbage values.
+    uint32_t first_free = 0;
+    for (uint32_t i = 0; i < ule->num_cuxrs; i++) {
+        // Shift values to 'delete' garbage values.
         if (necessary[i]) {
             ule->uxrs[first_free] = ule->uxrs[i];
             first_free++;
         }
     }
-    uint32_t saved;
-    saved = first_free;
+    uint32_t saved = first_free;
     invariant(saved <= ule->num_cuxrs);
     invariant(saved >= 1);
     ule->uxrs[0].xid = TXNID_NONE; //New 'bottom of stack' loses its TXNID
     if (first_free != ule->num_cuxrs) {
-        //Shift provisional values
+        // Shift provisional values
         memmove(&ule->uxrs[first_free], &ule->uxrs[ule->num_cuxrs], ule->num_puxrs * sizeof(ule->uxrs[0]));
     }
     ule->num_cuxrs = saved;
-    if (necessary != necessary_static) {
-        toku_free(necessary);
-    }
-done:;
 }
 
 static size_t ule_packed_memsize(ULE ule) {

From effa06ec47cba75dbeb34c6d298e3ade553042b7 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:34:18 -0400
Subject: [PATCH 146/190] Clean up toku_ft_handle_close

---
 ft/ft-ops.cc | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 8a311d312ee..c4d77a43793 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -3250,34 +3250,31 @@ ft_remove_handle_ref_callback(FT UU(ft), void *extra) {
     toku_list_remove(&handle->live_ft_handle_link);
 }
 
-// close an ft handle during normal operation. the underlying ft may or may not close,
-// depending if there are still references. an lsn for this close will come from the logger.
-void
-toku_ft_handle_close(FT_HANDLE ft_handle) {
-    // There are error paths in the ft_handle_open that end with ft_handle->ft==NULL.
+static void ft_handle_close(FT_HANDLE ft_handle, bool oplsn_valid, LSN oplsn) {
     FT ft = ft_handle->ft;
-    if (ft) {
-        const bool oplsn_valid = false;
-        toku_ft_remove_reference(ft, oplsn_valid, ZERO_LSN, ft_remove_handle_ref_callback, ft_handle);
+    // There are error paths in the ft_handle_open that end with ft_handle->ft == nullptr.
+    if (ft != nullptr) {
+        toku_ft_remove_reference(ft, oplsn_valid, oplsn, ft_remove_handle_ref_callback, ft_handle);
     }
     toku_free(ft_handle);
 }
 
+// close an ft handle during normal operation. the underlying ft may or may not close,
+// depending if there are still references. an lsn for this close will come from the logger.
+void toku_ft_handle_close(FT_HANDLE ft_handle) {
+    ft_handle_close(ft_handle, false, ZERO_LSN);
+}
+
 // close an ft handle during recovery. the underlying ft must close, and will use the given lsn.
-void 
-toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn) {
-    FT ft = ft_handle->ft;
+void toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn) {
     // the ft must exist if closing during recovery. error paths during 
     // open for recovery should close handles using toku_ft_handle_close()
-    assert(ft);
-    const bool oplsn_valid = true;
-    toku_ft_remove_reference(ft, oplsn_valid, oplsn, ft_remove_handle_ref_callback, ft_handle);
-    toku_free(ft_handle);
+    invariant_notnull(ft_handle->ft);
+    ft_handle_close(ft_handle, true, oplsn);
 }
 
 // TODO: remove this, callers should instead just use toku_ft_handle_close()
-int 
-toku_close_ft_handle_nolsn (FT_HANDLE ft_handle, char** UU(error_string)) {
+int toku_close_ft_handle_nolsn(FT_HANDLE ft_handle, char **UU(error_string)) {
     toku_ft_handle_close(ft_handle);
     return 0;
 }

From 9b0047636b4751440e48a6728953052d0321025d Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:34:22 -0400
Subject: [PATCH 147/190] FT-585 Move serialize and compression size
 calculations around so we can malloc one large buffer to serialize a node
 instead of many smaller ones, which should hopefully put less pressure on
 jemalloc during checkpoints etc.

---
 ft/serialize/compress.cc          |   9 ++-
 ft/serialize/ft_node-serialize.cc | 130 ++++++++++++++++--------------
 2 files changed, 76 insertions(+), 63 deletions(-)

diff --git a/ft/serialize/compress.cc b/ft/serialize/compress.cc
index 2b0187e0b4f..100be1302ee 100644
--- a/ft/serialize/compress.cc
+++ b/ft/serialize/compress.cc
@@ -165,11 +165,12 @@ void toku_compress (enum toku_compression_method a,
             assert(1 <= *destLen);
             *destLen = 1;
         } else {
-            qlz_state_compress *XCALLOC(qsc);
+            toku::scoped_calloc qsc_buf(sizeof(qlz_state_compress));
+            qlz_state_compress *qsc = reinterpret_cast<qlz_state_compress *>(qsc_buf.get());
             size_t actual_destlen = qlz_compress(source, (char*)(dest+1), sourceLen, qsc);
-            assert(actual_destlen +1 <= *destLen);
-            *destLen = actual_destlen+1; // add one for the rfc1950-style header byte.
-            toku_free(qsc);
+            assert(actual_destlen + 1 <= *destLen);
+            // add one for the rfc1950-style header byte.
+            *destLen = actual_destlen + 1;
         }
         // Fill in that first byte
         dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4);
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index ff18fb12ba8..ec72ddad09e 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -376,13 +376,11 @@ static void serialize_child_buffer(NONLEAF_CHILDINFO bnc, struct wbuf *wb) {
 //
 static void
 serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) {
-    if (sb->uncompressed_ptr == NULL) {
-        assert(sb->uncompressed_size == 0);
-        sb->uncompressed_size = serialize_ftnode_partition_size(node,i);
-        sb->uncompressed_ptr = toku_xmalloc(sb->uncompressed_size);
-    } else {
-        assert(sb->uncompressed_size > 0);
-    }
+    // Caller should have allocated memory.
+    invariant_notnull(sb->uncompressed_ptr);
+    invariant(sb->uncompressed_size > 0);
+    paranoid_invariant(sb->uncompressed_size == serialize_ftnode_partition_size(node, i));
+
     //
     // Now put the data into sb->uncompressed_ptr
     //
@@ -413,10 +411,10 @@ serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) {
 // 
 static void
 compress_ftnode_sub_block(struct sub_block *sb, enum toku_compression_method method) {
-    assert(sb->compressed_ptr == NULL);
-    set_compressed_size_bound(sb, method);
-    // add 8 extra bytes, 4 for compressed size,  4 for decompressed size
-    sb->compressed_ptr = toku_xmalloc(sb->compressed_size_bound + 8);
+    invariant(sb->compressed_ptr != nullptr);
+    invariant(sb->compressed_size_bound > 0);
+    paranoid_invariant(sb->compressed_size_bound == toku_compress_bound(method, sb->uncompressed_size));
+    
     //
     // This probably seems a bit complicated. Here is what is going on.
     // In TokuDB 5.0, sub_blocks were compressed and the compressed data
@@ -482,13 +480,12 @@ serialize_ftnode_info_size(FTNODE node)
     return retval;
 }
 
-static void serialize_ftnode_info(FTNODE node, 
-                                   SUB_BLOCK sb // output
-                                   ) {
-    assert(sb->uncompressed_size == 0);
-    assert(sb->uncompressed_ptr == NULL);
-    sb->uncompressed_size = serialize_ftnode_info_size(node);
-    sb->uncompressed_ptr = toku_xmalloc(sb->uncompressed_size);
+static void serialize_ftnode_info(FTNODE node, SUB_BLOCK sb) {
+    // Memory must have been allocated by our caller.
+    invariant(sb->uncompressed_size > 0);
+    invariant_notnull(sb->uncompressed_ptr);
+    paranoid_invariant(sb->uncompressed_size == serialize_ftnode_info_size(node));
+
     struct wbuf wb;
     wbuf_init(&wb, sb->uncompressed_ptr, sb->uncompressed_size);
 
@@ -703,24 +700,40 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
     // Each partition represents a compressed sub block
     // For internal nodes, a sub block is a message buffer
     // For leaf nodes, a sub block is a basement node
-    toku::scoped_malloc sb_buf(sizeof(struct sub_block) * npartitions);
+    toku::scoped_calloc sb_buf(sizeof(struct sub_block) * npartitions);
     struct sub_block *sb = reinterpret_cast<struct sub_block *>(sb_buf.get());
     XREALLOC_N(npartitions, *ndd);
-    struct sub_block sb_node_info;
-    for (int i = 0; i < npartitions; i++) {
-        sub_block_init(&sb[i]);;
-    }
-    sub_block_init(&sb_node_info);
 
     //
     // First, let's serialize and compress the individual sub blocks
     //
-    struct serialize_times st;
-    memset(&st, 0, sizeof(st));
+
+    // determine how large our serialization and compression buffers need to be.
+    size_t serialize_buf_size = 0, compression_buf_size = 0;
+    for (int i = 0; i < node->n_children; i++) {
+        sb[i].uncompressed_size = serialize_ftnode_partition_size(node, i);
+        sb[i].compressed_size_bound = toku_compress_bound(compression_method, sb[i].uncompressed_size);
+        serialize_buf_size += sb[i].uncompressed_size;
+        compression_buf_size += sb[i].compressed_size_bound + 8; // add 8 extra bytes, 4 for compressed size, 4 for decompressed size
+    }
+
+    // give each sub block a base pointer to enough buffer space for serialization and compression
+    toku::scoped_malloc serialize_buf(serialize_buf_size);
+    toku::scoped_malloc compression_buf(compression_buf_size);
+    for (size_t i = 0, uncompressed_offset = 0, compressed_offset = 0; i < (size_t) node->n_children; i++) {
+        sb[i].uncompressed_ptr = reinterpret_cast<char *>(serialize_buf.get()) + uncompressed_offset;
+        sb[i].compressed_ptr = reinterpret_cast<char *>(compression_buf.get()) + compressed_offset;
+        uncompressed_offset += sb[i].uncompressed_size;
+        compressed_offset += sb[i].compressed_size_bound + 8; // add 8 extra bytes, 4 for compressed size, 4 for decompressed size
+        invariant(uncompressed_offset <= serialize_buf_size);
+        invariant(compressed_offset <= compression_buf_size);
+    }
+
+    // do the actual serialization now that we have buffer space
+    struct serialize_times st = { 0, 0 };
     if (in_parallel) {
         serialize_and_compress_in_parallel(node, npartitions, compression_method, sb, &st);
-    }
-    else {
+    } else {
         serialize_and_compress_serially(node, npartitions, compression_method, sb, &st);
     }
 
@@ -728,16 +741,31 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
     // Now lets create a sub-block that has the common node information,
     // This does NOT include the header
     //
+
+    // determine how large our serialization and copmression buffers need to be
+    struct sub_block sb_node_info;
+    sub_block_init(&sb_node_info);
+    size_t sb_node_info_uncompressed_size = serialize_ftnode_info_size(node);
+    size_t sb_node_info_compressed_size_bound = toku_compress_bound(compression_method, sb_node_info_uncompressed_size);
+    toku::scoped_malloc sb_node_info_uncompressed_buf(sb_node_info_uncompressed_size);
+    toku::scoped_malloc sb_node_info_compressed_buf(sb_node_info_compressed_size_bound + 8); // add 8 extra bytes, 4 for compressed size, 4 for decompressed size
+    sb_node_info.uncompressed_size = sb_node_info_uncompressed_size;
+    sb_node_info.uncompressed_ptr = sb_node_info_uncompressed_buf.get();
+    sb_node_info.compressed_size_bound = sb_node_info_compressed_size_bound;
+    sb_node_info.compressed_ptr = sb_node_info_compressed_buf.get();
+
+    // do the actual serialization now that we have buffer space
     serialize_and_compress_sb_node_info(node, &sb_node_info, compression_method, &st);
 
+    //
+    // At this point, we have compressed each of our pieces into individual sub_blocks,
+    // we can put the header and all the subblocks into a single buffer and return it.
+    //
+
     // update the serialize times, ignore the header for simplicity. we captured all
     // of the partitions' serialize times so that's probably good enough.
     toku_ft_status_update_serialize_times(node, st.serialize_time, st.compress_time);
 
-    // now we have compressed each of our pieces into individual sub_blocks,
-    // we can put the header and all the subblocks into a single buffer
-    // and return it.
-
     // The total size of the node is:
     // size of header + disk size of the n+1 sub_block's created above
     uint32_t total_node_size = (serialize_node_header_size(node) // uncompressed header
@@ -755,11 +783,10 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
         total_uncompressed_size += sb[i].uncompressed_size + 4;
     }
 
+    // now create the final serialized node
     uint32_t total_buffer_size = roundup_to_multiple(512, total_node_size); // make the buffer be 512 bytes.
-    
     char *XMALLOC_N_ALIGNED(512, total_buffer_size, data);
     char *curr_ptr = data;
-    // now create the final serialized node
 
     // write the header
     struct wbuf wb;
@@ -783,28 +810,15 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
         curr_ptr += sizeof(sb[i].xsum);
     }
     // Zero the rest of the buffer
-    for (uint32_t i=total_node_size; i<total_buffer_size; i++) {
-        data[i]=0;
-    }
+    memset(data + total_node_size, 0, total_buffer_size - total_node_size);
             
     assert(curr_ptr - data == total_node_size);
     *bytes_to_write = data;
     *n_bytes_to_write = total_buffer_size;
     *n_uncompressed_bytes = total_uncompressed_size;
 
-    //
-    // now that node has been serialized, go through sub_block's and free
-    // memory
-    //
-    toku_free(sb_node_info.compressed_ptr);
-    toku_free(sb_node_info.uncompressed_ptr);
-    for (int i = 0; i < npartitions; i++) {
-        toku_free(sb[i].compressed_ptr);
-        toku_free(sb[i].uncompressed_ptr);
-    }
-
-    assert(0 == (*n_bytes_to_write)%512);
-    assert(0 == ((unsigned long long)(*bytes_to_write))%512);
+    invariant(*n_bytes_to_write % 512 == 0);
+    invariant(reinterpret_cast<unsigned long long>(*bytes_to_write) % 512 == 0);
     return 0;
 }
 
@@ -1578,8 +1592,9 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
     }
 
     // Now decompress the subblock
-    sb_node_info.uncompressed_ptr = toku_xmalloc(sb_node_info.uncompressed_size);
     {
+        toku::scoped_malloc sb_node_info_buf(sb_node_info.uncompressed_size);
+        sb_node_info.uncompressed_ptr = sb_node_info_buf.get();
         tokutime_t decompress_t0 = toku_time_now();
         toku_decompress(
             (Bytef *) sb_node_info.uncompressed_ptr,
@@ -1589,17 +1604,14 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
             );
         tokutime_t decompress_t1 = toku_time_now();
         decompress_time = decompress_t1 - decompress_t0;
-    }
 
-    // at this point sb->uncompressed_ptr stores the serialized node info.
-    r = deserialize_ftnode_info(&sb_node_info, node);
-    if (r != 0) {
-        goto cleanup;
+        // at this point sb->uncompressed_ptr stores the serialized node info.
+        r = deserialize_ftnode_info(&sb_node_info, node);
+        if (r != 0) {
+            goto cleanup;
+        }
     }
 
-    toku_free(sb_node_info.uncompressed_ptr);
-    sb_node_info.uncompressed_ptr = NULL;
-
     // Now we have the ftnode_info.  We have a bunch more stuff in the
     // rbuf, so we might be able to store the compressed data for some
     // objects.

From 674454b0f7cca73ee87d557ef52eb9da2cb6ec22 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:35:06 -0400
Subject: [PATCH 148/190] FT-583 Remove sub_block_map.h, dead code from old
 format verisons

---
 ft/loader/loader.cc               |   1 -
 ft/serialize/ft_node-serialize.cc |  29 ++++---
 ft/serialize/sub_block_map.h      | 125 ------------------------------
 3 files changed, 18 insertions(+), 137 deletions(-)
 delete mode 100644 ft/serialize/sub_block_map.h

diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index 8c508265357..de904940d86 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -112,7 +112,6 @@ PATENT RIGHTS GRANT:
 #include "ft/serialize/ft-serialize.h"
 #include "ft/serialize/ft_node-serialize.h"
 #include "ft/serialize/sub_block.h"
-#include "ft/serialize/sub_block_map.h"
 
 #include "util/x1764.h"
 
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index ec72ddad09e..9de8c744564 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -101,6 +101,7 @@ PATENT RIGHTS GRANT:
 #include "ft/serialize/block_table.h"
 #include "ft/serialize/compress.h"
 #include "ft/serialize/ft_node-serialize.h"
+#include "ft/serialize/sub_block.h"
 #include "util/sort.h"
 #include "util/threadpool.h"
 #include "util/status.h"
@@ -248,9 +249,6 @@ enum {
                             4),  // build_id
 };
 
-#include "sub_block.h"
-#include "sub_block_map.h"
-
 // uncompressed header offsets
 enum {
     uncompressed_magic_offset = 0,
@@ -1713,12 +1711,15 @@ deserialize_and_upgrade_internal_node(FTNODE node,
     }
 
     // Read in the child buffer maps.
-    struct sub_block_map child_buffer_map[node->n_children];
     for (int i = 0; i < node->n_children; ++i) {
-        // The following fields are read in the
-        // sub_block_map_deserialize() call:
-        // 19. index 20. offset 21. size
-        sub_block_map_deserialize(&child_buffer_map[i], rb);
+        // The following fields were previously used by the `sub_block_map'
+        // They include:
+        // - 4 byte index
+        (void) rbuf_int(rb);
+        // - 4 byte offset
+        (void) rbuf_int(rb);
+        // - 4 byte size
+        (void) rbuf_int(rb);
     }
 
     // We need to setup this node's partitions, but we can't call the
@@ -1838,9 +1839,15 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
 
     // 11. Deserialize the partition maps, though they are not used in the
     // newer versions of ftnodes.
-    struct sub_block_map part_map[npartitions];
-    for (int i = 0; i < npartitions; ++i) {
-        sub_block_map_deserialize(&part_map[i], rb);
+    for (int i = 0; i < node->n_children; ++i) {
+        // The following fields were previously used by the `sub_block_map'
+        // They include:
+        // - 4 byte index
+        (void) rbuf_int(rb);
+        // - 4 byte offset
+        (void) rbuf_int(rb);
+        // - 4 byte size
+        (void) rbuf_int(rb);
     }
 
     // Copy all of the leaf entries into the single basement node.
diff --git a/ft/serialize/sub_block_map.h b/ft/serialize/sub_block_map.h
deleted file mode 100644
index f2246279982..00000000000
--- a/ft/serialize/sub_block_map.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#pragma once
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-
-// Map objects to a sequence of sub block
-struct sub_block_map {
-    uint32_t idx;
-    uint32_t offset;
-    uint32_t size;
-};
-
-enum {
-    stored_sub_block_map_size = sizeof (struct sub_block_map), // size of a sub-block map on disk
-};
-
-static inline void
-sub_block_map_init(struct sub_block_map *sbmap, uint32_t idx, uint32_t offset, uint32_t size) {
-    sbmap->idx = idx;
-    sbmap->offset = offset;
-    sbmap->size = size;
-}
-
-static inline void
-sub_block_map_serialize(struct sub_block_map *sbmap, struct wbuf *wbuf) {
-    wbuf_nocrc_int(wbuf, sbmap->idx);
-    wbuf_nocrc_int(wbuf, sbmap->offset);
-    wbuf_nocrc_int(wbuf, sbmap->size);
-}
-
-static inline void
-sub_block_map_deserialize(struct sub_block_map *sbmap, struct rbuf *rbuf) {
-    sbmap->idx = rbuf_int(rbuf);
-    sbmap->offset = rbuf_int(rbuf);
-    sbmap->size = rbuf_int(rbuf);
-}

From addd9c3c59711a41a9c4e245ca23d2a40e0e997d Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:35:06 -0400
Subject: [PATCH 149/190] FT-580 Remove unused scripts

---
 scripts/run.db-benchmark-test.bash      | 201 -----------
 scripts/run.fractal.tree.tests.bash     | 460 ------------------------
 scripts/run.fractal.tree.tests.icc.bash |   2 -
 scripts/run.fractal.tree.tests.now.bash |   7 -
 scripts/run.loader.stress.bash          | 164 ---------
 scripts/run.stress-tests.bash           | 332 -----------------
 src/tests/run_test1426.sh               |  19 -
 7 files changed, 1185 deletions(-)
 delete mode 100755 scripts/run.db-benchmark-test.bash
 delete mode 100755 scripts/run.fractal.tree.tests.bash
 delete mode 100755 scripts/run.fractal.tree.tests.icc.bash
 delete mode 100755 scripts/run.fractal.tree.tests.now.bash
 delete mode 100755 scripts/run.loader.stress.bash
 delete mode 100755 scripts/run.stress-tests.bash
 delete mode 100755 src/tests/run_test1426.sh

diff --git a/scripts/run.db-benchmark-test.bash b/scripts/run.db-benchmark-test.bash
deleted file mode 100755
index ebd2a188f10..00000000000
--- a/scripts/run.db-benchmark-test.bash
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/usr/bin/env bash
-
-function usage() {
-    echo "run db-benchmark-test"
-    echo "[--tokudb=$tokudb"
-    echo "[--revision=$revision]"
-    echo "[--branch=$branch]"
-    echo "[--suffix=$suffix]"
-    echo "[--commit=$commit]"
-    echo "[--cc=$cc]"
-    echo "[--n=$n]"
-}
-
-function retry() {
-    local cmd
-    local retries
-    local exitcode
-    cmd=$*
-    let retries=0
-    while [ $retries -le 10 ] ; do
-        echo `date` $cmd
-        bash -c "$cmd"
-        exitcode=$?
-        echo `date` $cmd $exitcode $retries
-        let retries=retries+1
-        if [ $exitcode -eq 0 ] ; then break; fi
-        sleep 10
-    done
-    test $exitcode = 0
-}
-
-n=100
-cc=gcc44
-ft_loader=cilk
-branch=toku
-revision=0
-tokudb=tokudb
-suffix=.
-commit=0
-svnserver=https://svn.tokutek.com/tokudb
-basedir=$HOME/svn.build
-builddir=$basedir/tokudb.build
-system=`uname -s | tr [:upper:] [:lower:]`
-arch=`uname -m | tr [:upper:] [:lower:]`
-hostname=`hostname`
-instancetype=""
-
-# parse the command line
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [[ $arg =~ --(.*)=(.*) ]] ; then
-        eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-        usage; exit 1
-    fi
-done
-
-if [ $cc = icc ] ; then
-    d=/opt/intel/bin
-    if [ -d $d ] ; then
-	export PATH=$d:$PATH
-	. $d/compilervars.sh intel64
-    fi
-    d=/opt/intel/cilkutil/bin
-    if [ -d $d ] ; then
-	export PATH=$d:$PATH
-    fi
-fi
-
-# require a revision
-if [ $revision -eq 0 ] ; then exit 1; fi
-if [ $branch = "." ] ; then branch="toku"; fi
-
-function append() {
-    local s=""; local x
-    for x in $*; do
-	if [ "$s" != "" ] ; then s=$s-$x; else s=$x; fi
-    done
-    echo $s
-}
-
-# setup the branchrevision string
-branchrevision=""
-if [ $branch != "toku" ] ; then branchrevision=$(append $branchrevision $(basename $branch)); fi
-if [ $tokudb != "tokudb" ] ; then branchrevision=$(append $branchrevision $tokudb); fi
-branchrevision=$(append $branchrevision $revision)
-if [ $suffix != "." ] ; then branchrevision=$(append $branchrevision $suffix); fi
-
-# goto the base directory
-if [ ! -d $basedir ] ; then mkdir $basedir; fi
-
-pushd $basedir
-
-# update the build directory
-if [ ! -d $builddir ] ; then mkdir $builddir; fi
-
-date=`date +%Y%m%d`
-pushd $builddir
-    while [ ! -d $date ] ; do
-        svn mkdir $svnserver/mysql.build/$date -m ""
-        svn co -q $svnserver/mysql.build/$date
-        if [ $? -ne 0 ] ; then rm -rf $date; fi
-    done
-popd
-testresultsdir=$builddir/$date
-
-gccversion=`$cc --version|head -1|cut -f3 -d" "`
-
-runfile=$testresultsdir/db-benchmark-test-$branchrevision-$cc-$gccversion-$system-$arch-$hostname
-if [ "$instancetype" != "" ] ; then runfile=$runfile-$instancetype; fi
-rm -rf $runfile
-
-testresult="PASS"
-testdir=db-benchmark-test-$branchrevision
-rm -rf $testdir
-
-# checkout the tokudb branch
-if [ $testresult = "PASS" ] ; then
-    retry svn export -q https://svn.tokutek.com/tokudb/$branch/$tokudb $testdir
-    exitcode=$?
-    if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-fi
-
-# build it
-if [ $testresult = "PASS" ] ; then
-    pushd $testdir
-        make release -s CC=$cc GCCVERSION=$gccversion FTLOADER=$ft_loader >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-    popd
-    pushd $testdir/db-benchmark-test
-        make build.tdb CC=$cc GCCVERSION=$gccversion -s >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-    popd
-fi
-
-# run tests
-if [ $testresult = "PASS" ] ; then
-    let i=$n
-    pushd $testdir/db-benchmark-test
-        echo ./db-benchmark-test-tokudb -x $i >>$runfile 2>&1
-        ./db-benchmark-test-tokudb -x $i >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-	echo ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1
-	./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-	echo ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1
-	./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-    popd
-fi
-
-if [ $testresult = "PASS" ] ; then
-    let i=2*$n
-    pushd $testdir/db-benchmark-test
-        echo ./db-benchmark-test-tokudb -x --norandom $i >>$runfile 2>&1
-        ./db-benchmark-test-tokudb -x --norandom $i >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-	echo ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1
-	./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-	echo ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1
-	./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-    popd
-fi
-
-if [ $testresult = "PASS" ] ; then
-    let i=2*$n
-    pushd $testdir/db-benchmark-test
-        echo ./db-benchmark-test-tokudb -x --noserial $i >>$runfile 2>&1
-        ./db-benchmark-test-tokudb -x --noserial $i >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-	echo ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1
-	./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-	echo ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1
-	./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1
-	exitcode=$?
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-    popd
-fi
-
-# commit results
-if [ $commit != 0 ] ; then
-    svn add $runfile
-    retry svn commit -m \"$testresult db-benchmark-test $branchrevision $system $arch\" $runfile
-fi
-
-popd
-
-exit 0
diff --git a/scripts/run.fractal.tree.tests.bash b/scripts/run.fractal.tree.tests.bash
deleted file mode 100755
index 23900424af2..00000000000
--- a/scripts/run.fractal.tree.tests.bash
+++ /dev/null
@@ -1,460 +0,0 @@
-#!/bin/bash
-
-function usage() {
-    echo "run.fractal.tree.tests.bash - run the nightly fractal tree test suite"
-    echo "[--ftcc=$ftcc] [--ftcxx=$ftcxx] [--BDBVERSION=$BDBVERSION] [--ctest_model=$ctest_model]"
-    echo "[--commit=$commit] [--generator=$generator] [--toku_svnroot=$toku_svnroot]"
-    return 1
-}
-
-[ -f /etc/profile.d/gcc47.sh ] && . /etc/profile.d/gcc47.sh
-[ -f /etc/profile.d/binutils222.sh ] && . /etc/profile.d/binutils222.sh
-
-set -e
-
-pushd $(dirname $0) &>/dev/null
-SCRIPTDIR=$PWD
-popd &>/dev/null
-FULLTOKUDBDIR=$(dirname $SCRIPTDIR)
-TOKUDBDIR=$(basename $FULLTOKUDBDIR)
-BRANCHDIR=$(basename $(dirname $FULLTOKUDBDIR))
-
-function make_tokudb_name() {
-    local tokudb_dir=$1
-    local tokudb=$2
-    if [ $tokudb_dir = "toku" ] ; then
-        echo $tokudb
-    else
-        echo $(echo $tokudb_dir-$tokudb | tr / -)
-    fi
-}
-tokudb_name=$(make_tokudb_name $BRANCHDIR $TOKUDBDIR)
-export TOKUDB_NAME=$tokudb_name
-
-productname=$tokudb_name
-
-ftcc=gcc47
-ftcxx=g++47
-BDBVERSION=5.3
-ctest_model=Nightly
-generator="Unix Makefiles"
-toku_svnroot=$FULLTOKUDBDIR/../..
-commit=1
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [[ $arg =~ --(.*)=(.*) ]] ; then
-        eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-        usage; exit 1;
-    fi
-done
-
-if [[ ! ( ( $ctest_model = Nightly ) || ( $ctest_model = Experimental ) || ( $ctest_model = Continuous ) ) ]]; then
-    echo "--ctest_model must be Nightly, Experimental, or Continuous"
-    usage
-fi
-
-BDBDIR=/usr/local/BerkeleyDB.$BDBVERSION
-if [ -d $BDBDIR ] ; then
-    CMAKE_PREFIX_PATH=$BDBDIR:$CMAKE_PREFIX_PATH
-    export CMAKE_PREFIX_PATH
-fi
-
-# delete some characters that cygwin and osx have trouble with
-function sanitize() {
-    tr -d '[/:\\\\()]'
-}
-
-# gather some info
-svnserver=https://svn.tokutek.com/tokudb
-nodename=$(uname -n)
-system=$(uname -s | tr '[:upper:]' '[:lower:]' | sanitize)
-release=$(uname -r | sanitize)
-arch=$(uname -m | sanitize)
-date=$(date +%Y%m%d)
-ncpus=$([ -f /proc/cpuinfo ] && (grep bogomips /proc/cpuinfo | wc -l) || sysctl -n hw.ncpu)
-njobs=$(if [ $ncpus -gt 8 ] ; then echo "$ncpus / 3" | bc ; else echo "$ncpus" ; fi)
-
-GCCVERSION=$($ftcc --version|head -1|cut -f3 -d" ")
-export GCCVERSION
-CC=$ftcc
-export CC
-CXX=$ftcxx
-export CXX
-
-function retry() {
-    local cmd
-    local retries
-    local exitcode
-    cmd=$*
-    let retries=0
-    while [ $retries -le 10 ] ; do
-        echo `date` $cmd
-        bash -c "$cmd"
-        exitcode=$?
-        echo `date` $cmd $exitcode $retries
-        let retries=retries+1
-        if [ $exitcode -eq 0 ] ; then break; fi
-        sleep 10
-    done
-    test $exitcode = 0
-}
-
-if [[ $commit -eq 1 ]]; then
-    svnbase=~/svn.build
-    if [ ! -d $svnbase ] ; then mkdir $svnbase ; fi
-
-    # checkout the build dir
-    buildbase=$svnbase/tokudb.build
-    if [ ! -d $buildbase ] ; then
-        mkdir $buildbase
-    fi
-
-    # make the build directory, possibly on multiple machines simultaneously, there can be only one
-    builddir=$buildbase/$date
-    pushd $buildbase
-    set +e
-    svn mkdir $svnserver/tokudb.build/$date -m "" || true
-    retry svn co -q $svnserver/tokudb.build/$date
-    if [ ! -d $date ] ; then
-        exit 1
-    fi
-    set -e
-    popd
-
-    tracefilepfx=$builddir/$productname+$ftcc-$GCCVERSION+bdb-$BDBVERSION+$nodename+$system+$release+$arch
-else
-    tracefilepfx=$FULLTOKUDBDIR/test-trace
-fi
-
-function getsysinfo() {
-    tracefile=$1; shift
-    set +e
-    uname -a >$tracefile 2>&1
-    ulimit -a >>$tracefile 2>&1
-    cmake --version >>$tracefile 2>&1
-    $ftcc -v >>$tracefile 2>&1
-    $ftcxx -v >>$tracefile 2>&1
-    valgrind --version >>$tracefile 2>&1
-    cat /etc/issue >>$tracefile 2>&1
-    cat /proc/version >>$tracefile 2>&1
-    cat /proc/cpuinfo >>$tracefile 2>&1
-    env >>$tracefile 2>&1
-    set -e
-}
-
-function get_latest_svn_revision() {
-    svn info $1 | awk -v ORS="" '/Last Changed Rev:/ { print $4 }'
-}
-
-function my_mktemp() {
-    mktemp /tmp/$(whoami).$1.XXXXXXXXXX
-}
-
-yesterday="$(date -u -d yesterday +%F) 03:59:00 +0000"
-
-if [[ $commit -eq 1 ]]; then
-    # hack to make long tests run nightly but not when run in experimental mode
-    longtests=ON
-else
-    longtests=OFF
-fi
-################################################################################
-## run normal and valgrind on optimized build
-resultsdir=$tracefilepfx-Release
-mkdir $resultsdir
-tracefile=$tracefilepfx-Release/trace
-
-getsysinfo $tracefile
-
-mkdir -p $FULLTOKUDBDIR/opt >/dev/null 2>&1
-cd $FULLTOKUDBDIR/opt
-cmake \
-    -D CMAKE_BUILD_TYPE=Release \
-    -D USE_VALGRIND=ON \
-    -D USE_BDB=ON \
-    -D RUN_LONG_TESTS=$longtests \
-    -D USE_CTAGS=OFF \
-    -D USE_GTAGS=OFF \
-    -D USE_ETAGS=OFF \
-    -D USE_CSCOPE=OFF \
-    -D TOKU_SVNROOT="$toku_svnroot" \
-    -G "$generator" \
-    .. 2>&1 | tee -a $tracefile
-cmake --system-information $resultsdir/sysinfo
-make clean
-# update to yesterday exactly just before ctest does nightly update
-svn up -q -r "{$yesterday}" ..
-set +e
-ctest -j$njobs \
-    -D ${ctest_model}Start \
-    -D ${ctest_model}Update \
-    -D ${ctest_model}Configure \
-    -D ${ctest_model}Build \
-    -D ${ctest_model}Test \
-    -E '/drd|/helgrind' \
-    2>&1 | tee -a $tracefile
-ctest -j$njobs \
-    -D ${ctest_model}MemCheck \
-    -E '^ydb/.*\.bdb$|test1426.tdb|/drd|/helgrind' \
-    2>&1 | tee -a $tracefile
-set -e
-
-cp $tracefile notes.txt
-set +e
-ctest -D ${ctest_model}Submit -A notes.txt \
-    2>&1 | tee -a $tracefile
-set -e
-rm notes.txt
-
-tag=$(head -n1 Testing/TAG)
-cp -r Testing/$tag $resultsdir
-if [[ $commit -eq 1 ]]; then
-    cf=$(my_mktemp ftresult)
-    cat "$resultsdir/trace" | awk '
-BEGIN {
-    errs=0;
-    look=0;
-    ORS=" ";
-}
-/[0-9]+% tests passed, [0-9]+ tests failed out of [0-9]+/ {
-    fail=$4;
-    total=$9;
-    pass=total-fail;
-}
-/^Memory checking results:/ {
-    look=1;
-    FS=" - ";
-}
-/Errors while running CTest/ {
-    look=0;
-    FS=" ";
-}
-{
-    if (look) {
-        errs+=$2;
-    }
-}
-END {
-    print "ERRORS=" errs;
-    if (fail>0) {
-        print "FAIL=" fail
-    }
-    print "PASS=" pass
-}' >"$cf"
-    get_latest_svn_revision $FULLTOKUDBDIR >>"$cf"
-    echo -n " " >>"$cf"
-    cat "$resultsdir/trace" | awk '
-BEGIN {
-    FS=": ";
-}
-/Build name/ {
-    print $2;
-    exit
-}' >>"$cf"
-    (echo; echo) >>"$cf"
-    cat "$resultsdir/trace" | awk '
-BEGIN {
-    printit=0
-}
-/[0-9]*\% tests passed, [0-9]* tests failed out of [0-9]*/ { printit=1 }
-/Memory check project/ { printit=0 }
-/^   Site:/ { printit=0 }
-{
-    if (printit) {
-        print $0
-    }
-}' >>"$cf"
-    svn add $resultsdir
-    svn commit -F "$cf" $resultsdir
-    rm $cf
-fi
-
-################################################################################
-## run drd tests on debug build
-resultsdir=$tracefilepfx-Debug
-mkdir $resultsdir
-tracefile=$tracefilepfx-Debug/trace
-
-getsysinfo $tracefile
-
-mkdir -p $FULLTOKUDBDIR/dbg >/dev/null 2>&1
-cd $FULLTOKUDBDIR/dbg
-cmake \
-    -D CMAKE_BUILD_TYPE=Debug \
-    -D USE_VALGRIND=ON \
-    -D USE_BDB=OFF \
-    -D RUN_LONG_TESTS=$longtests \
-    -D USE_CTAGS=OFF \
-    -D USE_GTAGS=OFF \
-    -D USE_ETAGS=OFF \
-    -D USE_CSCOPE=OFF \
-    -D CMAKE_C_FLAGS_DEBUG="-O1" \
-    -D CMAKE_CXX_FLAGS_DEBUG="-O1" \
-    -D TOKU_SVNROOT="$toku_svnroot" \
-    -G "$generator" \
-    .. 2>&1 | tee -a $tracefile
-cmake --system-information $resultsdir/sysinfo
-make clean
-# update to yesterday exactly just before ctest does nightly update
-svn up -q -r "{$yesterday}" ..
-set +e
-ctest -j$njobs \
-    -D ${ctest_model}Start \
-    -D ${ctest_model}Update \
-    -D ${ctest_model}Configure \
-    -D ${ctest_model}Build \
-    -D ${ctest_model}Test \
-    -R '/drd|/helgrind' \
-    2>&1 | tee -a $tracefile
-set -e
-
-cp $tracefile notes.txt
-set +e
-ctest -D ${ctest_model}Submit -A notes.txt \
-    2>&1 | tee -a $tracefile
-set -e
-rm notes.txt
-
-tag=$(head -n1 Testing/TAG)
-cp -r Testing/$tag $resultsdir
-if [[ $commit -eq 1 ]]; then
-    cf=$(my_mktemp ftresult)
-    cat "$resultsdir/trace" | awk '
-BEGIN {
-    ORS=" ";
-}
-/[0-9]+% tests passed, [0-9]+ tests failed out of [0-9]+/ {
-    fail=$4;
-    total=$9;
-    pass=total-fail;
-}
-END {
-    if (fail>0) {
-        print "FAIL=" fail
-    }
-    print "PASS=" pass
-}' >"$cf"
-    get_latest_svn_revision $FULLTOKUDBDIR >>"$cf"
-    echo -n " " >>"$cf"
-    cat "$resultsdir/trace" | awk '
-BEGIN {
-    FS=": ";
-}
-/Build name/ {
-    print $2;
-    exit
-}' >>"$cf"
-    (echo; echo) >>"$cf"
-    cat "$resultsdir/trace" | awk '
-BEGIN {
-    printit=0
-}
-/[0-9]*\% tests passed, [0-9]* tests failed out of [0-9]*/ { printit=1 }
-/^   Site:/ { printit=0 }
-{
-    if (printit) {
-        print $0
-    }
-}' >>"$cf"
-    svn add $resultsdir
-    svn commit -F "$cf" $resultsdir
-    rm $cf
-fi
-
-################################################################################
-## run gcov on debug build
-resultsdir=$tracefilepfx-Coverage
-mkdir $resultsdir
-tracefile=$tracefilepfx-Coverage/trace
-
-getsysinfo $tracefile
-
-mkdir -p $FULLTOKUDBDIR/cov >/dev/null 2>&1
-cd $FULLTOKUDBDIR/cov
-cmake \
-    -D CMAKE_BUILD_TYPE=Debug \
-    -D BUILD_TESTING=ON \
-    -D USE_GCOV=ON \
-    -D USE_BDB=OFF \
-    -D RUN_LONG_TESTS=$longtests \
-    -D USE_CTAGS=OFF \
-    -D USE_GTAGS=OFF \
-    -D USE_ETAGS=OFF \
-    -D USE_CSCOPE=OFF \
-    -D TOKU_SVNROOT="$toku_svnroot" \
-    -G "$generator" \
-    .. 2>&1 | tee -a $tracefile
-cmake --system-information $resultsdir/sysinfo
-make clean
-# update to yesterday exactly just before ctest does nightly update
-svn up -q -r "{$yesterday}" ..
-set +e
-ctest -j$njobs \
-    -D ${ctest_model}Start \
-    -D ${ctest_model}Update \
-    -D ${ctest_model}Configure \
-    -D ${ctest_model}Build \
-    -D ${ctest_model}Test \
-    -D ${ctest_model}Coverage \
-    2>&1 | tee -a $tracefile
-set -e
-
-cp $tracefile notes.txt
-set +e
-ctest -D ${ctest_model}Submit -A notes.txt \
-    2>&1 | tee -a $tracefile
-set -e
-rm notes.txt
-
-tag=$(head -n1 Testing/TAG)
-cp -r Testing/$tag $resultsdir
-if [[ $commit -eq 1 ]]; then
-    cf=$(my_mktemp ftresult)
-    cat "$resultsdir/trace" | awk '
-BEGIN {
-    ORS=" ";
-}
-/Percentage Coverage:/ {
-    covpct=$3;
-}
-/[0-9]+% tests passed, [0-9]+ tests failed out of [0-9]+/ {
-    fail=$4;
-    total=$9;
-    pass=total-fail;
-}
-END {
-    print "COVERAGE=" covpct
-    if (fail>0) {
-        print "FAIL=" fail
-    }
-    print "PASS=" pass
-}' >"$cf"
-    get_latest_svn_revision $FULLTOKUDBDIR >>"$cf"
-    echo -n " " >>"$cf"
-    cat "$resultsdir/trace" | awk '
-BEGIN {
-    FS=": ";
-}
-/Build name/ {
-    print $2;
-    exit
-}' >>"$cf"
-    (echo; echo) >>"$cf"
-    cat "$resultsdir/trace" | awk '
-BEGIN {
-    printit=0
-}
-/[0-9]*\% tests passed, [0-9]* tests failed out of [0-9]*/ { printit=1 }
-/^   Site:/ { printit=0 }
-{
-    if (printit) {
-        print $0
-    }
-}' >>"$cf"
-    svn add $resultsdir
-    svn commit -F "$cf" $resultsdir
-    rm $cf
-fi
-
-exit 0
diff --git a/scripts/run.fractal.tree.tests.icc.bash b/scripts/run.fractal.tree.tests.icc.bash
deleted file mode 100755
index 2c62504619e..00000000000
--- a/scripts/run.fractal.tree.tests.icc.bash
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-run.fractal.tree.tests.bash --ftcc=icc $*
diff --git a/scripts/run.fractal.tree.tests.now.bash b/scripts/run.fractal.tree.tests.now.bash
deleted file mode 100755
index 661548f5ada..00000000000
--- a/scripts/run.fractal.tree.tests.now.bash
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-
-pushd $(dirname $0) &>/dev/null
-SCRIPTDIR=$PWD
-popd &>/dev/null
-
-exec $SCRIPTDIR/run.fractal.tree.tests.bash --ctest_model=Experimental --commit=0 "$@"
diff --git a/scripts/run.loader.stress.bash b/scripts/run.loader.stress.bash
deleted file mode 100755
index 1d4232c1bb3..00000000000
--- a/scripts/run.loader.stress.bash
+++ /dev/null
@@ -1,164 +0,0 @@
-#!/usr/bin/env bash
-
-function usage() {
-    echo "run the loader verify test"
-    echo "[--rows=$rows]"
-    echo "[--dictionaries=$dictionaries]"
-    echo "[--ft_loader=$ft_loader]"
-    echo "[--tokudb=$tokudb]"
-    echo "[--branch=$branch]"
-    echo "[--revision=$revision]"
-    echo "[--suffix=$suffix]"
-    echo "[--commit=$commit]"
-}
-
-function retry() {
-    local cmd
-    local retries
-    local exitcode
-    cmd=$*
-    let retries=0
-    while [ $retries -le 10 ] ; do
-        echo `date` $cmd
-        bash -c "$cmd"
-        exitcode=$?
-        echo `date` $cmd $exitcode $retries
-        let retries=retries+1
-        if [ $exitcode -eq 0 ] ; then break; fi
-        sleep 10
-    done
-    test $exitcode = 0
-}
-
-rows=100000000
-dictionaries=3
-ft_loader=cilk
-tokudb=tokudb
-branch=.
-revision=0
-suffix=.
-commit=0
-svnserver=https://svn.tokutek.com/tokudb
-basedir=~/svn.build
-builddir=$basedir/mysql.build
-system=`uname -s | tr [:upper:] [:lower:]`
-arch=`uname -m | tr [:upper:] [:lower:]`
-myhost=`hostname`
-instancetype=""
-ftcc=gcc
-have_cilk=0
-
-# parse the command line
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [[ $arg =~ --(.*)=(.*) ]] ; then
-        eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-        usage; exit 1
-    fi
-done
-
-# require a revision
-if [ $revision -eq 0 ] ; then 
-    exit 1
-fi
-
-# build 
-if [ $ftcc = icc ] ; then
-    d=/opt/intel/bin
-    if [ -d $d ] ; then
-	export PATH=$d:$PATH
-	. $d/compilervars.sh intel64
-    fi
-    d=/opt/intel/cilkutil/bin
-    if [ -d $d ] ; then
-	export PATH=$d:$PATH
-    fi
-fi
-
-# setup the branchrevision string
-if [ $branch = "." ] ; then
-    branchrevision=$revision
-else
-    branchrevision=`basename $branch`-$revision
-fi
-if [ $suffix != "." ] ; then
-    branchrevision=$branchrevision-$suffix
-fi
-
-ftccversion=$($ftcc --version|head -1|cut -f3 -d" ")
-
-# goto the base directory
-if [ ! -d $basedir ] ; then mkdir $basedir; fi
-
-pushd $basedir
-
-# update the build directory
-if [ ! -d $builddir ] ; then mkdir $builddir; fi
-
-date=`date +%Y%m%d`
-testresultsdir=$builddir/$date
-pushd $builddir
-    while [ ! -d $date ] ; do
-        svn mkdir $svnserver/mysql.build/$date -m ""
-	svn checkout $svnserver/mysql.build/$date
-        if [ $? -ne 0 ] ; then rm -rf $date; fi
-    done
-popd
-
-testresult="PASS"
-runfile=$testresultsdir/loader-stress-$rows-$dictionaries-$tokudb-$branchrevision-$ftcc-$ftccversion-$system-$arch-$myhost
-if [ "$instancetype" != "" ] ; then runfilefile=$runfile-$instancetype; fi
-rm -f $runfile
-
-# checkout the code
-if [ -d loader-stress-$branchrevision ] ; then rm -rf loader-stress-$branchrevision; fi
-mkdir loader-stress-$branchrevision
-
-if [ $branch = "." ] ; then branch=toku; fi
-
-retry svn export -r $revision -q $svnserver/$branch/$tokudb loader-stress-$branchrevision/$tokudb
-exitcode=$?
-if [ $exitcode != 0 ] ; then 
-    testresult="FAIL"
-fi
-
-if [ $testresult = "PASS" ] ; then
-    pushd loader-stress-$branchrevision/$tokudb
-        echo `date` make release -s CC=$ftcc HAVE_CILK=$have_cilk FTLOADER=$ft_loader >>$runfile
-        make -s release CC=$ftcc HAVE_CILK=$have_cilk FTLOADER=$ft_loader >>$runfile 2>&1
-	exitcode=$?
-	echo `date` complete $exitcode >>$runfile
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-    popd
-fi
-if [ $testresult = "PASS" ] ; then
-    pushd loader-stress-$branchrevision/$tokudb/src/tests
-        echo `date` make loader-stress-test.tdb CC=$ftcc HAVE_CILK=$have_cilk >>$runfile
-        make loader-stress-test.tdb -s CC=$ftcc HAVE_CILK=$have_cilk >>$runfile 2>&1
-	exitcode=$?
-	echo `date` complete $exitcode >>$runfile
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-    popd
-fi
-
-# run
-if [ $testresult = "PASS" ] ; then
-    pushd loader-stress-$branchrevision/$tokudb/src/tests
-        echo `date` ./loader-stress-test.tdb -v -r $rows -d $dictionaries -c >>$runfile
-	./loader-stress-test.tdb -v -r $rows -d $dictionaries -c >>$runfile 2>&1
-	exitcode=$?
-	echo `date` complete $exitcode >>$runfile
-	if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
-    popd
-fi
-
-if [ $commit != 0 ] ; then
-    svn add $runfile
-    retry svn commit -m \"$testresult loader stress $rows $dictionaries $tokudb $branchrevision $ftcc $ftccversion $system $arch $myhost\" $runfile
-fi
-
-popd
-
-if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi
-exit $exitcode
diff --git a/scripts/run.stress-tests.bash b/scripts/run.stress-tests.bash
deleted file mode 100755
index b3d0e197d7b..00000000000
--- a/scripts/run.stress-tests.bash
+++ /dev/null
@@ -1,332 +0,0 @@
-#!/bin/bash
-# $Id$
-
-DOC=<<EOF
-
-  PARAMETERS
-
-    table size: small (2 000), medium (200 000), large (50 000 000)
-
-    cachetable size: small (num_elements * 50), large (1 000 000 000)
-
-    update threads: 1, random number <= 20
-
-    point query threads: 1, random number <= 20
-
-    recover-test_stress1, recover-test_stress2
-
-  DATA
-
-    currently running tests
-
-    log of success/failure ("./recover-test_stress1.tdb --num_elements blah blah blah      PASS")
-
-    if failed:
-
-      parameters
-
-      corefile
-
-      stdout/stderr
-
-      data directory
-
-EOF
-
-set -e
-
-. /opt/intel/bin/compilervars.sh intel64
-
-scriptname=$(basename "$0")
-toku_toplevel=$(dirname $(dirname $(readlink -f "$PWD/$0")))
-log=/tmp/run.stress-tests.log
-savedir=/tmp/run.stress-tests.failures
-
-usage() {
-    echo "Usage: $scriptname" 1>&2
-    echo "  [--toku_toplevel=<dir>]" 1>&2
-    echo "  [--log=<file>]" 1>&2
-    echo "  [--savedir=<dir>]" 1>&2
-}
-
-# parse the command line
-while [ $# -gt 0 ] ; do
-    arg=$1; shift
-    if [[ $arg =~ --(.*)=(.*) ]] ; then
-        ok=no
-        for opt in toku_toplevel log savedir
-        do
-            if [[ ${BASH_REMATCH[1]} = $opt ]]
-            then
-                ok=yes
-            fi
-        done
-        if [[ $ok = no ]]
-        then
-            usage; exit 1
-        fi
-        eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
-    else
-        usage; exit 1
-    fi
-done
-
-src_tests="${toku_toplevel}/src/tests"
-testnames=(test_stress1.tdb \
-           test_stress5.tdb \
-           test_stress6.tdb)
-recover_testnames=(recover-test_stress1.tdb \
-                   recover-test_stress2.tdb \
-                   recover-test_stress3.tdb)
-
-save_failure() {
-    dir="$1"; shift
-    out="$1"; shift
-    envdir="$1"; shift
-    rev=$1; shift
-    exec="$1"; shift
-    table_size=$1; shift
-    cachetable_size=$1; shift
-    num_ptquery=$1; shift
-    num_update=$1; shift
-    phase=$1; shift
-    dest="${dir}/${exec}-${table_size}-${cachetable_size}-${num_ptquery}-${num_update}-${phase}-${rev}-$$"
-    mkdir -p "$dest"
-    mv $out "${dest}/output.txt"
-    mv core* "${dest}/"
-    mv $envdir "${dest}/"
-}
-
-running=no
-
-run_test() {
-    rev=$1; shift
-    exec="$1"; shift
-    table_size="$1"; shift
-    cachetable_size="$1"; shift
-    num_ptquery="$1"; shift
-    num_update="$1"; shift
-    mylog="$1"; shift
-    mysavedir="$1"; shift
-
-    rundir=$(mktemp -d ./rundir.XXXXXXXX)
-    tmplog=$(mktemp)
-
-    ulimit -c unlimited
-    t0="$(date)"
-    t1=""
-    t2=""
-    envdir="../${exec}-${table_size}-${cachetable_size}-${num_ptquery}-${num_update}-$$.dir"
-    cd $rundir
-    if LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \
-        ../$exec -v --only_create --num_seconds 600 --envdir "$envdir" \
-        --num_elements $table_size \
-        --cachetable_size $cachetable_size &> $tmplog
-    then
-        rm -f $tmplog
-        t1="$(date)"
-        if LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \
-            ../$exec -v --only_stress --num_seconds 600 --no-crash_on_update_failure --envdir "$envdir" \
-            --num_elements $table_size \
-            --cachetable_size $cachetable_size \
-            --num_ptquery_threads $num_ptquery \
-            --num_update_threads $num_update &> $tmplog
-        then
-            rm -f $tmplog
-            t2="$(date)"
-            echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,PASS" | tee -a "$mylog"
-        else
-            save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update stress
-            echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog"
-        fi
-    else
-        save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update create
-        echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog"
-    fi
-    cd ..
-    rm -rf $rundir "$envdir"
-}
-
-loop_test() {
-    rev=$1; shift
-    exec="$1"; shift
-    table_size="$1"; shift
-    cachetable_size="$1"; shift
-    mylog="$1"; shift
-    mysavedir="$1"; shift
-
-    ptquery_rand=0
-    update_rand=0
-    while [[ $running = "yes" ]]
-    do
-        num_ptquery=1
-        num_update=1
-        if [[ $ptquery_rand -gt 1 ]]
-        then
-            (( num_ptquery = $RANDOM % 16 ))
-        fi
-        if [[ $update_rand -gt 0 ]]
-        then
-            (( num_update = $RANDOM % 16 ))
-        fi
-        (( ptquery_rand = (ptquery_rand + 1) % 4 ))
-        (( update_rand = (update_rand + 1) % 2 ))
-        run_test $rev $exec $table_size $cachetable_size $num_ptquery $num_update $mylog $mysavedir
-    done
-}
-
-run_recover_test() {
-    rev=$1; shift
-    exec="$1"; shift
-    table_size="$1"; shift
-    cachetable_size="$1"; shift
-    num_ptquery="$1"; shift
-    num_update="$1"; shift
-    mylog="$1"; shift
-    mysavedir="$1"; shift
-
-    rundir=$(mktemp -d ./rundir.XXXXXXXX)
-    tmplog=$(mktemp)
-
-    ulimit -c unlimited
-    t0="$(date)"
-    t1=""
-    t2=""
-    envdir="../${exec}-${table_size}-${cachetable_size}-${num_ptquery}-${num_update}-$$.dir"
-    cd $rundir
-    if ! LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \
-        ../$exec -v --test --num_seconds 600 --no-crash_on_update_failure --envdir "$envdir" \
-        --num_elements $table_size \
-        --cachetable_size $cachetable_size \
-        --num_ptquery_threads $num_ptquery \
-        --num_update_threads $num_update &> $tmplog
-    then
-        rm -f $tmplog
-        t1="$(date)"
-        if LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \
-            ../$exec -v --recover --envdir "$envdir" \
-            --num_elements $table_size \
-            --cachetable_size $cachetable_size &> $tmplog
-        then
-            rm -f $tmplog
-            t2="$(date)"
-            echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,PASS" | tee -a "$mylog"
-        else
-            save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update recover
-            echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog"
-        fi
-    else
-        save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update test
-        echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog"
-    fi
-    cd ..
-    rm -rf $rundir "$envdir"
-}
-
-loop_recover_test() {
-    rev=$1; shift
-    exec="$1"; shift
-    table_size="$1"; shift
-    cachetable_size="$1"; shift
-    mylog="$1"; shift
-    mysavedir="$1"; shift
-
-    ptquery_rand=0
-    update_rand=0
-    while [[ $running = "yes" ]]
-    do
-        num_ptquery=1
-        num_update=1
-        if [[ $ptquery_rand -gt 1 ]]
-        then
-            (( num_ptquery = $RANDOM % 16 ))
-        fi
-        if [[ $update_rand -gt 0 ]]
-        then
-            (( num_update = $RANDOM % 16 ))
-        fi
-        (( ptquery_rand = (ptquery_rand + 1) % 4 ))
-        (( update_rand = (update_rand + 1) % 2 ))
-        run_recover_test $rev $exec $table_size $cachetable_size $num_ptquery $num_update $mylog $mysavedir
-    done
-}
-
-declare -a pids=(0)
-i=0
-
-savepid() {
-    pids[$i]=$1
-    (( i = i + 1 ))
-}
-
-killchildren() {
-    kill ${pids[@]} || true
-    for exec in ${testnames[@]} ${recover_testnames[@]}
-    do
-        pkill -f $exec || true
-    done
-}
-
-trap killchildren INT TERM EXIT
-
-mkdir -p $log
-mkdir -p $savedir
-
-while true
-do
-    (cd $toku_toplevel; \
-        svn update; \
-        make CC=icc DEBUG=0 HAVE_CILK=0 clean fastbuild; \
-        make CC=icc DEBUG=0 HAVE_CILK=0 -C src/tests ${testnames[@]} ${recover_testnames[@]})
-
-    cd $src_tests
-
-    rev=$(svn info ../.. | awk '/Revision/ { print $2 }')
-
-    running=yes
-
-    for exec in ${testnames[@]}
-    do
-        for table_size in 2000 200000 50000000
-        do
-            (( small_cachetable = table_size * 50 ))
-            suffix="${exec}-${table_size}-${small_cachetable}-$$"
-            touch "${log}/${suffix}"
-            loop_test $rev $exec $table_size $small_cachetable "${log}/${suffix}" "${savedir}/${suffix}" & savepid $!
-
-            suffix="${exec}-${table_size}-1000000000-$$"
-            touch "${log}/${suffix}"
-            loop_test $rev $exec $table_size 1000000000 "${log}/${suffix}" "${savedir}/${suffix}" & savepid $!
-        done
-    done
-
-    for exec in ${recover_testnames[@]}
-    do
-        for table_size in 2000 200000 50000000
-        do
-            (( small_cachetable = table_size * 50 ))
-            suffix="${exec}-${table_size}-${small_cachetable}-$$"
-            touch "${log}/${suffix}"
-            loop_recover_test $rev $exec $table_size $small_cachetable "${log}/${suffix}" "${savedir}/${suffix}" & savepid $!
-
-            suffix="${exec}-${table_size}-1000000000-$$"
-            touch "${log}/${suffix}"
-            loop_recover_test $rev $exec $table_size 1000000000 "${log}/${suffix}" "${savedir}/${suffix}" & savepid $!
-        done
-    done
-
-    sleep 1d
-
-    running=no
-
-    killchildren
-
-    wait ${pids[@]} || true
-
-    idx=0
-    for pid in ${pids[@]}
-    do
-        pids[$idx]=0
-        (( idx = idx + 1 ))
-    done
-done
diff --git a/src/tests/run_test1426.sh b/src/tests/run_test1426.sh
deleted file mode 100755
index 832dd9935c2..00000000000
--- a/src/tests/run_test1426.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-test $# -ge 4
-
-tdbbin=$1; shift
-bdbbin=$1; shift
-tdbenv=$1; shift
-bdbenv=$1; shift
-tdbdump=$1; shift
-bdbdump=$1; shift
-
-TOKU_TEST_FILENAME=$bdbenv $bdbbin
-$bdbdump -p -h $bdbenv main > dump.bdb.1426
-
-TOKU_TEST_FILENAME=$tdbenv $tdbbin
-$tdbdump -x -p -h $tdbenv main > dump.tdb.1426
-diff -I db_pagesize=4096 dump.bdb.1426 dump.tdb.1426

From 1a34a1315b161f3df089b3e1d9b705079e0b8b2d Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:35:06 -0400
Subject: [PATCH 150/190] FT-581 Remove dead rdtsc code

---
 portability/rdtsc.h | 129 --------------------------------------------
 1 file changed, 129 deletions(-)
 delete mode 100644 portability/rdtsc.h

diff --git a/portability/rdtsc.h b/portability/rdtsc.h
deleted file mode 100644
index 0a5e5374947..00000000000
--- a/portability/rdtsc.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*
-COPYING CONDITIONS NOTICE:
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation, and provided that the
-  following conditions are met:
-
-      * Redistributions of source code must retain this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below).
-
-      * Redistributions in binary form must reproduce this COPYING
-        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
-        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
-        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
-        GRANT (below) in the documentation and/or other materials
-        provided with the distribution.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-  02110-1301, USA.
-
-COPYRIGHT NOTICE:
-
-  TokuDB, Tokutek Fractal Tree Indexing Library.
-  Copyright (C) 2007-2013 Tokutek, Inc.
-
-DISCLAIMER:
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-UNIVERSITY PATENT NOTICE:
-
-  The technology is licensed by the Massachusetts Institute of
-  Technology, Rutgers State University of New Jersey, and the Research
-  Foundation of State University of New York at Stony Brook under
-  United States of America Serial No. 11/760379 and to the patents
-  and/or patent applications resulting from it.
-
-PATENT MARKING NOTICE:
-
-  This software is covered by US Patent No. 8,185,551.
-  This software is covered by US Patent No. 8,489,638.
-
-PATENT RIGHTS GRANT:
-
-  "THIS IMPLEMENTATION" means the copyrightable works distributed by
-  Tokutek as part of the Fractal Tree project.
-
-  "PATENT CLAIMS" means the claims of patents that are owned or
-  licensable by Tokutek, both currently or in the future; and that in
-  the absence of this license would be infringed by THIS
-  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
-
-  "PATENT CHALLENGE" shall mean a challenge to the validity,
-  patentability, enforceability and/or non-infringement of any of the
-  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
-
-  Tokutek hereby grants to you, for the term and geographical scope of
-  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
-  irrevocable (except as stated in this section) patent license to
-  make, have made, use, offer to sell, sell, import, transfer, and
-  otherwise run, modify, and propagate the contents of THIS
-  IMPLEMENTATION, where such license applies only to the PATENT
-  CLAIMS.  This grant does not include claims that would be infringed
-  only as a consequence of further modifications of THIS
-  IMPLEMENTATION.  If you or your agent or licensee institute or order
-  or agree to the institution of patent litigation against any entity
-  (including a cross-claim or counterclaim in a lawsuit) alleging that
-  THIS IMPLEMENTATION constitutes direct or contributory patent
-  infringement, or inducement of patent infringement, then any rights
-  granted to you under this License shall terminate as of the date
-  such litigation is filed.  If you or your agent or exclusive
-  licensee institute or order or agree to the institution of a PATENT
-  CHALLENGE, then Tokutek may terminate any rights granted to you
-  under this License.
-*/
-
-#pragma once
-
-#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
-#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
-// read the processor time stamp register
-
-#if defined __ICC
-
-#define USE_RDTSC 1
-#define rdtsc _rdtsc
-
-#elif defined __i386__
-
-#define USE_RDTSC 1
-
-static inline unsigned long long rdtsc(void) {
-    unsigned long hi, lo;
-    __asm__ __volatile__ ("rdtsc\n"
-                          "movl %%edx,%0\n"
-			  "movl %%eax,%1" : "=r"(hi), "=r"(lo) : : "edx", "eax");
-    return ((unsigned long long) hi << 32ULL) + (unsigned long long) lo;
-}
-
-#elif defined __x86_64__
-
-#define USE_RDTSC 1
-
-static inline unsigned long long rdtsc(void) {
-    unsigned long long r;
-    __asm__ __volatile__ ("rdtsc\n"
-                          "shl $32,%%rdx\n"
-                          "or %%rdx,%%rax\n"
-			  "movq %%rax,%0" : "=r"(r) : : "edx", "eax", "rdx", "rax");
-    return r;
-}
-
-#else
-
-#define USE_RDTSC 0
-
-#endif

From feb5b70c26b975bd1e455ad5f9d05d15272a894e Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:35:06 -0400
Subject: [PATCH 151/190] FT-582 Remove remaining artifacts from the ICC days

---
 ft/tests/ftloader-test.cc              |  2 +-
 portability/toku_portability.h         | 21 ---------------------
 scripts/run.fractal.tree.tests.cmake   |  3 ---
 src/tests/CMakeLists.txt               |  5 +----
 util/tests/test_partitioned_counter.cc |  3 ---
 5 files changed, 2 insertions(+), 32 deletions(-)

diff --git a/ft/tests/ftloader-test.cc b/ft/tests/ftloader-test.cc
index 9a2eeed1292..faa2e39aa08 100644
--- a/ft/tests/ftloader-test.cc
+++ b/ft/tests/ftloader-test.cc
@@ -186,7 +186,7 @@ static void test_merge_internal (int a[], int na, int b[], int nb, bool dups) {
 static void test_merge (void) {
     {
 	int avals[]={1,2,3,4,5};
-	int *bvals = NULL; //icc won't let us use a zero-sized array explicitly or by [] = {} construction.
+	int *bvals = NULL;
 	test_merge_internal(avals, 5, bvals, 0, false);
 	test_merge_internal(bvals, 0, avals, 5, false);
     }
diff --git a/portability/toku_portability.h b/portability/toku_portability.h
index 915fb462897..fac5e56a5d7 100644
--- a/portability/toku_portability.h
+++ b/portability/toku_portability.h
@@ -188,26 +188,6 @@ extern "C" {
 
 // Deprecated functions.
 #if !defined(TOKU_ALLOW_DEPRECATED)
-#   if defined(__ICL) || defined(__ICC) // Intel Compiler
-#       pragma deprecated (creat, fstat, stat, getpid, syscall, sysconf, mkdir, strdup)
-//#       pragma poison   off_t
-//#       pragma poison   pthread_attr_t       pthread_t
-//#       pragma poison   pthread_mutexattr_t  pthread_mutex_t
-//#       pragma poison   pthread_condattr_t   pthread_cond_t
-//#       pragma poison   pthread_rwlockattr_t pthread_rwlock_t
-//#       pragma poison   timespec
-#    ifndef DONT_DEPRECATE_WRITES
-#       pragma poison   write                pwrite
-#    endif
-#    ifndef DONT_DEPRECATE_MALLOC
-#       pragma deprecated (malloc, free, realloc)
-#    endif
-#    ifndef DONT_DEPRECATE_ERRNO
-#       pragma deprecated (errno)
-#    endif
-#    pragma poison   dup2
-#    pragma poison   _dup2
-#   else
 int      creat(const char *pathname, mode_t mode)   __attribute__((__deprecated__));
 int      fstat(int fd, struct stat *buf)            __attribute__((__deprecated__));
 int      stat(const char *path, struct stat *buf)   __attribute__((__deprecated__));
@@ -281,7 +261,6 @@ extern void *realloc(void*, size_t)            __THROW __attribute__((__deprecat
 #pragma GCC poison __sync_synchronize
 #pragma GCC poison __sync_lock_test_and_set
 #pragma GCC poison __sync_release
-#   endif
 #endif
 
 #if defined(__cplusplus)
diff --git a/scripts/run.fractal.tree.tests.cmake b/scripts/run.fractal.tree.tests.cmake
index f695699255d..265855e0278 100644
--- a/scripts/run.fractal.tree.tests.cmake
+++ b/scripts/run.fractal.tree.tests.cmake
@@ -85,19 +85,16 @@ set(all_opts
 set(rel_opts
   ${all_opts}
   -DCMAKE_BUILD_TYPE=Release
-  -DINTEL_CC=ON
   -DUSE_BDB=ON
   )
 set(dbg_opts
   ${all_opts}
   -DCMAKE_BUILD_TYPE=Debug
-  -DINTEL_CC=ON
   -DUSE_BDB=ON
   )
 set(cov_opts
   ${all_opts}
   -DCMAKE_BUILD_TYPE=Debug
-  -DINTEL_CC=OFF
   -DUSE_GCOV=ON
   )
 
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index df1451dbf34..287913fa370 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -160,10 +160,7 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
     endforeach(av)
   endforeach(ov)
 
-  if (NOT (CMAKE_SYSTEM_NAME MATCHES Darwin OR
-           (CMAKE_CXX_COMPILER_ID STREQUAL Intel AND
-            CMAKE_BUILD_TYPE STREQUAL Release)
-           OR USE_GCOV))
+  if (NOT (CMAKE_SYSTEM_NAME MATCHES Darwin OR USE_GCOV))
     declare_custom_tests(helgrind1.tdb)
     add_test(NAME ydb/helgrind_helgrind1.tdb
       COMMAND valgrind --quiet --tool=helgrind --error-exitcode=1 --log-file=helgrind1.tdb.deleteme $<TARGET_FILE:helgrind1.tdb>)
diff --git a/util/tests/test_partitioned_counter.cc b/util/tests/test_partitioned_counter.cc
index 5af214f75ac..02a9846517a 100644
--- a/util/tests/test_partitioned_counter.cc
+++ b/util/tests/test_partitioned_counter.cc
@@ -201,9 +201,6 @@ static inline void increment (void) {
 	    head->prev = cp;
 	}
         head = cp;
-#ifdef __INTEL_COMPILER
-        __memory_barrier(); // for some reason I don't understand, ICC needs a memory barrier here. -Bradley
-#endif
 	cp->counter = 0;
 	cp->inited = true;
 	cp->myid = idcounter++;

From a57387dfa70ccbfb39e834a71fb9469feb76af31 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:35:06 -0400
Subject: [PATCH 152/190] FT-512 Remove the majority of the remaining BDB
 artifacts

---
 CTestCustom.cmake                        |  2 --
 README.md                                |  8 +------
 cmake_modules/FindBDB.cmake              | 27 ------------------------
 cmake_modules/TokuFeatureDetection.cmake |  5 -----
 scripts/run-nightly-coverage-tests.bash  |  1 -
 scripts/run-nightly-drd-tests.bash       |  1 -
 scripts/run-nightly-release-tests.bash   |  3 +--
 scripts/run.fractal.tree.tests.cmake     |  3 ---
 scripts/run.stress-tests.py              |  1 -
 9 files changed, 2 insertions(+), 49 deletions(-)
 delete mode 100644 cmake_modules/FindBDB.cmake

diff --git a/CTestCustom.cmake b/CTestCustom.cmake
index 1785acabe88..62b592a5149 100644
--- a/CTestCustom.cmake
+++ b/CTestCustom.cmake
@@ -33,7 +33,6 @@ list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE
   ydb/preload-db-nested.tdb
   ydb/stress-gc.tdb
   ydb/stress-gc2.tdb
-  ydb/stress-test.bdb
   ydb/stress-test.tdb
   ydb/test-5138.tdb
   ydb/test-prepare.tdb
@@ -45,7 +44,6 @@ list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE
   ydb/test-xa-prepare.tdb
   ydb/test4573-logtrim.tdb
   ydb/test_3645.tdb
-  ydb/test_groupcommit_perf.bdb
   ydb/test_groupcommit_perf.tdb
   ydb/test_large_update_broadcast_small_cachetable.tdb
   ydb/test_update_broadcast_stress.tdb
diff --git a/README.md b/README.md
index eaded7c4287..2914ff9be2c 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,6 @@ mkdir build
 cd build
 CC=gcc47 CXX=g++47 cmake \
     -D CMAKE_BUILD_TYPE=Debug \
-    -D USE_BDB=OFF \
     -D BUILD_TESTING=OFF \
     -D USE_VALGRIND=OFF \
     -D CMAKE_INSTALL_PREFIX=../prefix/ \
@@ -84,15 +83,10 @@ There are some large data files not stored in the git repository, that
 will be made available soon.  For now, the tests that use these files will
 not run.
 
-Many of the tests are linked with both TokuFT and Berkeley DB, as a sanity
-check on the tests themselves.  To build these tests, you will need
-Berkeley DB and its header files installed.  If you do not have Berkeley
-DB installed, just don't pass `USE_BDB=ON`.
-
 In the build directory from above:
 
 ```sh
-cmake -D BUILD_TESTING=ON [-D USE_BDB=ON] ..
+cmake -D BUILD_TESTING=ON ..
 ctest -D ExperimentalStart \
       -D ExperimentalConfigure \
       -D ExperimentalBuild \
diff --git a/cmake_modules/FindBDB.cmake b/cmake_modules/FindBDB.cmake
deleted file mode 100644
index 495f2e87b3e..00000000000
--- a/cmake_modules/FindBDB.cmake
+++ /dev/null
@@ -1,27 +0,0 @@
-# - Try to find BDB
-# Once done this will define
-#  BDB_FOUND - System has BDB
-#  BDB_INCLUDE_DIRS - The BDB include directories
-#  BDB_LIBRARIES - The libraries needed to use BDB
-#  BDB_DEFINITIONS - Compiler switches required for using BDB
-
-find_path(BDB_INCLUDE_DIR db.h)
-
-find_library(BDB_LIBRARY NAMES db libdb)
-
-include(CheckSymbolExists)
-## check if the found bdb has DB_TXN_SNAPSHOT
-set(CMAKE_REQUIRED_INCLUDES ${BDB_INCLUDE_DIR})
-check_symbol_exists(DB_TXN_SNAPSHOT "db.h" HAVE_DB_TXN_SNAPSHOT)
-if(HAVE_DB_TXN_SNAPSHOT)
-  set(BDB_INCLUDE_DIRS ${BDB_INCLUDE_DIR})
-  set(BDB_LIBRARIES ${BDB_LIBRARY})
-
-  include(FindPackageHandleStandardArgs)
-  # handle the QUIETLY and REQUIRED arguments and set BDB_FOUND to TRUE
-  # if all listed variables are TRUE
-  find_package_handle_standard_args(BDB DEFAULT_MSG
-                                    BDB_LIBRARY BDB_INCLUDE_DIR)
-
-  mark_as_advanced(BDB_INCLUDE_DIR BDB_LIBRARY)
-endif()
diff --git a/cmake_modules/TokuFeatureDetection.cmake b/cmake_modules/TokuFeatureDetection.cmake
index 59dff0aadd4..e7fd27525d5 100644
--- a/cmake_modules/TokuFeatureDetection.cmake
+++ b/cmake_modules/TokuFeatureDetection.cmake
@@ -2,11 +2,6 @@
 find_package(Threads)
 find_package(ZLIB REQUIRED)
 
-option(USE_BDB "Build some tools and tests with bdb (requires a proper BerkeleyDB include directory and library)." ON)
-if(USE_BDB)
-  find_package(BDB REQUIRED)
-endif()
-
 option(USE_VALGRIND "Build to run safely under valgrind (often slower)." ON)
 if(USE_VALGRIND)
   find_package(Valgrind REQUIRED)
diff --git a/scripts/run-nightly-coverage-tests.bash b/scripts/run-nightly-coverage-tests.bash
index c91bdb7b9a0..c96a02352ca 100755
--- a/scripts/run-nightly-coverage-tests.bash
+++ b/scripts/run-nightly-coverage-tests.bash
@@ -20,7 +20,6 @@ if [ ! -d build ] ; then
         -D USE_GTAGS=OFF \
         -D USE_CSCOPE=OFF \
         -D USE_ETAGS=OFF \
-        -D USE_BDB=OFF \
         -D USE_GCOV=ON \
         -D CMAKE_LINK_DEPENDS_NO_SHARED=ON \
         -G Ninja \
diff --git a/scripts/run-nightly-drd-tests.bash b/scripts/run-nightly-drd-tests.bash
index 4a99b40262e..39d97de2185 100755
--- a/scripts/run-nightly-drd-tests.bash
+++ b/scripts/run-nightly-drd-tests.bash
@@ -20,7 +20,6 @@ if [ ! -d build ] ; then
         -D USE_GTAGS=OFF \
         -D USE_CSCOPE=OFF \
         -D USE_ETAGS=OFF \
-        -D USE_BDB=OFF \
         -D CMAKE_LINK_DEPENDS_NO_SHARED=ON \
         -G Ninja \
         -D RUN_LONG_TESTS=ON \
diff --git a/scripts/run-nightly-release-tests.bash b/scripts/run-nightly-release-tests.bash
index e5767d10893..af08894beb8 100755
--- a/scripts/run-nightly-release-tests.bash
+++ b/scripts/run-nightly-release-tests.bash
@@ -20,7 +20,6 @@ if [ ! -d build ] ; then
         -D USE_GTAGS=OFF \
         -D USE_CSCOPE=OFF \
         -D USE_ETAGS=OFF \
-        -D USE_BDB=ON \
         -D CMAKE_LINK_DEPENDS_NO_SHARED=ON \
         -G Ninja \
         -D RUN_LONG_TESTS=ON \
@@ -41,6 +40,6 @@ ctest -j16 \
     -E '/drd|/helgrind'
 ctest -j16 \
     -D NightlyMemCheck \
-    -E '^ydb/.*\.bdb|test1426\.tdb|/drd|/helgrind'
+    -E 'test1426\.tdb|/drd|/helgrind'
 set -e
 ctest -D NightlySubmit
diff --git a/scripts/run.fractal.tree.tests.cmake b/scripts/run.fractal.tree.tests.cmake
index 265855e0278..64d52a56735 100644
--- a/scripts/run.fractal.tree.tests.cmake
+++ b/scripts/run.fractal.tree.tests.cmake
@@ -78,19 +78,16 @@ list(APPEND CTEST_NOTES_FILES
   )
 
 set(all_opts
-  -DBDBDIR=/usr/local/BerkeleyDB.5.3
   -DBUILD_TESTING=ON
   -DUSE_CILK=OFF
   )
 set(rel_opts
   ${all_opts}
   -DCMAKE_BUILD_TYPE=Release
-  -DUSE_BDB=ON
   )
 set(dbg_opts
   ${all_opts}
   -DCMAKE_BUILD_TYPE=Debug
-  -DUSE_BDB=ON
   )
 set(cov_opts
   ${all_opts}
diff --git a/scripts/run.stress-tests.py b/scripts/run.stress-tests.py
index d4245a7c4b4..62edbab8f3c 100755
--- a/scripts/run.stress-tests.py
+++ b/scripts/run.stress-tests.py
@@ -552,7 +552,6 @@ def rebuild(tokudb, builddir, tokudb_data, cc, cxx, tests):
     newenv['CXX'] = cxx
     r = call(['cmake',
               '-DCMAKE_BUILD_TYPE=Debug',
-              '-DUSE_BDB=OFF',
               '-DUSE_GTAGS=OFF',
               '-DUSE_CTAGS=OFF',
               '-DUSE_ETAGS=OFF',

From d569f7a4c0f4889306a16e507213fb66ae06ebcc Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:35:06 -0400
Subject: [PATCH 153/190] FT-513 Remove some windows cruft

---
 portability/tests/test-fsync.cc | 1 -
 portability/toku_portability.h  | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/portability/tests/test-fsync.cc b/portability/tests/test-fsync.cc
index efdfd0dd4f2..33b02550716 100644
--- a/portability/tests/test-fsync.cc
+++ b/portability/tests/test-fsync.cc
@@ -221,7 +221,6 @@ time_fsyncs_many_files(int N, int bytes, int fds[/*N*/]) {
     }
 }
 
-//sync() does not appear to have an analogue on windows.
 static void
 time_sync_fsyncs_many_files(int N, int bytes, int fds[/*N*/]) {
     if (verbose>1) {
diff --git a/portability/toku_portability.h b/portability/toku_portability.h
index fac5e56a5d7..04afe282594 100644
--- a/portability/toku_portability.h
+++ b/portability/toku_portability.h
@@ -107,11 +107,6 @@ PATENT RIGHTS GRANT:
 
 #define DEV_NULL_FILE "/dev/null"
 
-// HACK Poison these mcaros so no one uses them
-#define TOKU_WINDOWS ,
-#define TOKU_WINDOWS_32 ,
-#define TOKU_WINDOWS_64 ,
-
 // include here, before they get deprecated
 #include <toku_atomic.h>
 

From 7316bacdeb8f385d7761d755bd8ec7f373e96794 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:35:06 -0400
Subject: [PATCH 154/190] FT-396 Rename TokuDB to TokuFT in each license header

---
 README-TOKUDB                                                 | 2 +-
 buildheader/make_tdb.cc                                       | 2 +-
 ft/bndata.cc                                                  | 2 +-
 ft/bndata.h                                                   | 2 +-
 ft/cachetable/background_job_manager.cc                       | 2 +-
 ft/cachetable/background_job_manager.h                        | 2 +-
 ft/cachetable/cachetable-internal.h                           | 2 +-
 ft/cachetable/cachetable.cc                                   | 2 +-
 ft/cachetable/cachetable.h                                    | 2 +-
 ft/cachetable/checkpoint.cc                                   | 2 +-
 ft/cachetable/checkpoint.h                                    | 2 +-
 ft/comparator.h                                               | 2 +-
 ft/cursor.cc                                                  | 2 +-
 ft/cursor.h                                                   | 2 +-
 ft/ft-cachetable-wrappers.cc                                  | 2 +-
 ft/ft-cachetable-wrappers.h                                   | 2 +-
 ft/ft-flusher-internal.h                                      | 2 +-
 ft/ft-flusher.cc                                              | 2 +-
 ft/ft-flusher.h                                               | 2 +-
 ft/ft-hot-flusher.cc                                          | 2 +-
 ft/ft-internal.h                                              | 2 +-
 ft/ft-ops.cc                                                  | 2 +-
 ft/ft-ops.h                                                   | 2 +-
 ft/ft-test-helpers.cc                                         | 2 +-
 ft/ft-verify.cc                                               | 2 +-
 ft/ft.cc                                                      | 2 +-
 ft/ft.h                                                       | 2 +-
 ft/le-cursor.cc                                               | 2 +-
 ft/le-cursor.h                                                | 2 +-
 ft/leafentry.cc                                               | 2 +-
 ft/leafentry.h                                                | 2 +-
 ft/loader/callbacks.cc                                        | 2 +-
 ft/loader/dbufio.cc                                           | 2 +-
 ft/loader/dbufio.h                                            | 2 +-
 ft/loader/loader-internal.h                                   | 2 +-
 ft/loader/loader.cc                                           | 2 +-
 ft/loader/loader.h                                            | 2 +-
 ft/loader/pqueue.cc                                           | 2 +-
 ft/loader/pqueue.h                                            | 2 +-
 ft/logger/log-internal.h                                      | 2 +-
 ft/logger/log.h                                               | 2 +-
 ft/logger/log_upgrade.cc                                      | 2 +-
 ft/logger/logcursor.cc                                        | 2 +-
 ft/logger/logcursor.h                                         | 2 +-
 ft/logger/logfilemgr.cc                                       | 2 +-
 ft/logger/logfilemgr.h                                        | 2 +-
 ft/logger/logformat.cc                                        | 2 +-
 ft/logger/logger.cc                                           | 2 +-
 ft/logger/logger.h                                            | 2 +-
 ft/logger/recover.cc                                          | 2 +-
 ft/logger/recover.h                                           | 2 +-
 ft/msg.cc                                                     | 2 +-
 ft/msg.h                                                      | 2 +-
 ft/msg_buffer.cc                                              | 2 +-
 ft/msg_buffer.h                                               | 2 +-
 ft/node.cc                                                    | 2 +-
 ft/node.h                                                     | 2 +-
 ft/pivotkeys.cc                                               | 2 +-
 ft/serialize/block_allocator.cc                               | 2 +-
 ft/serialize/block_allocator.h                                | 2 +-
 ft/serialize/block_allocator_strategy.cc                      | 2 +-
 ft/serialize/block_allocator_strategy.h                       | 2 +-
 ft/serialize/block_table.cc                                   | 2 +-
 ft/serialize/block_table.h                                    | 2 +-
 ft/serialize/compress.cc                                      | 2 +-
 ft/serialize/compress.h                                       | 2 +-
 ft/serialize/ft-node-deserialize.cc                           | 2 +-
 ft/serialize/ft-serialize.cc                                  | 2 +-
 ft/serialize/ft-serialize.h                                   | 2 +-
 ft/serialize/ft_layout_version.h                              | 2 +-
 ft/serialize/ft_node-serialize.cc                             | 2 +-
 ft/serialize/ft_node-serialize.h                              | 2 +-
 ft/serialize/quicklz.cc                                       | 2 +-
 ft/serialize/quicklz.h                                        | 2 +-
 ft/serialize/rbuf.h                                           | 2 +-
 ft/serialize/sub_block.cc                                     | 2 +-
 ft/serialize/sub_block.h                                      | 2 +-
 ft/serialize/wbuf.h                                           | 2 +-
 ft/serialize/workset.h                                        | 2 +-
 ft/tests/benchmark-test.cc                                    | 2 +-
 ft/tests/block_allocator_strategy_test.cc                     | 2 +-
 ft/tests/block_allocator_test.cc                              | 2 +-
 ft/tests/bnc-insert-benchmark.cc                              | 2 +-
 ft/tests/cachetable-4357.cc                                   | 2 +-
 ft/tests/cachetable-4365.cc                                   | 2 +-
 ft/tests/cachetable-5097.cc                                   | 2 +-
 ft/tests/cachetable-5978-2.cc                                 | 2 +-
 ft/tests/cachetable-5978.cc                                   | 2 +-
 ft/tests/cachetable-all-write.cc                              | 2 +-
 ft/tests/cachetable-checkpoint-pending.cc                     | 2 +-
 ft/tests/cachetable-checkpoint-pinned-nodes.cc                | 2 +-
 ft/tests/cachetable-checkpoint-prefetched-nodes.cc            | 2 +-
 ft/tests/cachetable-checkpoint-test.cc                        | 2 +-
 ft/tests/cachetable-checkpointer-class.cc                     | 2 +-
 ft/tests/cachetable-cleaner-checkpoint.cc                     | 2 +-
 ft/tests/cachetable-cleaner-checkpoint2.cc                    | 2 +-
 ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc        | 2 +-
 ft/tests/cachetable-cleaner-thread-empty-cachetable.cc        | 2 +-
 ft/tests/cachetable-cleaner-thread-everything-pinned.cc       | 2 +-
 ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc  | 2 +-
 ft/tests/cachetable-cleaner-thread-same-fullhash.cc           | 2 +-
 ft/tests/cachetable-cleaner-thread-simple.cc                  | 2 +-
 ft/tests/cachetable-clock-all-pinned.cc                       | 2 +-
 ft/tests/cachetable-clock-eviction.cc                         | 2 +-
 ft/tests/cachetable-clock-eviction2.cc                        | 2 +-
 ft/tests/cachetable-clock-eviction3.cc                        | 2 +-
 ft/tests/cachetable-clock-eviction4.cc                        | 2 +-
 ft/tests/cachetable-clone-checkpoint.cc                       | 2 +-
 ft/tests/cachetable-clone-partial-fetch-pinned-node.cc        | 2 +-
 ft/tests/cachetable-clone-partial-fetch.cc                    | 2 +-
 ft/tests/cachetable-clone-pin-nonblocking.cc                  | 2 +-
 ft/tests/cachetable-clone-unpin-remove.cc                     | 2 +-
 ft/tests/cachetable-count-pinned-test.cc                      | 2 +-
 ft/tests/cachetable-debug-test.cc                             | 2 +-
 ft/tests/cachetable-eviction-close-test.cc                    | 2 +-
 ft/tests/cachetable-eviction-close-test2.cc                   | 2 +-
 ft/tests/cachetable-eviction-getandpin-test.cc                | 2 +-
 ft/tests/cachetable-eviction-getandpin-test2.cc               | 2 +-
 ft/tests/cachetable-evictor-class.cc                          | 2 +-
 ft/tests/cachetable-fd-test.cc                                | 2 +-
 ft/tests/cachetable-fetch-inducing-evictor.cc                 | 2 +-
 ft/tests/cachetable-flush-during-cleaner.cc                   | 2 +-
 ft/tests/cachetable-flush-test.cc                             | 2 +-
 ft/tests/cachetable-getandpin-test.cc                         | 2 +-
 ft/tests/cachetable-kibbutz_and_flush_cachefile.cc            | 2 +-
 ft/tests/cachetable-partial-fetch.cc                          | 2 +-
 ft/tests/cachetable-pin-checkpoint.cc                         | 2 +-
 ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc       | 2 +-
 ft/tests/cachetable-prefetch-checkpoint-test.cc               | 2 +-
 ft/tests/cachetable-prefetch-close-leak-test.cc               | 2 +-
 ft/tests/cachetable-prefetch-close-test.cc                    | 2 +-
 ft/tests/cachetable-prefetch-flowcontrol-test.cc              | 2 +-
 ft/tests/cachetable-prefetch-getandpin-test.cc                | 2 +-
 ft/tests/cachetable-prefetch-maybegetandpin-test.cc           | 2 +-
 ft/tests/cachetable-prefetch2-test.cc                         | 2 +-
 ft/tests/cachetable-put-checkpoint.cc                         | 2 +-
 ft/tests/cachetable-put-test.cc                               | 2 +-
 ft/tests/cachetable-rwlock-test.cc                            | 2 +-
 ft/tests/cachetable-simple-clone.cc                           | 2 +-
 ft/tests/cachetable-simple-clone2.cc                          | 2 +-
 ft/tests/cachetable-simple-close.cc                           | 2 +-
 ft/tests/cachetable-simple-maybe-get-pin.cc                   | 2 +-
 ft/tests/cachetable-simple-pin-cheap.cc                       | 2 +-
 ft/tests/cachetable-simple-pin-dep-nodes.cc                   | 2 +-
 ft/tests/cachetable-simple-pin-nonblocking-cheap.cc           | 2 +-
 ft/tests/cachetable-simple-pin-nonblocking.cc                 | 2 +-
 ft/tests/cachetable-simple-pin.cc                             | 2 +-
 ft/tests/cachetable-simple-put-dep-nodes.cc                   | 2 +-
 ft/tests/cachetable-simple-read-pin-nonblocking.cc            | 2 +-
 ft/tests/cachetable-simple-read-pin.cc                        | 2 +-
 ft/tests/cachetable-simple-unpin-remove-checkpoint.cc         | 2 +-
 ft/tests/cachetable-simple-verify.cc                          | 2 +-
 ft/tests/cachetable-test.cc                                   | 2 +-
 ft/tests/cachetable-test.h                                    | 2 +-
 ft/tests/cachetable-unpin-and-remove-test.cc                  | 2 +-
 ft/tests/cachetable-unpin-remove-and-checkpoint.cc            | 2 +-
 ft/tests/cachetable-unpin-test.cc                             | 2 +-
 ft/tests/cachetable-writer-thread-limit.cc                    | 2 +-
 ft/tests/comparator-test.cc                                   | 2 +-
 ft/tests/compress-test.cc                                     | 2 +-
 ft/tests/dbufio-test-destroy.cc                               | 2 +-
 ft/tests/dbufio-test.cc                                       | 2 +-
 ft/tests/dmt-test.cc                                          | 2 +-
 ft/tests/dmt-test2.cc                                         | 2 +-
 ft/tests/fifo-test.cc                                         | 2 +-
 ft/tests/ft-bfe-query.cc                                      | 2 +-
 ft/tests/ft-clock-test.cc                                     | 2 +-
 ft/tests/ft-serialize-benchmark.cc                            | 2 +-
 ft/tests/ft-serialize-sub-block-test.cc                       | 2 +-
 ft/tests/ft-serialize-test.cc                                 | 2 +-
 ft/tests/ft-test-cursor-2.cc                                  | 2 +-
 ft/tests/ft-test-cursor.cc                                    | 2 +-
 ft/tests/ft-test-header.cc                                    | 2 +-
 ft/tests/ft-test.cc                                           | 2 +-
 ft/tests/ft-test0.cc                                          | 2 +-
 ft/tests/ft-test1.cc                                          | 2 +-
 ft/tests/ft-test2.cc                                          | 2 +-
 ft/tests/ft-test3.cc                                          | 2 +-
 ft/tests/ft-test4.cc                                          | 2 +-
 ft/tests/ft-test5.cc                                          | 2 +-
 ft/tests/ftloader-error-injector.h                            | 2 +-
 ft/tests/ftloader-test-bad-generate.cc                        | 2 +-
 ft/tests/ftloader-test-extractor-errors.cc                    | 2 +-
 ft/tests/ftloader-test-extractor.cc                           | 2 +-
 ft/tests/ftloader-test-merge-files-dbufio.cc                  | 2 +-
 ft/tests/ftloader-test-open.cc                                | 2 +-
 ft/tests/ftloader-test-vm.cc                                  | 2 +-
 ft/tests/ftloader-test-writer-errors.cc                       | 2 +-
 ft/tests/ftloader-test-writer.cc                              | 2 +-
 ft/tests/ftloader-test.cc                                     | 2 +-
 ft/tests/is_empty.cc                                          | 2 +-
 ft/tests/keyrange.cc                                          | 2 +-
 ft/tests/keytest.cc                                           | 2 +-
 ft/tests/le-cursor-provdel.cc                                 | 2 +-
 ft/tests/le-cursor-right.cc                                   | 2 +-
 ft/tests/le-cursor-walk.cc                                    | 2 +-
 ft/tests/list-test.cc                                         | 2 +-
 ft/tests/log-test-maybe-trim.cc                               | 2 +-
 ft/tests/log-test.cc                                          | 2 +-
 ft/tests/log-test2.cc                                         | 2 +-
 ft/tests/log-test3.cc                                         | 2 +-
 ft/tests/log-test4.cc                                         | 2 +-
 ft/tests/log-test5.cc                                         | 2 +-
 ft/tests/log-test6.cc                                         | 2 +-
 ft/tests/log-test7.cc                                         | 2 +-
 ft/tests/logcursor-bad-checksum.cc                            | 2 +-
 ft/tests/logcursor-bw.cc                                      | 2 +-
 ft/tests/logcursor-empty-logdir.cc                            | 2 +-
 ft/tests/logcursor-empty-logfile-2.cc                         | 2 +-
 ft/tests/logcursor-empty-logfile-3.cc                         | 2 +-
 ft/tests/logcursor-empty-logfile.cc                           | 2 +-
 ft/tests/logcursor-fw.cc                                      | 2 +-
 ft/tests/logcursor-print.cc                                   | 2 +-
 ft/tests/logcursor-timestamp.cc                               | 2 +-
 ft/tests/logfilemgr-create-destroy.cc                         | 2 +-
 ft/tests/logfilemgr-print.cc                                  | 2 +-
 ft/tests/make-tree.cc                                         | 2 +-
 ft/tests/mempool-115.cc                                       | 2 +-
 ft/tests/msnfilter.cc                                         | 2 +-
 ft/tests/orthopush-flush.cc                                   | 2 +-
 ft/tests/pqueue-test.cc                                       | 2 +-
 ft/tests/quicklz-test.cc                                      | 2 +-
 ft/tests/recovery-bad-last-entry.cc                           | 2 +-
 ft/tests/recovery-cbegin-cend-hello.cc                        | 2 +-
 ft/tests/recovery-cbegin-cend.cc                              | 2 +-
 ft/tests/recovery-cbegin.cc                                   | 2 +-
 ft/tests/recovery-cend-cbegin.cc                              | 2 +-
 ft/tests/recovery-datadir-is-file.cc                          | 2 +-
 ft/tests/recovery-empty.cc                                    | 2 +-
 ft/tests/recovery-fopen-missing-file.cc                       | 2 +-
 ft/tests/recovery-hello.cc                                    | 2 +-
 ft/tests/recovery-lsn-error-during-forward-scan.cc            | 2 +-
 ft/tests/recovery-no-datadir.cc                               | 2 +-
 ft/tests/recovery-no-log.cc                                   | 2 +-
 ft/tests/recovery-no-logdir.cc                                | 2 +-
 ft/tests/recovery-test5123.cc                                 | 2 +-
 ft/tests/shortcut.cc                                          | 2 +-
 ft/tests/subblock-test-checksum.cc                            | 2 +-
 ft/tests/subblock-test-compression.cc                         | 2 +-
 ft/tests/subblock-test-index.cc                               | 2 +-
 ft/tests/subblock-test-size.cc                                | 2 +-
 ft/tests/test-assert.cc                                       | 2 +-
 ft/tests/test-bjm.cc                                          | 2 +-
 ft/tests/test-checkpoint-during-flush.cc                      | 2 +-
 ft/tests/test-checkpoint-during-merge.cc                      | 2 +-
 ft/tests/test-checkpoint-during-rebalance.cc                  | 2 +-
 ft/tests/test-checkpoint-during-split.cc                      | 2 +-
 ft/tests/test-del-inorder.cc                                  | 2 +-
 ft/tests/test-dirty-flushes-on-cleaner.cc                     | 2 +-
 ft/tests/test-dump-ft.cc                                      | 2 +-
 ft/tests/test-flushes-on-cleaner.cc                           | 2 +-
 ft/tests/test-ft-overflow.cc                                  | 2 +-
 ft/tests/test-ft-txns.h                                       | 2 +-
 ft/tests/test-hot-with-bounds.cc                              | 2 +-
 ft/tests/test-inc-split.cc                                    | 2 +-
 ft/tests/test-leafentry-child-txn.cc                          | 2 +-
 ft/tests/test-leafentry-nested.cc                             | 2 +-
 ft/tests/test-merges-on-cleaner.cc                            | 2 +-
 ft/tests/test-oldest-referenced-xid-flush.cc                  | 2 +-
 ft/tests/test-pick-child-to-flush.cc                          | 2 +-
 ft/tests/test-txn-child-manager.cc                            | 2 +-
 ft/tests/test.h                                               | 2 +-
 ft/tests/test1308a.cc                                         | 2 +-
 ft/tests/test3681.cc                                          | 2 +-
 ft/tests/test3856.cc                                          | 2 +-
 ft/tests/test3884.cc                                          | 2 +-
 ft/tests/test4115.cc                                          | 2 +-
 ft/tests/test4244.cc                                          | 2 +-
 ft/tests/test_logcursor.cc                                    | 2 +-
 ft/tests/test_oexcl.cc                                        | 2 +-
 ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc           | 2 +-
 ft/tests/test_rightmost_leaf_split_merge.cc                   | 2 +-
 ft/tests/test_toku_malloc_plain_free.cc                       | 2 +-
 ft/tests/upgrade_test_simple.cc                               | 2 +-
 ft/tests/verify-bad-msn.cc                                    | 2 +-
 ft/tests/verify-bad-pivots.cc                                 | 2 +-
 ft/tests/verify-dup-in-leaf.cc                                | 2 +-
 ft/tests/verify-dup-pivots.cc                                 | 2 +-
 ft/tests/verify-misrouted-msgs.cc                             | 2 +-
 ft/tests/verify-unsorted-leaf.cc                              | 2 +-
 ft/tests/verify-unsorted-pivots.cc                            | 2 +-
 ft/tests/xid_lsn_independent.cc                               | 2 +-
 ft/tests/ybt-test.cc                                          | 2 +-
 ft/txn/roll.cc                                                | 2 +-
 ft/txn/rollback-apply.cc                                      | 2 +-
 ft/txn/rollback-apply.h                                       | 2 +-
 ft/txn/rollback-ct-callbacks.cc                               | 2 +-
 ft/txn/rollback-ct-callbacks.h                                | 2 +-
 ft/txn/rollback.cc                                            | 2 +-
 ft/txn/rollback.h                                             | 2 +-
 ft/txn/rollback_log_node_cache.cc                             | 2 +-
 ft/txn/rollback_log_node_cache.h                              | 2 +-
 ft/txn/txn.cc                                                 | 2 +-
 ft/txn/txn.h                                                  | 2 +-
 ft/txn/txn_child_manager.cc                                   | 2 +-
 ft/txn/txn_child_manager.h                                    | 2 +-
 ft/txn/txn_manager.cc                                         | 2 +-
 ft/txn/txn_manager.h                                          | 2 +-
 ft/txn/txn_state.h                                            | 2 +-
 ft/txn/xids.cc                                                | 2 +-
 ft/txn/xids.h                                                 | 2 +-
 ft/ule-internal.h                                             | 2 +-
 ft/ule.cc                                                     | 2 +-
 ft/ule.h                                                      | 2 +-
 locktree/concurrent_tree.cc                                   | 2 +-
 locktree/concurrent_tree.h                                    | 2 +-
 locktree/keyrange.cc                                          | 2 +-
 locktree/keyrange.h                                           | 2 +-
 locktree/lock_request.cc                                      | 2 +-
 locktree/lock_request.h                                       | 2 +-
 locktree/locktree.cc                                          | 2 +-
 locktree/locktree.h                                           | 2 +-
 locktree/manager.cc                                           | 2 +-
 locktree/range_buffer.cc                                      | 2 +-
 locktree/range_buffer.h                                       | 2 +-
 locktree/tests/concurrent_tree_create_destroy.cc              | 2 +-
 locktree/tests/concurrent_tree_lkr_acquire_release.cc         | 2 +-
 locktree/tests/concurrent_tree_lkr_insert_remove.cc           | 2 +-
 locktree/tests/concurrent_tree_lkr_insert_serial_large.cc     | 2 +-
 locktree/tests/concurrent_tree_lkr_remove_all.cc              | 2 +-
 locktree/tests/concurrent_tree_unit_test.h                    | 2 +-
 locktree/tests/lock_request_create_set.cc                     | 2 +-
 locktree/tests/lock_request_get_set_keys.cc                   | 2 +-
 locktree/tests/lock_request_killed.cc                         | 2 +-
 locktree/tests/lock_request_not_killed.cc                     | 2 +-
 locktree/tests/lock_request_start_deadlock.cc                 | 2 +-
 locktree/tests/lock_request_start_pending.cc                  | 2 +-
 locktree/tests/lock_request_unit_test.h                       | 2 +-
 locktree/tests/lock_request_wait_time_callback.cc             | 2 +-
 locktree/tests/locktree_conflicts.cc                          | 2 +-
 locktree/tests/locktree_create_destroy.cc                     | 2 +-
 locktree/tests/locktree_escalation_1big7lt_1small.cc          | 2 +-
 locktree/tests/locktree_escalation_2big_1lt.cc                | 2 +-
 locktree/tests/locktree_escalation_2big_2lt.cc                | 2 +-
 locktree/tests/locktree_escalation_impossible.cc              | 2 +-
 locktree/tests/locktree_escalation_stalls.cc                  | 2 +-
 locktree/tests/locktree_infinity.cc                           | 2 +-
 locktree/tests/locktree_misc.cc                               | 2 +-
 locktree/tests/locktree_overlapping_relock.cc                 | 2 +-
 locktree/tests/locktree_simple_lock.cc                        | 2 +-
 locktree/tests/locktree_single_txnid_optimization.cc          | 2 +-
 locktree/tests/locktree_unit_test.h                           | 2 +-
 locktree/tests/manager_create_destroy.cc                      | 2 +-
 locktree/tests/manager_locktree_map.cc                        | 2 +-
 locktree/tests/manager_params.cc                              | 2 +-
 locktree/tests/manager_reference_release_lt.cc                | 2 +-
 locktree/tests/manager_status.cc                              | 2 +-
 locktree/tests/manager_unit_test.h                            | 2 +-
 locktree/tests/range_buffer_test.cc                           | 2 +-
 locktree/tests/test.h                                         | 2 +-
 locktree/tests/txnid_set_test.cc                              | 2 +-
 locktree/tests/wfg_test.cc                                    | 2 +-
 locktree/treenode.cc                                          | 2 +-
 locktree/treenode.h                                           | 2 +-
 locktree/txnid_set.cc                                         | 2 +-
 locktree/txnid_set.h                                          | 2 +-
 locktree/wfg.cc                                               | 2 +-
 locktree/wfg.h                                                | 2 +-
 portability/file.cc                                           | 2 +-
 portability/huge_page_detection.cc                            | 2 +-
 portability/memory.cc                                         | 2 +-
 portability/memory.h                                          | 2 +-
 portability/os_malloc.cc                                      | 2 +-
 portability/portability.cc                                    | 2 +-
 portability/tests/rwlock_condvar.h                            | 2 +-
 portability/tests/test-active-cpus.cc                         | 2 +-
 portability/tests/test-cache-line-boundary-fails.cc           | 2 +-
 portability/tests/test-cpu-freq-openlimit17.cc                | 2 +-
 portability/tests/test-cpu-freq.cc                            | 2 +-
 portability/tests/test-filesystem-sizes.cc                    | 2 +-
 portability/tests/test-flock.cc                               | 2 +-
 portability/tests/test-fsync-directory.cc                     | 2 +-
 portability/tests/test-fsync.cc                               | 2 +-
 portability/tests/test-gettime.cc                             | 2 +-
 portability/tests/test-gettimeofday.cc                        | 2 +-
 portability/tests/test-hugepage.cc                            | 2 +-
 portability/tests/test-max-data.cc                            | 2 +-
 portability/tests/test-memory-status.cc                       | 2 +-
 portability/tests/test-pagesize.cc                            | 2 +-
 portability/tests/test-pthread-rwlock-rdlock.cc               | 2 +-
 portability/tests/test-pthread-rwlock-rwr.cc                  | 2 +-
 portability/tests/test-pwrite4g.cc                            | 2 +-
 portability/tests/test-snprintf.cc                            | 2 +-
 portability/tests/test-stat.cc                                | 2 +-
 portability/tests/test-toku-malloc.cc                         | 2 +-
 portability/tests/test-xid.cc                                 | 2 +-
 portability/tests/test.h                                      | 2 +-
 portability/tests/try-assert-zero.cc                          | 2 +-
 portability/tests/try-assert0.cc                              | 2 +-
 portability/tests/try-leak-lost.cc                            | 2 +-
 portability/tests/try-leak-reachable.cc                       | 2 +-
 portability/tests/try-uninit.cc                               | 2 +-
 portability/toku_assert.cc                                    | 2 +-
 portability/toku_assert.h                                     | 2 +-
 portability/toku_atomic.h                                     | 2 +-
 portability/toku_byteswap.h                                   | 2 +-
 portability/toku_crash.cc                                     | 2 +-
 portability/toku_crash.h                                      | 2 +-
 portability/toku_htod.h                                       | 2 +-
 portability/toku_htonl.h                                      | 2 +-
 portability/toku_list.h                                       | 2 +-
 portability/toku_os.h                                         | 2 +-
 portability/toku_os_types.h                                   | 2 +-
 portability/toku_path.cc                                      | 2 +-
 portability/toku_path.h                                       | 2 +-
 portability/toku_portability.h                                | 2 +-
 portability/toku_pthread.cc                                   | 2 +-
 portability/toku_pthread.h                                    | 2 +-
 portability/toku_race_tools.h                                 | 2 +-
 portability/toku_random.h                                     | 2 +-
 portability/toku_stdint.h                                     | 2 +-
 portability/toku_stdlib.h                                     | 2 +-
 portability/toku_time.cc                                      | 2 +-
 portability/toku_time.h                                       | 2 +-
 src/errors.cc                                                 | 2 +-
 src/indexer-internal.h                                        | 2 +-
 src/indexer-undo-do.cc                                        | 2 +-
 src/indexer.cc                                                | 2 +-
 src/indexer.h                                                 | 2 +-
 src/loader.cc                                                 | 2 +-
 src/loader.h                                                  | 2 +-
 src/tests/big-nested-abort-abort.cc                           | 2 +-
 src/tests/big-nested-abort-commit.cc                          | 2 +-
 src/tests/big-nested-commit-abort.cc                          | 2 +-
 src/tests/big-nested-commit-commit.cc                         | 2 +-
 src/tests/bigtxn27.cc                                         | 2 +-
 src/tests/blackhole.cc                                        | 2 +-
 src/tests/blocking-first-empty.cc                             | 2 +-
 src/tests/blocking-first.cc                                   | 2 +-
 src/tests/blocking-last.cc                                    | 2 +-
 src/tests/blocking-next-prev-deadlock.cc                      | 2 +-
 src/tests/blocking-next-prev.cc                               | 2 +-
 src/tests/blocking-prelock-range.cc                           | 2 +-
 src/tests/blocking-put-timeout.cc                             | 2 +-
 src/tests/blocking-put-wakeup.cc                              | 2 +-
 src/tests/blocking-put.cc                                     | 2 +-
 src/tests/blocking-set-range-0.cc                             | 2 +-
 src/tests/blocking-set-range-n.cc                             | 2 +-
 src/tests/blocking-set-range-reverse-0.cc                     | 2 +-
 src/tests/blocking-set.cc                                     | 2 +-
 src/tests/blocking-table-lock.cc                              | 2 +-
 src/tests/bug1381.cc                                          | 2 +-
 src/tests/cachetable-race.cc                                  | 2 +-
 src/tests/checkpoint1.cc                                      | 2 +-
 src/tests/checkpoint_fairness.cc                              | 2 +-
 src/tests/checkpoint_stress.cc                                | 2 +-
 src/tests/checkpoint_test.h                                   | 2 +-
 src/tests/create-datadir.cc                                   | 2 +-
 src/tests/cursor-isolation.cc                                 | 2 +-
 src/tests/cursor-more-than-a-leaf-provdel.cc                  | 2 +-
 src/tests/cursor-set-del-rmw.cc                               | 2 +-
 src/tests/cursor-set-range-rmw.cc                             | 2 +-
 src/tests/cursor-step-over-delete.cc                          | 2 +-
 src/tests/db-put-simple-deadlock-threads.cc                   | 2 +-
 src/tests/db-put-simple-deadlock.cc                           | 2 +-
 src/tests/db-put-simple-lockwait.cc                           | 2 +-
 src/tests/db-put-update-deadlock.cc                           | 2 +-
 src/tests/dbremove-nofile-limit.cc                            | 2 +-
 src/tests/del-multiple-huge-primary-row.cc                    | 2 +-
 src/tests/del-multiple-srcdb.cc                               | 2 +-
 src/tests/del-multiple.cc                                     | 2 +-
 src/tests/del-simple.cc                                       | 2 +-
 src/tests/directory_lock.cc                                   | 2 +-
 src/tests/diskfull.cc                                         | 2 +-
 src/tests/dump-env.cc                                         | 2 +-
 src/tests/env-put-multiple.cc                                 | 2 +-
 src/tests/env_loader_memory.cc                                | 2 +-
 src/tests/env_nproc.cc                                        | 2 +-
 src/tests/env_startup.cc                                      | 2 +-
 src/tests/filesize.cc                                         | 2 +-
 src/tests/get_key_after_bytes_unit.cc                         | 2 +-
 src/tests/get_last_key.cc                                     | 2 +-
 src/tests/helgrind1.cc                                        | 2 +-
 src/tests/helgrind2.cc                                        | 2 +-
 src/tests/helgrind3.cc                                        | 2 +-
 src/tests/hot-optimize-table-tests.cc                         | 2 +-
 src/tests/hotindexer-bw.cc                                    | 2 +-
 src/tests/hotindexer-error-callback.cc                        | 2 +-
 src/tests/hotindexer-insert-committed-optimized.cc            | 2 +-
 src/tests/hotindexer-insert-committed.cc                      | 2 +-
 src/tests/hotindexer-insert-provisional.cc                    | 2 +-
 src/tests/hotindexer-lock-test.cc                             | 2 +-
 src/tests/hotindexer-multiclient.cc                           | 2 +-
 src/tests/hotindexer-nested-insert-committed.cc               | 2 +-
 src/tests/hotindexer-put-abort.cc                             | 2 +-
 src/tests/hotindexer-put-commit.cc                            | 2 +-
 src/tests/hotindexer-put-multiple.cc                          | 2 +-
 src/tests/hotindexer-simple-abort-put.cc                      | 2 +-
 src/tests/hotindexer-simple-abort.cc                          | 2 +-
 src/tests/hotindexer-undo-do-test.cc                          | 2 +-
 src/tests/hotindexer-with-queries.cc                          | 2 +-
 src/tests/inflate.cc                                          | 2 +-
 src/tests/inflate2.cc                                         | 2 +-
 src/tests/insert-dup-prelock.cc                               | 2 +-
 src/tests/isolation-read-committed.cc                         | 2 +-
 src/tests/isolation.cc                                        | 2 +-
 src/tests/key-val.h                                           | 2 +-
 src/tests/keyrange-merge.cc                                   | 2 +-
 src/tests/keyrange.cc                                         | 2 +-
 src/tests/last-verify-time.cc                                 | 2 +-
 src/tests/loader-cleanup-test.cc                              | 2 +-
 src/tests/loader-close-nproc-limit.cc                         | 2 +-
 src/tests/loader-create-abort.cc                              | 2 +-
 src/tests/loader-create-close.cc                              | 2 +-
 src/tests/loader-create-commit-nproc-limit.cc                 | 2 +-
 src/tests/loader-create-nproc-limit.cc                        | 2 +-
 src/tests/loader-dup-test.cc                                  | 2 +-
 src/tests/loader-no-puts.cc                                   | 2 +-
 src/tests/loader-reference-test.cc                            | 2 +-
 src/tests/loader-stress-del.cc                                | 2 +-
 src/tests/loader-stress-test.cc                               | 2 +-
 src/tests/loader-tpch-load.cc                                 | 2 +-
 src/tests/locktree_escalation_stalls.cc                       | 2 +-
 src/tests/manyfiles.cc                                        | 2 +-
 src/tests/maxsize-for-loader.cc                               | 2 +-
 src/tests/medium-nested-commit-commit.cc                      | 2 +-
 src/tests/multiprocess.cc                                     | 2 +-
 src/tests/mvcc-create-table.cc                                | 2 +-
 src/tests/mvcc-many-committed.cc                              | 2 +-
 src/tests/mvcc-read-committed.cc                              | 2 +-
 src/tests/openlimit17-locktree.cc                             | 2 +-
 src/tests/openlimit17-metafiles.cc                            | 2 +-
 src/tests/openlimit17.cc                                      | 2 +-
 src/tests/perf_checkpoint_var.cc                              | 2 +-
 src/tests/perf_child_txn.cc                                   | 2 +-
 src/tests/perf_cursor_nop.cc                                  | 2 +-
 src/tests/perf_iibench.cc                                     | 2 +-
 src/tests/perf_insert.cc                                      | 2 +-
 src/tests/perf_malloc_free.cc                                 | 2 +-
 src/tests/perf_nop.cc                                         | 2 +-
 src/tests/perf_ptquery.cc                                     | 2 +-
 src/tests/perf_ptquery2.cc                                    | 2 +-
 src/tests/perf_rangequery.cc                                  | 2 +-
 src/tests/perf_read_txn.cc                                    | 2 +-
 src/tests/perf_read_txn_single_thread.cc                      | 2 +-
 src/tests/perf_read_write.cc                                  | 2 +-
 src/tests/perf_txn_single_thread.cc                           | 2 +-
 src/tests/powerfail.cc                                        | 2 +-
 src/tests/preload-db-nested.cc                                | 2 +-
 src/tests/preload-db.cc                                       | 2 +-
 src/tests/prelock-read-read.cc                                | 2 +-
 src/tests/prelock-read-write.cc                               | 2 +-
 src/tests/prelock-write-read.cc                               | 2 +-
 src/tests/prelock-write-write.cc                              | 2 +-
 src/tests/print_engine_status.cc                              | 2 +-
 src/tests/progress.cc                                         | 2 +-
 src/tests/put-del-multiple-array-indexing.cc                  | 2 +-
 src/tests/queries_with_deletes.cc                             | 2 +-
 src/tests/recover-2483.cc                                     | 2 +-
 src/tests/recover-3113.cc                                     | 2 +-
 src/tests/recover-5146.cc                                     | 2 +-
 src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc       | 2 +-
 src/tests/recover-checkpoint-fopen-abort.cc                   | 2 +-
 src/tests/recover-checkpoint-fopen-commit.cc                  | 2 +-
 src/tests/recover-child-rollback.cc                           | 2 +-
 src/tests/recover-compare-db-descriptor.cc                    | 2 +-
 src/tests/recover-compare-db.cc                               | 2 +-
 src/tests/recover-del-multiple-abort.cc                       | 2 +-
 src/tests/recover-del-multiple-srcdb-fdelete-all.cc           | 2 +-
 src/tests/recover-del-multiple.cc                             | 2 +-
 src/tests/recover-delboth-after-checkpoint.cc                 | 2 +-
 src/tests/recover-delboth-checkpoint.cc                       | 2 +-
 src/tests/recover-descriptor.cc                               | 2 +-
 src/tests/recover-descriptor10.cc                             | 2 +-
 src/tests/recover-descriptor11.cc                             | 2 +-
 src/tests/recover-descriptor12.cc                             | 2 +-
 src/tests/recover-descriptor2.cc                              | 2 +-
 src/tests/recover-descriptor3.cc                              | 2 +-
 src/tests/recover-descriptor4.cc                              | 2 +-
 src/tests/recover-descriptor5.cc                              | 2 +-
 src/tests/recover-descriptor6.cc                              | 2 +-
 src/tests/recover-descriptor7.cc                              | 2 +-
 src/tests/recover-descriptor8.cc                              | 2 +-
 src/tests/recover-descriptor9.cc                              | 2 +-
 src/tests/recover-fassociate.cc                               | 2 +-
 src/tests/recover-fclose-in-checkpoint.cc                     | 2 +-
 src/tests/recover-fcreate-basementnodesize.cc                 | 2 +-
 src/tests/recover-fcreate-fclose.cc                           | 2 +-
 src/tests/recover-fcreate-fdelete.cc                          | 2 +-
 src/tests/recover-fcreate-nodesize.cc                         | 2 +-
 src/tests/recover-fcreate-xabort.cc                           | 2 +-
 src/tests/recover-flt1.cc                                     | 2 +-
 src/tests/recover-flt10.cc                                    | 2 +-
 src/tests/recover-flt2.cc                                     | 2 +-
 src/tests/recover-flt3.cc                                     | 2 +-
 src/tests/recover-flt4.cc                                     | 2 +-
 src/tests/recover-flt5.cc                                     | 2 +-
 src/tests/recover-flt6.cc                                     | 2 +-
 src/tests/recover-flt7.cc                                     | 2 +-
 src/tests/recover-flt8.cc                                     | 2 +-
 src/tests/recover-flt9.cc                                     | 2 +-
 src/tests/recover-fopen-checkpoint-fclose.cc                  | 2 +-
 src/tests/recover-fopen-fclose-checkpoint.cc                  | 2 +-
 src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc         | 2 +-
 src/tests/recover-hotindexer-simple-abort-put.cc              | 2 +-
 src/tests/recover-loader-test.cc                              | 2 +-
 src/tests/recover-lsn-filter-multiple.cc                      | 2 +-
 src/tests/recover-lsn-filter.cc                               | 2 +-
 src/tests/recover-missing-dbfile-2.cc                         | 2 +-
 src/tests/recover-missing-dbfile.cc                           | 2 +-
 src/tests/recover-missing-logfile.cc                          | 2 +-
 src/tests/recover-put-multiple-abort.cc                       | 2 +-
 src/tests/recover-put-multiple-fdelete-all.cc                 | 2 +-
 src/tests/recover-put-multiple-fdelete-some.cc                | 2 +-
 src/tests/recover-put-multiple-srcdb-fdelete-all.cc           | 2 +-
 src/tests/recover-put-multiple.cc                             | 2 +-
 src/tests/recover-split-checkpoint.cc                         | 2 +-
 src/tests/recover-straddle-txn-nested.cc                      | 2 +-
 src/tests/recover-straddle-txn.cc                             | 2 +-
 src/tests/recover-tablelock.cc                                | 2 +-
 src/tests/recover-test-logsuppress-put.cc                     | 2 +-
 src/tests/recover-test-logsuppress.cc                         | 2 +-
 src/tests/recover-test1.cc                                    | 2 +-
 src/tests/recover-test2.cc                                    | 2 +-
 src/tests/recover-test3.cc                                    | 2 +-
 src/tests/recover-test_crash_in_flusher_thread.h              | 2 +-
 src/tests/recover-test_stress1.cc                             | 2 +-
 src/tests/recover-test_stress2.cc                             | 2 +-
 src/tests/recover-test_stress3.cc                             | 2 +-
 src/tests/recover-test_stress_openclose.cc                    | 2 +-
 src/tests/recover-update-multiple-abort.cc                    | 2 +-
 src/tests/recover-update-multiple.cc                          | 2 +-
 src/tests/recover-update_aborts.cc                            | 2 +-
 src/tests/recover-update_aborts_before_checkpoint.cc          | 2 +-
 src/tests/recover-update_aborts_before_close.cc               | 2 +-
 src/tests/recover-update_broadcast_aborts.cc                  | 2 +-
 src/tests/recover-update_broadcast_aborts2.cc                 | 2 +-
 src/tests/recover-update_broadcast_aborts3.cc                 | 2 +-
 .../recover-update_broadcast_aborts_before_checkpoint.cc      | 2 +-
 src/tests/recover-update_broadcast_aborts_before_close.cc     | 2 +-
 src/tests/recover-update_broadcast_changes_values.cc          | 2 +-
 src/tests/recover-update_broadcast_changes_values2.cc         | 2 +-
 src/tests/recover-update_broadcast_changes_values3.cc         | 2 +-
 ...cover-update_broadcast_changes_values_before_checkpoint.cc | 2 +-
 .../recover-update_broadcast_changes_values_before_close.cc   | 2 +-
 src/tests/recover-update_changes_values.cc                    | 2 +-
 src/tests/recover-update_changes_values_before_checkpoint.cc  | 2 +-
 src/tests/recover-update_changes_values_before_close.cc       | 2 +-
 src/tests/recover-upgrade-db-descriptor-multihandle.cc        | 2 +-
 src/tests/recover-upgrade-db-descriptor.cc                    | 2 +-
 src/tests/recover-x1-abort.cc                                 | 2 +-
 src/tests/recover-x1-commit.cc                                | 2 +-
 src/tests/recover-x1-nested-abort.cc                          | 2 +-
 src/tests/recover-x1-nested-commit.cc                         | 2 +-
 src/tests/recover-x2-abort.cc                                 | 2 +-
 src/tests/recover-x2-commit.cc                                | 2 +-
 src/tests/recovery_fileops_stress.cc                          | 2 +-
 src/tests/recovery_fileops_unit.cc                            | 2 +-
 src/tests/recovery_stress.cc                                  | 2 +-
 src/tests/redirect.cc                                         | 2 +-
 src/tests/replace-into-write-lock.cc                          | 2 +-
 src/tests/root_fifo_1.cc                                      | 2 +-
 src/tests/root_fifo_2.cc                                      | 2 +-
 src/tests/root_fifo_31.cc                                     | 2 +-
 src/tests/root_fifo_32.cc                                     | 2 +-
 src/tests/root_fifo_41.cc                                     | 2 +-
 src/tests/rowsize.cc                                          | 2 +-
 src/tests/seqinsert.cc                                        | 2 +-
 src/tests/shutdown-3344.cc                                    | 2 +-
 src/tests/simple.cc                                           | 2 +-
 src/tests/stat64-create-modify-times.cc                       | 2 +-
 src/tests/stat64-null-txn.cc                                  | 2 +-
 src/tests/stat64-root-changes.cc                              | 2 +-
 src/tests/stat64.cc                                           | 2 +-
 src/tests/stress-gc.cc                                        | 2 +-
 src/tests/stress-gc2.cc                                       | 2 +-
 src/tests/stress-test.cc                                      | 2 +-
 src/tests/stress_openclose.h                                  | 2 +-
 src/tests/test-5138.cc                                        | 2 +-
 src/tests/test-nested-xopen-eclose.cc                         | 2 +-
 src/tests/test-prepare.cc                                     | 2 +-
 src/tests/test-prepare2.cc                                    | 2 +-
 src/tests/test-prepare3.cc                                    | 2 +-
 src/tests/test-rollinclude.cc                                 | 2 +-
 src/tests/test-xa-prepare.cc                                  | 2 +-
 src/tests/test-xopen-eclose.cc                                | 2 +-
 src/tests/test.h                                              | 2 +-
 src/tests/test1572.cc                                         | 2 +-
 src/tests/test1753.cc                                         | 2 +-
 src/tests/test1842.cc                                         | 2 +-
 src/tests/test3039.cc                                         | 2 +-
 src/tests/test3219.cc                                         | 2 +-
 src/tests/test3522.cc                                         | 2 +-
 src/tests/test3522b.cc                                        | 2 +-
 src/tests/test3529.cc                                         | 2 +-
 src/tests/test4573-logtrim.cc                                 | 2 +-
 src/tests/test5092.cc                                         | 2 +-
 src/tests/test938.cc                                          | 2 +-
 src/tests/test938b.cc                                         | 2 +-
 src/tests/test938c.cc                                         | 2 +-
 src/tests/test_3529_insert_2.cc                               | 2 +-
 src/tests/test_3529_table_lock.cc                             | 2 +-
 src/tests/test_3645.cc                                        | 2 +-
 src/tests/test_3755.cc                                        | 2 +-
 src/tests/test_4015.cc                                        | 2 +-
 src/tests/test_4368.cc                                        | 2 +-
 src/tests/test_4657.cc                                        | 2 +-
 src/tests/test_5015.cc                                        | 2 +-
 src/tests/test_5469.cc                                        | 2 +-
 src/tests/test_789.cc                                         | 2 +-
 src/tests/test_935.cc                                         | 2 +-
 src/tests/test_abort1.cc                                      | 2 +-
 src/tests/test_abort2.cc                                      | 2 +-
 src/tests/test_abort3.cc                                      | 2 +-
 src/tests/test_abort4.cc                                      | 2 +-
 src/tests/test_abort5.cc                                      | 2 +-
 src/tests/test_abort_delete_first.cc                          | 2 +-
 src/tests/test_archive0.cc                                    | 2 +-
 src/tests/test_archive1.cc                                    | 2 +-
 src/tests/test_archive2.cc                                    | 2 +-
 src/tests/test_bad_implicit_promotion.cc                      | 2 +-
 src/tests/test_blobs_leaf_split.cc                            | 2 +-
 src/tests/test_bulk_fetch.cc                                  | 2 +-
 src/tests/test_cachesize.cc                                   | 2 +-
 src/tests/test_cmp_descriptor.cc                              | 2 +-
 src/tests/test_compression_methods.cc                         | 2 +-
 src/tests/test_cursor_2.cc                                    | 2 +-
 src/tests/test_cursor_3.cc                                    | 2 +-
 src/tests/test_cursor_DB_NEXT_no_dup.cc                       | 2 +-
 src/tests/test_cursor_db_current.cc                           | 2 +-
 src/tests/test_cursor_delete2.cc                              | 2 +-
 src/tests/test_cursor_flags.cc                                | 2 +-
 src/tests/test_cursor_interrupt.cc                            | 2 +-
 src/tests/test_cursor_nonleaf_expand.cc                       | 2 +-
 src/tests/test_cursor_null.cc                                 | 2 +-
 src/tests/test_cursor_stickyness.cc                           | 2 +-
 src/tests/test_cursor_with_read_txn.cc                        | 2 +-
 src/tests/test_db_already_exists.cc                           | 2 +-
 src/tests/test_db_change_pagesize.cc                          | 2 +-
 src/tests/test_db_change_xxx.cc                               | 2 +-
 src/tests/test_db_close_no_open.cc                            | 2 +-
 src/tests/test_db_current_clobbers_db.cc                      | 2 +-
 src/tests/test_db_dbt_mem_behavior.cc                         | 2 +-
 src/tests/test_db_delete.cc                                   | 2 +-
 src/tests/test_db_descriptor.cc                               | 2 +-
 src/tests/test_db_env_open_close.cc                           | 2 +-
 src/tests/test_db_env_open_nocreate.cc                        | 2 +-
 src/tests/test_db_env_open_open_close.cc                      | 2 +-
 src/tests/test_db_env_set_errpfx.cc                           | 2 +-
 src/tests/test_db_env_set_lg_dir.cc                           | 2 +-
 src/tests/test_db_env_set_tmp_dir.cc                          | 2 +-
 src/tests/test_db_env_strdup_null.cc                          | 2 +-
 src/tests/test_db_get_put_flags.cc                            | 2 +-
 src/tests/test_db_named_delete_last.cc                        | 2 +-
 src/tests/test_db_no_env.cc                                   | 2 +-
 src/tests/test_db_open_notexist_reopen.cc                     | 2 +-
 src/tests/test_db_remove.cc                                   | 2 +-
 src/tests/test_db_remove_subdb.cc                             | 2 +-
 src/tests/test_db_set_flags.cc                                | 2 +-
 src/tests/test_db_subdb.cc                                    | 2 +-
 src/tests/test_db_subdb_different_flags.cc                    | 2 +-
 src/tests/test_db_txn_locks_nonheaviside.cc                   | 2 +-
 src/tests/test_db_txn_locks_read_uncommitted.cc               | 2 +-
 src/tests/test_db_version.cc                                  | 2 +-
 src/tests/test_env_close_flags.cc                             | 2 +-
 src/tests/test_env_create_db_create.cc                        | 2 +-
 src/tests/test_env_open_flags.cc                              | 2 +-
 src/tests/test_equal_keys_with_different_bytes.cc             | 2 +-
 src/tests/test_error.cc                                       | 2 +-
 src/tests/test_forkjoin.cc                                    | 2 +-
 src/tests/test_get_max_row_size.cc                            | 2 +-
 src/tests/test_get_zeroed_dbt.cc                              | 2 +-
 src/tests/test_groupcommit_count.cc                           | 2 +-
 src/tests/test_groupcommit_perf.cc                            | 2 +-
 src/tests/test_hsoc.cc                                        | 2 +-
 src/tests/test_insert_cursor_delete_insert.cc                 | 2 +-
 src/tests/test_insert_many_gc.cc                              | 2 +-
 src/tests/test_insert_memleak.cc                              | 2 +-
 src/tests/test_insert_unique.cc                               | 2 +-
 src/tests/test_iterate_live_transactions.cc                   | 2 +-
 src/tests/test_iterate_pending_lock_requests.cc               | 2 +-
 src/tests/test_keylen_diff.cc                                 | 2 +-
 src/tests/test_kv_gen.h                                       | 2 +-
 src/tests/test_kv_limits.cc                                   | 2 +-
 src/tests/test_large_update_broadcast_small_cachetable.cc     | 2 +-
 src/tests/test_lock_timeout_callback.cc                       | 2 +-
 src/tests/test_locking_with_read_txn.cc                       | 2 +-
 src/tests/test_locktree_close.cc                              | 2 +-
 src/tests/test_log0.cc                                        | 2 +-
 src/tests/test_log1.cc                                        | 2 +-
 src/tests/test_log10.cc                                       | 2 +-
 src/tests/test_log1_abort.cc                                  | 2 +-
 src/tests/test_log2.cc                                        | 2 +-
 src/tests/test_log2_abort.cc                                  | 2 +-
 src/tests/test_log3.cc                                        | 2 +-
 src/tests/test_log3_abort.cc                                  | 2 +-
 src/tests/test_log4.cc                                        | 2 +-
 src/tests/test_log4_abort.cc                                  | 2 +-
 src/tests/test_log5.cc                                        | 2 +-
 src/tests/test_log5_abort.cc                                  | 2 +-
 src/tests/test_log6.cc                                        | 2 +-
 src/tests/test_log6_abort.cc                                  | 2 +-
 src/tests/test_log6a_abort.cc                                 | 2 +-
 src/tests/test_log7.cc                                        | 2 +-
 src/tests/test_log8.cc                                        | 2 +-
 src/tests/test_log9.cc                                        | 2 +-
 src/tests/test_logflush.cc                                    | 2 +-
 src/tests/test_logmax.cc                                      | 2 +-
 src/tests/test_mostly_seq.cc                                  | 2 +-
 src/tests/test_multiple_checkpoints_block_commit.cc           | 2 +-
 src/tests/test_nested.cc                                      | 2 +-
 src/tests/test_nodup_set.cc                                   | 2 +-
 src/tests/test_query.cc                                       | 2 +-
 src/tests/test_rand_insert.cc                                 | 2 +-
 src/tests/test_read_txn_invalid_ops.cc                        | 2 +-
 src/tests/test_redirect_func.cc                               | 2 +-
 src/tests/test_restrict.cc                                    | 2 +-
 src/tests/test_reverse_compare_fun.cc                         | 2 +-
 src/tests/test_set_func_malloc.cc                             | 2 +-
 src/tests/test_simple_read_txn.cc                             | 2 +-
 src/tests/test_stress0.cc                                     | 2 +-
 src/tests/test_stress1.cc                                     | 2 +-
 src/tests/test_stress2.cc                                     | 2 +-
 src/tests/test_stress3.cc                                     | 2 +-
 src/tests/test_stress4.cc                                     | 2 +-
 src/tests/test_stress5.cc                                     | 2 +-
 src/tests/test_stress6.cc                                     | 2 +-
 src/tests/test_stress7.cc                                     | 2 +-
 src/tests/test_stress_hot_indexing.cc                         | 2 +-
 src/tests/test_stress_openclose.cc                            | 2 +-
 src/tests/test_stress_with_verify.cc                          | 2 +-
 src/tests/test_thread_flags.cc                                | 2 +-
 src/tests/test_thread_insert.cc                               | 2 +-
 src/tests/test_trans_desc_during_chkpt.cc                     | 2 +-
 src/tests/test_trans_desc_during_chkpt2.cc                    | 2 +-
 src/tests/test_trans_desc_during_chkpt3.cc                    | 2 +-
 src/tests/test_trans_desc_during_chkpt4.cc                    | 2 +-
 src/tests/test_transactional_descriptor.cc                    | 2 +-
 src/tests/test_txn_abort5.cc                                  | 2 +-
 src/tests/test_txn_abort5a.cc                                 | 2 +-
 src/tests/test_txn_abort6.cc                                  | 2 +-
 src/tests/test_txn_abort7.cc                                  | 2 +-
 src/tests/test_txn_begin_commit.cc                            | 2 +-
 src/tests/test_txn_close_before_commit.cc                     | 2 +-
 src/tests/test_txn_close_before_prepare_commit.cc             | 2 +-
 src/tests/test_txn_cursor_last.cc                             | 2 +-
 src/tests/test_txn_nested1.cc                                 | 2 +-
 src/tests/test_txn_nested2.cc                                 | 2 +-
 src/tests/test_txn_nested3.cc                                 | 2 +-
 src/tests/test_txn_nested4.cc                                 | 2 +-
 src/tests/test_txn_nested5.cc                                 | 2 +-
 src/tests/test_txn_nested_abort.cc                            | 2 +-
 src/tests/test_txn_nested_abort2.cc                           | 2 +-
 src/tests/test_txn_nested_abort3.cc                           | 2 +-
 src/tests/test_txn_nested_abort4.cc                           | 2 +-
 src/tests/test_txn_recover3.cc                                | 2 +-
 src/tests/test_unused_memory_crash.cc                         | 2 +-
 src/tests/test_update_abort_works.cc                          | 2 +-
 src/tests/test_update_broadcast_abort_works.cc                | 2 +-
 src/tests/test_update_broadcast_calls_back.cc                 | 2 +-
 src/tests/test_update_broadcast_can_delete_elements.cc        | 2 +-
 src/tests/test_update_broadcast_changes_values.cc             | 2 +-
 src/tests/test_update_broadcast_indexer.cc                    | 2 +-
 src/tests/test_update_broadcast_loader.cc                     | 2 +-
 src/tests/test_update_broadcast_nested_updates.cc             | 2 +-
 src/tests/test_update_broadcast_previously_deleted.cc         | 2 +-
 src/tests/test_update_broadcast_stress.cc                     | 2 +-
 src/tests/test_update_broadcast_update_fun_has_choices.cc     | 2 +-
 src/tests/test_update_broadcast_with_empty_table.cc           | 2 +-
 src/tests/test_update_calls_back.cc                           | 2 +-
 src/tests/test_update_can_delete_elements.cc                  | 2 +-
 src/tests/test_update_changes_values.cc                       | 2 +-
 src/tests/test_update_nested_updates.cc                       | 2 +-
 src/tests/test_update_nonexistent_keys.cc                     | 2 +-
 src/tests/test_update_previously_deleted.cc                   | 2 +-
 src/tests/test_update_stress.cc                               | 2 +-
 src/tests/test_update_txn_snapshot_works_concurrently.cc      | 2 +-
 .../test_update_txn_snapshot_works_correctly_with_deletes.cc  | 2 +-
 src/tests/test_update_with_empty_table.cc                     | 2 +-
 src/tests/test_updates_single_key.cc                          | 2 +-
 src/tests/test_weakxaction.cc                                 | 2 +-
 src/tests/test_zero_length_keys.cc                            | 2 +-
 src/tests/threaded_stress_test_helpers.h                      | 2 +-
 src/tests/time_create_db.cc                                   | 2 +-
 src/tests/transactional_fileops.cc                            | 2 +-
 src/tests/update-multiple-data-diagonal.cc                    | 2 +-
 src/tests/update-multiple-key0.cc                             | 2 +-
 src/tests/update-multiple-nochange.cc                         | 2 +-
 src/tests/update-multiple-with-indexer-array.cc               | 2 +-
 src/tests/update-multiple-with-indexer.cc                     | 2 +-
 src/tests/update.cc                                           | 2 +-
 src/tests/upgrade-test-1.cc                                   | 2 +-
 src/tests/upgrade-test-2.cc                                   | 2 +-
 src/tests/upgrade-test-3.cc                                   | 2 +-
 src/tests/upgrade-test-4.cc                                   | 2 +-
 src/tests/upgrade-test-5.cc                                   | 2 +-
 src/tests/upgrade-test-6.cc                                   | 2 +-
 src/tests/upgrade-test-7.cc                                   | 2 +-
 src/tests/upgrade_simple.cc                                   | 2 +-
 src/tests/xa-dirty-commit.cc                                  | 2 +-
 src/tests/xa-dirty-rollback.cc                                | 2 +-
 src/tests/xa-txn-discard-abort.cc                             | 2 +-
 src/tests/xa-txn-discard-commit.cc                            | 2 +-
 src/tests/zombie_db.cc                                        | 2 +-
 src/toku_patent.cc                                            | 4 ++--
 src/ydb-internal.h                                            | 2 +-
 src/ydb.cc                                                    | 2 +-
 src/ydb.h                                                     | 2 +-
 src/ydb_cursor.cc                                             | 2 +-
 src/ydb_cursor.h                                              | 2 +-
 src/ydb_db.cc                                                 | 2 +-
 src/ydb_db.h                                                  | 2 +-
 src/ydb_env_func.cc                                           | 2 +-
 src/ydb_env_func.h                                            | 2 +-
 src/ydb_lib.cc                                                | 2 +-
 src/ydb_load.h                                                | 2 +-
 src/ydb_row_lock.cc                                           | 2 +-
 src/ydb_row_lock.h                                            | 2 +-
 src/ydb_txn.cc                                                | 2 +-
 src/ydb_txn.h                                                 | 2 +-
 src/ydb_write.cc                                              | 2 +-
 src/ydb_write.h                                               | 2 +-
 tools/ba_replay.cc                                            | 2 +-
 tools/ftverify.cc                                             | 2 +-
 tools/tdb-recover.cc                                          | 2 +-
 tools/tdb_logprint.cc                                         | 2 +-
 tools/tokudb_dump.cc                                          | 2 +-
 tools/tokuftdump.cc                                           | 2 +-
 util/bytestring.h                                             | 2 +-
 util/circular_buffer.cc                                       | 2 +-
 util/circular_buffer.h                                        | 2 +-
 util/constexpr.h                                              | 2 +-
 util/context.cc                                               | 2 +-
 util/context.h                                                | 2 +-
 util/dbt.cc                                                   | 2 +-
 util/dbt.h                                                    | 2 +-
 util/dmt.cc                                                   | 2 +-
 util/dmt.h                                                    | 2 +-
 util/doubly_linked_list.h                                     | 2 +-
 util/frwlock.cc                                               | 2 +-
 util/frwlock.h                                                | 2 +-
 util/growable_array.h                                         | 2 +-
 util/kibbutz.cc                                               | 2 +-
 util/kibbutz.h                                                | 2 +-
 util/memarena.cc                                              | 2 +-
 util/memarena.h                                               | 2 +-
 util/mempool.cc                                               | 2 +-
 util/mempool.h                                                | 2 +-
 util/minicron.cc                                              | 2 +-
 util/minicron.h                                               | 2 +-
 util/nb_mutex.h                                               | 2 +-
 util/omt.cc                                                   | 2 +-
 util/omt.h                                                    | 2 +-
 util/partitioned_counter.cc                                   | 2 +-
 util/partitioned_counter.h                                    | 2 +-
 util/queue.cc                                                 | 2 +-
 util/queue.h                                                  | 2 +-
 util/rwlock.h                                                 | 2 +-
 util/scoped_malloc.cc                                         | 2 +-
 util/scoped_malloc.h                                          | 2 +-
 util/sort.h                                                   | 2 +-
 util/status.h                                                 | 2 +-
 util/tests/marked-omt-test.cc                                 | 2 +-
 util/tests/memarena-test.cc                                   | 2 +-
 util/tests/minicron-test.cc                                   | 2 +-
 util/tests/omt-test.cc                                        | 2 +-
 util/tests/omt-tmpl-test.cc                                   | 2 +-
 util/tests/queue-test.cc                                      | 2 +-
 util/tests/rwlock_condvar.h                                   | 2 +-
 util/tests/sort-tmpl-test.cc                                  | 2 +-
 util/tests/test-kibbutz.cc                                    | 2 +-
 util/tests/test-kibbutz2.cc                                   | 2 +-
 util/tests/test-rwlock-cheapness.cc                           | 2 +-
 util/tests/test-rwlock.cc                                     | 2 +-
 util/tests/test.h                                             | 2 +-
 util/tests/test_circular_buffer.cc                            | 2 +-
 util/tests/test_doubly_linked_list.cc                         | 2 +-
 util/tests/test_partitioned_counter.cc                        | 2 +-
 util/tests/test_partitioned_counter_5833.cc                   | 2 +-
 util/tests/threadpool-nproc-limit.cc                          | 2 +-
 util/tests/threadpool-test.cc                                 | 2 +-
 util/tests/threadpool-testrunf.cc                             | 2 +-
 util/tests/x1764-test.cc                                      | 2 +-
 util/threadpool.cc                                            | 2 +-
 util/threadpool.h                                             | 2 +-
 util/x1764.cc                                                 | 2 +-
 util/x1764.h                                                  | 2 +-
 977 files changed, 978 insertions(+), 978 deletions(-)

diff --git a/README-TOKUDB b/README-TOKUDB
index 68fb40b3671..7d70059a912 100644
--- a/README-TOKUDB
+++ b/README-TOKUDB
@@ -25,7 +25,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/buildheader/make_tdb.cc b/buildheader/make_tdb.cc
index 7ddcd161989..6b25226fa5b 100644
--- a/buildheader/make_tdb.cc
+++ b/buildheader/make_tdb.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/bndata.cc b/ft/bndata.cc
index 9841d7ef2ed..a277e52aa0b 100644
--- a/ft/bndata.cc
+++ b/ft/bndata.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/bndata.h b/ft/bndata.h
index f228a3b04d7..0cded5de5fc 100644
--- a/ft/bndata.h
+++ b/ft/bndata.h
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/cachetable/background_job_manager.cc b/ft/cachetable/background_job_manager.cc
index 12588cd067d..8db05018d3c 100644
--- a/ft/cachetable/background_job_manager.cc
+++ b/ft/cachetable/background_job_manager.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/cachetable/background_job_manager.h b/ft/cachetable/background_job_manager.h
index c16902987b2..d977abae418 100644
--- a/ft/cachetable/background_job_manager.h
+++ b/ft/cachetable/background_job_manager.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/cachetable/cachetable-internal.h b/ft/cachetable/cachetable-internal.h
index 78c30bf7861..d5dc3ffa5fb 100644
--- a/ft/cachetable/cachetable-internal.h
+++ b/ft/cachetable/cachetable-internal.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/cachetable/cachetable.cc b/ft/cachetable/cachetable.cc
index 7073ff3435e..bcd9fc9b562 100644
--- a/ft/cachetable/cachetable.cc
+++ b/ft/cachetable/cachetable.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/cachetable/cachetable.h b/ft/cachetable/cachetable.h
index b4aacd8547b..3afc600437c 100644
--- a/ft/cachetable/cachetable.h
+++ b/ft/cachetable/cachetable.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/cachetable/checkpoint.cc b/ft/cachetable/checkpoint.cc
index 3da668a815d..7d9c45f3cb7 100644
--- a/ft/cachetable/checkpoint.cc
+++ b/ft/cachetable/checkpoint.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/cachetable/checkpoint.h b/ft/cachetable/checkpoint.h
index 2e4873084e6..57a41210e83 100644
--- a/ft/cachetable/checkpoint.h
+++ b/ft/cachetable/checkpoint.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/comparator.h b/ft/comparator.h
index e468e1fe82b..81a794e4afd 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/cursor.cc b/ft/cursor.cc
index 098721b2885..9814a49416b 100644
--- a/ft/cursor.cc
+++ b/ft/cursor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/cursor.h b/ft/cursor.h
index 947b69ccf47..da2b3d5c8a1 100644
--- a/ft/cursor.h
+++ b/ft/cursor.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-cachetable-wrappers.cc b/ft/ft-cachetable-wrappers.cc
index d80dea884cd..b8bee800f36 100644
--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-cachetable-wrappers.h b/ft/ft-cachetable-wrappers.h
index d65ab083efd..72056e4d58c 100644
--- a/ft/ft-cachetable-wrappers.h
+++ b/ft/ft-cachetable-wrappers.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-flusher-internal.h b/ft/ft-flusher-internal.h
index 539ded24def..f26b2d56ef5 100644
--- a/ft/ft-flusher-internal.h
+++ b/ft/ft-flusher-internal.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index f9b342da3cd..1c11f276f4f 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-flusher.h b/ft/ft-flusher.h
index 43fc5c297da..47bf4e7cf77 100644
--- a/ft/ft-flusher.h
+++ b/ft/ft-flusher.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-hot-flusher.cc b/ft/ft-hot-flusher.cc
index f1e6f1a93de..0a8bd131bac 100644
--- a/ft/ft-hot-flusher.cc
+++ b/ft/ft-hot-flusher.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-internal.h b/ft/ft-internal.h
index 571e696484c..3cd39705571 100644
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index c4d77a43793..53168f87d64 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index bdd9d33988c..9b28d75b111 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-test-helpers.cc b/ft/ft-test-helpers.cc
index 2eabaafb62a..dc0b77099fa 100644
--- a/ft/ft-test-helpers.cc
+++ b/ft/ft-test-helpers.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft-verify.cc b/ft/ft-verify.cc
index d21f4d1d805..cbb5159e276 100644
--- a/ft/ft-verify.cc
+++ b/ft/ft-verify.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft.cc b/ft/ft.cc
index f41853f76c1..f0fd148c83f 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ft.h b/ft/ft.h
index 73f52dea990..224a917c7b4 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/le-cursor.cc b/ft/le-cursor.cc
index 1b908064127..f840c021fd2 100644
--- a/ft/le-cursor.cc
+++ b/ft/le-cursor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/le-cursor.h b/ft/le-cursor.h
index ca99767ad05..2fc5e09bc2c 100644
--- a/ft/le-cursor.h
+++ b/ft/le-cursor.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/leafentry.cc b/ft/leafentry.cc
index 57d4241de9a..075f29fa191 100644
--- a/ft/leafentry.cc
+++ b/ft/leafentry.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/leafentry.h b/ft/leafentry.h
index 07ae06d64d5..eddd49481fb 100644
--- a/ft/leafentry.h
+++ b/ft/leafentry.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/loader/callbacks.cc b/ft/loader/callbacks.cc
index 323196bc218..40069c144f6 100644
--- a/ft/loader/callbacks.cc
+++ b/ft/loader/callbacks.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/loader/dbufio.cc b/ft/loader/dbufio.cc
index 0be68d250c4..c3f72e14ab1 100644
--- a/ft/loader/dbufio.cc
+++ b/ft/loader/dbufio.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/loader/dbufio.h b/ft/loader/dbufio.h
index 8bdcbc122ee..da31f22277d 100644
--- a/ft/loader/dbufio.h
+++ b/ft/loader/dbufio.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/loader/loader-internal.h b/ft/loader/loader-internal.h
index e7091df2650..ea1b9c5afa3 100644
--- a/ft/loader/loader-internal.h
+++ b/ft/loader/loader-internal.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/loader/loader.cc b/ft/loader/loader.cc
index de904940d86..a6f41cd6b54 100644
--- a/ft/loader/loader.cc
+++ b/ft/loader/loader.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/loader/loader.h b/ft/loader/loader.h
index 9ab0d482cd3..4ef45dea0ac 100644
--- a/ft/loader/loader.h
+++ b/ft/loader/loader.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/loader/pqueue.cc b/ft/loader/pqueue.cc
index 25620e37cc5..c50664f5e45 100644
--- a/ft/loader/pqueue.cc
+++ b/ft/loader/pqueue.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/loader/pqueue.h b/ft/loader/pqueue.h
index 9a8045111bc..43df70e97ff 100644
--- a/ft/loader/pqueue.h
+++ b/ft/loader/pqueue.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/log-internal.h b/ft/logger/log-internal.h
index d1d19bdad9e..d588f0028d5 100644
--- a/ft/logger/log-internal.h
+++ b/ft/logger/log-internal.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/log.h b/ft/logger/log.h
index 5dfb35315ee..180f118765b 100644
--- a/ft/logger/log.h
+++ b/ft/logger/log.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/log_upgrade.cc b/ft/logger/log_upgrade.cc
index 8fd65ae2691..5efc5b9b86c 100644
--- a/ft/logger/log_upgrade.cc
+++ b/ft/logger/log_upgrade.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/logcursor.cc b/ft/logger/logcursor.cc
index 071ebf9b3b6..0026d113657 100644
--- a/ft/logger/logcursor.cc
+++ b/ft/logger/logcursor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/logcursor.h b/ft/logger/logcursor.h
index e3ae366bbce..15774fb11d3 100644
--- a/ft/logger/logcursor.h
+++ b/ft/logger/logcursor.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/logfilemgr.cc b/ft/logger/logfilemgr.cc
index 17c4bc922ad..4840d8908ce 100644
--- a/ft/logger/logfilemgr.cc
+++ b/ft/logger/logfilemgr.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/logfilemgr.h b/ft/logger/logfilemgr.h
index 73e0a335496..70e59575127 100644
--- a/ft/logger/logfilemgr.h
+++ b/ft/logger/logfilemgr.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/logformat.cc b/ft/logger/logformat.cc
index 39ad7d0b798..698b612c078 100644
--- a/ft/logger/logformat.cc
+++ b/ft/logger/logformat.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/logger.cc b/ft/logger/logger.cc
index 3ce3a7cc2a6..332d94b47e5 100644
--- a/ft/logger/logger.cc
+++ b/ft/logger/logger.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/logger.h b/ft/logger/logger.h
index 9ef62f9b596..8a81c67b477 100644
--- a/ft/logger/logger.h
+++ b/ft/logger/logger.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/recover.cc b/ft/logger/recover.cc
index ad05ecb18e3..d194f3ebdd2 100644
--- a/ft/logger/recover.cc
+++ b/ft/logger/recover.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/logger/recover.h b/ft/logger/recover.h
index 134bbcdc480..47815332ffa 100644
--- a/ft/logger/recover.h
+++ b/ft/logger/recover.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/msg.cc b/ft/msg.cc
index 09a9d17e306..1fedbe745af 100644
--- a/ft/msg.cc
+++ b/ft/msg.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/msg.h b/ft/msg.h
index 1c974a0c84a..3a26f068399 100644
--- a/ft/msg.h
+++ b/ft/msg.h
@@ -35,7 +35,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/msg_buffer.cc b/ft/msg_buffer.cc
index 9da03033ebc..3a72fdb7090 100644
--- a/ft/msg_buffer.cc
+++ b/ft/msg_buffer.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/msg_buffer.h b/ft/msg_buffer.h
index 703f3c5fced..b63b4a354b2 100644
--- a/ft/msg_buffer.h
+++ b/ft/msg_buffer.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/node.cc b/ft/node.cc
index b03c64c5f32..f6a8c0bb2b3 100644
--- a/ft/node.cc
+++ b/ft/node.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/node.h b/ft/node.h
index 460d7876db8..7b1b4023d84 100644
--- a/ft/node.h
+++ b/ft/node.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/pivotkeys.cc b/ft/pivotkeys.cc
index 27e1ea14cc1..cf37777d892 100644
--- a/ft/pivotkeys.cc
+++ b/ft/pivotkeys.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc
index c81799d6746..6af0ae82b05 100644
--- a/ft/serialize/block_allocator.cc
+++ b/ft/serialize/block_allocator.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h
index d849b656dfc..b50dadc9e56 100644
--- a/ft/serialize/block_allocator.h
+++ b/ft/serialize/block_allocator.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
index b6354966e3b..dbfbb1e5585 100644
--- a/ft/serialize/block_allocator_strategy.cc
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/block_allocator_strategy.h b/ft/serialize/block_allocator_strategy.h
index 924142a89b4..3b7c0bafe4e 100644
--- a/ft/serialize/block_allocator_strategy.h
+++ b/ft/serialize/block_allocator_strategy.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/block_table.cc b/ft/serialize/block_table.cc
index 4bbe709934d..561f03a8871 100644
--- a/ft/serialize/block_table.cc
+++ b/ft/serialize/block_table.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/block_table.h b/ft/serialize/block_table.h
index 280c8be23fa..534befaf426 100644
--- a/ft/serialize/block_table.h
+++ b/ft/serialize/block_table.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/compress.cc b/ft/serialize/compress.cc
index 100be1302ee..e905220026b 100644
--- a/ft/serialize/compress.cc
+++ b/ft/serialize/compress.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/compress.h b/ft/serialize/compress.h
index 8e39fc96220..8b3bb2185b6 100644
--- a/ft/serialize/compress.h
+++ b/ft/serialize/compress.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/ft-node-deserialize.cc b/ft/serialize/ft-node-deserialize.cc
index eced7a5e1b9..4e55c222eb7 100644
--- a/ft/serialize/ft-node-deserialize.cc
+++ b/ft/serialize/ft-node-deserialize.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/ft-serialize.cc b/ft/serialize/ft-serialize.cc
index e6bd84733a2..855fa19c6e2 100644
--- a/ft/serialize/ft-serialize.cc
+++ b/ft/serialize/ft-serialize.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/ft-serialize.h b/ft/serialize/ft-serialize.h
index 05957d70f69..dc8bb68ae40 100644
--- a/ft/serialize/ft-serialize.h
+++ b/ft/serialize/ft-serialize.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/ft_layout_version.h b/ft/serialize/ft_layout_version.h
index 9d9796e925d..cf16d472355 100644
--- a/ft/serialize/ft_layout_version.h
+++ b/ft/serialize/ft_layout_version.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index 9de8c744564..e9bcd416131 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/ft_node-serialize.h b/ft/serialize/ft_node-serialize.h
index b9b87c58a3a..28d3f26590e 100644
--- a/ft/serialize/ft_node-serialize.h
+++ b/ft/serialize/ft_node-serialize.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/quicklz.cc b/ft/serialize/quicklz.cc
index c7cd82c80d5..81f768ababf 100644
--- a/ft/serialize/quicklz.cc
+++ b/ft/serialize/quicklz.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/quicklz.h b/ft/serialize/quicklz.h
index 23b6e10f8a6..362a246994f 100644
--- a/ft/serialize/quicklz.h
+++ b/ft/serialize/quicklz.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/rbuf.h b/ft/serialize/rbuf.h
index 41f74ed2cfc..c72ea6b79db 100644
--- a/ft/serialize/rbuf.h
+++ b/ft/serialize/rbuf.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/sub_block.cc b/ft/serialize/sub_block.cc
index 8ea4fb03be4..1346c76b103 100644
--- a/ft/serialize/sub_block.cc
+++ b/ft/serialize/sub_block.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/sub_block.h b/ft/serialize/sub_block.h
index b165d5cd545..1a371c2dcd3 100644
--- a/ft/serialize/sub_block.h
+++ b/ft/serialize/sub_block.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/wbuf.h b/ft/serialize/wbuf.h
index efa941f51c3..8c71fb16b20 100644
--- a/ft/serialize/wbuf.h
+++ b/ft/serialize/wbuf.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/serialize/workset.h b/ft/serialize/workset.h
index b2451d1e169..4efa042b9c3 100644
--- a/ft/serialize/workset.h
+++ b/ft/serialize/workset.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/benchmark-test.cc b/ft/tests/benchmark-test.cc
index b607d772cca..0acb97daa87 100644
--- a/ft/tests/benchmark-test.cc
+++ b/ft/tests/benchmark-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/block_allocator_strategy_test.cc b/ft/tests/block_allocator_strategy_test.cc
index 1b26f82ff9e..6879002a025 100644
--- a/ft/tests/block_allocator_strategy_test.cc
+++ b/ft/tests/block_allocator_strategy_test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/block_allocator_test.cc b/ft/tests/block_allocator_test.cc
index 81dcc739bcd..bbd170ebaab 100644
--- a/ft/tests/block_allocator_test.cc
+++ b/ft/tests/block_allocator_test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/bnc-insert-benchmark.cc b/ft/tests/bnc-insert-benchmark.cc
index 43d30be1e3e..bd9f28c858f 100644
--- a/ft/tests/bnc-insert-benchmark.cc
+++ b/ft/tests/bnc-insert-benchmark.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-4357.cc b/ft/tests/cachetable-4357.cc
index 01c8875db5d..0704914cc5b 100644
--- a/ft/tests/cachetable-4357.cc
+++ b/ft/tests/cachetable-4357.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-4365.cc b/ft/tests/cachetable-4365.cc
index 3396becbfdd..1c5a55bf120 100644
--- a/ft/tests/cachetable-4365.cc
+++ b/ft/tests/cachetable-4365.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-5097.cc b/ft/tests/cachetable-5097.cc
index 2da3439fd1f..5cef1f3c6d5 100644
--- a/ft/tests/cachetable-5097.cc
+++ b/ft/tests/cachetable-5097.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-5978-2.cc b/ft/tests/cachetable-5978-2.cc
index 790be1719ab..427bc2c4a42 100644
--- a/ft/tests/cachetable-5978-2.cc
+++ b/ft/tests/cachetable-5978-2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-5978.cc b/ft/tests/cachetable-5978.cc
index ff140ade37c..11613e5a204 100644
--- a/ft/tests/cachetable-5978.cc
+++ b/ft/tests/cachetable-5978.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-all-write.cc b/ft/tests/cachetable-all-write.cc
index 74e3a357529..b0ebd9ed5e9 100644
--- a/ft/tests/cachetable-all-write.cc
+++ b/ft/tests/cachetable-all-write.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-checkpoint-pending.cc b/ft/tests/cachetable-checkpoint-pending.cc
index e0bde782cd3..53570ec1f0a 100644
--- a/ft/tests/cachetable-checkpoint-pending.cc
+++ b/ft/tests/cachetable-checkpoint-pending.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-checkpoint-pinned-nodes.cc b/ft/tests/cachetable-checkpoint-pinned-nodes.cc
index 9bc6da74086..bacf48d01b1 100644
--- a/ft/tests/cachetable-checkpoint-pinned-nodes.cc
+++ b/ft/tests/cachetable-checkpoint-pinned-nodes.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-checkpoint-prefetched-nodes.cc b/ft/tests/cachetable-checkpoint-prefetched-nodes.cc
index 59ae451234f..510b2fb458c 100644
--- a/ft/tests/cachetable-checkpoint-prefetched-nodes.cc
+++ b/ft/tests/cachetable-checkpoint-prefetched-nodes.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-checkpoint-test.cc b/ft/tests/cachetable-checkpoint-test.cc
index fa629794217..f1ea464d952 100644
--- a/ft/tests/cachetable-checkpoint-test.cc
+++ b/ft/tests/cachetable-checkpoint-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-checkpointer-class.cc b/ft/tests/cachetable-checkpointer-class.cc
index 57e51820a73..6b138cd0bca 100644
--- a/ft/tests/cachetable-checkpointer-class.cc
+++ b/ft/tests/cachetable-checkpointer-class.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-cleaner-checkpoint.cc b/ft/tests/cachetable-cleaner-checkpoint.cc
index 283b781e524..0b726f67306 100644
--- a/ft/tests/cachetable-cleaner-checkpoint.cc
+++ b/ft/tests/cachetable-cleaner-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-cleaner-checkpoint2.cc b/ft/tests/cachetable-cleaner-checkpoint2.cc
index 009f17f2be2..b360d21c177 100644
--- a/ft/tests/cachetable-cleaner-checkpoint2.cc
+++ b/ft/tests/cachetable-cleaner-checkpoint2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc b/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc
index 745facfba99..1b7f4825e17 100644
--- a/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc
+++ b/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc b/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc
index 3f31729be06..8c7de0ae914 100644
--- a/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc
+++ b/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-cleaner-thread-everything-pinned.cc b/ft/tests/cachetable-cleaner-thread-everything-pinned.cc
index 92056b7fa13..8e5a3ea40fa 100644
--- a/ft/tests/cachetable-cleaner-thread-everything-pinned.cc
+++ b/ft/tests/cachetable-cleaner-thread-everything-pinned.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc b/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc
index 57aac61ac96..06107b7cefd 100644
--- a/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc
+++ b/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-cleaner-thread-same-fullhash.cc b/ft/tests/cachetable-cleaner-thread-same-fullhash.cc
index 24524134864..de1cb8b612a 100644
--- a/ft/tests/cachetable-cleaner-thread-same-fullhash.cc
+++ b/ft/tests/cachetable-cleaner-thread-same-fullhash.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-cleaner-thread-simple.cc b/ft/tests/cachetable-cleaner-thread-simple.cc
index 89a170c296c..a50495774f3 100644
--- a/ft/tests/cachetable-cleaner-thread-simple.cc
+++ b/ft/tests/cachetable-cleaner-thread-simple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-clock-all-pinned.cc b/ft/tests/cachetable-clock-all-pinned.cc
index 18482a2dd42..9eac1304fe2 100644
--- a/ft/tests/cachetable-clock-all-pinned.cc
+++ b/ft/tests/cachetable-clock-all-pinned.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-clock-eviction.cc b/ft/tests/cachetable-clock-eviction.cc
index 00c56c06522..ac18ce8ac32 100644
--- a/ft/tests/cachetable-clock-eviction.cc
+++ b/ft/tests/cachetable-clock-eviction.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-clock-eviction2.cc b/ft/tests/cachetable-clock-eviction2.cc
index 3a8266f7b8a..13b941ab054 100644
--- a/ft/tests/cachetable-clock-eviction2.cc
+++ b/ft/tests/cachetable-clock-eviction2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-clock-eviction3.cc b/ft/tests/cachetable-clock-eviction3.cc
index 0c5210e238c..9f148af1d43 100644
--- a/ft/tests/cachetable-clock-eviction3.cc
+++ b/ft/tests/cachetable-clock-eviction3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-clock-eviction4.cc b/ft/tests/cachetable-clock-eviction4.cc
index 169c4f31e81..e89319c90a9 100644
--- a/ft/tests/cachetable-clock-eviction4.cc
+++ b/ft/tests/cachetable-clock-eviction4.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-clone-checkpoint.cc b/ft/tests/cachetable-clone-checkpoint.cc
index 4c2d4bad3a8..1fc36e06927 100644
--- a/ft/tests/cachetable-clone-checkpoint.cc
+++ b/ft/tests/cachetable-clone-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc b/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc
index 5f75c91b4e0..d22478b8e38 100644
--- a/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc
+++ b/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-clone-partial-fetch.cc b/ft/tests/cachetable-clone-partial-fetch.cc
index d48efc69be0..92859cfac68 100644
--- a/ft/tests/cachetable-clone-partial-fetch.cc
+++ b/ft/tests/cachetable-clone-partial-fetch.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-clone-pin-nonblocking.cc b/ft/tests/cachetable-clone-pin-nonblocking.cc
index 29f3518c815..d7cdbcc0854 100644
--- a/ft/tests/cachetable-clone-pin-nonblocking.cc
+++ b/ft/tests/cachetable-clone-pin-nonblocking.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-clone-unpin-remove.cc b/ft/tests/cachetable-clone-unpin-remove.cc
index b9b10739beb..f6cf0ec34c7 100644
--- a/ft/tests/cachetable-clone-unpin-remove.cc
+++ b/ft/tests/cachetable-clone-unpin-remove.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-count-pinned-test.cc b/ft/tests/cachetable-count-pinned-test.cc
index a1ffa8e4021..4dba635d22f 100644
--- a/ft/tests/cachetable-count-pinned-test.cc
+++ b/ft/tests/cachetable-count-pinned-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-debug-test.cc b/ft/tests/cachetable-debug-test.cc
index 1e207fbd505..fda1d0ae563 100644
--- a/ft/tests/cachetable-debug-test.cc
+++ b/ft/tests/cachetable-debug-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-eviction-close-test.cc b/ft/tests/cachetable-eviction-close-test.cc
index fbd7c3b04a8..da2ff48f2eb 100644
--- a/ft/tests/cachetable-eviction-close-test.cc
+++ b/ft/tests/cachetable-eviction-close-test.cc
@@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-eviction-close-test2.cc b/ft/tests/cachetable-eviction-close-test2.cc
index f92729b3481..d6ba0f3b136 100644
--- a/ft/tests/cachetable-eviction-close-test2.cc
+++ b/ft/tests/cachetable-eviction-close-test2.cc
@@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-eviction-getandpin-test.cc b/ft/tests/cachetable-eviction-getandpin-test.cc
index e1bba60c755..51540db5739 100644
--- a/ft/tests/cachetable-eviction-getandpin-test.cc
+++ b/ft/tests/cachetable-eviction-getandpin-test.cc
@@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-eviction-getandpin-test2.cc b/ft/tests/cachetable-eviction-getandpin-test2.cc
index 338871a1895..45c10bcc552 100644
--- a/ft/tests/cachetable-eviction-getandpin-test2.cc
+++ b/ft/tests/cachetable-eviction-getandpin-test2.cc
@@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-evictor-class.cc b/ft/tests/cachetable-evictor-class.cc
index bd4eff4f8cc..12e463d61d8 100644
--- a/ft/tests/cachetable-evictor-class.cc
+++ b/ft/tests/cachetable-evictor-class.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-fd-test.cc b/ft/tests/cachetable-fd-test.cc
index 65e0d78268f..8ff6ee94fbe 100644
--- a/ft/tests/cachetable-fd-test.cc
+++ b/ft/tests/cachetable-fd-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-fetch-inducing-evictor.cc b/ft/tests/cachetable-fetch-inducing-evictor.cc
index 2e946aa7ae0..089c34498b9 100644
--- a/ft/tests/cachetable-fetch-inducing-evictor.cc
+++ b/ft/tests/cachetable-fetch-inducing-evictor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-flush-during-cleaner.cc b/ft/tests/cachetable-flush-during-cleaner.cc
index 24a5e16caac..237671fe28f 100644
--- a/ft/tests/cachetable-flush-during-cleaner.cc
+++ b/ft/tests/cachetable-flush-during-cleaner.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-flush-test.cc b/ft/tests/cachetable-flush-test.cc
index 55a13159c9e..2297364891b 100644
--- a/ft/tests/cachetable-flush-test.cc
+++ b/ft/tests/cachetable-flush-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-getandpin-test.cc b/ft/tests/cachetable-getandpin-test.cc
index 56d665160d0..b3e4dfa1d9a 100644
--- a/ft/tests/cachetable-getandpin-test.cc
+++ b/ft/tests/cachetable-getandpin-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc b/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc
index 749b3cdb8af..b6f2a189e26 100644
--- a/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc
+++ b/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-partial-fetch.cc b/ft/tests/cachetable-partial-fetch.cc
index 3a8fc33316e..043b35ab503 100644
--- a/ft/tests/cachetable-partial-fetch.cc
+++ b/ft/tests/cachetable-partial-fetch.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-pin-checkpoint.cc b/ft/tests/cachetable-pin-checkpoint.cc
index bf7ec984562..6916e974c3b 100644
--- a/ft/tests/cachetable-pin-checkpoint.cc
+++ b/ft/tests/cachetable-pin-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc b/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc
index 32fe84ea57a..ca9db5e6521 100644
--- a/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc
+++ b/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-prefetch-checkpoint-test.cc b/ft/tests/cachetable-prefetch-checkpoint-test.cc
index c409a62ba2e..65465339f0e 100644
--- a/ft/tests/cachetable-prefetch-checkpoint-test.cc
+++ b/ft/tests/cachetable-prefetch-checkpoint-test.cc
@@ -32,7 +32,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-prefetch-close-leak-test.cc b/ft/tests/cachetable-prefetch-close-leak-test.cc
index 719b6b40d21..e817c8aa65e 100644
--- a/ft/tests/cachetable-prefetch-close-leak-test.cc
+++ b/ft/tests/cachetable-prefetch-close-leak-test.cc
@@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-prefetch-close-test.cc b/ft/tests/cachetable-prefetch-close-test.cc
index 8495ad4c5ab..e8d08c86aa1 100644
--- a/ft/tests/cachetable-prefetch-close-test.cc
+++ b/ft/tests/cachetable-prefetch-close-test.cc
@@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-prefetch-flowcontrol-test.cc b/ft/tests/cachetable-prefetch-flowcontrol-test.cc
index e557930bfa5..8736b6a4065 100644
--- a/ft/tests/cachetable-prefetch-flowcontrol-test.cc
+++ b/ft/tests/cachetable-prefetch-flowcontrol-test.cc
@@ -32,7 +32,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-prefetch-getandpin-test.cc b/ft/tests/cachetable-prefetch-getandpin-test.cc
index 9474a46089e..4ba6dff51a9 100644
--- a/ft/tests/cachetable-prefetch-getandpin-test.cc
+++ b/ft/tests/cachetable-prefetch-getandpin-test.cc
@@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-prefetch-maybegetandpin-test.cc b/ft/tests/cachetable-prefetch-maybegetandpin-test.cc
index 72a1a5ada9f..0540ab5429b 100644
--- a/ft/tests/cachetable-prefetch-maybegetandpin-test.cc
+++ b/ft/tests/cachetable-prefetch-maybegetandpin-test.cc
@@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-prefetch2-test.cc b/ft/tests/cachetable-prefetch2-test.cc
index 4285b586039..f7d348eaa3b 100644
--- a/ft/tests/cachetable-prefetch2-test.cc
+++ b/ft/tests/cachetable-prefetch2-test.cc
@@ -32,7 +32,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-put-checkpoint.cc b/ft/tests/cachetable-put-checkpoint.cc
index fb0c510442e..0b316aa528f 100644
--- a/ft/tests/cachetable-put-checkpoint.cc
+++ b/ft/tests/cachetable-put-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-put-test.cc b/ft/tests/cachetable-put-test.cc
index fcfce830cd0..07765bd666e 100644
--- a/ft/tests/cachetable-put-test.cc
+++ b/ft/tests/cachetable-put-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-rwlock-test.cc b/ft/tests/cachetable-rwlock-test.cc
index 87014dc406e..7e5fb4a00bb 100644
--- a/ft/tests/cachetable-rwlock-test.cc
+++ b/ft/tests/cachetable-rwlock-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-clone.cc b/ft/tests/cachetable-simple-clone.cc
index 1794a1982f5..3944182084c 100644
--- a/ft/tests/cachetable-simple-clone.cc
+++ b/ft/tests/cachetable-simple-clone.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-clone2.cc b/ft/tests/cachetable-simple-clone2.cc
index 0543d80981c..177905e6721 100644
--- a/ft/tests/cachetable-simple-clone2.cc
+++ b/ft/tests/cachetable-simple-clone2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-close.cc b/ft/tests/cachetable-simple-close.cc
index 652d701f193..7a7518b78f5 100644
--- a/ft/tests/cachetable-simple-close.cc
+++ b/ft/tests/cachetable-simple-close.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-maybe-get-pin.cc b/ft/tests/cachetable-simple-maybe-get-pin.cc
index be3f737fce3..891b70fd7d1 100644
--- a/ft/tests/cachetable-simple-maybe-get-pin.cc
+++ b/ft/tests/cachetable-simple-maybe-get-pin.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-pin-cheap.cc b/ft/tests/cachetable-simple-pin-cheap.cc
index af5f134646a..70e7a936a26 100644
--- a/ft/tests/cachetable-simple-pin-cheap.cc
+++ b/ft/tests/cachetable-simple-pin-cheap.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-pin-dep-nodes.cc b/ft/tests/cachetable-simple-pin-dep-nodes.cc
index 1a04dbbd1a3..8a87f006f6b 100644
--- a/ft/tests/cachetable-simple-pin-dep-nodes.cc
+++ b/ft/tests/cachetable-simple-pin-dep-nodes.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc b/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc
index 0f10c98a443..e1050b2da7f 100644
--- a/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc
+++ b/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-pin-nonblocking.cc b/ft/tests/cachetable-simple-pin-nonblocking.cc
index d6e74270866..33319b7a368 100644
--- a/ft/tests/cachetable-simple-pin-nonblocking.cc
+++ b/ft/tests/cachetable-simple-pin-nonblocking.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-pin.cc b/ft/tests/cachetable-simple-pin.cc
index f4385821674..b90b01bfd6c 100644
--- a/ft/tests/cachetable-simple-pin.cc
+++ b/ft/tests/cachetable-simple-pin.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-put-dep-nodes.cc b/ft/tests/cachetable-simple-put-dep-nodes.cc
index d1ca984e8a8..eaeee0bb4db 100644
--- a/ft/tests/cachetable-simple-put-dep-nodes.cc
+++ b/ft/tests/cachetable-simple-put-dep-nodes.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-read-pin-nonblocking.cc b/ft/tests/cachetable-simple-read-pin-nonblocking.cc
index cb86aa5bad7..aeb6437f670 100644
--- a/ft/tests/cachetable-simple-read-pin-nonblocking.cc
+++ b/ft/tests/cachetable-simple-read-pin-nonblocking.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-read-pin.cc b/ft/tests/cachetable-simple-read-pin.cc
index 2683ea04bed..5f0b6eff445 100644
--- a/ft/tests/cachetable-simple-read-pin.cc
+++ b/ft/tests/cachetable-simple-read-pin.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc b/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc
index 15e3fbe10bb..45d66073930 100644
--- a/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc
+++ b/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-simple-verify.cc b/ft/tests/cachetable-simple-verify.cc
index 89453355bd2..f38eb2214b6 100644
--- a/ft/tests/cachetable-simple-verify.cc
+++ b/ft/tests/cachetable-simple-verify.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-test.cc b/ft/tests/cachetable-test.cc
index e3085e37572..a040943007a 100644
--- a/ft/tests/cachetable-test.cc
+++ b/ft/tests/cachetable-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-test.h b/ft/tests/cachetable-test.h
index facdba3985f..6d27a9b71bb 100644
--- a/ft/tests/cachetable-test.h
+++ b/ft/tests/cachetable-test.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-unpin-and-remove-test.cc b/ft/tests/cachetable-unpin-and-remove-test.cc
index 9cd8dda74bd..9063cdc2bcc 100644
--- a/ft/tests/cachetable-unpin-and-remove-test.cc
+++ b/ft/tests/cachetable-unpin-and-remove-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-unpin-remove-and-checkpoint.cc b/ft/tests/cachetable-unpin-remove-and-checkpoint.cc
index 23302bf3f45..406df310de5 100644
--- a/ft/tests/cachetable-unpin-remove-and-checkpoint.cc
+++ b/ft/tests/cachetable-unpin-remove-and-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-unpin-test.cc b/ft/tests/cachetable-unpin-test.cc
index cb2c92d575b..1d8c2b03abc 100644
--- a/ft/tests/cachetable-unpin-test.cc
+++ b/ft/tests/cachetable-unpin-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/cachetable-writer-thread-limit.cc b/ft/tests/cachetable-writer-thread-limit.cc
index c3f61ebe7f1..92f5a1906f6 100644
--- a/ft/tests/cachetable-writer-thread-limit.cc
+++ b/ft/tests/cachetable-writer-thread-limit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/comparator-test.cc b/ft/tests/comparator-test.cc
index 55804db0af8..0ac3bd569cc 100644
--- a/ft/tests/comparator-test.cc
+++ b/ft/tests/comparator-test.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/compress-test.cc b/ft/tests/compress-test.cc
index 98c3a774d43..7f7a97274c8 100644
--- a/ft/tests/compress-test.cc
+++ b/ft/tests/compress-test.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/dbufio-test-destroy.cc b/ft/tests/dbufio-test-destroy.cc
index c09cbb683e6..c9984879a86 100644
--- a/ft/tests/dbufio-test-destroy.cc
+++ b/ft/tests/dbufio-test-destroy.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/dbufio-test.cc b/ft/tests/dbufio-test.cc
index 2b32684de53..6f562d8ac85 100644
--- a/ft/tests/dbufio-test.cc
+++ b/ft/tests/dbufio-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/dmt-test.cc b/ft/tests/dmt-test.cc
index e4f1e53751e..adc759a3c4b 100644
--- a/ft/tests/dmt-test.cc
+++ b/ft/tests/dmt-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/dmt-test2.cc b/ft/tests/dmt-test2.cc
index 8943aae7324..707ad9a5a7e 100644
--- a/ft/tests/dmt-test2.cc
+++ b/ft/tests/dmt-test2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/fifo-test.cc b/ft/tests/fifo-test.cc
index 856c9f57d02..30815160684 100644
--- a/ft/tests/fifo-test.cc
+++ b/ft/tests/fifo-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc
index 357d9fa4b87..d91ae001884 100644
--- a/ft/tests/ft-bfe-query.cc
+++ b/ft/tests/ft-bfe-query.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-clock-test.cc b/ft/tests/ft-clock-test.cc
index 18ac7cdc011..50bb6d67ca5 100644
--- a/ft/tests/ft-clock-test.cc
+++ b/ft/tests/ft-clock-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc
index 089da09038b..82b96742ceb 100644
--- a/ft/tests/ft-serialize-benchmark.cc
+++ b/ft/tests/ft-serialize-benchmark.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-serialize-sub-block-test.cc b/ft/tests/ft-serialize-sub-block-test.cc
index 0910595961d..f0be59a811c 100644
--- a/ft/tests/ft-serialize-sub-block-test.cc
+++ b/ft/tests/ft-serialize-sub-block-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc
index cc66054459a..266cf50f8ce 100644
--- a/ft/tests/ft-serialize-test.cc
+++ b/ft/tests/ft-serialize-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-test-cursor-2.cc b/ft/tests/ft-test-cursor-2.cc
index 6907a379289..6e38884d571 100644
--- a/ft/tests/ft-test-cursor-2.cc
+++ b/ft/tests/ft-test-cursor-2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-test-cursor.cc b/ft/tests/ft-test-cursor.cc
index 0ae777499ad..3e7aa6875e0 100644
--- a/ft/tests/ft-test-cursor.cc
+++ b/ft/tests/ft-test-cursor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-test-header.cc b/ft/tests/ft-test-header.cc
index 5b19ecd6245..18da9502b48 100644
--- a/ft/tests/ft-test-header.cc
+++ b/ft/tests/ft-test-header.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-test.cc b/ft/tests/ft-test.cc
index f826e20967f..e9981c92572 100644
--- a/ft/tests/ft-test.cc
+++ b/ft/tests/ft-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-test0.cc b/ft/tests/ft-test0.cc
index 3e03b808d9f..719bfd35588 100644
--- a/ft/tests/ft-test0.cc
+++ b/ft/tests/ft-test0.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-test1.cc b/ft/tests/ft-test1.cc
index cbf082e9fd2..5ab4b779403 100644
--- a/ft/tests/ft-test1.cc
+++ b/ft/tests/ft-test1.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-test2.cc b/ft/tests/ft-test2.cc
index eb80122fd7c..ccd94a74ade 100644
--- a/ft/tests/ft-test2.cc
+++ b/ft/tests/ft-test2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-test3.cc b/ft/tests/ft-test3.cc
index 761c1cae643..4b57eb8e323 100644
--- a/ft/tests/ft-test3.cc
+++ b/ft/tests/ft-test3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-test4.cc b/ft/tests/ft-test4.cc
index d41fa5a697c..945d05f0a17 100644
--- a/ft/tests/ft-test4.cc
+++ b/ft/tests/ft-test4.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ft-test5.cc b/ft/tests/ft-test5.cc
index 8c1a53914b7..8144754b12d 100644
--- a/ft/tests/ft-test5.cc
+++ b/ft/tests/ft-test5.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ftloader-error-injector.h b/ft/tests/ftloader-error-injector.h
index e0ba18aa235..d0d4cc87c88 100644
--- a/ft/tests/ftloader-error-injector.h
+++ b/ft/tests/ftloader-error-injector.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ftloader-test-bad-generate.cc b/ft/tests/ftloader-test-bad-generate.cc
index 8fd7c27401a..3cc574b759a 100644
--- a/ft/tests/ftloader-test-bad-generate.cc
+++ b/ft/tests/ftloader-test-bad-generate.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ftloader-test-extractor-errors.cc b/ft/tests/ftloader-test-extractor-errors.cc
index 6d96dee9145..4418e475bac 100644
--- a/ft/tests/ftloader-test-extractor-errors.cc
+++ b/ft/tests/ftloader-test-extractor-errors.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ftloader-test-extractor.cc b/ft/tests/ftloader-test-extractor.cc
index 868ca41fa18..b20dd2fb2c3 100644
--- a/ft/tests/ftloader-test-extractor.cc
+++ b/ft/tests/ftloader-test-extractor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ftloader-test-merge-files-dbufio.cc b/ft/tests/ftloader-test-merge-files-dbufio.cc
index 5eaabf124ec..d450bd00923 100644
--- a/ft/tests/ftloader-test-merge-files-dbufio.cc
+++ b/ft/tests/ftloader-test-merge-files-dbufio.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ftloader-test-open.cc b/ft/tests/ftloader-test-open.cc
index c7149ce8113..5c4b689a824 100644
--- a/ft/tests/ftloader-test-open.cc
+++ b/ft/tests/ftloader-test-open.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ftloader-test-vm.cc b/ft/tests/ftloader-test-vm.cc
index d207fa574a2..9dd7ffec027 100644
--- a/ft/tests/ftloader-test-vm.cc
+++ b/ft/tests/ftloader-test-vm.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ftloader-test-writer-errors.cc b/ft/tests/ftloader-test-writer-errors.cc
index 6464f1f9240..d2669aee72a 100644
--- a/ft/tests/ftloader-test-writer-errors.cc
+++ b/ft/tests/ftloader-test-writer-errors.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ftloader-test-writer.cc b/ft/tests/ftloader-test-writer.cc
index 6be385ec5db..e57b800adad 100644
--- a/ft/tests/ftloader-test-writer.cc
+++ b/ft/tests/ftloader-test-writer.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ftloader-test.cc b/ft/tests/ftloader-test.cc
index faa2e39aa08..46271eeb451 100644
--- a/ft/tests/ftloader-test.cc
+++ b/ft/tests/ftloader-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/is_empty.cc b/ft/tests/is_empty.cc
index f38d0967beb..b415343fba1 100644
--- a/ft/tests/is_empty.cc
+++ b/ft/tests/is_empty.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/keyrange.cc b/ft/tests/keyrange.cc
index 8274da108ce..6c191adf890 100644
--- a/ft/tests/keyrange.cc
+++ b/ft/tests/keyrange.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/keytest.cc b/ft/tests/keytest.cc
index 70beae0beca..bde2a4ca9ca 100644
--- a/ft/tests/keytest.cc
+++ b/ft/tests/keytest.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/le-cursor-provdel.cc b/ft/tests/le-cursor-provdel.cc
index 73894c6addd..33729527ca1 100644
--- a/ft/tests/le-cursor-provdel.cc
+++ b/ft/tests/le-cursor-provdel.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/le-cursor-right.cc b/ft/tests/le-cursor-right.cc
index 3f9593c1ce0..1c6ac9eab5c 100644
--- a/ft/tests/le-cursor-right.cc
+++ b/ft/tests/le-cursor-right.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/le-cursor-walk.cc b/ft/tests/le-cursor-walk.cc
index 7dc925100b0..af26228ddfe 100644
--- a/ft/tests/le-cursor-walk.cc
+++ b/ft/tests/le-cursor-walk.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/list-test.cc b/ft/tests/list-test.cc
index 0922b831e1f..c7286048e24 100644
--- a/ft/tests/list-test.cc
+++ b/ft/tests/list-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/log-test-maybe-trim.cc b/ft/tests/log-test-maybe-trim.cc
index d724b075408..2e12fa81579 100644
--- a/ft/tests/log-test-maybe-trim.cc
+++ b/ft/tests/log-test-maybe-trim.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/log-test.cc b/ft/tests/log-test.cc
index 7b7005ccb2c..c21e43641d5 100644
--- a/ft/tests/log-test.cc
+++ b/ft/tests/log-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/log-test2.cc b/ft/tests/log-test2.cc
index d5fb7bfe102..eafdd26d08b 100644
--- a/ft/tests/log-test2.cc
+++ b/ft/tests/log-test2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/log-test3.cc b/ft/tests/log-test3.cc
index 69443377751..e9bb72fdfb0 100644
--- a/ft/tests/log-test3.cc
+++ b/ft/tests/log-test3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/log-test4.cc b/ft/tests/log-test4.cc
index 8a0d230983e..c4e92fe2fcc 100644
--- a/ft/tests/log-test4.cc
+++ b/ft/tests/log-test4.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/log-test5.cc b/ft/tests/log-test5.cc
index c6ad061aa11..bbe24640d7e 100644
--- a/ft/tests/log-test5.cc
+++ b/ft/tests/log-test5.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/log-test6.cc b/ft/tests/log-test6.cc
index 8e07365a967..5e2ff52ea75 100644
--- a/ft/tests/log-test6.cc
+++ b/ft/tests/log-test6.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/log-test7.cc b/ft/tests/log-test7.cc
index a07e6775818..d6ac8bd688f 100644
--- a/ft/tests/log-test7.cc
+++ b/ft/tests/log-test7.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logcursor-bad-checksum.cc b/ft/tests/logcursor-bad-checksum.cc
index 22ec4a91a0a..d9dc5ce04df 100644
--- a/ft/tests/logcursor-bad-checksum.cc
+++ b/ft/tests/logcursor-bad-checksum.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logcursor-bw.cc b/ft/tests/logcursor-bw.cc
index 00fc757e836..3d5a0d32f62 100644
--- a/ft/tests/logcursor-bw.cc
+++ b/ft/tests/logcursor-bw.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logcursor-empty-logdir.cc b/ft/tests/logcursor-empty-logdir.cc
index 6982b310acb..cdce3881605 100644
--- a/ft/tests/logcursor-empty-logdir.cc
+++ b/ft/tests/logcursor-empty-logdir.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logcursor-empty-logfile-2.cc b/ft/tests/logcursor-empty-logfile-2.cc
index 1fa630a6422..665623788af 100644
--- a/ft/tests/logcursor-empty-logfile-2.cc
+++ b/ft/tests/logcursor-empty-logfile-2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logcursor-empty-logfile-3.cc b/ft/tests/logcursor-empty-logfile-3.cc
index 6e687f9bd30..12bf4ba4859 100644
--- a/ft/tests/logcursor-empty-logfile-3.cc
+++ b/ft/tests/logcursor-empty-logfile-3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logcursor-empty-logfile.cc b/ft/tests/logcursor-empty-logfile.cc
index 7fc0be3d734..0cc4bd34285 100644
--- a/ft/tests/logcursor-empty-logfile.cc
+++ b/ft/tests/logcursor-empty-logfile.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logcursor-fw.cc b/ft/tests/logcursor-fw.cc
index 51fa24b1078..9f0510c6118 100644
--- a/ft/tests/logcursor-fw.cc
+++ b/ft/tests/logcursor-fw.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logcursor-print.cc b/ft/tests/logcursor-print.cc
index cf508fa0a2b..902dc494a66 100644
--- a/ft/tests/logcursor-print.cc
+++ b/ft/tests/logcursor-print.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logcursor-timestamp.cc b/ft/tests/logcursor-timestamp.cc
index 94768f897a1..a329cb49d3d 100644
--- a/ft/tests/logcursor-timestamp.cc
+++ b/ft/tests/logcursor-timestamp.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logfilemgr-create-destroy.cc b/ft/tests/logfilemgr-create-destroy.cc
index 4f447cd7360..c7a06d90d41 100644
--- a/ft/tests/logfilemgr-create-destroy.cc
+++ b/ft/tests/logfilemgr-create-destroy.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/logfilemgr-print.cc b/ft/tests/logfilemgr-print.cc
index a361a270768..6a50cd3f091 100644
--- a/ft/tests/logfilemgr-print.cc
+++ b/ft/tests/logfilemgr-print.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/make-tree.cc b/ft/tests/make-tree.cc
index 14700ddbfb3..51a4b66618a 100644
--- a/ft/tests/make-tree.cc
+++ b/ft/tests/make-tree.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/mempool-115.cc b/ft/tests/mempool-115.cc
index 4ecb624763f..11960aa4ef2 100644
--- a/ft/tests/mempool-115.cc
+++ b/ft/tests/mempool-115.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/msnfilter.cc b/ft/tests/msnfilter.cc
index ea8d3a97649..41615028168 100644
--- a/ft/tests/msnfilter.cc
+++ b/ft/tests/msnfilter.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/orthopush-flush.cc b/ft/tests/orthopush-flush.cc
index 21dcc1281ad..749729838e3 100644
--- a/ft/tests/orthopush-flush.cc
+++ b/ft/tests/orthopush-flush.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/pqueue-test.cc b/ft/tests/pqueue-test.cc
index 90a9fbb4e1e..a10fcd77483 100644
--- a/ft/tests/pqueue-test.cc
+++ b/ft/tests/pqueue-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/quicklz-test.cc b/ft/tests/quicklz-test.cc
index a7970abb057..2c8b88440f9 100644
--- a/ft/tests/quicklz-test.cc
+++ b/ft/tests/quicklz-test.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-bad-last-entry.cc b/ft/tests/recovery-bad-last-entry.cc
index 214218f5a1a..78c3be54b60 100644
--- a/ft/tests/recovery-bad-last-entry.cc
+++ b/ft/tests/recovery-bad-last-entry.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-cbegin-cend-hello.cc b/ft/tests/recovery-cbegin-cend-hello.cc
index b0da2695e47..3fb2b84d300 100644
--- a/ft/tests/recovery-cbegin-cend-hello.cc
+++ b/ft/tests/recovery-cbegin-cend-hello.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-cbegin-cend.cc b/ft/tests/recovery-cbegin-cend.cc
index b192ad64af4..902a4e783b8 100644
--- a/ft/tests/recovery-cbegin-cend.cc
+++ b/ft/tests/recovery-cbegin-cend.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-cbegin.cc b/ft/tests/recovery-cbegin.cc
index 411684770d0..b3edff58c89 100644
--- a/ft/tests/recovery-cbegin.cc
+++ b/ft/tests/recovery-cbegin.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-cend-cbegin.cc b/ft/tests/recovery-cend-cbegin.cc
index 8c155c35b0e..89d8d48aa24 100644
--- a/ft/tests/recovery-cend-cbegin.cc
+++ b/ft/tests/recovery-cend-cbegin.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-datadir-is-file.cc b/ft/tests/recovery-datadir-is-file.cc
index 7a8108a347e..aa5f52ee681 100644
--- a/ft/tests/recovery-datadir-is-file.cc
+++ b/ft/tests/recovery-datadir-is-file.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-empty.cc b/ft/tests/recovery-empty.cc
index 33c7333bc54..839161918f7 100644
--- a/ft/tests/recovery-empty.cc
+++ b/ft/tests/recovery-empty.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-fopen-missing-file.cc b/ft/tests/recovery-fopen-missing-file.cc
index 63a5f5a5fee..4fd3851fef7 100644
--- a/ft/tests/recovery-fopen-missing-file.cc
+++ b/ft/tests/recovery-fopen-missing-file.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-hello.cc b/ft/tests/recovery-hello.cc
index d31698f795b..22fd7df1084 100644
--- a/ft/tests/recovery-hello.cc
+++ b/ft/tests/recovery-hello.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-lsn-error-during-forward-scan.cc b/ft/tests/recovery-lsn-error-during-forward-scan.cc
index 253c674ae70..4edbaa82afc 100644
--- a/ft/tests/recovery-lsn-error-during-forward-scan.cc
+++ b/ft/tests/recovery-lsn-error-during-forward-scan.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-no-datadir.cc b/ft/tests/recovery-no-datadir.cc
index 222de5bdbcb..689efcd104c 100644
--- a/ft/tests/recovery-no-datadir.cc
+++ b/ft/tests/recovery-no-datadir.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-no-log.cc b/ft/tests/recovery-no-log.cc
index ac71769e580..c11be54d7c8 100644
--- a/ft/tests/recovery-no-log.cc
+++ b/ft/tests/recovery-no-log.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-no-logdir.cc b/ft/tests/recovery-no-logdir.cc
index ad72decd134..327cd544861 100644
--- a/ft/tests/recovery-no-logdir.cc
+++ b/ft/tests/recovery-no-logdir.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/recovery-test5123.cc b/ft/tests/recovery-test5123.cc
index 7e912b6f9f9..955a842e6e9 100644
--- a/ft/tests/recovery-test5123.cc
+++ b/ft/tests/recovery-test5123.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/shortcut.cc b/ft/tests/shortcut.cc
index 5abb41cb943..fc08868ccf0 100644
--- a/ft/tests/shortcut.cc
+++ b/ft/tests/shortcut.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/subblock-test-checksum.cc b/ft/tests/subblock-test-checksum.cc
index 8d6156f04e7..0ba9e88ee83 100644
--- a/ft/tests/subblock-test-checksum.cc
+++ b/ft/tests/subblock-test-checksum.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/subblock-test-compression.cc b/ft/tests/subblock-test-compression.cc
index 2874e50a146..ba3ab4113f4 100644
--- a/ft/tests/subblock-test-compression.cc
+++ b/ft/tests/subblock-test-compression.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/subblock-test-index.cc b/ft/tests/subblock-test-index.cc
index e805bf1ead3..d6e035af6d3 100644
--- a/ft/tests/subblock-test-index.cc
+++ b/ft/tests/subblock-test-index.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/subblock-test-size.cc b/ft/tests/subblock-test-size.cc
index 8b1119b30c8..e21b15f33d2 100644
--- a/ft/tests/subblock-test-size.cc
+++ b/ft/tests/subblock-test-size.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-assert.cc b/ft/tests/test-assert.cc
index a06b389584d..f6221c2d152 100644
--- a/ft/tests/test-assert.cc
+++ b/ft/tests/test-assert.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-bjm.cc b/ft/tests/test-bjm.cc
index 7218b597afd..4969f8c4a8b 100644
--- a/ft/tests/test-bjm.cc
+++ b/ft/tests/test-bjm.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-checkpoint-during-flush.cc b/ft/tests/test-checkpoint-during-flush.cc
index 49541ae8a6d..22fbf37d500 100644
--- a/ft/tests/test-checkpoint-during-flush.cc
+++ b/ft/tests/test-checkpoint-during-flush.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-checkpoint-during-merge.cc b/ft/tests/test-checkpoint-during-merge.cc
index cf8b8e57fae..0ad417f2712 100644
--- a/ft/tests/test-checkpoint-during-merge.cc
+++ b/ft/tests/test-checkpoint-during-merge.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-checkpoint-during-rebalance.cc b/ft/tests/test-checkpoint-during-rebalance.cc
index 98c4ab5a6c1..7870cd2fa58 100644
--- a/ft/tests/test-checkpoint-during-rebalance.cc
+++ b/ft/tests/test-checkpoint-during-rebalance.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-checkpoint-during-split.cc b/ft/tests/test-checkpoint-during-split.cc
index d25e81b0dcc..8e24ae2bb43 100644
--- a/ft/tests/test-checkpoint-during-split.cc
+++ b/ft/tests/test-checkpoint-during-split.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-del-inorder.cc b/ft/tests/test-del-inorder.cc
index c95801ef430..75a1c255bd9 100644
--- a/ft/tests/test-del-inorder.cc
+++ b/ft/tests/test-del-inorder.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-dirty-flushes-on-cleaner.cc b/ft/tests/test-dirty-flushes-on-cleaner.cc
index 55aa6c969bc..a88c07c0ca1 100644
--- a/ft/tests/test-dirty-flushes-on-cleaner.cc
+++ b/ft/tests/test-dirty-flushes-on-cleaner.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-dump-ft.cc b/ft/tests/test-dump-ft.cc
index 28007290a52..f1c76d0bd13 100644
--- a/ft/tests/test-dump-ft.cc
+++ b/ft/tests/test-dump-ft.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-flushes-on-cleaner.cc b/ft/tests/test-flushes-on-cleaner.cc
index bef3286291f..fa00100d3ed 100644
--- a/ft/tests/test-flushes-on-cleaner.cc
+++ b/ft/tests/test-flushes-on-cleaner.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-ft-overflow.cc b/ft/tests/test-ft-overflow.cc
index c1c3f9b2f9d..d8e51b5ab7c 100644
--- a/ft/tests/test-ft-overflow.cc
+++ b/ft/tests/test-ft-overflow.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-ft-txns.h b/ft/tests/test-ft-txns.h
index 645281ae867..3f7d38e307e 100644
--- a/ft/tests/test-ft-txns.h
+++ b/ft/tests/test-ft-txns.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-hot-with-bounds.cc b/ft/tests/test-hot-with-bounds.cc
index efd48b7172e..419cbd2cb51 100644
--- a/ft/tests/test-hot-with-bounds.cc
+++ b/ft/tests/test-hot-with-bounds.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-inc-split.cc b/ft/tests/test-inc-split.cc
index 5430e456835..13510855cb0 100644
--- a/ft/tests/test-inc-split.cc
+++ b/ft/tests/test-inc-split.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-leafentry-child-txn.cc b/ft/tests/test-leafentry-child-txn.cc
index 5c1c326ddb7..30dd15d3e39 100644
--- a/ft/tests/test-leafentry-child-txn.cc
+++ b/ft/tests/test-leafentry-child-txn.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-leafentry-nested.cc b/ft/tests/test-leafentry-nested.cc
index 1976b70c7cb..9253ff814c9 100644
--- a/ft/tests/test-leafentry-nested.cc
+++ b/ft/tests/test-leafentry-nested.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-merges-on-cleaner.cc b/ft/tests/test-merges-on-cleaner.cc
index 532625b4342..f67cfa78734 100644
--- a/ft/tests/test-merges-on-cleaner.cc
+++ b/ft/tests/test-merges-on-cleaner.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-oldest-referenced-xid-flush.cc b/ft/tests/test-oldest-referenced-xid-flush.cc
index ef6143dba21..fc642eab8df 100644
--- a/ft/tests/test-oldest-referenced-xid-flush.cc
+++ b/ft/tests/test-oldest-referenced-xid-flush.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-pick-child-to-flush.cc b/ft/tests/test-pick-child-to-flush.cc
index feadabd2b81..d2fe0ef9469 100644
--- a/ft/tests/test-pick-child-to-flush.cc
+++ b/ft/tests/test-pick-child-to-flush.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test-txn-child-manager.cc b/ft/tests/test-txn-child-manager.cc
index 25886031821..8a67df8aa97 100644
--- a/ft/tests/test-txn-child-manager.cc
+++ b/ft/tests/test-txn-child-manager.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test.h b/ft/tests/test.h
index 19f44d784e1..3170146a120 100644
--- a/ft/tests/test.h
+++ b/ft/tests/test.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test1308a.cc b/ft/tests/test1308a.cc
index a39953ad354..ddbc43de7dc 100644
--- a/ft/tests/test1308a.cc
+++ b/ft/tests/test1308a.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test3681.cc b/ft/tests/test3681.cc
index 5e8f5b5e1a9..db5e8232cd4 100644
--- a/ft/tests/test3681.cc
+++ b/ft/tests/test3681.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test3856.cc b/ft/tests/test3856.cc
index e892c334242..c0b693e3421 100644
--- a/ft/tests/test3856.cc
+++ b/ft/tests/test3856.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test3884.cc b/ft/tests/test3884.cc
index e79c28325f9..a4a9e8568cf 100644
--- a/ft/tests/test3884.cc
+++ b/ft/tests/test3884.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test4115.cc b/ft/tests/test4115.cc
index 457adcdac15..e24696af057 100644
--- a/ft/tests/test4115.cc
+++ b/ft/tests/test4115.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test4244.cc b/ft/tests/test4244.cc
index 8f484129dd0..3c2728e941e 100644
--- a/ft/tests/test4244.cc
+++ b/ft/tests/test4244.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test_logcursor.cc b/ft/tests/test_logcursor.cc
index c7b68f2400c..41644a02d07 100644
--- a/ft/tests/test_logcursor.cc
+++ b/ft/tests/test_logcursor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test_oexcl.cc b/ft/tests/test_oexcl.cc
index b6eacd92362..72fd01c2c89 100644
--- a/ft/tests/test_oexcl.cc
+++ b/ft/tests/test_oexcl.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc b/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc
index efa08d0c8c0..69962153979 100644
--- a/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc
+++ b/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test_rightmost_leaf_split_merge.cc b/ft/tests/test_rightmost_leaf_split_merge.cc
index 854bc9d4609..29515d9925f 100644
--- a/ft/tests/test_rightmost_leaf_split_merge.cc
+++ b/ft/tests/test_rightmost_leaf_split_merge.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/test_toku_malloc_plain_free.cc b/ft/tests/test_toku_malloc_plain_free.cc
index e7188bb0402..7f166fda836 100644
--- a/ft/tests/test_toku_malloc_plain_free.cc
+++ b/ft/tests/test_toku_malloc_plain_free.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/upgrade_test_simple.cc b/ft/tests/upgrade_test_simple.cc
index fe3fd60499f..3496df47e7c 100644
--- a/ft/tests/upgrade_test_simple.cc
+++ b/ft/tests/upgrade_test_simple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/verify-bad-msn.cc b/ft/tests/verify-bad-msn.cc
index 2ac6dde456f..a7e3beea1e5 100644
--- a/ft/tests/verify-bad-msn.cc
+++ b/ft/tests/verify-bad-msn.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/verify-bad-pivots.cc b/ft/tests/verify-bad-pivots.cc
index 55a67b507a1..f36ae77a973 100644
--- a/ft/tests/verify-bad-pivots.cc
+++ b/ft/tests/verify-bad-pivots.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/verify-dup-in-leaf.cc b/ft/tests/verify-dup-in-leaf.cc
index 81089f7955f..a2c6567fdb4 100644
--- a/ft/tests/verify-dup-in-leaf.cc
+++ b/ft/tests/verify-dup-in-leaf.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/verify-dup-pivots.cc b/ft/tests/verify-dup-pivots.cc
index 03a7f6eba3f..4dc42a06c82 100644
--- a/ft/tests/verify-dup-pivots.cc
+++ b/ft/tests/verify-dup-pivots.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/verify-misrouted-msgs.cc b/ft/tests/verify-misrouted-msgs.cc
index f9490cfd6c4..d671dd7a7f3 100644
--- a/ft/tests/verify-misrouted-msgs.cc
+++ b/ft/tests/verify-misrouted-msgs.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/verify-unsorted-leaf.cc b/ft/tests/verify-unsorted-leaf.cc
index 365b7bd4a5f..6a1fe6d0e23 100644
--- a/ft/tests/verify-unsorted-leaf.cc
+++ b/ft/tests/verify-unsorted-leaf.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/verify-unsorted-pivots.cc b/ft/tests/verify-unsorted-pivots.cc
index a12307f8555..bb20733f3e1 100644
--- a/ft/tests/verify-unsorted-pivots.cc
+++ b/ft/tests/verify-unsorted-pivots.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/xid_lsn_independent.cc b/ft/tests/xid_lsn_independent.cc
index 43f8102067c..545fcf4d927 100644
--- a/ft/tests/xid_lsn_independent.cc
+++ b/ft/tests/xid_lsn_independent.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/tests/ybt-test.cc b/ft/tests/ybt-test.cc
index 7c92418d3fd..d53c03718e9 100644
--- a/ft/tests/ybt-test.cc
+++ b/ft/tests/ybt-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/roll.cc b/ft/txn/roll.cc
index 0d8855bb8ce..affa9fa802c 100644
--- a/ft/txn/roll.cc
+++ b/ft/txn/roll.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/rollback-apply.cc b/ft/txn/rollback-apply.cc
index 2f0239bb175..258994223cc 100644
--- a/ft/txn/rollback-apply.cc
+++ b/ft/txn/rollback-apply.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/rollback-apply.h b/ft/txn/rollback-apply.h
index 2ddd24563fe..3d91c154a32 100644
--- a/ft/txn/rollback-apply.h
+++ b/ft/txn/rollback-apply.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/rollback-ct-callbacks.cc b/ft/txn/rollback-ct-callbacks.cc
index 27ccfa4bc11..bb60e787735 100644
--- a/ft/txn/rollback-ct-callbacks.cc
+++ b/ft/txn/rollback-ct-callbacks.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/rollback-ct-callbacks.h b/ft/txn/rollback-ct-callbacks.h
index 4bcb004de3d..aee13f2e94d 100644
--- a/ft/txn/rollback-ct-callbacks.h
+++ b/ft/txn/rollback-ct-callbacks.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/rollback.cc b/ft/txn/rollback.cc
index ffd2fc93a7b..54a7d9b58ae 100644
--- a/ft/txn/rollback.cc
+++ b/ft/txn/rollback.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/rollback.h b/ft/txn/rollback.h
index cddbe16c91a..c9f779e677b 100644
--- a/ft/txn/rollback.h
+++ b/ft/txn/rollback.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/rollback_log_node_cache.cc b/ft/txn/rollback_log_node_cache.cc
index fca18702da3..95a54d6fd76 100644
--- a/ft/txn/rollback_log_node_cache.cc
+++ b/ft/txn/rollback_log_node_cache.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/rollback_log_node_cache.h b/ft/txn/rollback_log_node_cache.h
index baafcf979d5..4aa9daee207 100644
--- a/ft/txn/rollback_log_node_cache.h
+++ b/ft/txn/rollback_log_node_cache.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/txn.cc b/ft/txn/txn.cc
index 2654aafab26..a5de5b6adfd 100644
--- a/ft/txn/txn.cc
+++ b/ft/txn/txn.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/txn.h b/ft/txn/txn.h
index f8d78eb956c..c458df3b5b2 100644
--- a/ft/txn/txn.h
+++ b/ft/txn/txn.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/txn_child_manager.cc b/ft/txn/txn_child_manager.cc
index 1282aea2e25..3a006285e20 100644
--- a/ft/txn/txn_child_manager.cc
+++ b/ft/txn/txn_child_manager.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/txn_child_manager.h b/ft/txn/txn_child_manager.h
index 81136b02c8c..99d98e2fe59 100644
--- a/ft/txn/txn_child_manager.h
+++ b/ft/txn/txn_child_manager.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/txn_manager.cc b/ft/txn/txn_manager.cc
index b9bbddf5ed1..570174f9b9f 100644
--- a/ft/txn/txn_manager.cc
+++ b/ft/txn/txn_manager.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/txn_manager.h b/ft/txn/txn_manager.h
index c9fa158aaee..5df1e23115c 100644
--- a/ft/txn/txn_manager.h
+++ b/ft/txn/txn_manager.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/txn_state.h b/ft/txn/txn_state.h
index 0375cdcc542..75c3f51ce79 100644
--- a/ft/txn/txn_state.h
+++ b/ft/txn/txn_state.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/xids.cc b/ft/txn/xids.cc
index 2d80192df99..6308f3c0368 100644
--- a/ft/txn/xids.cc
+++ b/ft/txn/xids.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/txn/xids.h b/ft/txn/xids.h
index 894c369a1b1..5b0e95c2eee 100644
--- a/ft/txn/xids.h
+++ b/ft/txn/xids.h
@@ -41,7 +41,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ule-internal.h b/ft/ule-internal.h
index 6c3c6bd013c..9a42ead3d90 100644
--- a/ft/ule-internal.h
+++ b/ft/ule-internal.h
@@ -34,7 +34,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ule.cc b/ft/ule.cc
index 1a9ab2a0867..bc2944b90d9 100644
--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/ft/ule.h b/ft/ule.h
index 2468d2c328e..337abf25a5f 100644
--- a/ft/ule.h
+++ b/ft/ule.h
@@ -35,7 +35,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/concurrent_tree.cc b/locktree/concurrent_tree.cc
index b7366d51dbc..b472be23111 100644
--- a/locktree/concurrent_tree.cc
+++ b/locktree/concurrent_tree.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/concurrent_tree.h b/locktree/concurrent_tree.h
index 58683a10ee8..82977bbf5f1 100644
--- a/locktree/concurrent_tree.h
+++ b/locktree/concurrent_tree.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/keyrange.cc b/locktree/keyrange.cc
index c44260c0358..c7cb19a597f 100644
--- a/locktree/keyrange.cc
+++ b/locktree/keyrange.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/keyrange.h b/locktree/keyrange.h
index c086fc1cd21..8b8e1a743e2 100644
--- a/locktree/keyrange.h
+++ b/locktree/keyrange.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/lock_request.cc b/locktree/lock_request.cc
index 3272f012be4..97fa780bb04 100644
--- a/locktree/lock_request.cc
+++ b/locktree/lock_request.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/lock_request.h b/locktree/lock_request.h
index 2dfde89dd7c..d1a4c2822e0 100644
--- a/locktree/lock_request.h
+++ b/locktree/lock_request.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/locktree.cc b/locktree/locktree.cc
index 78332fcfa28..eb9be825f48 100644
--- a/locktree/locktree.cc
+++ b/locktree/locktree.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/locktree.h b/locktree/locktree.h
index 74fab0af0ad..3e613aba7a4 100644
--- a/locktree/locktree.h
+++ b/locktree/locktree.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/manager.cc b/locktree/manager.cc
index 4aa9a135214..f3d45e2defd 100644
--- a/locktree/manager.cc
+++ b/locktree/manager.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/range_buffer.cc b/locktree/range_buffer.cc
index acf3422ced4..cc7bbd90afc 100644
--- a/locktree/range_buffer.cc
+++ b/locktree/range_buffer.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/range_buffer.h b/locktree/range_buffer.h
index edd31211058..7b1beb90329 100644
--- a/locktree/range_buffer.h
+++ b/locktree/range_buffer.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/concurrent_tree_create_destroy.cc b/locktree/tests/concurrent_tree_create_destroy.cc
index f6bb3987d1f..a1187d6e0cc 100644
--- a/locktree/tests/concurrent_tree_create_destroy.cc
+++ b/locktree/tests/concurrent_tree_create_destroy.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/concurrent_tree_lkr_acquire_release.cc b/locktree/tests/concurrent_tree_lkr_acquire_release.cc
index 6ae972d2321..002df28ff9e 100644
--- a/locktree/tests/concurrent_tree_lkr_acquire_release.cc
+++ b/locktree/tests/concurrent_tree_lkr_acquire_release.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/concurrent_tree_lkr_insert_remove.cc b/locktree/tests/concurrent_tree_lkr_insert_remove.cc
index 275abbb3baa..a4c3f01f419 100644
--- a/locktree/tests/concurrent_tree_lkr_insert_remove.cc
+++ b/locktree/tests/concurrent_tree_lkr_insert_remove.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc b/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc
index 1165bff9151..1b3da34c904 100644
--- a/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc
+++ b/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/concurrent_tree_lkr_remove_all.cc b/locktree/tests/concurrent_tree_lkr_remove_all.cc
index 0f7b045ded9..9fc67dbf5ef 100644
--- a/locktree/tests/concurrent_tree_lkr_remove_all.cc
+++ b/locktree/tests/concurrent_tree_lkr_remove_all.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/concurrent_tree_unit_test.h b/locktree/tests/concurrent_tree_unit_test.h
index f57c45f9d5c..132dbf24cce 100644
--- a/locktree/tests/concurrent_tree_unit_test.h
+++ b/locktree/tests/concurrent_tree_unit_test.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/lock_request_create_set.cc b/locktree/tests/lock_request_create_set.cc
index b309d9b6fd8..d88976add4f 100644
--- a/locktree/tests/lock_request_create_set.cc
+++ b/locktree/tests/lock_request_create_set.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/lock_request_get_set_keys.cc b/locktree/tests/lock_request_get_set_keys.cc
index 60300a138df..55bb483114b 100644
--- a/locktree/tests/lock_request_get_set_keys.cc
+++ b/locktree/tests/lock_request_get_set_keys.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/lock_request_killed.cc b/locktree/tests/lock_request_killed.cc
index 593d2cc17fc..3c2a6a35562 100644
--- a/locktree/tests/lock_request_killed.cc
+++ b/locktree/tests/lock_request_killed.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/lock_request_not_killed.cc b/locktree/tests/lock_request_not_killed.cc
index c2ad4817455..96bd2869fcf 100644
--- a/locktree/tests/lock_request_not_killed.cc
+++ b/locktree/tests/lock_request_not_killed.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/lock_request_start_deadlock.cc b/locktree/tests/lock_request_start_deadlock.cc
index 38bea266c61..af28b06b682 100644
--- a/locktree/tests/lock_request_start_deadlock.cc
+++ b/locktree/tests/lock_request_start_deadlock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/lock_request_start_pending.cc b/locktree/tests/lock_request_start_pending.cc
index dc7bf363300..a719da64114 100644
--- a/locktree/tests/lock_request_start_pending.cc
+++ b/locktree/tests/lock_request_start_pending.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/lock_request_unit_test.h b/locktree/tests/lock_request_unit_test.h
index a20f2f1326f..8fc4a3f8df8 100644
--- a/locktree/tests/lock_request_unit_test.h
+++ b/locktree/tests/lock_request_unit_test.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/lock_request_wait_time_callback.cc b/locktree/tests/lock_request_wait_time_callback.cc
index 60298f536da..b583e32e117 100644
--- a/locktree/tests/lock_request_wait_time_callback.cc
+++ b/locktree/tests/lock_request_wait_time_callback.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_conflicts.cc b/locktree/tests/locktree_conflicts.cc
index 3a78906e203..716000d4753 100644
--- a/locktree/tests/locktree_conflicts.cc
+++ b/locktree/tests/locktree_conflicts.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_create_destroy.cc b/locktree/tests/locktree_create_destroy.cc
index e4abfb36d1f..93bdea239cc 100644
--- a/locktree/tests/locktree_create_destroy.cc
+++ b/locktree/tests/locktree_create_destroy.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_escalation_1big7lt_1small.cc b/locktree/tests/locktree_escalation_1big7lt_1small.cc
index ec1911308ea..02784f52bfa 100644
--- a/locktree/tests/locktree_escalation_1big7lt_1small.cc
+++ b/locktree/tests/locktree_escalation_1big7lt_1small.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_escalation_2big_1lt.cc b/locktree/tests/locktree_escalation_2big_1lt.cc
index c7135707a1d..9509224a15f 100644
--- a/locktree/tests/locktree_escalation_2big_1lt.cc
+++ b/locktree/tests/locktree_escalation_2big_1lt.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_escalation_2big_2lt.cc b/locktree/tests/locktree_escalation_2big_2lt.cc
index dcd55f72509..5e315edda78 100644
--- a/locktree/tests/locktree_escalation_2big_2lt.cc
+++ b/locktree/tests/locktree_escalation_2big_2lt.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_escalation_impossible.cc b/locktree/tests/locktree_escalation_impossible.cc
index 08a51b6a981..a7d84aaf650 100644
--- a/locktree/tests/locktree_escalation_impossible.cc
+++ b/locktree/tests/locktree_escalation_impossible.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_escalation_stalls.cc b/locktree/tests/locktree_escalation_stalls.cc
index 5e8f84175bc..9228e627e9a 100644
--- a/locktree/tests/locktree_escalation_stalls.cc
+++ b/locktree/tests/locktree_escalation_stalls.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_infinity.cc b/locktree/tests/locktree_infinity.cc
index cbabc131668..ef490b59cc2 100644
--- a/locktree/tests/locktree_infinity.cc
+++ b/locktree/tests/locktree_infinity.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_misc.cc b/locktree/tests/locktree_misc.cc
index 51dd17fa43c..67d616867bc 100644
--- a/locktree/tests/locktree_misc.cc
+++ b/locktree/tests/locktree_misc.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_overlapping_relock.cc b/locktree/tests/locktree_overlapping_relock.cc
index 15f20f1a91d..4cf950e7037 100644
--- a/locktree/tests/locktree_overlapping_relock.cc
+++ b/locktree/tests/locktree_overlapping_relock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_simple_lock.cc b/locktree/tests/locktree_simple_lock.cc
index 6042fe71c13..c4ebb45537d 100644
--- a/locktree/tests/locktree_simple_lock.cc
+++ b/locktree/tests/locktree_simple_lock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_single_txnid_optimization.cc b/locktree/tests/locktree_single_txnid_optimization.cc
index ddbc88da400..17ebc3c86f9 100644
--- a/locktree/tests/locktree_single_txnid_optimization.cc
+++ b/locktree/tests/locktree_single_txnid_optimization.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/locktree_unit_test.h b/locktree/tests/locktree_unit_test.h
index ba7a934340b..34dbc3a7e59 100644
--- a/locktree/tests/locktree_unit_test.h
+++ b/locktree/tests/locktree_unit_test.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/manager_create_destroy.cc b/locktree/tests/manager_create_destroy.cc
index 5b4eef82b30..07c00c5d7b7 100644
--- a/locktree/tests/manager_create_destroy.cc
+++ b/locktree/tests/manager_create_destroy.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/manager_locktree_map.cc b/locktree/tests/manager_locktree_map.cc
index bd35ba93fc9..82cf1dc9f5a 100644
--- a/locktree/tests/manager_locktree_map.cc
+++ b/locktree/tests/manager_locktree_map.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/manager_params.cc b/locktree/tests/manager_params.cc
index 1fbaf2cf330..7376d91a064 100644
--- a/locktree/tests/manager_params.cc
+++ b/locktree/tests/manager_params.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/manager_reference_release_lt.cc b/locktree/tests/manager_reference_release_lt.cc
index cdc876c829d..c2fdee49ffe 100644
--- a/locktree/tests/manager_reference_release_lt.cc
+++ b/locktree/tests/manager_reference_release_lt.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/manager_status.cc b/locktree/tests/manager_status.cc
index d6b9b1fbade..b2f1560736a 100644
--- a/locktree/tests/manager_status.cc
+++ b/locktree/tests/manager_status.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/manager_unit_test.h b/locktree/tests/manager_unit_test.h
index bd6e6db5f52..cec640e0c2c 100644
--- a/locktree/tests/manager_unit_test.h
+++ b/locktree/tests/manager_unit_test.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/range_buffer_test.cc b/locktree/tests/range_buffer_test.cc
index 5df3cc522ee..61d14888229 100644
--- a/locktree/tests/range_buffer_test.cc
+++ b/locktree/tests/range_buffer_test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/test.h b/locktree/tests/test.h
index dc1eb2f5afe..904d0d03415 100644
--- a/locktree/tests/test.h
+++ b/locktree/tests/test.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/txnid_set_test.cc b/locktree/tests/txnid_set_test.cc
index fe442a50683..3502b9bf049 100644
--- a/locktree/tests/txnid_set_test.cc
+++ b/locktree/tests/txnid_set_test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/tests/wfg_test.cc b/locktree/tests/wfg_test.cc
index a7669135bf0..be3f8fa1f20 100644
--- a/locktree/tests/wfg_test.cc
+++ b/locktree/tests/wfg_test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/treenode.cc b/locktree/treenode.cc
index 836e16ce0f5..9853874776f 100644
--- a/locktree/treenode.cc
+++ b/locktree/treenode.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/treenode.h b/locktree/treenode.h
index a83699d5d56..7a6880a657c 100644
--- a/locktree/treenode.h
+++ b/locktree/treenode.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/txnid_set.cc b/locktree/txnid_set.cc
index 598a717f933..f6b95c9b32f 100644
--- a/locktree/txnid_set.cc
+++ b/locktree/txnid_set.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/txnid_set.h b/locktree/txnid_set.h
index 0dfed575178..c2c84b39c07 100644
--- a/locktree/txnid_set.h
+++ b/locktree/txnid_set.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/wfg.cc b/locktree/wfg.cc
index dea97d5cd43..e18c7f4aa26 100644
--- a/locktree/wfg.cc
+++ b/locktree/wfg.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/locktree/wfg.h b/locktree/wfg.h
index 3b6c2922ba3..99172902d2e 100644
--- a/locktree/wfg.h
+++ b/locktree/wfg.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/file.cc b/portability/file.cc
index b351141fe29..06ccfcfc97d 100644
--- a/portability/file.cc
+++ b/portability/file.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/huge_page_detection.cc b/portability/huge_page_detection.cc
index 9344d77786f..c90333857c3 100644
--- a/portability/huge_page_detection.cc
+++ b/portability/huge_page_detection.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/memory.cc b/portability/memory.cc
index 6102aaf7c1a..568be399bb5 100644
--- a/portability/memory.cc
+++ b/portability/memory.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/memory.h b/portability/memory.h
index 4e90d0afc14..837b0a70265 100644
--- a/portability/memory.h
+++ b/portability/memory.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/os_malloc.cc b/portability/os_malloc.cc
index 6db71e958e9..c59167bd8c4 100644
--- a/portability/os_malloc.cc
+++ b/portability/os_malloc.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/portability.cc b/portability/portability.cc
index 8fdfb916d94..09c1ccd50be 100644
--- a/portability/portability.cc
+++ b/portability/portability.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/rwlock_condvar.h b/portability/tests/rwlock_condvar.h
index fb592175dc8..135481f8997 100644
--- a/portability/tests/rwlock_condvar.h
+++ b/portability/tests/rwlock_condvar.h
@@ -33,7 +33,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-active-cpus.cc b/portability/tests/test-active-cpus.cc
index c8e1188cd03..ed141edd0bf 100644
--- a/portability/tests/test-active-cpus.cc
+++ b/portability/tests/test-active-cpus.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-cache-line-boundary-fails.cc b/portability/tests/test-cache-line-boundary-fails.cc
index 6e900b15be8..eb4862c2254 100644
--- a/portability/tests/test-cache-line-boundary-fails.cc
+++ b/portability/tests/test-cache-line-boundary-fails.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-cpu-freq-openlimit17.cc b/portability/tests/test-cpu-freq-openlimit17.cc
index ae4ec26fb97..04e58d49bf6 100644
--- a/portability/tests/test-cpu-freq-openlimit17.cc
+++ b/portability/tests/test-cpu-freq-openlimit17.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-cpu-freq.cc b/portability/tests/test-cpu-freq.cc
index f0e991c9735..889eb29c5c1 100644
--- a/portability/tests/test-cpu-freq.cc
+++ b/portability/tests/test-cpu-freq.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-filesystem-sizes.cc b/portability/tests/test-filesystem-sizes.cc
index e4466b83952..993eaf4fea2 100644
--- a/portability/tests/test-filesystem-sizes.cc
+++ b/portability/tests/test-filesystem-sizes.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-flock.cc b/portability/tests/test-flock.cc
index 942dc6b0686..5ef45b1bd97 100644
--- a/portability/tests/test-flock.cc
+++ b/portability/tests/test-flock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-fsync-directory.cc b/portability/tests/test-fsync-directory.cc
index 8d1546fcff7..a0de1a0d882 100644
--- a/portability/tests/test-fsync-directory.cc
+++ b/portability/tests/test-fsync-directory.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-fsync.cc b/portability/tests/test-fsync.cc
index 33b02550716..4d3be11120f 100644
--- a/portability/tests/test-fsync.cc
+++ b/portability/tests/test-fsync.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-gettime.cc b/portability/tests/test-gettime.cc
index 70b24cd2aaf..ce0e5cb9921 100644
--- a/portability/tests/test-gettime.cc
+++ b/portability/tests/test-gettime.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-gettimeofday.cc b/portability/tests/test-gettimeofday.cc
index a4660d8ba4f..0ff77118d1c 100644
--- a/portability/tests/test-gettimeofday.cc
+++ b/portability/tests/test-gettimeofday.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-hugepage.cc b/portability/tests/test-hugepage.cc
index 61adcac0a69..4aa11ee2e79 100644
--- a/portability/tests/test-hugepage.cc
+++ b/portability/tests/test-hugepage.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-max-data.cc b/portability/tests/test-max-data.cc
index 459349460d6..f04b39d8421 100644
--- a/portability/tests/test-max-data.cc
+++ b/portability/tests/test-max-data.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-memory-status.cc b/portability/tests/test-memory-status.cc
index 20eea248bae..87011c4e0d0 100644
--- a/portability/tests/test-memory-status.cc
+++ b/portability/tests/test-memory-status.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-pagesize.cc b/portability/tests/test-pagesize.cc
index 5f921fe4920..f9a78742e35 100644
--- a/portability/tests/test-pagesize.cc
+++ b/portability/tests/test-pagesize.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-pthread-rwlock-rdlock.cc b/portability/tests/test-pthread-rwlock-rdlock.cc
index a4c5dcd0128..9008262fa09 100644
--- a/portability/tests/test-pthread-rwlock-rdlock.cc
+++ b/portability/tests/test-pthread-rwlock-rdlock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-pthread-rwlock-rwr.cc b/portability/tests/test-pthread-rwlock-rwr.cc
index b7a21b1fc06..32b38421aaf 100644
--- a/portability/tests/test-pthread-rwlock-rwr.cc
+++ b/portability/tests/test-pthread-rwlock-rwr.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-pwrite4g.cc b/portability/tests/test-pwrite4g.cc
index 3fa21f99fa0..abd5e4ec1ac 100644
--- a/portability/tests/test-pwrite4g.cc
+++ b/portability/tests/test-pwrite4g.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-snprintf.cc b/portability/tests/test-snprintf.cc
index 852fab29ba5..5f168c8d612 100644
--- a/portability/tests/test-snprintf.cc
+++ b/portability/tests/test-snprintf.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-stat.cc b/portability/tests/test-stat.cc
index 8e3d18eac9a..bedf7e7e54f 100644
--- a/portability/tests/test-stat.cc
+++ b/portability/tests/test-stat.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-toku-malloc.cc b/portability/tests/test-toku-malloc.cc
index 8c588230331..48f616dd817 100644
--- a/portability/tests/test-toku-malloc.cc
+++ b/portability/tests/test-toku-malloc.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test-xid.cc b/portability/tests/test-xid.cc
index 140f335d0e5..9277f984b43 100644
--- a/portability/tests/test-xid.cc
+++ b/portability/tests/test-xid.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/test.h b/portability/tests/test.h
index ff71395e7d7..a3e7994957e 100644
--- a/portability/tests/test.h
+++ b/portability/tests/test.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/try-assert-zero.cc b/portability/tests/try-assert-zero.cc
index 6249d0b1aa5..6517f61b3c3 100644
--- a/portability/tests/try-assert-zero.cc
+++ b/portability/tests/try-assert-zero.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/try-assert0.cc b/portability/tests/try-assert0.cc
index f2e1a99469b..89fe6941138 100644
--- a/portability/tests/try-assert0.cc
+++ b/portability/tests/try-assert0.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/try-leak-lost.cc b/portability/tests/try-leak-lost.cc
index fa6217f39f0..57bbe3589bb 100644
--- a/portability/tests/try-leak-lost.cc
+++ b/portability/tests/try-leak-lost.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/try-leak-reachable.cc b/portability/tests/try-leak-reachable.cc
index b17418ae67b..63c1dd4f756 100644
--- a/portability/tests/try-leak-reachable.cc
+++ b/portability/tests/try-leak-reachable.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/tests/try-uninit.cc b/portability/tests/try-uninit.cc
index 415de3203f7..c763348ed2f 100644
--- a/portability/tests/try-uninit.cc
+++ b/portability/tests/try-uninit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_assert.cc b/portability/toku_assert.cc
index 860d11d83f4..68e16699e60 100644
--- a/portability/toku_assert.cc
+++ b/portability/toku_assert.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_assert.h b/portability/toku_assert.h
index 8767f048355..ab5f8c1ffb4 100644
--- a/portability/toku_assert.h
+++ b/portability/toku_assert.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_atomic.h b/portability/toku_atomic.h
index 2243a4ca4b2..075211a790c 100644
--- a/portability/toku_atomic.h
+++ b/portability/toku_atomic.h
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_byteswap.h b/portability/toku_byteswap.h
index 5895738abe6..12c76b00825 100644
--- a/portability/toku_byteswap.h
+++ b/portability/toku_byteswap.h
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_crash.cc b/portability/toku_crash.cc
index 2eed142229d..123746d8f7f 100644
--- a/portability/toku_crash.cc
+++ b/portability/toku_crash.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_crash.h b/portability/toku_crash.h
index acb060323e7..a5dd959a15d 100644
--- a/portability/toku_crash.h
+++ b/portability/toku_crash.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_htod.h b/portability/toku_htod.h
index 0053a93f2fc..d12d45a13cf 100644
--- a/portability/toku_htod.h
+++ b/portability/toku_htod.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_htonl.h b/portability/toku_htonl.h
index f3dcb9aaba9..f2ba320bf1f 100644
--- a/portability/toku_htonl.h
+++ b/portability/toku_htonl.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_list.h b/portability/toku_list.h
index 534f1179444..3fc96a671dd 100644
--- a/portability/toku_list.h
+++ b/portability/toku_list.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_os.h b/portability/toku_os.h
index ba21a56de91..71576d7c1dd 100644
--- a/portability/toku_os.h
+++ b/portability/toku_os.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_os_types.h b/portability/toku_os_types.h
index 47c7e53dfda..a7053374fde 100644
--- a/portability/toku_os_types.h
+++ b/portability/toku_os_types.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_path.cc b/portability/toku_path.cc
index 22264b7e799..89b106309eb 100644
--- a/portability/toku_path.cc
+++ b/portability/toku_path.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_path.h b/portability/toku_path.h
index bf0af6bbb64..3ee6736360f 100644
--- a/portability/toku_path.h
+++ b/portability/toku_path.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_portability.h b/portability/toku_portability.h
index 04afe282594..9459c2d7ad3 100644
--- a/portability/toku_portability.h
+++ b/portability/toku_portability.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_pthread.cc b/portability/toku_pthread.cc
index d023e9427ec..42ae9c0b1f5 100644
--- a/portability/toku_pthread.cc
+++ b/portability/toku_pthread.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_pthread.h b/portability/toku_pthread.h
index 90b12689109..e6a7ae4d92a 100644
--- a/portability/toku_pthread.h
+++ b/portability/toku_pthread.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_race_tools.h b/portability/toku_race_tools.h
index eb97e55c6d3..b4c83b6119d 100644
--- a/portability/toku_race_tools.h
+++ b/portability/toku_race_tools.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_random.h b/portability/toku_random.h
index 0a9df169be1..a350b171a3b 100644
--- a/portability/toku_random.h
+++ b/portability/toku_random.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_stdint.h b/portability/toku_stdint.h
index d75e48a11d0..806e40e612b 100644
--- a/portability/toku_stdint.h
+++ b/portability/toku_stdint.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_stdlib.h b/portability/toku_stdlib.h
index f5764868b83..229e3945cf9 100644
--- a/portability/toku_stdlib.h
+++ b/portability/toku_stdlib.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_time.cc b/portability/toku_time.cc
index a20c45dfb03..97834493194 100644
--- a/portability/toku_time.cc
+++ b/portability/toku_time.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/portability/toku_time.h b/portability/toku_time.h
index f5eb778eeec..069e67c0d28 100644
--- a/portability/toku_time.h
+++ b/portability/toku_time.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/errors.cc b/src/errors.cc
index 4101b372a38..fa1227b25cc 100644
--- a/src/errors.cc
+++ b/src/errors.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/indexer-internal.h b/src/indexer-internal.h
index c3a00cef295..fd648a88c8f 100644
--- a/src/indexer-internal.h
+++ b/src/indexer-internal.h
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/indexer-undo-do.cc b/src/indexer-undo-do.cc
index a97064119bc..52489fb7825 100644
--- a/src/indexer-undo-do.cc
+++ b/src/indexer-undo-do.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/indexer.cc b/src/indexer.cc
index 926bf5c579d..4b48b747858 100644
--- a/src/indexer.cc
+++ b/src/indexer.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/indexer.h b/src/indexer.h
index 5eaecaf5f3b..12625fdc6ea 100644
--- a/src/indexer.h
+++ b/src/indexer.h
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/loader.cc b/src/loader.cc
index 1550af3460c..e59242f1dee 100644
--- a/src/loader.cc
+++ b/src/loader.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/loader.h b/src/loader.h
index e6316a5cea8..c709eed3e35 100644
--- a/src/loader.h
+++ b/src/loader.h
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/big-nested-abort-abort.cc b/src/tests/big-nested-abort-abort.cc
index 882c3441a8f..7c6e444986a 100644
--- a/src/tests/big-nested-abort-abort.cc
+++ b/src/tests/big-nested-abort-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/big-nested-abort-commit.cc b/src/tests/big-nested-abort-commit.cc
index 98dde05cfde..9965a6f5725 100644
--- a/src/tests/big-nested-abort-commit.cc
+++ b/src/tests/big-nested-abort-commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/big-nested-commit-abort.cc b/src/tests/big-nested-commit-abort.cc
index ec8707530fa..6e02e6e7799 100644
--- a/src/tests/big-nested-commit-abort.cc
+++ b/src/tests/big-nested-commit-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/big-nested-commit-commit.cc b/src/tests/big-nested-commit-commit.cc
index 189bc97769f..efd951a90d4 100644
--- a/src/tests/big-nested-commit-commit.cc
+++ b/src/tests/big-nested-commit-commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/bigtxn27.cc b/src/tests/bigtxn27.cc
index baa1e4f7e6e..1eedb79543d 100644
--- a/src/tests/bigtxn27.cc
+++ b/src/tests/bigtxn27.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blackhole.cc b/src/tests/blackhole.cc
index bf15283d0f9..34df107b153 100644
--- a/src/tests/blackhole.cc
+++ b/src/tests/blackhole.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-first-empty.cc b/src/tests/blocking-first-empty.cc
index 6ccf879005d..3fb5cae46ff 100644
--- a/src/tests/blocking-first-empty.cc
+++ b/src/tests/blocking-first-empty.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-first.cc b/src/tests/blocking-first.cc
index 6d255023274..b501f70d5bb 100644
--- a/src/tests/blocking-first.cc
+++ b/src/tests/blocking-first.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-last.cc b/src/tests/blocking-last.cc
index 403f31bca61..e087d9623fc 100644
--- a/src/tests/blocking-last.cc
+++ b/src/tests/blocking-last.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-next-prev-deadlock.cc b/src/tests/blocking-next-prev-deadlock.cc
index f71c89aa3e7..dac4aa1ad44 100644
--- a/src/tests/blocking-next-prev-deadlock.cc
+++ b/src/tests/blocking-next-prev-deadlock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-next-prev.cc b/src/tests/blocking-next-prev.cc
index ec1ae8ff340..5fa2f781fb7 100644
--- a/src/tests/blocking-next-prev.cc
+++ b/src/tests/blocking-next-prev.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-prelock-range.cc b/src/tests/blocking-prelock-range.cc
index 15065dcf244..78d2975f81b 100644
--- a/src/tests/blocking-prelock-range.cc
+++ b/src/tests/blocking-prelock-range.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-put-timeout.cc b/src/tests/blocking-put-timeout.cc
index b91198a4438..13fddb8d05b 100644
--- a/src/tests/blocking-put-timeout.cc
+++ b/src/tests/blocking-put-timeout.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-put-wakeup.cc b/src/tests/blocking-put-wakeup.cc
index 06c51b6fede..c5052fbf813 100644
--- a/src/tests/blocking-put-wakeup.cc
+++ b/src/tests/blocking-put-wakeup.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-put.cc b/src/tests/blocking-put.cc
index 8100862881a..8b1cf71e359 100644
--- a/src/tests/blocking-put.cc
+++ b/src/tests/blocking-put.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-set-range-0.cc b/src/tests/blocking-set-range-0.cc
index 896d4a82e12..8445493832f 100644
--- a/src/tests/blocking-set-range-0.cc
+++ b/src/tests/blocking-set-range-0.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-set-range-n.cc b/src/tests/blocking-set-range-n.cc
index 841809fadbe..a37e5b2a0a7 100644
--- a/src/tests/blocking-set-range-n.cc
+++ b/src/tests/blocking-set-range-n.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-set-range-reverse-0.cc b/src/tests/blocking-set-range-reverse-0.cc
index f63bb3c0e5b..dc79522d629 100644
--- a/src/tests/blocking-set-range-reverse-0.cc
+++ b/src/tests/blocking-set-range-reverse-0.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-set.cc b/src/tests/blocking-set.cc
index dd0196ff276..4bb3c5dd1eb 100644
--- a/src/tests/blocking-set.cc
+++ b/src/tests/blocking-set.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/blocking-table-lock.cc b/src/tests/blocking-table-lock.cc
index 42e824debbf..8258a698784 100644
--- a/src/tests/blocking-table-lock.cc
+++ b/src/tests/blocking-table-lock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/bug1381.cc b/src/tests/bug1381.cc
index c603d5e3ab2..988538ef782 100644
--- a/src/tests/bug1381.cc
+++ b/src/tests/bug1381.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/cachetable-race.cc b/src/tests/cachetable-race.cc
index 1e0ffaad40c..0ff1fc11b4e 100644
--- a/src/tests/cachetable-race.cc
+++ b/src/tests/cachetable-race.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/checkpoint1.cc b/src/tests/checkpoint1.cc
index 9fe56cdbc36..68300dee6fb 100644
--- a/src/tests/checkpoint1.cc
+++ b/src/tests/checkpoint1.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/checkpoint_fairness.cc b/src/tests/checkpoint_fairness.cc
index 03d5c47f4e5..3e76020d70b 100644
--- a/src/tests/checkpoint_fairness.cc
+++ b/src/tests/checkpoint_fairness.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/checkpoint_stress.cc b/src/tests/checkpoint_stress.cc
index 976ba8e8555..1f39061bb1e 100644
--- a/src/tests/checkpoint_stress.cc
+++ b/src/tests/checkpoint_stress.cc
@@ -27,7 +27,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/checkpoint_test.h b/src/tests/checkpoint_test.h
index 0ded9104bf0..4ea74e09ede 100644
--- a/src/tests/checkpoint_test.h
+++ b/src/tests/checkpoint_test.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/create-datadir.cc b/src/tests/create-datadir.cc
index ecd0b032897..2cde781872b 100644
--- a/src/tests/create-datadir.cc
+++ b/src/tests/create-datadir.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/cursor-isolation.cc b/src/tests/cursor-isolation.cc
index 1a1450f2f63..ec91f5b73d1 100644
--- a/src/tests/cursor-isolation.cc
+++ b/src/tests/cursor-isolation.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/cursor-more-than-a-leaf-provdel.cc b/src/tests/cursor-more-than-a-leaf-provdel.cc
index 2927bb7c5ff..4587402c286 100644
--- a/src/tests/cursor-more-than-a-leaf-provdel.cc
+++ b/src/tests/cursor-more-than-a-leaf-provdel.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/cursor-set-del-rmw.cc b/src/tests/cursor-set-del-rmw.cc
index 38ccf112697..79df796a6c1 100644
--- a/src/tests/cursor-set-del-rmw.cc
+++ b/src/tests/cursor-set-del-rmw.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/cursor-set-range-rmw.cc b/src/tests/cursor-set-range-rmw.cc
index fb5dbca72b4..4f0dce02edd 100644
--- a/src/tests/cursor-set-range-rmw.cc
+++ b/src/tests/cursor-set-range-rmw.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/cursor-step-over-delete.cc b/src/tests/cursor-step-over-delete.cc
index 748b5135899..7c57475da6a 100644
--- a/src/tests/cursor-step-over-delete.cc
+++ b/src/tests/cursor-step-over-delete.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/db-put-simple-deadlock-threads.cc b/src/tests/db-put-simple-deadlock-threads.cc
index d60725c4966..6227b602df2 100644
--- a/src/tests/db-put-simple-deadlock-threads.cc
+++ b/src/tests/db-put-simple-deadlock-threads.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/db-put-simple-deadlock.cc b/src/tests/db-put-simple-deadlock.cc
index acc841b8d78..46109e9592f 100644
--- a/src/tests/db-put-simple-deadlock.cc
+++ b/src/tests/db-put-simple-deadlock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/db-put-simple-lockwait.cc b/src/tests/db-put-simple-lockwait.cc
index e459652101d..6466bd2e9ea 100644
--- a/src/tests/db-put-simple-lockwait.cc
+++ b/src/tests/db-put-simple-lockwait.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/db-put-update-deadlock.cc b/src/tests/db-put-update-deadlock.cc
index ddd2893cae0..cfbf95cd599 100644
--- a/src/tests/db-put-update-deadlock.cc
+++ b/src/tests/db-put-update-deadlock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/dbremove-nofile-limit.cc b/src/tests/dbremove-nofile-limit.cc
index eb5c6b80b63..cd8b50c1c5b 100644
--- a/src/tests/dbremove-nofile-limit.cc
+++ b/src/tests/dbremove-nofile-limit.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/del-multiple-huge-primary-row.cc b/src/tests/del-multiple-huge-primary-row.cc
index 9d2b2b6871b..f0ee57228ad 100644
--- a/src/tests/del-multiple-huge-primary-row.cc
+++ b/src/tests/del-multiple-huge-primary-row.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/del-multiple-srcdb.cc b/src/tests/del-multiple-srcdb.cc
index 5230caf3a4e..f14ba646e59 100644
--- a/src/tests/del-multiple-srcdb.cc
+++ b/src/tests/del-multiple-srcdb.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/del-multiple.cc b/src/tests/del-multiple.cc
index b54ff4fce72..7f3560fb459 100644
--- a/src/tests/del-multiple.cc
+++ b/src/tests/del-multiple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/del-simple.cc b/src/tests/del-simple.cc
index 34376637c9a..6ae08607f48 100644
--- a/src/tests/del-simple.cc
+++ b/src/tests/del-simple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/directory_lock.cc b/src/tests/directory_lock.cc
index ed89e004900..c67dfab20c8 100644
--- a/src/tests/directory_lock.cc
+++ b/src/tests/directory_lock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/diskfull.cc b/src/tests/diskfull.cc
index fdce56aa251..d52f621a174 100644
--- a/src/tests/diskfull.cc
+++ b/src/tests/diskfull.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/dump-env.cc b/src/tests/dump-env.cc
index edbfc7d02cb..7815aa5bd63 100644
--- a/src/tests/dump-env.cc
+++ b/src/tests/dump-env.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/env-put-multiple.cc b/src/tests/env-put-multiple.cc
index 0988f3d5ca2..75ccb0297b3 100644
--- a/src/tests/env-put-multiple.cc
+++ b/src/tests/env-put-multiple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/env_loader_memory.cc b/src/tests/env_loader_memory.cc
index 106bdefd3a9..ed19f05f944 100644
--- a/src/tests/env_loader_memory.cc
+++ b/src/tests/env_loader_memory.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/env_nproc.cc b/src/tests/env_nproc.cc
index 29bc216f9c8..3ed60a18e69 100644
--- a/src/tests/env_nproc.cc
+++ b/src/tests/env_nproc.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/env_startup.cc b/src/tests/env_startup.cc
index 0fe5a4abac8..5be8b9849a0 100644
--- a/src/tests/env_startup.cc
+++ b/src/tests/env_startup.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/filesize.cc b/src/tests/filesize.cc
index b47be955efd..6b4c03a358c 100644
--- a/src/tests/filesize.cc
+++ b/src/tests/filesize.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/get_key_after_bytes_unit.cc b/src/tests/get_key_after_bytes_unit.cc
index 7303ebac8c8..73a6e92b28f 100644
--- a/src/tests/get_key_after_bytes_unit.cc
+++ b/src/tests/get_key_after_bytes_unit.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/get_last_key.cc b/src/tests/get_last_key.cc
index 36c7ab63259..241652928c5 100644
--- a/src/tests/get_last_key.cc
+++ b/src/tests/get_last_key.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/helgrind1.cc b/src/tests/helgrind1.cc
index 5e451ab2a25..49572197fc1 100644
--- a/src/tests/helgrind1.cc
+++ b/src/tests/helgrind1.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/helgrind2.cc b/src/tests/helgrind2.cc
index 608d635a54a..d70c4d256df 100644
--- a/src/tests/helgrind2.cc
+++ b/src/tests/helgrind2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/helgrind3.cc b/src/tests/helgrind3.cc
index 2defde37ba5..85f909b11f0 100644
--- a/src/tests/helgrind3.cc
+++ b/src/tests/helgrind3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hot-optimize-table-tests.cc b/src/tests/hot-optimize-table-tests.cc
index 886d23366e5..7b580d94953 100644
--- a/src/tests/hot-optimize-table-tests.cc
+++ b/src/tests/hot-optimize-table-tests.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-bw.cc b/src/tests/hotindexer-bw.cc
index 7d06cce6ca0..eb6b9f1b11b 100644
--- a/src/tests/hotindexer-bw.cc
+++ b/src/tests/hotindexer-bw.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-error-callback.cc b/src/tests/hotindexer-error-callback.cc
index 0f0e889d525..18e5a0116e1 100644
--- a/src/tests/hotindexer-error-callback.cc
+++ b/src/tests/hotindexer-error-callback.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-insert-committed-optimized.cc b/src/tests/hotindexer-insert-committed-optimized.cc
index 4acd2c57b72..9268a5d2370 100644
--- a/src/tests/hotindexer-insert-committed-optimized.cc
+++ b/src/tests/hotindexer-insert-committed-optimized.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-insert-committed.cc b/src/tests/hotindexer-insert-committed.cc
index b6c409be315..81aa83ba9f7 100644
--- a/src/tests/hotindexer-insert-committed.cc
+++ b/src/tests/hotindexer-insert-committed.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-insert-provisional.cc b/src/tests/hotindexer-insert-provisional.cc
index 911587ff3f3..509f74fbec9 100644
--- a/src/tests/hotindexer-insert-provisional.cc
+++ b/src/tests/hotindexer-insert-provisional.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-lock-test.cc b/src/tests/hotindexer-lock-test.cc
index 615486a6496..16600c76574 100644
--- a/src/tests/hotindexer-lock-test.cc
+++ b/src/tests/hotindexer-lock-test.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-multiclient.cc b/src/tests/hotindexer-multiclient.cc
index 89da7da3b3b..18dc6e5e030 100644
--- a/src/tests/hotindexer-multiclient.cc
+++ b/src/tests/hotindexer-multiclient.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-nested-insert-committed.cc b/src/tests/hotindexer-nested-insert-committed.cc
index 446fae8e983..938ee151b69 100644
--- a/src/tests/hotindexer-nested-insert-committed.cc
+++ b/src/tests/hotindexer-nested-insert-committed.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-put-abort.cc b/src/tests/hotindexer-put-abort.cc
index 35f3e317e3e..f81336cbee4 100644
--- a/src/tests/hotindexer-put-abort.cc
+++ b/src/tests/hotindexer-put-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-put-commit.cc b/src/tests/hotindexer-put-commit.cc
index b8177d52e13..2863ef4754c 100644
--- a/src/tests/hotindexer-put-commit.cc
+++ b/src/tests/hotindexer-put-commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-put-multiple.cc b/src/tests/hotindexer-put-multiple.cc
index e5bb39f93ae..05e77137ca0 100644
--- a/src/tests/hotindexer-put-multiple.cc
+++ b/src/tests/hotindexer-put-multiple.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-simple-abort-put.cc b/src/tests/hotindexer-simple-abort-put.cc
index 41a7cc5b817..0aabcdbdd4a 100644
--- a/src/tests/hotindexer-simple-abort-put.cc
+++ b/src/tests/hotindexer-simple-abort-put.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-simple-abort.cc b/src/tests/hotindexer-simple-abort.cc
index f210e0079b5..3fddf1d319f 100644
--- a/src/tests/hotindexer-simple-abort.cc
+++ b/src/tests/hotindexer-simple-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-undo-do-test.cc b/src/tests/hotindexer-undo-do-test.cc
index 9aa3ea1fc8e..5ef06f62155 100644
--- a/src/tests/hotindexer-undo-do-test.cc
+++ b/src/tests/hotindexer-undo-do-test.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/hotindexer-with-queries.cc b/src/tests/hotindexer-with-queries.cc
index 7770b34ae07..c1be755b4d6 100644
--- a/src/tests/hotindexer-with-queries.cc
+++ b/src/tests/hotindexer-with-queries.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/inflate.cc b/src/tests/inflate.cc
index 8311b591c86..30f8f2199ae 100644
--- a/src/tests/inflate.cc
+++ b/src/tests/inflate.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/inflate2.cc b/src/tests/inflate2.cc
index e5b8b6f270f..ce594cf0834 100644
--- a/src/tests/inflate2.cc
+++ b/src/tests/inflate2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/insert-dup-prelock.cc b/src/tests/insert-dup-prelock.cc
index 0771056b072..2ba99d0bc02 100644
--- a/src/tests/insert-dup-prelock.cc
+++ b/src/tests/insert-dup-prelock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/isolation-read-committed.cc b/src/tests/isolation-read-committed.cc
index ce226508d79..c949482ca16 100644
--- a/src/tests/isolation-read-committed.cc
+++ b/src/tests/isolation-read-committed.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/isolation.cc b/src/tests/isolation.cc
index 485986099e0..dbe4ce9cb4a 100644
--- a/src/tests/isolation.cc
+++ b/src/tests/isolation.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/key-val.h b/src/tests/key-val.h
index 1da48508ad5..9a4512bfdac 100644
--- a/src/tests/key-val.h
+++ b/src/tests/key-val.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/keyrange-merge.cc b/src/tests/keyrange-merge.cc
index 0b5df76d731..b53016053ce 100644
--- a/src/tests/keyrange-merge.cc
+++ b/src/tests/keyrange-merge.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/keyrange.cc b/src/tests/keyrange.cc
index ee63cd7cdf5..85ffcd23357 100644
--- a/src/tests/keyrange.cc
+++ b/src/tests/keyrange.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/last-verify-time.cc b/src/tests/last-verify-time.cc
index d3b5cf456fd..057a711ffec 100644
--- a/src/tests/last-verify-time.cc
+++ b/src/tests/last-verify-time.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-cleanup-test.cc b/src/tests/loader-cleanup-test.cc
index eaed9c4170a..c0f92c448ef 100644
--- a/src/tests/loader-cleanup-test.cc
+++ b/src/tests/loader-cleanup-test.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-close-nproc-limit.cc b/src/tests/loader-close-nproc-limit.cc
index 3ef2b0541f7..262a63294fd 100644
--- a/src/tests/loader-close-nproc-limit.cc
+++ b/src/tests/loader-close-nproc-limit.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-create-abort.cc b/src/tests/loader-create-abort.cc
index 58568564699..3d2cf84cefe 100644
--- a/src/tests/loader-create-abort.cc
+++ b/src/tests/loader-create-abort.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-create-close.cc b/src/tests/loader-create-close.cc
index 4d66a9df004..8a2d043e51f 100644
--- a/src/tests/loader-create-close.cc
+++ b/src/tests/loader-create-close.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-create-commit-nproc-limit.cc b/src/tests/loader-create-commit-nproc-limit.cc
index 091809a8551..62ba70fa4f8 100644
--- a/src/tests/loader-create-commit-nproc-limit.cc
+++ b/src/tests/loader-create-commit-nproc-limit.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-create-nproc-limit.cc b/src/tests/loader-create-nproc-limit.cc
index 7a61fce7799..844ca2043c7 100644
--- a/src/tests/loader-create-nproc-limit.cc
+++ b/src/tests/loader-create-nproc-limit.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-dup-test.cc b/src/tests/loader-dup-test.cc
index 5fa41809baa..d3bd2aabe57 100644
--- a/src/tests/loader-dup-test.cc
+++ b/src/tests/loader-dup-test.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-no-puts.cc b/src/tests/loader-no-puts.cc
index 6fc20c5c8a1..c2c11a639a0 100644
--- a/src/tests/loader-no-puts.cc
+++ b/src/tests/loader-no-puts.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-reference-test.cc b/src/tests/loader-reference-test.cc
index 7fadcf150d4..4bb9334a71f 100644
--- a/src/tests/loader-reference-test.cc
+++ b/src/tests/loader-reference-test.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-stress-del.cc b/src/tests/loader-stress-del.cc
index 9578df66003..c9a262222fb 100644
--- a/src/tests/loader-stress-del.cc
+++ b/src/tests/loader-stress-del.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-stress-test.cc b/src/tests/loader-stress-test.cc
index f58b839b314..b9e51436632 100644
--- a/src/tests/loader-stress-test.cc
+++ b/src/tests/loader-stress-test.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/loader-tpch-load.cc b/src/tests/loader-tpch-load.cc
index cbe38275821..c89331a4200 100644
--- a/src/tests/loader-tpch-load.cc
+++ b/src/tests/loader-tpch-load.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/locktree_escalation_stalls.cc b/src/tests/locktree_escalation_stalls.cc
index 037d2fc46a9..2a5581077d5 100644
--- a/src/tests/locktree_escalation_stalls.cc
+++ b/src/tests/locktree_escalation_stalls.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/manyfiles.cc b/src/tests/manyfiles.cc
index 6445f1b7d38..4c68f8d86fe 100644
--- a/src/tests/manyfiles.cc
+++ b/src/tests/manyfiles.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/maxsize-for-loader.cc b/src/tests/maxsize-for-loader.cc
index 02b21794abb..a95a42d4870 100644
--- a/src/tests/maxsize-for-loader.cc
+++ b/src/tests/maxsize-for-loader.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/medium-nested-commit-commit.cc b/src/tests/medium-nested-commit-commit.cc
index 48d9102d523..aab33584391 100644
--- a/src/tests/medium-nested-commit-commit.cc
+++ b/src/tests/medium-nested-commit-commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/multiprocess.cc b/src/tests/multiprocess.cc
index fab0d7d3896..93b20d3ad7f 100644
--- a/src/tests/multiprocess.cc
+++ b/src/tests/multiprocess.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/mvcc-create-table.cc b/src/tests/mvcc-create-table.cc
index 84f8c75db7c..db1d1616732 100644
--- a/src/tests/mvcc-create-table.cc
+++ b/src/tests/mvcc-create-table.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/mvcc-many-committed.cc b/src/tests/mvcc-many-committed.cc
index db261e6ae17..bbb7116b42d 100644
--- a/src/tests/mvcc-many-committed.cc
+++ b/src/tests/mvcc-many-committed.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/mvcc-read-committed.cc b/src/tests/mvcc-read-committed.cc
index 38a598ec5be..6f8d3377c10 100644
--- a/src/tests/mvcc-read-committed.cc
+++ b/src/tests/mvcc-read-committed.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/openlimit17-locktree.cc b/src/tests/openlimit17-locktree.cc
index c83ec2543f9..e9b62752af4 100644
--- a/src/tests/openlimit17-locktree.cc
+++ b/src/tests/openlimit17-locktree.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/openlimit17-metafiles.cc b/src/tests/openlimit17-metafiles.cc
index 52c319af778..29dbeebef7c 100644
--- a/src/tests/openlimit17-metafiles.cc
+++ b/src/tests/openlimit17-metafiles.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/openlimit17.cc b/src/tests/openlimit17.cc
index 0709d89a0c2..4f322a86f35 100644
--- a/src/tests/openlimit17.cc
+++ b/src/tests/openlimit17.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_checkpoint_var.cc b/src/tests/perf_checkpoint_var.cc
index 0c9ad682eb9..d0d60641cb1 100644
--- a/src/tests/perf_checkpoint_var.cc
+++ b/src/tests/perf_checkpoint_var.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_child_txn.cc b/src/tests/perf_child_txn.cc
index 121d9dc3735..f6d2e8018eb 100644
--- a/src/tests/perf_child_txn.cc
+++ b/src/tests/perf_child_txn.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_cursor_nop.cc b/src/tests/perf_cursor_nop.cc
index 71c5b8d170e..4f890ab0ca2 100644
--- a/src/tests/perf_cursor_nop.cc
+++ b/src/tests/perf_cursor_nop.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_iibench.cc b/src/tests/perf_iibench.cc
index b5e094d2c15..b9d142c65c9 100644
--- a/src/tests/perf_iibench.cc
+++ b/src/tests/perf_iibench.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_insert.cc b/src/tests/perf_insert.cc
index 31210a6e343..9d621b93c0c 100644
--- a/src/tests/perf_insert.cc
+++ b/src/tests/perf_insert.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_malloc_free.cc b/src/tests/perf_malloc_free.cc
index 451bc346897..ee6ca92edb4 100644
--- a/src/tests/perf_malloc_free.cc
+++ b/src/tests/perf_malloc_free.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_nop.cc b/src/tests/perf_nop.cc
index 9f6b5e2ee95..e6d4d94640f 100644
--- a/src/tests/perf_nop.cc
+++ b/src/tests/perf_nop.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_ptquery.cc b/src/tests/perf_ptquery.cc
index bc17d498a7b..71922782878 100644
--- a/src/tests/perf_ptquery.cc
+++ b/src/tests/perf_ptquery.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_ptquery2.cc b/src/tests/perf_ptquery2.cc
index 9ae1bbab844..888081bdb81 100644
--- a/src/tests/perf_ptquery2.cc
+++ b/src/tests/perf_ptquery2.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_rangequery.cc b/src/tests/perf_rangequery.cc
index 88d30049a29..3d78dd16ab7 100644
--- a/src/tests/perf_rangequery.cc
+++ b/src/tests/perf_rangequery.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_read_txn.cc b/src/tests/perf_read_txn.cc
index 9e62314fa58..2825f6588ce 100644
--- a/src/tests/perf_read_txn.cc
+++ b/src/tests/perf_read_txn.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_read_txn_single_thread.cc b/src/tests/perf_read_txn_single_thread.cc
index debb1296ae9..f36b748d853 100644
--- a/src/tests/perf_read_txn_single_thread.cc
+++ b/src/tests/perf_read_txn_single_thread.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_read_write.cc b/src/tests/perf_read_write.cc
index ef95e9d3aa2..f5d75f57103 100644
--- a/src/tests/perf_read_write.cc
+++ b/src/tests/perf_read_write.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/perf_txn_single_thread.cc b/src/tests/perf_txn_single_thread.cc
index 789024327cb..52e6d9d7cae 100644
--- a/src/tests/perf_txn_single_thread.cc
+++ b/src/tests/perf_txn_single_thread.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/powerfail.cc b/src/tests/powerfail.cc
index 63ad313e145..601df047d09 100644
--- a/src/tests/powerfail.cc
+++ b/src/tests/powerfail.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/preload-db-nested.cc b/src/tests/preload-db-nested.cc
index 0d1a3749193..9c0c8282456 100644
--- a/src/tests/preload-db-nested.cc
+++ b/src/tests/preload-db-nested.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/preload-db.cc b/src/tests/preload-db.cc
index d486af941d2..584176bc997 100644
--- a/src/tests/preload-db.cc
+++ b/src/tests/preload-db.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/prelock-read-read.cc b/src/tests/prelock-read-read.cc
index daa6ab108b7..b23c81dd119 100644
--- a/src/tests/prelock-read-read.cc
+++ b/src/tests/prelock-read-read.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/prelock-read-write.cc b/src/tests/prelock-read-write.cc
index 140c9e79b1c..0a3a3fddf39 100644
--- a/src/tests/prelock-read-write.cc
+++ b/src/tests/prelock-read-write.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/prelock-write-read.cc b/src/tests/prelock-write-read.cc
index 540d385b116..35c194c362d 100644
--- a/src/tests/prelock-write-read.cc
+++ b/src/tests/prelock-write-read.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/prelock-write-write.cc b/src/tests/prelock-write-write.cc
index 8753f158648..d9f832cdde7 100644
--- a/src/tests/prelock-write-write.cc
+++ b/src/tests/prelock-write-write.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/print_engine_status.cc b/src/tests/print_engine_status.cc
index 2f1b6b5b98d..34b62bd8fe6 100644
--- a/src/tests/print_engine_status.cc
+++ b/src/tests/print_engine_status.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/progress.cc b/src/tests/progress.cc
index e1d57ec61b0..e6af8fb9763 100644
--- a/src/tests/progress.cc
+++ b/src/tests/progress.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/put-del-multiple-array-indexing.cc b/src/tests/put-del-multiple-array-indexing.cc
index af0407063f8..0a29d87369f 100644
--- a/src/tests/put-del-multiple-array-indexing.cc
+++ b/src/tests/put-del-multiple-array-indexing.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/queries_with_deletes.cc b/src/tests/queries_with_deletes.cc
index eebe61e2839..a619e5f0f58 100644
--- a/src/tests/queries_with_deletes.cc
+++ b/src/tests/queries_with_deletes.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-2483.cc b/src/tests/recover-2483.cc
index e2244534e2e..e31361839f8 100644
--- a/src/tests/recover-2483.cc
+++ b/src/tests/recover-2483.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-3113.cc b/src/tests/recover-3113.cc
index 67a4e1ff4d4..eeba9baf03c 100644
--- a/src/tests/recover-3113.cc
+++ b/src/tests/recover-3113.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-5146.cc b/src/tests/recover-5146.cc
index 3ad54539aef..c05f9effa7d 100644
--- a/src/tests/recover-5146.cc
+++ b/src/tests/recover-5146.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc b/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc
index 20fc67dd956..0d5d4ff20db 100644
--- a/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc
+++ b/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-checkpoint-fopen-abort.cc b/src/tests/recover-checkpoint-fopen-abort.cc
index 3023cc1a1a7..bed20966845 100644
--- a/src/tests/recover-checkpoint-fopen-abort.cc
+++ b/src/tests/recover-checkpoint-fopen-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-checkpoint-fopen-commit.cc b/src/tests/recover-checkpoint-fopen-commit.cc
index 33546958a37..2dcdbf6b939 100644
--- a/src/tests/recover-checkpoint-fopen-commit.cc
+++ b/src/tests/recover-checkpoint-fopen-commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-child-rollback.cc b/src/tests/recover-child-rollback.cc
index 00f036cc174..62fbfbda6ef 100644
--- a/src/tests/recover-child-rollback.cc
+++ b/src/tests/recover-child-rollback.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-compare-db-descriptor.cc b/src/tests/recover-compare-db-descriptor.cc
index 58ae0b007e3..2cbc54efa17 100644
--- a/src/tests/recover-compare-db-descriptor.cc
+++ b/src/tests/recover-compare-db-descriptor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-compare-db.cc b/src/tests/recover-compare-db.cc
index 7e1de1ef3fe..6ce16bd479d 100644
--- a/src/tests/recover-compare-db.cc
+++ b/src/tests/recover-compare-db.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-del-multiple-abort.cc b/src/tests/recover-del-multiple-abort.cc
index 4b8d8b44cf7..5a7e1710de1 100644
--- a/src/tests/recover-del-multiple-abort.cc
+++ b/src/tests/recover-del-multiple-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-del-multiple-srcdb-fdelete-all.cc b/src/tests/recover-del-multiple-srcdb-fdelete-all.cc
index 3224fa66057..632a4805835 100644
--- a/src/tests/recover-del-multiple-srcdb-fdelete-all.cc
+++ b/src/tests/recover-del-multiple-srcdb-fdelete-all.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-del-multiple.cc b/src/tests/recover-del-multiple.cc
index be09e29a0b8..d4c7303162a 100644
--- a/src/tests/recover-del-multiple.cc
+++ b/src/tests/recover-del-multiple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-delboth-after-checkpoint.cc b/src/tests/recover-delboth-after-checkpoint.cc
index 4655b5b5065..323b5b64ef7 100644
--- a/src/tests/recover-delboth-after-checkpoint.cc
+++ b/src/tests/recover-delboth-after-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-delboth-checkpoint.cc b/src/tests/recover-delboth-checkpoint.cc
index 3e674644ed5..4ee3f5bba66 100644
--- a/src/tests/recover-delboth-checkpoint.cc
+++ b/src/tests/recover-delboth-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor.cc b/src/tests/recover-descriptor.cc
index f726d63fc0c..df96b8cbf45 100644
--- a/src/tests/recover-descriptor.cc
+++ b/src/tests/recover-descriptor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor10.cc b/src/tests/recover-descriptor10.cc
index 9b747506fd3..db73549eb1f 100644
--- a/src/tests/recover-descriptor10.cc
+++ b/src/tests/recover-descriptor10.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor11.cc b/src/tests/recover-descriptor11.cc
index 8a2a1f34644..5d593af25bd 100644
--- a/src/tests/recover-descriptor11.cc
+++ b/src/tests/recover-descriptor11.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor12.cc b/src/tests/recover-descriptor12.cc
index b3bb25abaea..698fa5d2b63 100644
--- a/src/tests/recover-descriptor12.cc
+++ b/src/tests/recover-descriptor12.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor2.cc b/src/tests/recover-descriptor2.cc
index 7f09f4a7c54..62e685962e4 100644
--- a/src/tests/recover-descriptor2.cc
+++ b/src/tests/recover-descriptor2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor3.cc b/src/tests/recover-descriptor3.cc
index 87d607359f3..58d219af9cf 100644
--- a/src/tests/recover-descriptor3.cc
+++ b/src/tests/recover-descriptor3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor4.cc b/src/tests/recover-descriptor4.cc
index 192a9474b62..37d7ca80f1f 100644
--- a/src/tests/recover-descriptor4.cc
+++ b/src/tests/recover-descriptor4.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor5.cc b/src/tests/recover-descriptor5.cc
index 6ce30af5a70..757116afe19 100644
--- a/src/tests/recover-descriptor5.cc
+++ b/src/tests/recover-descriptor5.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor6.cc b/src/tests/recover-descriptor6.cc
index b092581c3c1..68f90b0f276 100644
--- a/src/tests/recover-descriptor6.cc
+++ b/src/tests/recover-descriptor6.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor7.cc b/src/tests/recover-descriptor7.cc
index 77d5c74cc97..9c3a44d01e5 100644
--- a/src/tests/recover-descriptor7.cc
+++ b/src/tests/recover-descriptor7.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor8.cc b/src/tests/recover-descriptor8.cc
index d9c993237d8..ac80a3e8a29 100644
--- a/src/tests/recover-descriptor8.cc
+++ b/src/tests/recover-descriptor8.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-descriptor9.cc b/src/tests/recover-descriptor9.cc
index be5bf31e3d6..17da0907374 100644
--- a/src/tests/recover-descriptor9.cc
+++ b/src/tests/recover-descriptor9.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-fassociate.cc b/src/tests/recover-fassociate.cc
index d97cfd7f849..81a19f18cd2 100644
--- a/src/tests/recover-fassociate.cc
+++ b/src/tests/recover-fassociate.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-fclose-in-checkpoint.cc b/src/tests/recover-fclose-in-checkpoint.cc
index 5058c49dc6c..0519b9ba3c9 100644
--- a/src/tests/recover-fclose-in-checkpoint.cc
+++ b/src/tests/recover-fclose-in-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-fcreate-basementnodesize.cc b/src/tests/recover-fcreate-basementnodesize.cc
index 228528a5327..25350829505 100644
--- a/src/tests/recover-fcreate-basementnodesize.cc
+++ b/src/tests/recover-fcreate-basementnodesize.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-fcreate-fclose.cc b/src/tests/recover-fcreate-fclose.cc
index 240969e5eca..1dfccc4c3f4 100644
--- a/src/tests/recover-fcreate-fclose.cc
+++ b/src/tests/recover-fcreate-fclose.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-fcreate-fdelete.cc b/src/tests/recover-fcreate-fdelete.cc
index b0ef652e906..76605330a23 100644
--- a/src/tests/recover-fcreate-fdelete.cc
+++ b/src/tests/recover-fcreate-fdelete.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-fcreate-nodesize.cc b/src/tests/recover-fcreate-nodesize.cc
index 619704efcf2..7526c20b474 100644
--- a/src/tests/recover-fcreate-nodesize.cc
+++ b/src/tests/recover-fcreate-nodesize.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-fcreate-xabort.cc b/src/tests/recover-fcreate-xabort.cc
index 9473b52f240..c18db167449 100644
--- a/src/tests/recover-fcreate-xabort.cc
+++ b/src/tests/recover-fcreate-xabort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-flt1.cc b/src/tests/recover-flt1.cc
index 2efea643f73..f395ed8171d 100644
--- a/src/tests/recover-flt1.cc
+++ b/src/tests/recover-flt1.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-flt10.cc b/src/tests/recover-flt10.cc
index 82e774ea96b..ccee07680b6 100644
--- a/src/tests/recover-flt10.cc
+++ b/src/tests/recover-flt10.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-flt2.cc b/src/tests/recover-flt2.cc
index e7151771bc8..bd6125e2dfe 100644
--- a/src/tests/recover-flt2.cc
+++ b/src/tests/recover-flt2.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-flt3.cc b/src/tests/recover-flt3.cc
index 323eb2d5019..f4fa3344e68 100644
--- a/src/tests/recover-flt3.cc
+++ b/src/tests/recover-flt3.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-flt4.cc b/src/tests/recover-flt4.cc
index a4b68f2b8df..d2efee438c9 100644
--- a/src/tests/recover-flt4.cc
+++ b/src/tests/recover-flt4.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-flt5.cc b/src/tests/recover-flt5.cc
index 48a5a10b707..d4a4c0cec00 100644
--- a/src/tests/recover-flt5.cc
+++ b/src/tests/recover-flt5.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-flt6.cc b/src/tests/recover-flt6.cc
index 5929ba5200a..184e3933f64 100644
--- a/src/tests/recover-flt6.cc
+++ b/src/tests/recover-flt6.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-flt7.cc b/src/tests/recover-flt7.cc
index 40be856bba8..e8fce283b71 100644
--- a/src/tests/recover-flt7.cc
+++ b/src/tests/recover-flt7.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-flt8.cc b/src/tests/recover-flt8.cc
index 44a7b0f4f44..2f1958b3025 100644
--- a/src/tests/recover-flt8.cc
+++ b/src/tests/recover-flt8.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-flt9.cc b/src/tests/recover-flt9.cc
index a9c89a53ab1..28325fbd6c5 100644
--- a/src/tests/recover-flt9.cc
+++ b/src/tests/recover-flt9.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-fopen-checkpoint-fclose.cc b/src/tests/recover-fopen-checkpoint-fclose.cc
index 65c63417065..b8019b1ec79 100644
--- a/src/tests/recover-fopen-checkpoint-fclose.cc
+++ b/src/tests/recover-fopen-checkpoint-fclose.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-fopen-fclose-checkpoint.cc b/src/tests/recover-fopen-fclose-checkpoint.cc
index 6d17bb79998..bb750cd3c8d 100644
--- a/src/tests/recover-fopen-fclose-checkpoint.cc
+++ b/src/tests/recover-fopen-fclose-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc b/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc
index 608ebadd4a6..e745b666f86 100644
--- a/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc
+++ b/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-hotindexer-simple-abort-put.cc b/src/tests/recover-hotindexer-simple-abort-put.cc
index 153b911a018..ae99abb1082 100644
--- a/src/tests/recover-hotindexer-simple-abort-put.cc
+++ b/src/tests/recover-hotindexer-simple-abort-put.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-loader-test.cc b/src/tests/recover-loader-test.cc
index c9944ba0409..381a0c600ba 100644
--- a/src/tests/recover-loader-test.cc
+++ b/src/tests/recover-loader-test.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-lsn-filter-multiple.cc b/src/tests/recover-lsn-filter-multiple.cc
index dc26721818d..465f8cffab7 100644
--- a/src/tests/recover-lsn-filter-multiple.cc
+++ b/src/tests/recover-lsn-filter-multiple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-lsn-filter.cc b/src/tests/recover-lsn-filter.cc
index 4cd79918a86..9877923c50c 100644
--- a/src/tests/recover-lsn-filter.cc
+++ b/src/tests/recover-lsn-filter.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-missing-dbfile-2.cc b/src/tests/recover-missing-dbfile-2.cc
index 59f963ef503..691ffc36162 100644
--- a/src/tests/recover-missing-dbfile-2.cc
+++ b/src/tests/recover-missing-dbfile-2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-missing-dbfile.cc b/src/tests/recover-missing-dbfile.cc
index a71f91d7417..5af1644176b 100644
--- a/src/tests/recover-missing-dbfile.cc
+++ b/src/tests/recover-missing-dbfile.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-missing-logfile.cc b/src/tests/recover-missing-logfile.cc
index d7b6b75d4cc..51681ad0ea8 100644
--- a/src/tests/recover-missing-logfile.cc
+++ b/src/tests/recover-missing-logfile.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-put-multiple-abort.cc b/src/tests/recover-put-multiple-abort.cc
index c2036f6f34b..abfa78a9283 100644
--- a/src/tests/recover-put-multiple-abort.cc
+++ b/src/tests/recover-put-multiple-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-put-multiple-fdelete-all.cc b/src/tests/recover-put-multiple-fdelete-all.cc
index a92db3a2a22..e65667a0e4f 100644
--- a/src/tests/recover-put-multiple-fdelete-all.cc
+++ b/src/tests/recover-put-multiple-fdelete-all.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-put-multiple-fdelete-some.cc b/src/tests/recover-put-multiple-fdelete-some.cc
index 88014d208d2..4f37a9adf67 100644
--- a/src/tests/recover-put-multiple-fdelete-some.cc
+++ b/src/tests/recover-put-multiple-fdelete-some.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-put-multiple-srcdb-fdelete-all.cc b/src/tests/recover-put-multiple-srcdb-fdelete-all.cc
index df56fa4f00b..e612e4d4c9a 100644
--- a/src/tests/recover-put-multiple-srcdb-fdelete-all.cc
+++ b/src/tests/recover-put-multiple-srcdb-fdelete-all.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-put-multiple.cc b/src/tests/recover-put-multiple.cc
index 8e4c19141bf..21a68384860 100644
--- a/src/tests/recover-put-multiple.cc
+++ b/src/tests/recover-put-multiple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-split-checkpoint.cc b/src/tests/recover-split-checkpoint.cc
index 9df540aadde..4e6b3d16d98 100644
--- a/src/tests/recover-split-checkpoint.cc
+++ b/src/tests/recover-split-checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-straddle-txn-nested.cc b/src/tests/recover-straddle-txn-nested.cc
index 32e22f9607d..03887ac19e5 100644
--- a/src/tests/recover-straddle-txn-nested.cc
+++ b/src/tests/recover-straddle-txn-nested.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-straddle-txn.cc b/src/tests/recover-straddle-txn.cc
index a08e8940015..a728a7de17d 100644
--- a/src/tests/recover-straddle-txn.cc
+++ b/src/tests/recover-straddle-txn.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-tablelock.cc b/src/tests/recover-tablelock.cc
index c75574e60b2..eb2a4318a20 100644
--- a/src/tests/recover-tablelock.cc
+++ b/src/tests/recover-tablelock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-test-logsuppress-put.cc b/src/tests/recover-test-logsuppress-put.cc
index 14f659f72ad..c022fdf6243 100644
--- a/src/tests/recover-test-logsuppress-put.cc
+++ b/src/tests/recover-test-logsuppress-put.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-test-logsuppress.cc b/src/tests/recover-test-logsuppress.cc
index 8272bbbcd51..020cfbd6122 100644
--- a/src/tests/recover-test-logsuppress.cc
+++ b/src/tests/recover-test-logsuppress.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-test1.cc b/src/tests/recover-test1.cc
index d9b7cb6fa6c..6529d6ac968 100644
--- a/src/tests/recover-test1.cc
+++ b/src/tests/recover-test1.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-test2.cc b/src/tests/recover-test2.cc
index 9faeedc8d5d..e6bf69b92fe 100644
--- a/src/tests/recover-test2.cc
+++ b/src/tests/recover-test2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-test3.cc b/src/tests/recover-test3.cc
index 7dcc191fc25..fa40c494a96 100644
--- a/src/tests/recover-test3.cc
+++ b/src/tests/recover-test3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-test_crash_in_flusher_thread.h b/src/tests/recover-test_crash_in_flusher_thread.h
index 014a6428777..82d57b48867 100644
--- a/src/tests/recover-test_crash_in_flusher_thread.h
+++ b/src/tests/recover-test_crash_in_flusher_thread.h
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-test_stress1.cc b/src/tests/recover-test_stress1.cc
index 7e7be8c26f7..a45667cd8a1 100644
--- a/src/tests/recover-test_stress1.cc
+++ b/src/tests/recover-test_stress1.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-test_stress2.cc b/src/tests/recover-test_stress2.cc
index e07f36dca7c..e78f8a222b1 100644
--- a/src/tests/recover-test_stress2.cc
+++ b/src/tests/recover-test_stress2.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-test_stress3.cc b/src/tests/recover-test_stress3.cc
index 2a3017c4cae..9794271ec6b 100644
--- a/src/tests/recover-test_stress3.cc
+++ b/src/tests/recover-test_stress3.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-test_stress_openclose.cc b/src/tests/recover-test_stress_openclose.cc
index 52b59f96810..e84c9f2c9f6 100644
--- a/src/tests/recover-test_stress_openclose.cc
+++ b/src/tests/recover-test_stress_openclose.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update-multiple-abort.cc b/src/tests/recover-update-multiple-abort.cc
index 1e6f57a0714..4d0e0164aa3 100644
--- a/src/tests/recover-update-multiple-abort.cc
+++ b/src/tests/recover-update-multiple-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update-multiple.cc b/src/tests/recover-update-multiple.cc
index 437f9615351..fe436c95a4d 100644
--- a/src/tests/recover-update-multiple.cc
+++ b/src/tests/recover-update-multiple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_aborts.cc b/src/tests/recover-update_aborts.cc
index 27e4a19cef1..82ffd511c2a 100644
--- a/src/tests/recover-update_aborts.cc
+++ b/src/tests/recover-update_aborts.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_aborts_before_checkpoint.cc b/src/tests/recover-update_aborts_before_checkpoint.cc
index de3f0996d63..46723760c88 100644
--- a/src/tests/recover-update_aborts_before_checkpoint.cc
+++ b/src/tests/recover-update_aborts_before_checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_aborts_before_close.cc b/src/tests/recover-update_aborts_before_close.cc
index e1c13d92f93..feac9dba77d 100644
--- a/src/tests/recover-update_aborts_before_close.cc
+++ b/src/tests/recover-update_aborts_before_close.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_broadcast_aborts.cc b/src/tests/recover-update_broadcast_aborts.cc
index 9ac5bb5b186..05904b0ae7f 100644
--- a/src/tests/recover-update_broadcast_aborts.cc
+++ b/src/tests/recover-update_broadcast_aborts.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_broadcast_aborts2.cc b/src/tests/recover-update_broadcast_aborts2.cc
index 2f05dc92c53..d88d483bd17 100644
--- a/src/tests/recover-update_broadcast_aborts2.cc
+++ b/src/tests/recover-update_broadcast_aborts2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_broadcast_aborts3.cc b/src/tests/recover-update_broadcast_aborts3.cc
index 3668d7a612e..c1f1baada13 100644
--- a/src/tests/recover-update_broadcast_aborts3.cc
+++ b/src/tests/recover-update_broadcast_aborts3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc b/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc
index a9bc84907ed..0768def9255 100644
--- a/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc
+++ b/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_broadcast_aborts_before_close.cc b/src/tests/recover-update_broadcast_aborts_before_close.cc
index 7dd3f647cbe..0d18ad7bacb 100644
--- a/src/tests/recover-update_broadcast_aborts_before_close.cc
+++ b/src/tests/recover-update_broadcast_aborts_before_close.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_broadcast_changes_values.cc b/src/tests/recover-update_broadcast_changes_values.cc
index f1c61f9d7f9..b8cd95c91b6 100644
--- a/src/tests/recover-update_broadcast_changes_values.cc
+++ b/src/tests/recover-update_broadcast_changes_values.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_broadcast_changes_values2.cc b/src/tests/recover-update_broadcast_changes_values2.cc
index db0080598eb..f1a776bc7f5 100644
--- a/src/tests/recover-update_broadcast_changes_values2.cc
+++ b/src/tests/recover-update_broadcast_changes_values2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_broadcast_changes_values3.cc b/src/tests/recover-update_broadcast_changes_values3.cc
index 28d102579c9..9ec99677ad0 100644
--- a/src/tests/recover-update_broadcast_changes_values3.cc
+++ b/src/tests/recover-update_broadcast_changes_values3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc b/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc
index 93b40a14c27..8197f8ad2fe 100644
--- a/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc
+++ b/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_broadcast_changes_values_before_close.cc b/src/tests/recover-update_broadcast_changes_values_before_close.cc
index ce187c09303..8d462e82f57 100644
--- a/src/tests/recover-update_broadcast_changes_values_before_close.cc
+++ b/src/tests/recover-update_broadcast_changes_values_before_close.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_changes_values.cc b/src/tests/recover-update_changes_values.cc
index 94029e1c99e..65a55a12125 100644
--- a/src/tests/recover-update_changes_values.cc
+++ b/src/tests/recover-update_changes_values.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_changes_values_before_checkpoint.cc b/src/tests/recover-update_changes_values_before_checkpoint.cc
index 0ff19f1801c..ab97b660724 100644
--- a/src/tests/recover-update_changes_values_before_checkpoint.cc
+++ b/src/tests/recover-update_changes_values_before_checkpoint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-update_changes_values_before_close.cc b/src/tests/recover-update_changes_values_before_close.cc
index 7e075b00456..f17edbd1317 100644
--- a/src/tests/recover-update_changes_values_before_close.cc
+++ b/src/tests/recover-update_changes_values_before_close.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-upgrade-db-descriptor-multihandle.cc b/src/tests/recover-upgrade-db-descriptor-multihandle.cc
index 3914badda0b..c2b8543dba8 100644
--- a/src/tests/recover-upgrade-db-descriptor-multihandle.cc
+++ b/src/tests/recover-upgrade-db-descriptor-multihandle.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-upgrade-db-descriptor.cc b/src/tests/recover-upgrade-db-descriptor.cc
index 5bf6e1be049..9db973bc13b 100644
--- a/src/tests/recover-upgrade-db-descriptor.cc
+++ b/src/tests/recover-upgrade-db-descriptor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-x1-abort.cc b/src/tests/recover-x1-abort.cc
index 473d76874e3..c962f9c1c29 100644
--- a/src/tests/recover-x1-abort.cc
+++ b/src/tests/recover-x1-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-x1-commit.cc b/src/tests/recover-x1-commit.cc
index 780c4287f58..2c0883294e4 100644
--- a/src/tests/recover-x1-commit.cc
+++ b/src/tests/recover-x1-commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-x1-nested-abort.cc b/src/tests/recover-x1-nested-abort.cc
index 415d184aa3d..b23235b2af8 100644
--- a/src/tests/recover-x1-nested-abort.cc
+++ b/src/tests/recover-x1-nested-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-x1-nested-commit.cc b/src/tests/recover-x1-nested-commit.cc
index a7fb13df76a..0426ac54cd7 100644
--- a/src/tests/recover-x1-nested-commit.cc
+++ b/src/tests/recover-x1-nested-commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-x2-abort.cc b/src/tests/recover-x2-abort.cc
index c14fa98d0b0..9335aa5e7fb 100644
--- a/src/tests/recover-x2-abort.cc
+++ b/src/tests/recover-x2-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recover-x2-commit.cc b/src/tests/recover-x2-commit.cc
index 373e9cf546f..4a2dfa8013b 100644
--- a/src/tests/recover-x2-commit.cc
+++ b/src/tests/recover-x2-commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recovery_fileops_stress.cc b/src/tests/recovery_fileops_stress.cc
index acbb59bd05b..4ac3bccf0a2 100644
--- a/src/tests/recovery_fileops_stress.cc
+++ b/src/tests/recovery_fileops_stress.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recovery_fileops_unit.cc b/src/tests/recovery_fileops_unit.cc
index 0d7b33212ad..9c9681ae5e6 100644
--- a/src/tests/recovery_fileops_unit.cc
+++ b/src/tests/recovery_fileops_unit.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/recovery_stress.cc b/src/tests/recovery_stress.cc
index 9e6b3117031..8e704bbc3f6 100644
--- a/src/tests/recovery_stress.cc
+++ b/src/tests/recovery_stress.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/redirect.cc b/src/tests/redirect.cc
index bcbe861adc0..9cf9d979f2f 100644
--- a/src/tests/redirect.cc
+++ b/src/tests/redirect.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/replace-into-write-lock.cc b/src/tests/replace-into-write-lock.cc
index 7f9ec3768ff..77a03436407 100644
--- a/src/tests/replace-into-write-lock.cc
+++ b/src/tests/replace-into-write-lock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/root_fifo_1.cc b/src/tests/root_fifo_1.cc
index fa88b3dfc4a..c83fe05c9fa 100644
--- a/src/tests/root_fifo_1.cc
+++ b/src/tests/root_fifo_1.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/root_fifo_2.cc b/src/tests/root_fifo_2.cc
index b8d3ac63f27..1902774cd99 100644
--- a/src/tests/root_fifo_2.cc
+++ b/src/tests/root_fifo_2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/root_fifo_31.cc b/src/tests/root_fifo_31.cc
index 09f57a67ac9..495073c8e82 100644
--- a/src/tests/root_fifo_31.cc
+++ b/src/tests/root_fifo_31.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/root_fifo_32.cc b/src/tests/root_fifo_32.cc
index 874405ff68f..d75f81dc012 100644
--- a/src/tests/root_fifo_32.cc
+++ b/src/tests/root_fifo_32.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/root_fifo_41.cc b/src/tests/root_fifo_41.cc
index d4f1e6554f1..91fb63985d8 100644
--- a/src/tests/root_fifo_41.cc
+++ b/src/tests/root_fifo_41.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/rowsize.cc b/src/tests/rowsize.cc
index 0965231e621..7e84173006b 100644
--- a/src/tests/rowsize.cc
+++ b/src/tests/rowsize.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/seqinsert.cc b/src/tests/seqinsert.cc
index 8d402f2dcf5..85d20bf8941 100644
--- a/src/tests/seqinsert.cc
+++ b/src/tests/seqinsert.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/shutdown-3344.cc b/src/tests/shutdown-3344.cc
index 6b586287909..94716b558ec 100644
--- a/src/tests/shutdown-3344.cc
+++ b/src/tests/shutdown-3344.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/simple.cc b/src/tests/simple.cc
index d51cf446f4c..0733a005283 100644
--- a/src/tests/simple.cc
+++ b/src/tests/simple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/stat64-create-modify-times.cc b/src/tests/stat64-create-modify-times.cc
index 09c24546757..46c25dc7208 100644
--- a/src/tests/stat64-create-modify-times.cc
+++ b/src/tests/stat64-create-modify-times.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/stat64-null-txn.cc b/src/tests/stat64-null-txn.cc
index eb799010835..3ca90823b86 100644
--- a/src/tests/stat64-null-txn.cc
+++ b/src/tests/stat64-null-txn.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/stat64-root-changes.cc b/src/tests/stat64-root-changes.cc
index b0c7a0131b6..80cf022f946 100644
--- a/src/tests/stat64-root-changes.cc
+++ b/src/tests/stat64-root-changes.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/stat64.cc b/src/tests/stat64.cc
index 23e6ee84a65..8e115fc20d6 100644
--- a/src/tests/stat64.cc
+++ b/src/tests/stat64.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/stress-gc.cc b/src/tests/stress-gc.cc
index 97e2e7309c3..c67f9b8ed40 100644
--- a/src/tests/stress-gc.cc
+++ b/src/tests/stress-gc.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/stress-gc2.cc b/src/tests/stress-gc2.cc
index adee9ad1b35..d0a63a874ec 100644
--- a/src/tests/stress-gc2.cc
+++ b/src/tests/stress-gc2.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/stress-test.cc b/src/tests/stress-test.cc
index 0774358fee1..87238a62d18 100644
--- a/src/tests/stress-test.cc
+++ b/src/tests/stress-test.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/stress_openclose.h b/src/tests/stress_openclose.h
index 7477f4c16d4..ab15960a310 100644
--- a/src/tests/stress_openclose.h
+++ b/src/tests/stress_openclose.h
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test-5138.cc b/src/tests/test-5138.cc
index 1ec1d4646f9..546fe6d9368 100644
--- a/src/tests/test-5138.cc
+++ b/src/tests/test-5138.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test-nested-xopen-eclose.cc b/src/tests/test-nested-xopen-eclose.cc
index 2c5f7fae569..e78d2130ea3 100644
--- a/src/tests/test-nested-xopen-eclose.cc
+++ b/src/tests/test-nested-xopen-eclose.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test-prepare.cc b/src/tests/test-prepare.cc
index ea3949cf6fe..9033c633ea7 100644
--- a/src/tests/test-prepare.cc
+++ b/src/tests/test-prepare.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test-prepare2.cc b/src/tests/test-prepare2.cc
index eb79a1e8e18..8952f14cf31 100644
--- a/src/tests/test-prepare2.cc
+++ b/src/tests/test-prepare2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test-prepare3.cc b/src/tests/test-prepare3.cc
index 352518b8579..3643d73f41a 100644
--- a/src/tests/test-prepare3.cc
+++ b/src/tests/test-prepare3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test-rollinclude.cc b/src/tests/test-rollinclude.cc
index 8a4af61bf59..6ece4beb671 100644
--- a/src/tests/test-rollinclude.cc
+++ b/src/tests/test-rollinclude.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test-xa-prepare.cc b/src/tests/test-xa-prepare.cc
index d409eefb382..e08e7361555 100644
--- a/src/tests/test-xa-prepare.cc
+++ b/src/tests/test-xa-prepare.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test-xopen-eclose.cc b/src/tests/test-xopen-eclose.cc
index f6359d8cf1a..82f2bc6d159 100644
--- a/src/tests/test-xopen-eclose.cc
+++ b/src/tests/test-xopen-eclose.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test.h b/src/tests/test.h
index 80f9ee37215..c8e98862038 100644
--- a/src/tests/test.h
+++ b/src/tests/test.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test1572.cc b/src/tests/test1572.cc
index b6dd7f0dcb6..0ea04a9c401 100644
--- a/src/tests/test1572.cc
+++ b/src/tests/test1572.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test1753.cc b/src/tests/test1753.cc
index d4d09ecaf79..e50b828c92c 100644
--- a/src/tests/test1753.cc
+++ b/src/tests/test1753.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test1842.cc b/src/tests/test1842.cc
index 43702da5694..20b014d4a33 100644
--- a/src/tests/test1842.cc
+++ b/src/tests/test1842.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test3039.cc b/src/tests/test3039.cc
index aaaeebc7c36..35b22b374c0 100644
--- a/src/tests/test3039.cc
+++ b/src/tests/test3039.cc
@@ -36,7 +36,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test3219.cc b/src/tests/test3219.cc
index 95bff0f445d..d5de370fd60 100644
--- a/src/tests/test3219.cc
+++ b/src/tests/test3219.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test3522.cc b/src/tests/test3522.cc
index fe67793e3af..7166c561f70 100644
--- a/src/tests/test3522.cc
+++ b/src/tests/test3522.cc
@@ -35,7 +35,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test3522b.cc b/src/tests/test3522b.cc
index 17b2df6b13a..09c9807dd2b 100644
--- a/src/tests/test3522b.cc
+++ b/src/tests/test3522b.cc
@@ -36,7 +36,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test3529.cc b/src/tests/test3529.cc
index 287729451a4..2c605c3028a 100644
--- a/src/tests/test3529.cc
+++ b/src/tests/test3529.cc
@@ -40,7 +40,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test4573-logtrim.cc b/src/tests/test4573-logtrim.cc
index a439f886103..9dba89f04f1 100644
--- a/src/tests/test4573-logtrim.cc
+++ b/src/tests/test4573-logtrim.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test5092.cc b/src/tests/test5092.cc
index 6572c4df246..16652472b55 100644
--- a/src/tests/test5092.cc
+++ b/src/tests/test5092.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test938.cc b/src/tests/test938.cc
index bb4b9464a2d..d6896894b54 100644
--- a/src/tests/test938.cc
+++ b/src/tests/test938.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test938b.cc b/src/tests/test938b.cc
index d0d07120f4a..78830d8d4c8 100644
--- a/src/tests/test938b.cc
+++ b/src/tests/test938b.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test938c.cc b/src/tests/test938c.cc
index f3914ccb302..2ca11d6f44e 100644
--- a/src/tests/test938c.cc
+++ b/src/tests/test938c.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_3529_insert_2.cc b/src/tests/test_3529_insert_2.cc
index 542785e007f..d024143415d 100644
--- a/src/tests/test_3529_insert_2.cc
+++ b/src/tests/test_3529_insert_2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_3529_table_lock.cc b/src/tests/test_3529_table_lock.cc
index 7a07ff4ac11..cb9137a10a0 100644
--- a/src/tests/test_3529_table_lock.cc
+++ b/src/tests/test_3529_table_lock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_3645.cc b/src/tests/test_3645.cc
index dfd8544ef4b..1e7c3b5faf6 100644
--- a/src/tests/test_3645.cc
+++ b/src/tests/test_3645.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_3755.cc b/src/tests/test_3755.cc
index fa6af3b46d8..a678352ba19 100644
--- a/src/tests/test_3755.cc
+++ b/src/tests/test_3755.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_4015.cc b/src/tests/test_4015.cc
index a1b8f555155..c0538b7976c 100644
--- a/src/tests/test_4015.cc
+++ b/src/tests/test_4015.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_4368.cc b/src/tests/test_4368.cc
index f000efa7813..ab55a6ee173 100644
--- a/src/tests/test_4368.cc
+++ b/src/tests/test_4368.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_4657.cc b/src/tests/test_4657.cc
index c7a3f7473ce..6ab9ce56d73 100644
--- a/src/tests/test_4657.cc
+++ b/src/tests/test_4657.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_5015.cc b/src/tests/test_5015.cc
index 4eb337eb88b..071b7f3660e 100644
--- a/src/tests/test_5015.cc
+++ b/src/tests/test_5015.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_5469.cc b/src/tests/test_5469.cc
index c7e30b42c2f..cbbcb3721cb 100644
--- a/src/tests/test_5469.cc
+++ b/src/tests/test_5469.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_789.cc b/src/tests/test_789.cc
index c99af5bd1e5..31cdd6ef777 100644
--- a/src/tests/test_789.cc
+++ b/src/tests/test_789.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_935.cc b/src/tests/test_935.cc
index 971a1c1a85a..a676db32460 100644
--- a/src/tests/test_935.cc
+++ b/src/tests/test_935.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_abort1.cc b/src/tests/test_abort1.cc
index 27f5d68348c..c88eeaddd8f 100644
--- a/src/tests/test_abort1.cc
+++ b/src/tests/test_abort1.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_abort2.cc b/src/tests/test_abort2.cc
index e8beb73dcf2..881bc97ad1b 100644
--- a/src/tests/test_abort2.cc
+++ b/src/tests/test_abort2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_abort3.cc b/src/tests/test_abort3.cc
index 705ae5cfce2..4542ad3b1c4 100644
--- a/src/tests/test_abort3.cc
+++ b/src/tests/test_abort3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_abort4.cc b/src/tests/test_abort4.cc
index 0d575c78b12..29581dc6285 100644
--- a/src/tests/test_abort4.cc
+++ b/src/tests/test_abort4.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_abort5.cc b/src/tests/test_abort5.cc
index 803e0c4fa22..34bf564d9ac 100644
--- a/src/tests/test_abort5.cc
+++ b/src/tests/test_abort5.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_abort_delete_first.cc b/src/tests/test_abort_delete_first.cc
index ff55cb2e3ae..fb983474462 100644
--- a/src/tests/test_abort_delete_first.cc
+++ b/src/tests/test_abort_delete_first.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_archive0.cc b/src/tests/test_archive0.cc
index 85b444e243c..8ffa87e2a6d 100644
--- a/src/tests/test_archive0.cc
+++ b/src/tests/test_archive0.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_archive1.cc b/src/tests/test_archive1.cc
index 8aa045b061e..5208a5eb1b6 100644
--- a/src/tests/test_archive1.cc
+++ b/src/tests/test_archive1.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_archive2.cc b/src/tests/test_archive2.cc
index ea67a743f92..faa73171f7e 100644
--- a/src/tests/test_archive2.cc
+++ b/src/tests/test_archive2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_bad_implicit_promotion.cc b/src/tests/test_bad_implicit_promotion.cc
index 8fcff7c6132..c7555d2d3f6 100644
--- a/src/tests/test_bad_implicit_promotion.cc
+++ b/src/tests/test_bad_implicit_promotion.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_blobs_leaf_split.cc b/src/tests/test_blobs_leaf_split.cc
index e567e4d58a8..eae30421f79 100644
--- a/src/tests/test_blobs_leaf_split.cc
+++ b/src/tests/test_blobs_leaf_split.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_bulk_fetch.cc b/src/tests/test_bulk_fetch.cc
index 76706db6dba..800212a6751 100644
--- a/src/tests/test_bulk_fetch.cc
+++ b/src/tests/test_bulk_fetch.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cachesize.cc b/src/tests/test_cachesize.cc
index 2af678ff53a..d161dd89033 100644
--- a/src/tests/test_cachesize.cc
+++ b/src/tests/test_cachesize.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cmp_descriptor.cc b/src/tests/test_cmp_descriptor.cc
index 87b3da2b4cd..3d318ddd346 100644
--- a/src/tests/test_cmp_descriptor.cc
+++ b/src/tests/test_cmp_descriptor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_compression_methods.cc b/src/tests/test_compression_methods.cc
index ef73c593f56..272cf4f145d 100644
--- a/src/tests/test_compression_methods.cc
+++ b/src/tests/test_compression_methods.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_2.cc b/src/tests/test_cursor_2.cc
index de332e6bf75..d07eb95122e 100644
--- a/src/tests/test_cursor_2.cc
+++ b/src/tests/test_cursor_2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_3.cc b/src/tests/test_cursor_3.cc
index 45c0b0b4a8d..f9f256fc884 100644
--- a/src/tests/test_cursor_3.cc
+++ b/src/tests/test_cursor_3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_DB_NEXT_no_dup.cc b/src/tests/test_cursor_DB_NEXT_no_dup.cc
index d87ff04f25d..125bbee9d52 100644
--- a/src/tests/test_cursor_DB_NEXT_no_dup.cc
+++ b/src/tests/test_cursor_DB_NEXT_no_dup.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_db_current.cc b/src/tests/test_cursor_db_current.cc
index 8a2f5bcba93..2f8fbb9149f 100644
--- a/src/tests/test_cursor_db_current.cc
+++ b/src/tests/test_cursor_db_current.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_delete2.cc b/src/tests/test_cursor_delete2.cc
index 73a7182e2fd..1fcda002bc5 100644
--- a/src/tests/test_cursor_delete2.cc
+++ b/src/tests/test_cursor_delete2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_flags.cc b/src/tests/test_cursor_flags.cc
index 1bdb3daf81c..60ca37a0ac4 100644
--- a/src/tests/test_cursor_flags.cc
+++ b/src/tests/test_cursor_flags.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_interrupt.cc b/src/tests/test_cursor_interrupt.cc
index d82fc5131da..e992f86455b 100644
--- a/src/tests/test_cursor_interrupt.cc
+++ b/src/tests/test_cursor_interrupt.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_nonleaf_expand.cc b/src/tests/test_cursor_nonleaf_expand.cc
index 96b83d778fc..6464a2fda83 100644
--- a/src/tests/test_cursor_nonleaf_expand.cc
+++ b/src/tests/test_cursor_nonleaf_expand.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_null.cc b/src/tests/test_cursor_null.cc
index 68a65b97e6e..6c7bf382a8d 100644
--- a/src/tests/test_cursor_null.cc
+++ b/src/tests/test_cursor_null.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_stickyness.cc b/src/tests/test_cursor_stickyness.cc
index 6ed74265fff..62178e14137 100644
--- a/src/tests/test_cursor_stickyness.cc
+++ b/src/tests/test_cursor_stickyness.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_cursor_with_read_txn.cc b/src/tests/test_cursor_with_read_txn.cc
index 8435b2e1a3e..d4e3148ec62 100644
--- a/src/tests/test_cursor_with_read_txn.cc
+++ b/src/tests/test_cursor_with_read_txn.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_already_exists.cc b/src/tests/test_db_already_exists.cc
index cbb98d1b3e7..ce4008a06d8 100644
--- a/src/tests/test_db_already_exists.cc
+++ b/src/tests/test_db_already_exists.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_change_pagesize.cc b/src/tests/test_db_change_pagesize.cc
index 83e195093fd..d596782c919 100644
--- a/src/tests/test_db_change_pagesize.cc
+++ b/src/tests/test_db_change_pagesize.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_change_xxx.cc b/src/tests/test_db_change_xxx.cc
index 35170e5f9ec..2033cc6cb04 100644
--- a/src/tests/test_db_change_xxx.cc
+++ b/src/tests/test_db_change_xxx.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_close_no_open.cc b/src/tests/test_db_close_no_open.cc
index a9421b57451..7f433e09393 100644
--- a/src/tests/test_db_close_no_open.cc
+++ b/src/tests/test_db_close_no_open.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_current_clobbers_db.cc b/src/tests/test_db_current_clobbers_db.cc
index d908e0c2e14..962d1ae5256 100644
--- a/src/tests/test_db_current_clobbers_db.cc
+++ b/src/tests/test_db_current_clobbers_db.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_dbt_mem_behavior.cc b/src/tests/test_db_dbt_mem_behavior.cc
index eb5d7f87893..8a0a385f82e 100644
--- a/src/tests/test_db_dbt_mem_behavior.cc
+++ b/src/tests/test_db_dbt_mem_behavior.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_delete.cc b/src/tests/test_db_delete.cc
index 66fc506d44b..4ee9b0fba83 100644
--- a/src/tests/test_db_delete.cc
+++ b/src/tests/test_db_delete.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_descriptor.cc b/src/tests/test_db_descriptor.cc
index de6f6f5f608..a9403174818 100644
--- a/src/tests/test_db_descriptor.cc
+++ b/src/tests/test_db_descriptor.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_env_open_close.cc b/src/tests/test_db_env_open_close.cc
index 13b1166fa0b..4bb22a026b2 100644
--- a/src/tests/test_db_env_open_close.cc
+++ b/src/tests/test_db_env_open_close.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_env_open_nocreate.cc b/src/tests/test_db_env_open_nocreate.cc
index a690a4f33a9..e45afb55ded 100644
--- a/src/tests/test_db_env_open_nocreate.cc
+++ b/src/tests/test_db_env_open_nocreate.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_env_open_open_close.cc b/src/tests/test_db_env_open_open_close.cc
index d9336a8b48e..9a2d665edbf 100644
--- a/src/tests/test_db_env_open_open_close.cc
+++ b/src/tests/test_db_env_open_open_close.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_env_set_errpfx.cc b/src/tests/test_db_env_set_errpfx.cc
index 7a717ea807b..ef7bf85b528 100644
--- a/src/tests/test_db_env_set_errpfx.cc
+++ b/src/tests/test_db_env_set_errpfx.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_env_set_lg_dir.cc b/src/tests/test_db_env_set_lg_dir.cc
index ffdaf5cb833..0baa9185d15 100644
--- a/src/tests/test_db_env_set_lg_dir.cc
+++ b/src/tests/test_db_env_set_lg_dir.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_env_set_tmp_dir.cc b/src/tests/test_db_env_set_tmp_dir.cc
index 150b0b29049..b1adbb30120 100644
--- a/src/tests/test_db_env_set_tmp_dir.cc
+++ b/src/tests/test_db_env_set_tmp_dir.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_env_strdup_null.cc b/src/tests/test_db_env_strdup_null.cc
index 1f65f7a54ed..01ba0792e4a 100644
--- a/src/tests/test_db_env_strdup_null.cc
+++ b/src/tests/test_db_env_strdup_null.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_get_put_flags.cc b/src/tests/test_db_get_put_flags.cc
index bb5403732b5..1c716cc747a 100644
--- a/src/tests/test_db_get_put_flags.cc
+++ b/src/tests/test_db_get_put_flags.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_named_delete_last.cc b/src/tests/test_db_named_delete_last.cc
index db530ad1e18..ba63986af79 100644
--- a/src/tests/test_db_named_delete_last.cc
+++ b/src/tests/test_db_named_delete_last.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_no_env.cc b/src/tests/test_db_no_env.cc
index 61952133119..8f7a336af5b 100644
--- a/src/tests/test_db_no_env.cc
+++ b/src/tests/test_db_no_env.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_open_notexist_reopen.cc b/src/tests/test_db_open_notexist_reopen.cc
index ea5002a0131..7881a14fbd7 100644
--- a/src/tests/test_db_open_notexist_reopen.cc
+++ b/src/tests/test_db_open_notexist_reopen.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_remove.cc b/src/tests/test_db_remove.cc
index 7c5009a703d..e736784a7c4 100644
--- a/src/tests/test_db_remove.cc
+++ b/src/tests/test_db_remove.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_remove_subdb.cc b/src/tests/test_db_remove_subdb.cc
index 90ee56278fb..8f3e50a649c 100644
--- a/src/tests/test_db_remove_subdb.cc
+++ b/src/tests/test_db_remove_subdb.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_set_flags.cc b/src/tests/test_db_set_flags.cc
index bd63991da87..b106c70589f 100644
--- a/src/tests/test_db_set_flags.cc
+++ b/src/tests/test_db_set_flags.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_subdb.cc b/src/tests/test_db_subdb.cc
index 4a65317d6c1..f29dd14a3fa 100644
--- a/src/tests/test_db_subdb.cc
+++ b/src/tests/test_db_subdb.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_subdb_different_flags.cc b/src/tests/test_db_subdb_different_flags.cc
index 0c30b782665..c12e1bdfce9 100644
--- a/src/tests/test_db_subdb_different_flags.cc
+++ b/src/tests/test_db_subdb_different_flags.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_txn_locks_nonheaviside.cc b/src/tests/test_db_txn_locks_nonheaviside.cc
index 9451b107e1a..381bce27596 100644
--- a/src/tests/test_db_txn_locks_nonheaviside.cc
+++ b/src/tests/test_db_txn_locks_nonheaviside.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_txn_locks_read_uncommitted.cc b/src/tests/test_db_txn_locks_read_uncommitted.cc
index 277fbaee8be..0dd73590b84 100644
--- a/src/tests/test_db_txn_locks_read_uncommitted.cc
+++ b/src/tests/test_db_txn_locks_read_uncommitted.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_db_version.cc b/src/tests/test_db_version.cc
index 1041db949d4..6ce4574ae57 100644
--- a/src/tests/test_db_version.cc
+++ b/src/tests/test_db_version.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_env_close_flags.cc b/src/tests/test_env_close_flags.cc
index 50fcd3fa50a..dd532627502 100644
--- a/src/tests/test_env_close_flags.cc
+++ b/src/tests/test_env_close_flags.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_env_create_db_create.cc b/src/tests/test_env_create_db_create.cc
index 8e706ebf04e..d45bd7002ab 100644
--- a/src/tests/test_env_create_db_create.cc
+++ b/src/tests/test_env_create_db_create.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_env_open_flags.cc b/src/tests/test_env_open_flags.cc
index 8d9b147e46d..1b621b8005d 100644
--- a/src/tests/test_env_open_flags.cc
+++ b/src/tests/test_env_open_flags.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_equal_keys_with_different_bytes.cc b/src/tests/test_equal_keys_with_different_bytes.cc
index 6567822c929..d91f965ebc8 100644
--- a/src/tests/test_equal_keys_with_different_bytes.cc
+++ b/src/tests/test_equal_keys_with_different_bytes.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_error.cc b/src/tests/test_error.cc
index 21084e70061..84c6289990e 100644
--- a/src/tests/test_error.cc
+++ b/src/tests/test_error.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_forkjoin.cc b/src/tests/test_forkjoin.cc
index 8190a7e7745..1fb01b53712 100644
--- a/src/tests/test_forkjoin.cc
+++ b/src/tests/test_forkjoin.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_get_max_row_size.cc b/src/tests/test_get_max_row_size.cc
index 12fc8c1e619..5ddddac9bd6 100644
--- a/src/tests/test_get_max_row_size.cc
+++ b/src/tests/test_get_max_row_size.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_get_zeroed_dbt.cc b/src/tests/test_get_zeroed_dbt.cc
index bf7848088d2..384f4e91f46 100644
--- a/src/tests/test_get_zeroed_dbt.cc
+++ b/src/tests/test_get_zeroed_dbt.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_groupcommit_count.cc b/src/tests/test_groupcommit_count.cc
index c24efb5562f..f5bb46c35db 100644
--- a/src/tests/test_groupcommit_count.cc
+++ b/src/tests/test_groupcommit_count.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_groupcommit_perf.cc b/src/tests/test_groupcommit_perf.cc
index e7aa5071f61..ade56e24e4e 100644
--- a/src/tests/test_groupcommit_perf.cc
+++ b/src/tests/test_groupcommit_perf.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_hsoc.cc b/src/tests/test_hsoc.cc
index ada02e5e522..28368456501 100644
--- a/src/tests/test_hsoc.cc
+++ b/src/tests/test_hsoc.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_insert_cursor_delete_insert.cc b/src/tests/test_insert_cursor_delete_insert.cc
index 865736d14fe..8b09698fcee 100644
--- a/src/tests/test_insert_cursor_delete_insert.cc
+++ b/src/tests/test_insert_cursor_delete_insert.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_insert_many_gc.cc b/src/tests/test_insert_many_gc.cc
index a1884d22c0c..be66e852021 100644
--- a/src/tests/test_insert_many_gc.cc
+++ b/src/tests/test_insert_many_gc.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_insert_memleak.cc b/src/tests/test_insert_memleak.cc
index 667221cdf04..8168ae477a1 100644
--- a/src/tests/test_insert_memleak.cc
+++ b/src/tests/test_insert_memleak.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_insert_unique.cc b/src/tests/test_insert_unique.cc
index 29439f9d704..84d1ded6db5 100644
--- a/src/tests/test_insert_unique.cc
+++ b/src/tests/test_insert_unique.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_iterate_live_transactions.cc b/src/tests/test_iterate_live_transactions.cc
index bfc90e71d07..dd00ddeeb9a 100644
--- a/src/tests/test_iterate_live_transactions.cc
+++ b/src/tests/test_iterate_live_transactions.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_iterate_pending_lock_requests.cc b/src/tests/test_iterate_pending_lock_requests.cc
index 248c346c305..03dcce49ffd 100644
--- a/src/tests/test_iterate_pending_lock_requests.cc
+++ b/src/tests/test_iterate_pending_lock_requests.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_keylen_diff.cc b/src/tests/test_keylen_diff.cc
index 55eb620b958..144ac5fce3e 100644
--- a/src/tests/test_keylen_diff.cc
+++ b/src/tests/test_keylen_diff.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_kv_gen.h b/src/tests/test_kv_gen.h
index 0d54f8680a7..49bb3acdb42 100644
--- a/src/tests/test_kv_gen.h
+++ b/src/tests/test_kv_gen.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_kv_limits.cc b/src/tests/test_kv_limits.cc
index 9ce236bf0ad..70390bb2802 100644
--- a/src/tests/test_kv_limits.cc
+++ b/src/tests/test_kv_limits.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_large_update_broadcast_small_cachetable.cc b/src/tests/test_large_update_broadcast_small_cachetable.cc
index ea164a8ea43..e5ccb3071b8 100644
--- a/src/tests/test_large_update_broadcast_small_cachetable.cc
+++ b/src/tests/test_large_update_broadcast_small_cachetable.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_lock_timeout_callback.cc b/src/tests/test_lock_timeout_callback.cc
index d4aae0f95d4..74daae7897f 100644
--- a/src/tests/test_lock_timeout_callback.cc
+++ b/src/tests/test_lock_timeout_callback.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_locking_with_read_txn.cc b/src/tests/test_locking_with_read_txn.cc
index 8f3349f3c4b..f3cb36d1df5 100644
--- a/src/tests/test_locking_with_read_txn.cc
+++ b/src/tests/test_locking_with_read_txn.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_locktree_close.cc b/src/tests/test_locktree_close.cc
index 10efefd0b62..b5735fd1495 100644
--- a/src/tests/test_locktree_close.cc
+++ b/src/tests/test_locktree_close.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log0.cc b/src/tests/test_log0.cc
index 05c0820f153..4f7202005d3 100644
--- a/src/tests/test_log0.cc
+++ b/src/tests/test_log0.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log1.cc b/src/tests/test_log1.cc
index 3c03249c845..82f376ef067 100644
--- a/src/tests/test_log1.cc
+++ b/src/tests/test_log1.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log10.cc b/src/tests/test_log10.cc
index 599eb16d3ae..d47f6220fc5 100644
--- a/src/tests/test_log10.cc
+++ b/src/tests/test_log10.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log1_abort.cc b/src/tests/test_log1_abort.cc
index f1f8269239e..6861698740f 100644
--- a/src/tests/test_log1_abort.cc
+++ b/src/tests/test_log1_abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log2.cc b/src/tests/test_log2.cc
index 85a3354629f..40e7f231398 100644
--- a/src/tests/test_log2.cc
+++ b/src/tests/test_log2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log2_abort.cc b/src/tests/test_log2_abort.cc
index 7991f046643..a7470181d7f 100644
--- a/src/tests/test_log2_abort.cc
+++ b/src/tests/test_log2_abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log3.cc b/src/tests/test_log3.cc
index 9e4a531a899..33c6bba45d2 100644
--- a/src/tests/test_log3.cc
+++ b/src/tests/test_log3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log3_abort.cc b/src/tests/test_log3_abort.cc
index dc36d754f09..d08dab0c033 100644
--- a/src/tests/test_log3_abort.cc
+++ b/src/tests/test_log3_abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log4.cc b/src/tests/test_log4.cc
index b0da26e8454..7d45024ed1a 100644
--- a/src/tests/test_log4.cc
+++ b/src/tests/test_log4.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log4_abort.cc b/src/tests/test_log4_abort.cc
index 4d73cda3903..37a00085812 100644
--- a/src/tests/test_log4_abort.cc
+++ b/src/tests/test_log4_abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log5.cc b/src/tests/test_log5.cc
index 6a40394668b..f03888f0367 100644
--- a/src/tests/test_log5.cc
+++ b/src/tests/test_log5.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log5_abort.cc b/src/tests/test_log5_abort.cc
index be74c14b1d4..dcd512abd61 100644
--- a/src/tests/test_log5_abort.cc
+++ b/src/tests/test_log5_abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log6.cc b/src/tests/test_log6.cc
index 9e579d5f4e5..8084933f5c0 100644
--- a/src/tests/test_log6.cc
+++ b/src/tests/test_log6.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log6_abort.cc b/src/tests/test_log6_abort.cc
index c02e61c82b0..09db439b22b 100644
--- a/src/tests/test_log6_abort.cc
+++ b/src/tests/test_log6_abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log6a_abort.cc b/src/tests/test_log6a_abort.cc
index ec4490c06fd..b5ddb6b4c3c 100644
--- a/src/tests/test_log6a_abort.cc
+++ b/src/tests/test_log6a_abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log7.cc b/src/tests/test_log7.cc
index ca5eb168028..fff24228ae0 100644
--- a/src/tests/test_log7.cc
+++ b/src/tests/test_log7.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log8.cc b/src/tests/test_log8.cc
index bf6cad4c66b..a54793da239 100644
--- a/src/tests/test_log8.cc
+++ b/src/tests/test_log8.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_log9.cc b/src/tests/test_log9.cc
index 302eaefb976..38b8e7818f5 100644
--- a/src/tests/test_log9.cc
+++ b/src/tests/test_log9.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_logflush.cc b/src/tests/test_logflush.cc
index 0c813d4b131..6ea09b83916 100644
--- a/src/tests/test_logflush.cc
+++ b/src/tests/test_logflush.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_logmax.cc b/src/tests/test_logmax.cc
index 6c8b5a43ee2..89c9284ea6b 100644
--- a/src/tests/test_logmax.cc
+++ b/src/tests/test_logmax.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_mostly_seq.cc b/src/tests/test_mostly_seq.cc
index 1094639e7e7..f4f8d16e312 100644
--- a/src/tests/test_mostly_seq.cc
+++ b/src/tests/test_mostly_seq.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_multiple_checkpoints_block_commit.cc b/src/tests/test_multiple_checkpoints_block_commit.cc
index 6cb26372140..5accd55dadc 100644
--- a/src/tests/test_multiple_checkpoints_block_commit.cc
+++ b/src/tests/test_multiple_checkpoints_block_commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_nested.cc b/src/tests/test_nested.cc
index 1f96101b940..9ce288435ce 100644
--- a/src/tests/test_nested.cc
+++ b/src/tests/test_nested.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_nodup_set.cc b/src/tests/test_nodup_set.cc
index f6797c81ef6..81c0d258af3 100644
--- a/src/tests/test_nodup_set.cc
+++ b/src/tests/test_nodup_set.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_query.cc b/src/tests/test_query.cc
index 1a3ee026b9a..db199ea2b80 100644
--- a/src/tests/test_query.cc
+++ b/src/tests/test_query.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_rand_insert.cc b/src/tests/test_rand_insert.cc
index d87f34af28b..76c12a9d124 100644
--- a/src/tests/test_rand_insert.cc
+++ b/src/tests/test_rand_insert.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_read_txn_invalid_ops.cc b/src/tests/test_read_txn_invalid_ops.cc
index f86c56637c6..93cab3cd0c0 100644
--- a/src/tests/test_read_txn_invalid_ops.cc
+++ b/src/tests/test_read_txn_invalid_ops.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_redirect_func.cc b/src/tests/test_redirect_func.cc
index f5e4dacbfcd..2107fda3093 100644
--- a/src/tests/test_redirect_func.cc
+++ b/src/tests/test_redirect_func.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_restrict.cc b/src/tests/test_restrict.cc
index cc1d573ca4f..fe71111a6bb 100644
--- a/src/tests/test_restrict.cc
+++ b/src/tests/test_restrict.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_reverse_compare_fun.cc b/src/tests/test_reverse_compare_fun.cc
index 774a78d23b1..f50cc6fc18e 100644
--- a/src/tests/test_reverse_compare_fun.cc
+++ b/src/tests/test_reverse_compare_fun.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_set_func_malloc.cc b/src/tests/test_set_func_malloc.cc
index 8efa786b7fd..0acea21e863 100644
--- a/src/tests/test_set_func_malloc.cc
+++ b/src/tests/test_set_func_malloc.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_simple_read_txn.cc b/src/tests/test_simple_read_txn.cc
index 3538c71e476..4449a6287b8 100644
--- a/src/tests/test_simple_read_txn.cc
+++ b/src/tests/test_simple_read_txn.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress0.cc b/src/tests/test_stress0.cc
index 6e3eb2e2e89..5dbca08db48 100644
--- a/src/tests/test_stress0.cc
+++ b/src/tests/test_stress0.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress1.cc b/src/tests/test_stress1.cc
index 9aa5c29e89b..81095299265 100644
--- a/src/tests/test_stress1.cc
+++ b/src/tests/test_stress1.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress2.cc b/src/tests/test_stress2.cc
index 255dc10317b..cbd798f318b 100644
--- a/src/tests/test_stress2.cc
+++ b/src/tests/test_stress2.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress3.cc b/src/tests/test_stress3.cc
index 572576261af..b47e4f812ae 100644
--- a/src/tests/test_stress3.cc
+++ b/src/tests/test_stress3.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress4.cc b/src/tests/test_stress4.cc
index 4404f1cecac..3d420561f5e 100644
--- a/src/tests/test_stress4.cc
+++ b/src/tests/test_stress4.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress5.cc b/src/tests/test_stress5.cc
index 053da44d66c..a591b340025 100644
--- a/src/tests/test_stress5.cc
+++ b/src/tests/test_stress5.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress6.cc b/src/tests/test_stress6.cc
index e3d47064d13..d616622353b 100644
--- a/src/tests/test_stress6.cc
+++ b/src/tests/test_stress6.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress7.cc b/src/tests/test_stress7.cc
index e1e477c234a..cdf03ce8036 100644
--- a/src/tests/test_stress7.cc
+++ b/src/tests/test_stress7.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress_hot_indexing.cc b/src/tests/test_stress_hot_indexing.cc
index 65e7230bba6..fe237f063ab 100644
--- a/src/tests/test_stress_hot_indexing.cc
+++ b/src/tests/test_stress_hot_indexing.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress_openclose.cc b/src/tests/test_stress_openclose.cc
index 55d21770b0c..54c8e784b18 100644
--- a/src/tests/test_stress_openclose.cc
+++ b/src/tests/test_stress_openclose.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_stress_with_verify.cc b/src/tests/test_stress_with_verify.cc
index 3c13da4f975..d259d09d25a 100644
--- a/src/tests/test_stress_with_verify.cc
+++ b/src/tests/test_stress_with_verify.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_thread_flags.cc b/src/tests/test_thread_flags.cc
index 08429d1effc..2ff2dabab98 100644
--- a/src/tests/test_thread_flags.cc
+++ b/src/tests/test_thread_flags.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_thread_insert.cc b/src/tests/test_thread_insert.cc
index c8a84196d4a..a1044948e0e 100644
--- a/src/tests/test_thread_insert.cc
+++ b/src/tests/test_thread_insert.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_trans_desc_during_chkpt.cc b/src/tests/test_trans_desc_during_chkpt.cc
index d1844ba3f9b..5d2196b13f8 100644
--- a/src/tests/test_trans_desc_during_chkpt.cc
+++ b/src/tests/test_trans_desc_during_chkpt.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_trans_desc_during_chkpt2.cc b/src/tests/test_trans_desc_during_chkpt2.cc
index dd545d33900..3a215a8b6f2 100644
--- a/src/tests/test_trans_desc_during_chkpt2.cc
+++ b/src/tests/test_trans_desc_during_chkpt2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_trans_desc_during_chkpt3.cc b/src/tests/test_trans_desc_during_chkpt3.cc
index 71a9358c332..6644cdabaa2 100644
--- a/src/tests/test_trans_desc_during_chkpt3.cc
+++ b/src/tests/test_trans_desc_during_chkpt3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_trans_desc_during_chkpt4.cc b/src/tests/test_trans_desc_during_chkpt4.cc
index dd545d33900..3a215a8b6f2 100644
--- a/src/tests/test_trans_desc_during_chkpt4.cc
+++ b/src/tests/test_trans_desc_during_chkpt4.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_transactional_descriptor.cc b/src/tests/test_transactional_descriptor.cc
index 8c800784e5c..4f2e66a9381 100644
--- a/src/tests/test_transactional_descriptor.cc
+++ b/src/tests/test_transactional_descriptor.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_abort5.cc b/src/tests/test_txn_abort5.cc
index 27b7f056cf2..fb3a522c995 100644
--- a/src/tests/test_txn_abort5.cc
+++ b/src/tests/test_txn_abort5.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_abort5a.cc b/src/tests/test_txn_abort5a.cc
index 87840fc8958..6678a959805 100644
--- a/src/tests/test_txn_abort5a.cc
+++ b/src/tests/test_txn_abort5a.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_abort6.cc b/src/tests/test_txn_abort6.cc
index ce481c342de..f61aea8e0bd 100644
--- a/src/tests/test_txn_abort6.cc
+++ b/src/tests/test_txn_abort6.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_abort7.cc b/src/tests/test_txn_abort7.cc
index 8832e950310..f7f0840680f 100644
--- a/src/tests/test_txn_abort7.cc
+++ b/src/tests/test_txn_abort7.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_begin_commit.cc b/src/tests/test_txn_begin_commit.cc
index 8b3906decb8..7e686e3e885 100644
--- a/src/tests/test_txn_begin_commit.cc
+++ b/src/tests/test_txn_begin_commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_close_before_commit.cc b/src/tests/test_txn_close_before_commit.cc
index cbc9d856bd2..24ef8a0fb72 100644
--- a/src/tests/test_txn_close_before_commit.cc
+++ b/src/tests/test_txn_close_before_commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_close_before_prepare_commit.cc b/src/tests/test_txn_close_before_prepare_commit.cc
index 6427bf8491e..e3b715c4ce7 100644
--- a/src/tests/test_txn_close_before_prepare_commit.cc
+++ b/src/tests/test_txn_close_before_prepare_commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_cursor_last.cc b/src/tests/test_txn_cursor_last.cc
index 62cb3984b79..82a15e0b515 100644
--- a/src/tests/test_txn_cursor_last.cc
+++ b/src/tests/test_txn_cursor_last.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_nested1.cc b/src/tests/test_txn_nested1.cc
index 748c6e44750..7797d88e478 100644
--- a/src/tests/test_txn_nested1.cc
+++ b/src/tests/test_txn_nested1.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_nested2.cc b/src/tests/test_txn_nested2.cc
index 3cd2164d938..f5c0d2b4e51 100644
--- a/src/tests/test_txn_nested2.cc
+++ b/src/tests/test_txn_nested2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_nested3.cc b/src/tests/test_txn_nested3.cc
index 0e74b0856ce..16ede714422 100644
--- a/src/tests/test_txn_nested3.cc
+++ b/src/tests/test_txn_nested3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_nested4.cc b/src/tests/test_txn_nested4.cc
index 9b064564556..0bca6309169 100644
--- a/src/tests/test_txn_nested4.cc
+++ b/src/tests/test_txn_nested4.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_nested5.cc b/src/tests/test_txn_nested5.cc
index 0f90c96bc2e..02692be3d08 100644
--- a/src/tests/test_txn_nested5.cc
+++ b/src/tests/test_txn_nested5.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_nested_abort.cc b/src/tests/test_txn_nested_abort.cc
index 2c81c91681d..10be3fea79d 100644
--- a/src/tests/test_txn_nested_abort.cc
+++ b/src/tests/test_txn_nested_abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_nested_abort2.cc b/src/tests/test_txn_nested_abort2.cc
index 2fbf3f6e2b2..6e1928b4891 100644
--- a/src/tests/test_txn_nested_abort2.cc
+++ b/src/tests/test_txn_nested_abort2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_nested_abort3.cc b/src/tests/test_txn_nested_abort3.cc
index c53b1cc68ba..2fa58b86b2a 100644
--- a/src/tests/test_txn_nested_abort3.cc
+++ b/src/tests/test_txn_nested_abort3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_nested_abort4.cc b/src/tests/test_txn_nested_abort4.cc
index 164f1c26d11..b412aeec884 100644
--- a/src/tests/test_txn_nested_abort4.cc
+++ b/src/tests/test_txn_nested_abort4.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_txn_recover3.cc b/src/tests/test_txn_recover3.cc
index c701ed51257..55cf772f207 100644
--- a/src/tests/test_txn_recover3.cc
+++ b/src/tests/test_txn_recover3.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_unused_memory_crash.cc b/src/tests/test_unused_memory_crash.cc
index 9bb65016ba0..9c13a08368f 100644
--- a/src/tests/test_unused_memory_crash.cc
+++ b/src/tests/test_unused_memory_crash.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_abort_works.cc b/src/tests/test_update_abort_works.cc
index 595b955855f..4a0d815749b 100644
--- a/src/tests/test_update_abort_works.cc
+++ b/src/tests/test_update_abort_works.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_abort_works.cc b/src/tests/test_update_broadcast_abort_works.cc
index 5b9e105cc7c..c11fffe643f 100644
--- a/src/tests/test_update_broadcast_abort_works.cc
+++ b/src/tests/test_update_broadcast_abort_works.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_calls_back.cc b/src/tests/test_update_broadcast_calls_back.cc
index 22bb1193f3f..db12a74832b 100644
--- a/src/tests/test_update_broadcast_calls_back.cc
+++ b/src/tests/test_update_broadcast_calls_back.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_can_delete_elements.cc b/src/tests/test_update_broadcast_can_delete_elements.cc
index a54aa20da88..804161402ba 100644
--- a/src/tests/test_update_broadcast_can_delete_elements.cc
+++ b/src/tests/test_update_broadcast_can_delete_elements.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_changes_values.cc b/src/tests/test_update_broadcast_changes_values.cc
index c532d571375..304c799de07 100644
--- a/src/tests/test_update_broadcast_changes_values.cc
+++ b/src/tests/test_update_broadcast_changes_values.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_indexer.cc b/src/tests/test_update_broadcast_indexer.cc
index 839b42a5347..4a7fa176424 100644
--- a/src/tests/test_update_broadcast_indexer.cc
+++ b/src/tests/test_update_broadcast_indexer.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_loader.cc b/src/tests/test_update_broadcast_loader.cc
index 704e6e08070..4e3db6380ae 100644
--- a/src/tests/test_update_broadcast_loader.cc
+++ b/src/tests/test_update_broadcast_loader.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_nested_updates.cc b/src/tests/test_update_broadcast_nested_updates.cc
index 42b254b22ef..2f858beffcd 100644
--- a/src/tests/test_update_broadcast_nested_updates.cc
+++ b/src/tests/test_update_broadcast_nested_updates.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_previously_deleted.cc b/src/tests/test_update_broadcast_previously_deleted.cc
index 912b68f6a1e..348dd71f941 100644
--- a/src/tests/test_update_broadcast_previously_deleted.cc
+++ b/src/tests/test_update_broadcast_previously_deleted.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_stress.cc b/src/tests/test_update_broadcast_stress.cc
index 9da0fd8dfa8..fb294e40446 100644
--- a/src/tests/test_update_broadcast_stress.cc
+++ b/src/tests/test_update_broadcast_stress.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_update_fun_has_choices.cc b/src/tests/test_update_broadcast_update_fun_has_choices.cc
index 31c0dabc39c..6f6481d3175 100644
--- a/src/tests/test_update_broadcast_update_fun_has_choices.cc
+++ b/src/tests/test_update_broadcast_update_fun_has_choices.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_broadcast_with_empty_table.cc b/src/tests/test_update_broadcast_with_empty_table.cc
index 82c69f95af8..5aa27c10b69 100644
--- a/src/tests/test_update_broadcast_with_empty_table.cc
+++ b/src/tests/test_update_broadcast_with_empty_table.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_calls_back.cc b/src/tests/test_update_calls_back.cc
index ba64dea1463..4970cc6ad4c 100644
--- a/src/tests/test_update_calls_back.cc
+++ b/src/tests/test_update_calls_back.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_can_delete_elements.cc b/src/tests/test_update_can_delete_elements.cc
index ca59008014f..328a569bf56 100644
--- a/src/tests/test_update_can_delete_elements.cc
+++ b/src/tests/test_update_can_delete_elements.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_changes_values.cc b/src/tests/test_update_changes_values.cc
index 623ce1a1f38..ee346f54947 100644
--- a/src/tests/test_update_changes_values.cc
+++ b/src/tests/test_update_changes_values.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_nested_updates.cc b/src/tests/test_update_nested_updates.cc
index 28ab01ae632..cb44a5bff29 100644
--- a/src/tests/test_update_nested_updates.cc
+++ b/src/tests/test_update_nested_updates.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_nonexistent_keys.cc b/src/tests/test_update_nonexistent_keys.cc
index 24a1eaf3787..1d609aabb9a 100644
--- a/src/tests/test_update_nonexistent_keys.cc
+++ b/src/tests/test_update_nonexistent_keys.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_previously_deleted.cc b/src/tests/test_update_previously_deleted.cc
index 27c01649851..7e9e4bcb5ba 100644
--- a/src/tests/test_update_previously_deleted.cc
+++ b/src/tests/test_update_previously_deleted.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_stress.cc b/src/tests/test_update_stress.cc
index 97a6bb93d6c..0c0d2c9926a 100644
--- a/src/tests/test_update_stress.cc
+++ b/src/tests/test_update_stress.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_txn_snapshot_works_concurrently.cc b/src/tests/test_update_txn_snapshot_works_concurrently.cc
index 337a90e8d88..99acf3f7f8e 100644
--- a/src/tests/test_update_txn_snapshot_works_concurrently.cc
+++ b/src/tests/test_update_txn_snapshot_works_concurrently.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc b/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc
index 641521e96bd..61a346c928d 100644
--- a/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc
+++ b/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_update_with_empty_table.cc b/src/tests/test_update_with_empty_table.cc
index 6ed492b86e9..6d54ee93b19 100644
--- a/src/tests/test_update_with_empty_table.cc
+++ b/src/tests/test_update_with_empty_table.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_updates_single_key.cc b/src/tests/test_updates_single_key.cc
index 455e82122dd..0b4dff69b9f 100644
--- a/src/tests/test_updates_single_key.cc
+++ b/src/tests/test_updates_single_key.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_weakxaction.cc b/src/tests/test_weakxaction.cc
index 3e9e1f25234..e99f6510a0c 100644
--- a/src/tests/test_weakxaction.cc
+++ b/src/tests/test_weakxaction.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/test_zero_length_keys.cc b/src/tests/test_zero_length_keys.cc
index 37180e9e952..c7b4dd1ac75 100644
--- a/src/tests/test_zero_length_keys.cc
+++ b/src/tests/test_zero_length_keys.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/threaded_stress_test_helpers.h b/src/tests/threaded_stress_test_helpers.h
index 0269867744e..2c2525a3165 100644
--- a/src/tests/threaded_stress_test_helpers.h
+++ b/src/tests/threaded_stress_test_helpers.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/time_create_db.cc b/src/tests/time_create_db.cc
index 2cc2496f33a..2365df4701d 100644
--- a/src/tests/time_create_db.cc
+++ b/src/tests/time_create_db.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/transactional_fileops.cc b/src/tests/transactional_fileops.cc
index c58e5d8e8a1..ea1f8af6e16 100644
--- a/src/tests/transactional_fileops.cc
+++ b/src/tests/transactional_fileops.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/update-multiple-data-diagonal.cc b/src/tests/update-multiple-data-diagonal.cc
index f60f939dbc8..c39005d0f00 100644
--- a/src/tests/update-multiple-data-diagonal.cc
+++ b/src/tests/update-multiple-data-diagonal.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/update-multiple-key0.cc b/src/tests/update-multiple-key0.cc
index 52d672ebc6f..51257fd7377 100644
--- a/src/tests/update-multiple-key0.cc
+++ b/src/tests/update-multiple-key0.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/update-multiple-nochange.cc b/src/tests/update-multiple-nochange.cc
index e814bff7d2b..19a668a67c8 100644
--- a/src/tests/update-multiple-nochange.cc
+++ b/src/tests/update-multiple-nochange.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/update-multiple-with-indexer-array.cc b/src/tests/update-multiple-with-indexer-array.cc
index 684925f9872..9101771c4d8 100644
--- a/src/tests/update-multiple-with-indexer-array.cc
+++ b/src/tests/update-multiple-with-indexer-array.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/update-multiple-with-indexer.cc b/src/tests/update-multiple-with-indexer.cc
index 62f3c7b7e76..444bcf17106 100644
--- a/src/tests/update-multiple-with-indexer.cc
+++ b/src/tests/update-multiple-with-indexer.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/update.cc b/src/tests/update.cc
index e89a0227082..aa0c4f0dd18 100644
--- a/src/tests/update.cc
+++ b/src/tests/update.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/upgrade-test-1.cc b/src/tests/upgrade-test-1.cc
index ef638e3fa8c..5de82f09d51 100644
--- a/src/tests/upgrade-test-1.cc
+++ b/src/tests/upgrade-test-1.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/upgrade-test-2.cc b/src/tests/upgrade-test-2.cc
index e7735ac1567..b8375a20ede 100644
--- a/src/tests/upgrade-test-2.cc
+++ b/src/tests/upgrade-test-2.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/upgrade-test-3.cc b/src/tests/upgrade-test-3.cc
index 276251d699a..e18ebdcf3b5 100644
--- a/src/tests/upgrade-test-3.cc
+++ b/src/tests/upgrade-test-3.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/upgrade-test-4.cc b/src/tests/upgrade-test-4.cc
index 67380e900e1..86b62fcabee 100644
--- a/src/tests/upgrade-test-4.cc
+++ b/src/tests/upgrade-test-4.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/upgrade-test-5.cc b/src/tests/upgrade-test-5.cc
index 564fe607d85..ce5e4faeb58 100644
--- a/src/tests/upgrade-test-5.cc
+++ b/src/tests/upgrade-test-5.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/upgrade-test-6.cc b/src/tests/upgrade-test-6.cc
index afe99ae68a3..a1e137c980f 100644
--- a/src/tests/upgrade-test-6.cc
+++ b/src/tests/upgrade-test-6.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/upgrade-test-7.cc b/src/tests/upgrade-test-7.cc
index b1a17d0a079..12acbc215cb 100644
--- a/src/tests/upgrade-test-7.cc
+++ b/src/tests/upgrade-test-7.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/upgrade_simple.cc b/src/tests/upgrade_simple.cc
index a9048460054..678953c4ff7 100644
--- a/src/tests/upgrade_simple.cc
+++ b/src/tests/upgrade_simple.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/xa-dirty-commit.cc b/src/tests/xa-dirty-commit.cc
index 03850b2b026..126a7c1453e 100644
--- a/src/tests/xa-dirty-commit.cc
+++ b/src/tests/xa-dirty-commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/xa-dirty-rollback.cc b/src/tests/xa-dirty-rollback.cc
index 8d28e8a762f..2d13e559050 100644
--- a/src/tests/xa-dirty-rollback.cc
+++ b/src/tests/xa-dirty-rollback.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/xa-txn-discard-abort.cc b/src/tests/xa-txn-discard-abort.cc
index 3b71f807d44..3365a1bb139 100644
--- a/src/tests/xa-txn-discard-abort.cc
+++ b/src/tests/xa-txn-discard-abort.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/xa-txn-discard-commit.cc b/src/tests/xa-txn-discard-commit.cc
index 51b2d0670cd..c4d164017ae 100644
--- a/src/tests/xa-txn-discard-commit.cc
+++ b/src/tests/xa-txn-discard-commit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/tests/zombie_db.cc b/src/tests/zombie_db.cc
index 16d6a933451..56ff71f13da 100644
--- a/src/tests/zombie_db.cc
+++ b/src/tests/zombie_db.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/toku_patent.cc b/src/toku_patent.cc
index e7b0ebe2c56..5261b6f3a79 100644
--- a/src/toku_patent.cc
+++ b/src/toku_patent.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
@@ -115,7 +115,7 @@ const char *toku_patent_string = "COPYING CONDITIONS NOTICE:\n\
 \n\
 COPYRIGHT NOTICE:\n\
 \n\
-  TokuDB, Tokutek Fractal Tree Indexing Library.\n\
+  TokuFT, Tokutek Fractal Tree Indexing Library.\n\
   Copyright (C) 2007-2013 Tokutek, Inc.\n\
 \n\
 DISCLAIMER:\n\
diff --git a/src/ydb-internal.h b/src/ydb-internal.h
index 60be5338eac..26cc8419f4a 100644
--- a/src/ydb-internal.h
+++ b/src/ydb-internal.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb.cc b/src/ydb.cc
index ed5eb8bfae9..885671f76c9 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb.h b/src/ydb.h
index 7fcc460b6fe..fad41f382f3 100644
--- a/src/ydb.h
+++ b/src/ydb.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_cursor.cc b/src/ydb_cursor.cc
index 8ccc00c285e..81c7a0593ff 100644
--- a/src/ydb_cursor.cc
+++ b/src/ydb_cursor.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_cursor.h b/src/ydb_cursor.h
index ff7070f668b..a10e32f3002 100644
--- a/src/ydb_cursor.h
+++ b/src/ydb_cursor.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_db.cc b/src/ydb_db.cc
index ad49376a39d..3d7d328b5f8 100644
--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_db.h b/src/ydb_db.h
index 9fac85f9ffb..edbc72cb0d3 100644
--- a/src/ydb_db.h
+++ b/src/ydb_db.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_env_func.cc b/src/ydb_env_func.cc
index 3714b55c746..714fad74ec5 100644
--- a/src/ydb_env_func.cc
+++ b/src/ydb_env_func.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_env_func.h b/src/ydb_env_func.h
index 08202a334fe..2fb0c202f90 100644
--- a/src/ydb_env_func.h
+++ b/src/ydb_env_func.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_lib.cc b/src/ydb_lib.cc
index 11847788661..8956fec04ab 100644
--- a/src/ydb_lib.cc
+++ b/src/ydb_lib.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_load.h b/src/ydb_load.h
index f5e140d9d5d..c815969a97c 100644
--- a/src/ydb_load.h
+++ b/src/ydb_load.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_row_lock.cc b/src/ydb_row_lock.cc
index 89b436380ea..40cafd0e331 100644
--- a/src/ydb_row_lock.cc
+++ b/src/ydb_row_lock.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_row_lock.h b/src/ydb_row_lock.h
index 8c50cf87cc9..2c3a10d92d6 100644
--- a/src/ydb_row_lock.h
+++ b/src/ydb_row_lock.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_txn.cc b/src/ydb_txn.cc
index 1cccb91f330..b6b8e154c6f 100644
--- a/src/ydb_txn.cc
+++ b/src/ydb_txn.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_txn.h b/src/ydb_txn.h
index 57b2201d6cc..a2e5a3b09e2 100644
--- a/src/ydb_txn.h
+++ b/src/ydb_txn.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_write.cc b/src/ydb_write.cc
index a0d355eb948..9f9937a9301 100644
--- a/src/ydb_write.cc
+++ b/src/ydb_write.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/src/ydb_write.h b/src/ydb_write.h
index ba26b5106b7..00c4ab4da5e 100644
--- a/src/ydb_write.h
+++ b/src/ydb_write.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/tools/ba_replay.cc b/tools/ba_replay.cc
index b5e5fe7932f..e274ac0a1e8 100644
--- a/tools/ba_replay.cc
+++ b/tools/ba_replay.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/tools/ftverify.cc b/tools/ftverify.cc
index 1365f34a320..7f7e6d27d13 100644
--- a/tools/ftverify.cc
+++ b/tools/ftverify.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/tools/tdb-recover.cc b/tools/tdb-recover.cc
index 9c0239e63b5..842acc75924 100644
--- a/tools/tdb-recover.cc
+++ b/tools/tdb-recover.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/tools/tdb_logprint.cc b/tools/tdb_logprint.cc
index 8d0bea0f016..1dd7581b9f5 100644
--- a/tools/tdb_logprint.cc
+++ b/tools/tdb_logprint.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/tools/tokudb_dump.cc b/tools/tokudb_dump.cc
index 83578ed9f35..2da50bb793a 100644
--- a/tools/tokudb_dump.cc
+++ b/tools/tokudb_dump.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/tools/tokuftdump.cc b/tools/tokuftdump.cc
index a6df16af197..6f3d14deba7 100644
--- a/tools/tokuftdump.cc
+++ b/tools/tokuftdump.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/bytestring.h b/util/bytestring.h
index 43119983452..1fea03ecfd1 100644
--- a/util/bytestring.h
+++ b/util/bytestring.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/circular_buffer.cc b/util/circular_buffer.cc
index a453c5b71c7..92d9af521f7 100644
--- a/util/circular_buffer.cc
+++ b/util/circular_buffer.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/circular_buffer.h b/util/circular_buffer.h
index 13e5f06ab11..904dfed7c49 100644
--- a/util/circular_buffer.h
+++ b/util/circular_buffer.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/constexpr.h b/util/constexpr.h
index 444f0f718da..ed71daaf3fe 100644
--- a/util/constexpr.h
+++ b/util/constexpr.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/context.cc b/util/context.cc
index 350cac07960..67f146872d5 100644
--- a/util/context.cc
+++ b/util/context.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/context.h b/util/context.h
index 15f7d732635..04aef5c5e3b 100644
--- a/util/context.h
+++ b/util/context.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2014 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/dbt.cc b/util/dbt.cc
index 99d5371a6fd..aa26a9b0dd9 100644
--- a/util/dbt.cc
+++ b/util/dbt.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/dbt.h b/util/dbt.h
index 9d8329e78bf..4d78068cb67 100644
--- a/util/dbt.h
+++ b/util/dbt.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/dmt.cc b/util/dmt.cc
index 87b06bf5696..3e0b512d7a7 100644
--- a/util/dmt.cc
+++ b/util/dmt.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/dmt.h b/util/dmt.h
index f927b966002..d4b032f5d6f 100644
--- a/util/dmt.h
+++ b/util/dmt.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/doubly_linked_list.h b/util/doubly_linked_list.h
index 444d4f30cd9..738e2736fa1 100644
--- a/util/doubly_linked_list.h
+++ b/util/doubly_linked_list.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/frwlock.cc b/util/frwlock.cc
index 7259c776f83..fac0c07967b 100644
--- a/util/frwlock.cc
+++ b/util/frwlock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/frwlock.h b/util/frwlock.h
index 8698c96b279..985c92bccb4 100644
--- a/util/frwlock.h
+++ b/util/frwlock.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/growable_array.h b/util/growable_array.h
index cdb7973c9e3..b452f94366c 100644
--- a/util/growable_array.h
+++ b/util/growable_array.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/kibbutz.cc b/util/kibbutz.cc
index 8e69471e9ab..ad0c0b30788 100644
--- a/util/kibbutz.cc
+++ b/util/kibbutz.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/kibbutz.h b/util/kibbutz.h
index 6e9d3cf6643..25515887700 100644
--- a/util/kibbutz.h
+++ b/util/kibbutz.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/memarena.cc b/util/memarena.cc
index 6fb6eb51cd7..d8c0daa0ba0 100644
--- a/util/memarena.cc
+++ b/util/memarena.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/memarena.h b/util/memarena.h
index c9ce8ce93f2..8d1b577222e 100644
--- a/util/memarena.h
+++ b/util/memarena.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/mempool.cc b/util/mempool.cc
index ffd900580b1..23200ee41ad 100644
--- a/util/mempool.cc
+++ b/util/mempool.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/mempool.h b/util/mempool.h
index 8b52f095b22..e1a47e66591 100644
--- a/util/mempool.h
+++ b/util/mempool.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/minicron.cc b/util/minicron.cc
index 32e24f6bde5..601e1fd40d4 100644
--- a/util/minicron.cc
+++ b/util/minicron.cc
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/minicron.h b/util/minicron.h
index 74e75a86181..b97c5687561 100644
--- a/util/minicron.h
+++ b/util/minicron.h
@@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/nb_mutex.h b/util/nb_mutex.h
index 6124eba2437..cc350813622 100644
--- a/util/nb_mutex.h
+++ b/util/nb_mutex.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/omt.cc b/util/omt.cc
index 709c7eab4c3..bb3fc34c513 100644
--- a/util/omt.cc
+++ b/util/omt.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/omt.h b/util/omt.h
index 86f39e1fe5b..02f3f0d759a 100644
--- a/util/omt.h
+++ b/util/omt.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/partitioned_counter.cc b/util/partitioned_counter.cc
index 4ac60cc8e29..70dff209f3b 100644
--- a/util/partitioned_counter.cc
+++ b/util/partitioned_counter.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/partitioned_counter.h b/util/partitioned_counter.h
index 832309c1935..4da0e084a82 100644
--- a/util/partitioned_counter.h
+++ b/util/partitioned_counter.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/queue.cc b/util/queue.cc
index ecc6747c3e3..7a2fefaefec 100644
--- a/util/queue.cc
+++ b/util/queue.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/queue.h b/util/queue.h
index 51a9662886c..88c7d99c200 100644
--- a/util/queue.h
+++ b/util/queue.h
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/rwlock.h b/util/rwlock.h
index 6ee4c6cec1a..a98e10b47d2 100644
--- a/util/rwlock.h
+++ b/util/rwlock.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/scoped_malloc.cc b/util/scoped_malloc.cc
index ed8a493233e..551bd944beb 100644
--- a/util/scoped_malloc.cc
+++ b/util/scoped_malloc.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/scoped_malloc.h b/util/scoped_malloc.h
index ae8847731f3..dbd919d155e 100644
--- a/util/scoped_malloc.h
+++ b/util/scoped_malloc.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/sort.h b/util/sort.h
index d597b4d7a8d..2925f791029 100644
--- a/util/sort.h
+++ b/util/sort.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/status.h b/util/status.h
index 0706185a856..54eb4dbd376 100644
--- a/util/status.h
+++ b/util/status.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/marked-omt-test.cc b/util/tests/marked-omt-test.cc
index 883a414c566..97e4cf72d61 100644
--- a/util/tests/marked-omt-test.cc
+++ b/util/tests/marked-omt-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/memarena-test.cc b/util/tests/memarena-test.cc
index 46a55d58847..7374539d11a 100644
--- a/util/tests/memarena-test.cc
+++ b/util/tests/memarena-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/minicron-test.cc b/util/tests/minicron-test.cc
index 2ec27a05310..7729edbda84 100644
--- a/util/tests/minicron-test.cc
+++ b/util/tests/minicron-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/omt-test.cc b/util/tests/omt-test.cc
index 9eeb7970a47..28daed80965 100644
--- a/util/tests/omt-test.cc
+++ b/util/tests/omt-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/omt-tmpl-test.cc b/util/tests/omt-tmpl-test.cc
index 8a9e13af89d..455502d50e9 100644
--- a/util/tests/omt-tmpl-test.cc
+++ b/util/tests/omt-tmpl-test.cc
@@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/queue-test.cc b/util/tests/queue-test.cc
index f613ed561f2..d15e9ccab96 100644
--- a/util/tests/queue-test.cc
+++ b/util/tests/queue-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/rwlock_condvar.h b/util/tests/rwlock_condvar.h
index db4b759ea52..58e7a61ae42 100644
--- a/util/tests/rwlock_condvar.h
+++ b/util/tests/rwlock_condvar.h
@@ -33,7 +33,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/sort-tmpl-test.cc b/util/tests/sort-tmpl-test.cc
index a1be929fce0..7597c4fa5a7 100644
--- a/util/tests/sort-tmpl-test.cc
+++ b/util/tests/sort-tmpl-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/test-kibbutz.cc b/util/tests/test-kibbutz.cc
index 1f73037892c..dd5a7facf64 100644
--- a/util/tests/test-kibbutz.cc
+++ b/util/tests/test-kibbutz.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/test-kibbutz2.cc b/util/tests/test-kibbutz2.cc
index ce797c068d8..80b97ff69c6 100644
--- a/util/tests/test-kibbutz2.cc
+++ b/util/tests/test-kibbutz2.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/test-rwlock-cheapness.cc b/util/tests/test-rwlock-cheapness.cc
index de54c21efd2..ac04da16b85 100644
--- a/util/tests/test-rwlock-cheapness.cc
+++ b/util/tests/test-rwlock-cheapness.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/test-rwlock.cc b/util/tests/test-rwlock.cc
index 42ceb00ad19..c4988aab85d 100644
--- a/util/tests/test-rwlock.cc
+++ b/util/tests/test-rwlock.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/test.h b/util/tests/test.h
index 0760b9bf1fb..6ca60105d93 100644
--- a/util/tests/test.h
+++ b/util/tests/test.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/test_circular_buffer.cc b/util/tests/test_circular_buffer.cc
index 8bc239ac6fc..8bf0b646e4a 100644
--- a/util/tests/test_circular_buffer.cc
+++ b/util/tests/test_circular_buffer.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/test_doubly_linked_list.cc b/util/tests/test_doubly_linked_list.cc
index 6fad884ed8e..94e6b0a3489 100644
--- a/util/tests/test_doubly_linked_list.cc
+++ b/util/tests/test_doubly_linked_list.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/test_partitioned_counter.cc b/util/tests/test_partitioned_counter.cc
index 02a9846517a..ce09aa04229 100644
--- a/util/tests/test_partitioned_counter.cc
+++ b/util/tests/test_partitioned_counter.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/test_partitioned_counter_5833.cc b/util/tests/test_partitioned_counter_5833.cc
index 419f992576b..2e42e4d4b4e 100644
--- a/util/tests/test_partitioned_counter_5833.cc
+++ b/util/tests/test_partitioned_counter_5833.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/threadpool-nproc-limit.cc b/util/tests/threadpool-nproc-limit.cc
index f1ba10dad84..3395a30238b 100644
--- a/util/tests/threadpool-nproc-limit.cc
+++ b/util/tests/threadpool-nproc-limit.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/threadpool-test.cc b/util/tests/threadpool-test.cc
index 6815cce8f8f..b9bebc5db7d 100644
--- a/util/tests/threadpool-test.cc
+++ b/util/tests/threadpool-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/threadpool-testrunf.cc b/util/tests/threadpool-testrunf.cc
index f4d875a8941..b7744cbf54c 100644
--- a/util/tests/threadpool-testrunf.cc
+++ b/util/tests/threadpool-testrunf.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/tests/x1764-test.cc b/util/tests/x1764-test.cc
index d8a0b1d0eb3..5f47e007f50 100644
--- a/util/tests/x1764-test.cc
+++ b/util/tests/x1764-test.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/threadpool.cc b/util/threadpool.cc
index 4f1105d83c2..7c8fade7ed6 100644
--- a/util/threadpool.cc
+++ b/util/threadpool.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/threadpool.h b/util/threadpool.h
index 1882a4c2f7a..ed43dea93be 100644
--- a/util/threadpool.h
+++ b/util/threadpool.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/x1764.cc b/util/x1764.cc
index ef7e6576e4f..5fb20daccee 100644
--- a/util/x1764.cc
+++ b/util/x1764.cc
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:
diff --git a/util/x1764.h b/util/x1764.h
index ff6b3ea0d8d..1d83e5a1853 100644
--- a/util/x1764.h
+++ b/util/x1764.h
@@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE:
 
 COPYRIGHT NOTICE:
 
-  TokuDB, Tokutek Fractal Tree Indexing Library.
+  TokuFT, Tokutek Fractal Tree Indexing Library.
   Copyright (C) 2007-2013 Tokutek, Inc.
 
 DISCLAIMER:

From f176b29b5e9ca3c5cac45adf2f5fa2b1c892e65a Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 15:35:07 -0400
Subject: [PATCH 155/190] FT-396 Rename various tokudb-containing functions and
 comments to instead refer to tokuft

---
 buildheader/make_tdb.cc                       | 12 ++--
 ft/cachetable/cachetable.cc                   |  2 +-
 ft/cachetable/checkpoint.cc                   |  2 +-
 ft/ft-flusher.cc                              |  2 +-
 ft/ft-hot-flusher.cc                          |  2 +-
 ft/ft-ops.cc                                  |  2 +-
 ft/ft-ops.h                                   |  2 +-
 ft/ft.cc                                      | 17 ++---
 ft/ft.h                                       |  4 +-
 ft/logger/log-internal.h                      |  2 +-
 ft/logger/log_upgrade.cc                      |  2 +-
 ft/logger/logcursor.cc                        | 14 ++--
 ft/logger/logger.cc                           |  2 +-
 ft/logger/recover.cc                          | 70 +++++++++----------
 ft/logger/recover.h                           | 12 ++--
 ft/serialize/ft-serialize.cc                  |  2 +-
 ft/serialize/ft_node-serialize.cc             |  6 +-
 ft/tests/recovery-bad-last-entry.cc           |  2 +-
 ft/tests/recovery-cbegin-cend-hello.cc        |  2 +-
 ft/tests/recovery-cbegin-cend.cc              |  2 +-
 ft/tests/recovery-cbegin.cc                   |  2 +-
 ft/tests/recovery-cend-cbegin.cc              |  2 +-
 ft/tests/recovery-datadir-is-file.cc          |  2 +-
 ft/tests/recovery-empty.cc                    |  2 +-
 ft/tests/recovery-fopen-missing-file.cc       |  4 +-
 ft/tests/recovery-hello.cc                    |  2 +-
 .../recovery-lsn-error-during-forward-scan.cc |  2 +-
 ft/tests/recovery-no-datadir.cc               |  2 +-
 ft/tests/recovery-no-log.cc                   |  2 +-
 ft/tests/recovery-no-logdir.cc                |  2 +-
 ft/tests/test-ft-txns.h                       |  2 +-
 ft/txn/txn.cc                                 |  2 +-
 ft/ule.cc                                     |  2 +-
 locktree/manager.cc                           |  2 +-
 portability/file.cc                           |  4 +-
 src/indexer.cc                                |  4 +-
 src/loader.cc                                 |  2 +-
 src/tests/hot-optimize-table-tests.cc         |  2 +-
 src/tests/test938c.cc                         |  2 +-
 src/tests/test_abort1.cc                      |  2 +-
 src/tests/test_db_env_open_nocreate.cc        |  2 +-
 src/tests/test_db_open_notexist_reopen.cc     |  2 +-
 src/tests/test_log0.cc                        |  2 +-
 src/tests/test_log1.cc                        |  2 +-
 src/tests/test_log10.cc                       |  2 +-
 src/tests/test_log2.cc                        |  2 +-
 src/tests/test_log2_abort.cc                  |  2 +-
 src/tests/test_log3.cc                        |  2 +-
 src/tests/test_log4.cc                        |  2 +-
 src/tests/test_log5.cc                        |  2 +-
 src/tests/test_log6.cc                        |  2 +-
 src/tests/test_log7.cc                        |  2 +-
 src/tests/test_log8.cc                        |  2 +-
 src/tests/test_log9.cc                        |  2 +-
 src/tests/upgrade-test-1.cc                   |  2 +-
 src/tests/upgrade-test-2.cc                   |  2 +-
 src/tests/upgrade-test-3.cc                   |  4 +-
 src/tests/upgrade-test-4.cc                   |  2 +-
 src/tests/upgrade-test-5.cc                   |  2 +-
 src/tests/upgrade-test-7.cc                   |  4 +-
 src/ydb.cc                                    | 48 ++++++-------
 src/ydb_cursor.cc                             |  2 +-
 src/ydb_db.cc                                 |  2 +-
 src/ydb_lib.cc                                |  6 +-
 src/ydb_write.cc                              |  2 +-
 tools/ftverify.cc                             |  2 +-
 tools/tdb-recover.cc                          |  2 +-
 util/context.cc                               |  2 +-
 util/rwlock.h                                 |  2 +-
 util/status.h                                 |  4 +-
 70 files changed, 159 insertions(+), 164 deletions(-)

diff --git a/buildheader/make_tdb.cc b/buildheader/make_tdb.cc
index 6b25226fa5b..9890b8ed34b 100644
--- a/buildheader/make_tdb.cc
+++ b/buildheader/make_tdb.cc
@@ -341,8 +341,8 @@ static void print_defines (void) {
         dodefine_from_track(txn_flags, DB_TXN_READ_ONLY);
     }
     
-    /* TOKUDB specific error codes*/
-    printf("/* TOKUDB specific error codes */\n");
+    /* TokuFT specific error codes*/
+    printf("/* TokuFT specific error codes */\n");
     dodefine(TOKUDB_OUT_OF_LOCKS);
     dodefine(TOKUDB_SUCCEEDED_EARLY);
     dodefine(TOKUDB_FOUND_BUT_REJECTED);
@@ -422,7 +422,7 @@ static void print_db_env_struct (void) {
                              "int (*cleaner_set_iterations)               (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation.  0 means disabled. */",
                              "int (*cleaner_get_iterations)               (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation.  0 means disabled. */",
                              "int (*checkpointing_postpone)               (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
-                             "int (*checkpointing_resume)                 (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */",
+                             "int (*checkpointing_resume)                 (DB_ENV*) /* Alert tokuft that 'postpone' is no longer necessary */",
                              "int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */",
                              "int (*checkpointing_end_atomic_operation)   (DB_ENV*) /* End   a set of operations (that must be atomic as far as checkpoints are concerned). */",
                              "int (*set_default_bt_compare)               (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment.  Required for RECOVERY since you cannot open the DBs manually. */",
@@ -575,7 +575,7 @@ static void print_db_txn_struct (void) {
     STRUCT_SETUP(DB_TXN, prepare,     "int (*%s) (DB_TXN*, uint8_t gid[DB_GID_SIZE])");
     STRUCT_SETUP(DB_TXN, discard,     "int (*%s) (DB_TXN*, uint32_t)");
     STRUCT_SETUP(DB_TXN, id,          "uint32_t (*%s) (DB_TXN *)");
-    STRUCT_SETUP(DB_TXN, mgrp,        "DB_ENV *%s /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/");
+    STRUCT_SETUP(DB_TXN, mgrp,        "DB_ENV *%s /* In TokuFT, mgrp is a DB_ENV, not a DB_TXNMGR */");
     STRUCT_SETUP(DB_TXN, parent,      "DB_TXN *%s");
     const char *extra[] = {
 	"int (*txn_stat)(DB_TXN *, struct txn_stat **)", 
@@ -639,9 +639,9 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
 
     printf("#define DB_VERSION_MAJOR %d\n", DB_VERSION_MAJOR);
     printf("#define DB_VERSION_MINOR %d\n", DB_VERSION_MINOR);
-    printf("/* As of r40364 (post TokuDB 5.2.7), the patch version number is 100+ the BDB header patch version number.*/\n");
+    printf("/* As of r40364 (post TokuFT 5.2.7), the patch version number is 100+ the BDB header patch version number.*/\n");
     printf("#define DB_VERSION_PATCH %d\n", 100+DB_VERSION_PATCH);
-    printf("#define DB_VERSION_STRING \"Tokutek: TokuDB %d.%d.%d\"\n", DB_VERSION_MAJOR, DB_VERSION_MINOR, 100+DB_VERSION_PATCH);
+    printf("#define DB_VERSION_STRING \"Tokutek: TokuFT %d.%d.%d\"\n", DB_VERSION_MAJOR, DB_VERSION_MINOR, 100+DB_VERSION_PATCH);
 
 #ifndef DB_GID_SIZE
 #define DB_GID_SIZE DB_XIDDATASIZE
diff --git a/ft/cachetable/cachetable.cc b/ft/cachetable/cachetable.cc
index bcd9fc9b562..794e3abdca9 100644
--- a/ft/cachetable/cachetable.cc
+++ b/ft/cachetable/cachetable.cc
@@ -129,7 +129,7 @@ static CACHETABLE_STATUS_S ct_status;
 
 // Note, toku_cachetable_get_status() is below, after declaration of cachetable.
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ct_status, k, c, t, "cachetable: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ct_status, k, c, t, "cachetable: " l, inc)
 
 static void
 status_init(void) {
diff --git a/ft/cachetable/checkpoint.cc b/ft/cachetable/checkpoint.cc
index 7d9c45f3cb7..492893ddc7b 100644
--- a/ft/cachetable/checkpoint.cc
+++ b/ft/cachetable/checkpoint.cc
@@ -147,7 +147,7 @@ PATENT RIGHTS GRANT:
 
 static CHECKPOINT_STATUS_S cp_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(cp_status, k, c, t, "checkpoint: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(cp_status, k, c, t, "checkpoint: " l, inc)
 
 static void
 status_init(void) {
diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index 1c11f276f4f..bb22d52f763 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -107,7 +107,7 @@ PATENT RIGHTS GRANT:
  */
 static FT_FLUSHER_STATUS_S ft_flusher_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ft_flusher_status, k, c, t, "ft flusher: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_flusher_status, k, c, t, "ft flusher: " l, inc)
 
 #define STATUS_VALUE(x) ft_flusher_status.status[x].value.num
 void toku_ft_flusher_status_init(void) {
diff --git a/ft/ft-hot-flusher.cc b/ft/ft-hot-flusher.cc
index 0a8bd131bac..55230e75da0 100644
--- a/ft/ft-hot-flusher.cc
+++ b/ft/ft-hot-flusher.cc
@@ -120,7 +120,7 @@ struct hot_flusher_extra {
 
 static FT_HOT_STATUS_S hot_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(hot_status, k, c, t, "hot: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(hot_status, k, c, t, "hot: " l, inc)
 
 #define STATUS_VALUE(x) hot_status.status[x].value.num
 
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 53168f87d64..a48b785700e 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -237,7 +237,7 @@ basement nodes, bulk fetch,  and partial fetch:
  */
 static FT_STATUS_S ft_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ft_status, k, c, t, "ft: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_status, k, c, t, "ft: " l, inc)
 
 static toku_mutex_t ft_open_close_lock;
 
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index 9b28d75b111..dae335a3cc2 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -321,7 +321,7 @@ bool toku_ft_is_empty_fast (FT_HANDLE ft_h) __attribute__ ((warn_unused_result))
 int toku_ft_strerror_r(int error, char *buf, size_t buflen);
 // Effect: LIke the XSI-compliant strerorr_r, extended to db_strerror().
 // If error>=0 then the result is to do strerror_r(error, buf, buflen), that is fill buf with a descriptive error message.
-// If error<0 then return a TokuDB-specific error code.  For unknown cases, we return -1 and set errno=EINVAL, even for cases that *should* be known.  (Not all DB errors are known by this function which is a bug.)
+// If error<0 then return a TokuFT-specific error code.  For unknown cases, we return -1 and set errno=EINVAL, even for cases that *should* be known.  (Not all DB errors are known by this function which is a bug.)
 
 extern bool garbage_collection_debug;
 
diff --git a/ft/ft.cc b/ft/ft.cc
index f0fd148c83f..fd3960b64f6 100644
--- a/ft/ft.cc
+++ b/ft/ft.cc
@@ -1090,8 +1090,6 @@ void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space) {
 #error
 #endif
 
-
-
 #define xstr(X) str(X)
 #define str(X) #X
 #define static_version_string xstr(DB_VERSION_MAJOR) "." \
@@ -1101,10 +1099,9 @@ void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space) {
 struct toku_product_name_strings_struct toku_product_name_strings;
 
 char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH];
-void
-tokudb_update_product_name_strings(void) {
-    //DO ALL STRINGS HERE.. maybe have a separate FT layer version as well
-    { // Version string
+void tokuft_update_product_name_strings(void) {
+    // DO ALL STRINGS HERE.. maybe have a separate FT layer version as well
+    {
         int n = snprintf(toku_product_name_strings.db_version,
                          sizeof(toku_product_name_strings.db_version),
                          "%s %s", toku_product_name, static_version_string);
@@ -1156,7 +1153,7 @@ toku_single_process_lock(const char *lock_dir, const char *which, int *lockfd) {
     *lockfd = toku_os_lock_file(lockfname);
     if (*lockfd < 0) {
         int e = get_error_errno();
-        fprintf(stderr, "Couldn't start tokudb because some other tokudb process is using the same directory [%s] for [%s]\n", lock_dir, which);
+        fprintf(stderr, "Couldn't start tokuft because some other tokuft process is using the same directory [%s] for [%s]\n", lock_dir, which);
         return e;
     }
     return 0;
@@ -1174,10 +1171,10 @@ toku_single_process_unlock(int *lockfd) {
     return 0;
 }
 
-int tokudb_num_envs = 0;
+int tokuft_num_envs = 0;
 int
 db_env_set_toku_product_name(const char *name) {
-    if (tokudb_num_envs > 0) {
+    if (tokuft_num_envs > 0) {
         return EINVAL;
     }
     if (!name || strlen(name) < 1) {
@@ -1188,7 +1185,7 @@ db_env_set_toku_product_name(const char *name) {
     }
     if (strncmp(toku_product_name, name, sizeof(toku_product_name))) {
         strcpy(toku_product_name, name);
-        tokudb_update_product_name_strings();
+        tokuft_update_product_name_strings();
     }
     return 0;
 }
diff --git a/ft/ft.h b/ft/ft.h
index 224a917c7b4..336845475cc 100644
--- a/ft/ft.h
+++ b/ft/ft.h
@@ -219,7 +219,7 @@ struct toku_thread_pool *get_ft_pool(void);
 int toku_single_process_lock(const char *lock_dir, const char *which, int *lockfd);
 int toku_single_process_unlock(int *lockfd);
 
-void tokudb_update_product_name_strings(void);
+void tokuft_update_product_name_strings(void);
 #define TOKU_MAX_PRODUCT_NAME_LENGTH (256)
 extern char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH];
 
@@ -232,4 +232,4 @@ struct toku_product_name_strings_struct {
 };
 
 extern struct toku_product_name_strings_struct toku_product_name_strings;
-extern int tokudb_num_envs;
+extern int tokuft_num_envs;
diff --git a/ft/logger/log-internal.h b/ft/logger/log-internal.h
index d588f0028d5..690a2ccfa08 100644
--- a/ft/logger/log-internal.h
+++ b/ft/logger/log-internal.h
@@ -157,7 +157,7 @@ struct tokulogger {
     DIR *dir; // descriptor for directory
     int fd;
     CACHETABLE ct;
-    int lg_max; // The size of the single file in the log.  Default is 100MB in TokuDB
+    int lg_max; // The size of the single file in the log.  Default is 100MB.
 
     // To access these, you must have the input lock
     LSN lsn; // the next available lsn
diff --git a/ft/logger/log_upgrade.cc b/ft/logger/log_upgrade.cc
index 5efc5b9b86c..60c87240168 100644
--- a/ft/logger/log_upgrade.cc
+++ b/ft/logger/log_upgrade.cc
@@ -228,7 +228,7 @@ verify_clean_shutdown_of_log_version(const char *log_dir, uint32_t version, LSN
         FOOTPRINT(1);
         r = verify_clean_shutdown_of_log_version_old(log_dir, last_lsn, last_xid, version);
 	if (r != 0) {
-	    fprintf(stderr, "Cannot upgrade TokuDB version %d database.", version);
+	    fprintf(stderr, "Cannot upgrade TokuFT version %d database.", version);
 	    fprintf(stderr, "  Previous improper shutdown detected.\n");
 	}
     }
diff --git a/ft/logger/logcursor.cc b/ft/logger/logcursor.cc
index 0026d113657..ee944c28078 100644
--- a/ft/logger/logcursor.cc
+++ b/ft/logger/logcursor.cc
@@ -191,7 +191,7 @@ static int lc_check_lsn(TOKULOGCURSOR lc, int dir) {
 //        int index = lc->cur_logfiles_index;
 //        fprintf(stderr, "Bad LSN: %d %s direction = %d, lsn.lsn = %" PRIu64 ", cur_lsn.lsn=%" PRIu64 "\n", 
 //                index, lc->logfiles[index], dir, lsn.lsn, lc->cur_lsn.lsn);
-        if (tokudb_recovery_trace) 
+        if (tokuft_recovery_trace) 
             printf("DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, 0);
         return LC_LSN_ERROR;
     }
@@ -307,10 +307,10 @@ static int lc_log_read(TOKULOGCURSOR lc)
         toku_log_free_log_entry_resources(&(lc->entry));
         time_t tnow = time(NULL);
         if (r==DB_BADFORMAT) {
-            fprintf(stderr, "%.24s Tokudb bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]);
+            fprintf(stderr, "%.24s TokuFT bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]);
         }
         else {
-            fprintf(stderr, "%.24s Tokudb unexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]);
+            fprintf(stderr, "%.24s TokuFT unexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]);
         }
     }
     return r;
@@ -339,10 +339,10 @@ static int lc_log_read_backward(TOKULOGCURSOR lc)
         toku_log_free_log_entry_resources(&(lc->entry));
         time_t tnow = time(NULL);
         if (r==DB_BADFORMAT) {
-            fprintf(stderr, "%.24s Tokudb bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]);
+            fprintf(stderr, "%.24s TokuFT bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]);
         }
         else {
-            fprintf(stderr, "%.24s Tokudb uUnexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]);
+            fprintf(stderr, "%.24s TokuFT uUnexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]);
         }
     }
     return r;
@@ -460,10 +460,10 @@ int toku_logcursor_last(TOKULOGCURSOR lc, struct log_entry **le) {
             // probably a corrupted last log entry due to a crash
             // try scanning forward from the beginning to find the last good entry
             time_t tnow = time(NULL);
-            fprintf(stderr, "%.24s Tokudb recovery repairing log\n", ctime(&tnow));
+            fprintf(stderr, "%.24s TokuFT recovery repairing log\n", ctime(&tnow));
             r = lc_fix_bad_logfile(lc);
             if ( r != 0 ) {
-                fprintf(stderr, "%.24s Tokudb recovery repair unsuccessful\n", ctime(&tnow));
+                fprintf(stderr, "%.24s TokuFT recovery repair unsuccessful\n", ctime(&tnow));
                 return DB_BADFORMAT;
             }
             // try reading again
diff --git a/ft/logger/logger.cc b/ft/logger/logger.cc
index 332d94b47e5..6550d9e5610 100644
--- a/ft/logger/logger.cc
+++ b/ft/logger/logger.cc
@@ -1391,7 +1391,7 @@ void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn) {
 
 static LOGGER_STATUS_S logger_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(logger_status, k, c, t, "logger: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(logger_status, k, c, t, "logger: " l, inc)
 
 static void
 status_init(void) {
diff --git a/ft/logger/recover.cc b/ft/logger/recover.cc
index d194f3ebdd2..ca284568f07 100644
--- a/ft/logger/recover.cc
+++ b/ft/logger/recover.cc
@@ -99,7 +99,7 @@ PATENT RIGHTS GRANT:
 #include "ft/txn/txn_manager.h"
 #include "util/omt.h"
 
-int tokudb_recovery_trace = 0;                    // turn on recovery tracing, default off.
+int tokuft_recovery_trace = 0;                    // turn on recovery tracing, default off.
 
 //#define DO_VERIFY_COUNTS
 #ifdef DO_VERIFY_COUNTS
@@ -317,7 +317,7 @@ static int recover_env_init (RECOVER_ENV renv,
     renv->cp = toku_cachetable_get_checkpointer(renv->ct);
     toku_dbt_array_init(&renv->dest_keys, 1);
     toku_dbt_array_init(&renv->dest_vals, 1);
-    if (tokudb_recovery_trace)
+    if (tokuft_recovery_trace)
         fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__);
     return r;
 }
@@ -344,7 +344,7 @@ static void recover_env_cleanup (RECOVER_ENV renv) {
     toku_dbt_array_destroy(&renv->dest_keys);
     toku_dbt_array_destroy(&renv->dest_vals);
 
-    if (tokudb_recovery_trace)
+    if (tokuft_recovery_trace)
         fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__);
 }
 
@@ -423,7 +423,7 @@ static int toku_recover_begin_checkpoint (struct logtype_begin_checkpoint *l, RE
         r = 0; // ignore it (log only has a begin checkpoint)
         break;
     default:
-        fprintf(stderr, "Tokudb recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
+        fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
         abort();
         break;
     }
@@ -433,7 +433,7 @@ static int toku_recover_begin_checkpoint (struct logtype_begin_checkpoint *l, RE
 static int toku_recover_backward_begin_checkpoint (struct logtype_begin_checkpoint *l, RECOVER_ENV renv) {
     int r;
     time_t tnow = time(NULL);
-    fprintf(stderr, "%.24s Tokudb recovery bw_begin_checkpoint at %" PRIu64 " timestamp %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, recover_state(renv));
+    fprintf(stderr, "%.24s TokuFT recovery bw_begin_checkpoint at %" PRIu64 " timestamp %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, recover_state(renv));
     switch (renv->ss.ss) {
     case BACKWARD_NEWER_CHECKPOINT_END:
         // incomplete checkpoint, nothing to do
@@ -445,13 +445,13 @@ static int toku_recover_backward_begin_checkpoint (struct logtype_begin_checkpoi
         renv->ss.checkpoint_begin_timestamp = l->timestamp;
         renv->goforward = true;
         tnow = time(NULL);
-        fprintf(stderr, "%.24s Tokudb recovery turning around at begin checkpoint %" PRIu64 " time %" PRIu64 "\n", 
+        fprintf(stderr, "%.24s TokuFT recovery turning around at begin checkpoint %" PRIu64 " time %" PRIu64 "\n", 
                 ctime(&tnow), l->lsn.lsn, 
                 renv->ss.checkpoint_end_timestamp - renv->ss.checkpoint_begin_timestamp);
         r = 0;
         break;
     default:
-        fprintf(stderr, "Tokudb recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
+        fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
         abort();
         break;
     }
@@ -481,7 +481,7 @@ static int toku_recover_end_checkpoint (struct logtype_end_checkpoint *l, RECOVE
 
 static int toku_recover_backward_end_checkpoint (struct logtype_end_checkpoint *l, RECOVER_ENV renv) {
     time_t tnow = time(NULL);
-    fprintf(stderr, "%.24s Tokudb recovery bw_end_checkpoint at %" PRIu64 " timestamp %" PRIu64 " xid %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, l->lsn_begin_checkpoint.lsn, recover_state(renv));
+    fprintf(stderr, "%.24s TokuFT recovery bw_end_checkpoint at %" PRIu64 " timestamp %" PRIu64 " xid %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, l->lsn_begin_checkpoint.lsn, recover_state(renv));
     switch (renv->ss.ss) {
     case BACKWARD_NEWER_CHECKPOINT_END:
         renv->ss.ss = BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END;
@@ -490,12 +490,12 @@ static int toku_recover_backward_end_checkpoint (struct logtype_end_checkpoint *
         renv->ss.checkpoint_end_timestamp = l->timestamp;
         return 0;
     case BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END:
-        fprintf(stderr, "Tokudb recovery %s:%d Should not see two end_checkpoint log entries without an intervening begin_checkpoint\n", __FILE__, __LINE__);
+        fprintf(stderr, "TokuFT recovery %s:%d Should not see two end_checkpoint log entries without an intervening begin_checkpoint\n", __FILE__, __LINE__);
         abort();
     default:
         break;
     }
-    fprintf(stderr, "Tokudb recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
+    fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss);
     abort();
 }
 
@@ -832,7 +832,7 @@ static int toku_recover_fcreate (struct logtype_fcreate *l, RECOVER_ENV renv) {
     if (r != 0) {
         int er = get_error_errno();
         if (er != ENOENT) {
-            fprintf(stderr, "Tokudb recovery %s:%d unlink %s %d\n", __FUNCTION__, __LINE__, iname, er);
+            fprintf(stderr, "TokuFT recovery %s:%d unlink %s %d\n", __FUNCTION__, __LINE__, iname, er);
             toku_free(iname);
             return r;
         }
@@ -1259,7 +1259,7 @@ static int toku_recover_backward_hot_index(struct logtype_hot_index *UU(l), RECO
 // Effects: If there are no log files, or if there is a clean "shutdown" at
 // the end of the log, then we don't need recovery to run.
 // Returns: true if we need recovery, otherwise false.
-int tokudb_needs_recovery(const char *log_dir, bool ignore_log_empty) {
+int tokuft_needs_recovery(const char *log_dir, bool ignore_log_empty) {
     int needs_recovery;
     int r;
     TOKULOGCURSOR logcursor = NULL;
@@ -1383,7 +1383,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
     struct log_entry *le = NULL;
     
     time_t tnow = time(NULL);
-    fprintf(stderr, "%.24s Tokudb recovery starting in env %s\n", ctime(&tnow), env_dir);
+    fprintf(stderr, "%.24s TokuFT recovery starting in env %s\n", ctime(&tnow), env_dir);
 
     char org_wd[1000];
     {
@@ -1404,7 +1404,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
     
     r = toku_logcursor_last(logcursor, &le);
     if (r != 0) {
-        if (tokudb_recovery_trace) 
+        if (tokuft_recovery_trace) 
             fprintf(stderr, "RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r);
         rr = DB_RUNRECOVERY; goto errorexit;
     }
@@ -1419,10 +1419,10 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
         toku_struct_stat buf;
         if (toku_stat(env_dir, &buf)!=0) {
             rr = get_error_errno();
-            fprintf(stderr, "%.24s Tokudb recovery error: directory does not exist: %s\n", ctime(&tnow), env_dir);
+            fprintf(stderr, "%.24s TokuFT recovery error: directory does not exist: %s\n", ctime(&tnow), env_dir);
             goto errorexit;
         } else if (!S_ISDIR(buf.st_mode)) {
-            fprintf(stderr, "%.24s Tokudb recovery error: this file is supposed to be a directory, but is not: %s\n", ctime(&tnow), env_dir);
+            fprintf(stderr, "%.24s TokuFT recovery error: this file is supposed to be a directory, but is not: %s\n", ctime(&tnow), env_dir);
             rr = ENOTDIR; goto errorexit;
         }
     }
@@ -1431,13 +1431,13 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
     tnow = time(NULL);
     time_t tlast;
     tlast = tnow;
-    fprintf(stderr, "%.24s Tokudb recovery scanning backward from %" PRIu64 "\n", ctime(&tnow), lastlsn.lsn);
+    fprintf(stderr, "%.24s TokuFT recovery scanning backward from %" PRIu64 "\n", ctime(&tnow), lastlsn.lsn);
     for (unsigned i=0; 1; i++) {
 
         // get the previous log entry (first time gets the last one)
         le = NULL;
         r = toku_logcursor_prev(logcursor, &le);
-        if (tokudb_recovery_trace) 
+        if (tokuft_recovery_trace) 
             recover_trace_le(__FUNCTION__, __LINE__, r, le);
         if (r != 0) {
             if (r == DB_NOTFOUND)
@@ -1451,7 +1451,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
             tnow = time(NULL);
             if (tnow - tlast >= TOKUDB_RECOVERY_PROGRESS_TIME) {
                 thislsn = toku_log_entry_get_lsn(le);
-                fprintf(stderr, "%.24s Tokudb recovery scanning backward from %" PRIu64 " at %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, recover_state(renv));
+                fprintf(stderr, "%.24s TokuFT recovery scanning backward from %" PRIu64 " at %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, recover_state(renv));
                 tlast = tnow;
             }
         }
@@ -1460,10 +1460,10 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
         assert(renv->ss.ss == BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END ||
                renv->ss.ss == BACKWARD_NEWER_CHECKPOINT_END);
         logtype_dispatch_assign(le, toku_recover_backward_, r, renv);
-        if (tokudb_recovery_trace) 
+        if (tokuft_recovery_trace) 
             recover_trace_le(__FUNCTION__, __LINE__, r, le);
         if (r != 0) {
-            if (tokudb_recovery_trace) 
+            if (tokuft_recovery_trace) 
                 fprintf(stderr, "DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r);
             rr = DB_RUNRECOVERY; 
             goto errorexit;
@@ -1480,7 +1480,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
     assert(le);
     thislsn = toku_log_entry_get_lsn(le);
     tnow = time(NULL);
-    fprintf(stderr, "%.24s Tokudb recovery starts scanning forward to %" PRIu64 " from %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv));
+    fprintf(stderr, "%.24s TokuFT recovery starts scanning forward to %" PRIu64 " from %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv));
 
     for (unsigned i=0; 1; i++) {
 
@@ -1489,7 +1489,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
             tnow = time(NULL);
             if (tnow - tlast >= TOKUDB_RECOVERY_PROGRESS_TIME) {
                 thislsn = toku_log_entry_get_lsn(le);
-                fprintf(stderr, "%.24s Tokudb recovery scanning forward to %" PRIu64 " at %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv));
+                fprintf(stderr, "%.24s TokuFT recovery scanning forward to %" PRIu64 " at %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv));
                 tlast = tnow;
             }
         }
@@ -1498,10 +1498,10 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
         assert(renv->ss.ss == FORWARD_BETWEEN_CHECKPOINT_BEGIN_END ||
                renv->ss.ss == FORWARD_NEWER_CHECKPOINT_END);
         logtype_dispatch_assign(le, toku_recover_, r, renv);
-        if (tokudb_recovery_trace) 
+        if (tokuft_recovery_trace) 
             recover_trace_le(__FUNCTION__, __LINE__, r, le);
         if (r != 0) {
-            if (tokudb_recovery_trace) 
+            if (tokuft_recovery_trace) 
                 fprintf(stderr, "DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r);
             rr = DB_RUNRECOVERY; 
             goto errorexit;
@@ -1510,7 +1510,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
         // get the next log entry
         le = NULL;
         r = toku_logcursor_next(logcursor, &le);
-        if (tokudb_recovery_trace) 
+        if (tokuft_recovery_trace) 
             recover_trace_le(__FUNCTION__, __LINE__, r, le);
         if (r != 0) {
             if (r == DB_NOTFOUND)
@@ -1538,7 +1538,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
         uint32_t n = recover_get_num_live_txns(renv);
         if (n > 0) {
             tnow = time(NULL);
-            fprintf(stderr, "%.24s Tokudb recovery has %" PRIu32 " live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
+            fprintf(stderr, "%.24s TokuFT recovery has %" PRIu32 " live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
         }
     }
     recover_abort_all_live_txns(renv);
@@ -1546,7 +1546,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
         uint32_t n = recover_get_num_live_txns(renv);
         if (n > 0) {
             tnow = time(NULL);
-            fprintf(stderr, "%.24s Tokudb recovery has %" PRIu32 " prepared transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
+            fprintf(stderr, "%.24s TokuFT recovery has %" PRIu32 " prepared transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
         }
     }
 
@@ -1555,7 +1555,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
     n = file_map_get_num_dictionaries(&renv->fmap);
     if (n > 0) {
         tnow = time(NULL);
-        fprintf(stderr, "%.24s Tokudb recovery closing %" PRIu32 " dictionar%s\n", ctime(&tnow), n, n > 1 ? "ies" : "y");
+        fprintf(stderr, "%.24s TokuFT recovery closing %" PRIu32 " dictionar%s\n", ctime(&tnow), n, n > 1 ? "ies" : "y");
     }
     file_map_close_dictionaries(&renv->fmap, lastlsn);
 
@@ -1567,17 +1567,17 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
 
     // checkpoint 
     tnow = time(NULL);
-    fprintf(stderr, "%.24s Tokudb recovery making a checkpoint\n", ctime(&tnow));
+    fprintf(stderr, "%.24s TokuFT recovery making a checkpoint\n", ctime(&tnow));
     r = toku_checkpoint(renv->cp, renv->logger, NULL, NULL, NULL, NULL, RECOVERY_CHECKPOINT);
     assert(r == 0);
     tnow = time(NULL);
-    fprintf(stderr, "%.24s Tokudb recovery done\n", ctime(&tnow));
+    fprintf(stderr, "%.24s TokuFT recovery done\n", ctime(&tnow));
 
     return 0;
 
  errorexit:
     tnow = time(NULL);
-    fprintf(stderr, "%.24s Tokudb recovery failed %d\n", ctime(&tnow), rr);
+    fprintf(stderr, "%.24s TokuFT recovery failed %d\n", ctime(&tnow), rr);
 
     if (logcursor) {
         r = toku_logcursor_destroy(&logcursor);
@@ -1602,7 +1602,7 @@ toku_recover_unlock(int lockfd) {
     return toku_single_process_unlock(&lockfd_copy);
 }
 
-int tokudb_recover(DB_ENV *env,
+int tokuft_recover(DB_ENV *env,
                    prepared_txn_callback_t    prepared_txn_callback,
                    keep_cachetable_callback_t keep_cachetable_callback,
                    TOKULOGGER logger,
@@ -1620,7 +1620,7 @@ int tokudb_recover(DB_ENV *env,
         return r;
 
     int rr = 0;
-    if (tokudb_needs_recovery(log_dir, false)) {
+    if (tokuft_needs_recovery(log_dir, false)) {
         struct recover_env renv;
         r = recover_env_init(&renv,
                              env_dir,
@@ -1649,7 +1649,7 @@ int tokudb_recover(DB_ENV *env,
 
 // Return 0 if recovery log exists, ENOENT if log is missing
 int 
-tokudb_recover_log_exists(const char * log_dir) {
+tokuft_recover_log_exists(const char * log_dir) {
     int r;
     TOKULOGCURSOR logcursor;
 
diff --git a/ft/logger/recover.h b/ft/logger/recover.h
index 47815332ffa..f08abc44200 100644
--- a/ft/logger/recover.h
+++ b/ft/logger/recover.h
@@ -105,9 +105,9 @@ PATENT RIGHTS GRANT:
 typedef void (*prepared_txn_callback_t)(DB_ENV *env, struct tokutxn *txn);
 typedef void (*keep_cachetable_callback_t)(DB_ENV *env, struct cachetable *ct);
 
-// Run tokudb recovery from the log
+// Run tokuft recovery from the log
 // Returns 0 if success
-int tokudb_recover(DB_ENV *env,
+int tokuft_recover(DB_ENV *env,
 		   prepared_txn_callback_t prepared_txn_callback,
 		   keep_cachetable_callback_t keep_cachetable_callback,
 		   struct tokulogger *logger,
@@ -119,20 +119,20 @@ int tokudb_recover(DB_ENV *env,
                    generate_row_for_del_func generate_row_for_del,
                    size_t cachetable_size);
 
-// Effect: Check the tokudb logs to determine whether or not we need to run recovery.
+// Effect: Check the tokuft logs to determine whether or not we need to run recovery.
 // If the log is empty or if there is a clean shutdown at the end of the log, then we
 // dont need to run recovery.
 // Returns: true if we need recovery, otherwise false.
-int tokudb_needs_recovery(const char *logdir, bool ignore_empty_log);
+int tokuft_needs_recovery(const char *logdir, bool ignore_empty_log);
 
 // Return 0 if recovery log exists, ENOENT if log is missing
-int tokudb_recover_log_exists(const char * log_dir);
+int tokuft_recover_log_exists(const char * log_dir);
 
 // For test only - set callbacks for recovery testing
 void toku_recover_set_callback (void (*)(void*), void*);
 void toku_recover_set_callback2 (void (*)(void*), void*);
 
-extern int tokudb_recovery_trace;
+extern int tokuft_recovery_trace;
 
 int toku_recover_lock (const char *lock_dir, int *lockfd);
 
diff --git a/ft/serialize/ft-serialize.cc b/ft/serialize/ft-serialize.cc
index 855fa19c6e2..4e447592255 100644
--- a/ft/serialize/ft-serialize.cc
+++ b/ft/serialize/ft-serialize.cc
@@ -149,7 +149,7 @@ toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset
 static void
 deserialize_descriptor_from_rbuf(struct rbuf *rb, DESCRIPTOR desc, int layout_version) {
     if (layout_version <= FT_LAYOUT_VERSION_13) {
-        // in older versions of TokuDB the Descriptor had a 4 byte
+        // in older versions of tokuft, the descriptor had a 4 byte
         // version, which we skip over
         (void) rbuf_int(rb);
     }
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index e9bcd416131..df067eefa22 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -109,7 +109,7 @@ PATENT RIGHTS GRANT:
 
 static FT_UPGRADE_STATUS_S ft_upgrade_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ft_upgrade_status, k, c, t, "ft upgrade: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_upgrade_status, k, c, t, "ft upgrade: " l, inc)
 
 static void
 status_init(void)
@@ -415,7 +415,7 @@ compress_ftnode_sub_block(struct sub_block *sb, enum toku_compression_method met
     
     //
     // This probably seems a bit complicated. Here is what is going on.
-    // In TokuDB 5.0, sub_blocks were compressed and the compressed data
+    // In TokuFT 5.0, sub_blocks were compressed and the compressed data
     // was checksummed. The checksum did NOT include the size of the compressed data
     // and the size of the uncompressed data. The fields of sub_block only reference the
     // compressed data, and it is the responsibility of the user of the sub_block
@@ -448,7 +448,7 @@ compress_ftnode_sub_block(struct sub_block *sb, enum toku_compression_method met
     // two integers at the beginning, the size and uncompressed size, and then the compressed
     // data. sb->xsum contains the checksum of this entire thing.
     // 
-    // In TokuDB 5.0, sb->compressed_ptr only contained the compressed data, sb->xsum
+    // In TokuFT 5.0, sb->compressed_ptr only contained the compressed data, sb->xsum
     // checksummed only the compressed data, and the checksumming of the sizes were not
     // done here.
     //
diff --git a/ft/tests/recovery-bad-last-entry.cc b/ft/tests/recovery-bad-last-entry.cc
index 78c3be54b60..80d0f295465 100644
--- a/ft/tests/recovery-bad-last-entry.cc
+++ b/ft/tests/recovery-bad-last-entry.cc
@@ -146,7 +146,7 @@ run_test(void) {
         else
             break;
         // run recovery
-        r = tokudb_recover(NULL,
+        r = tokuft_recover(NULL,
 			   NULL_prepared_txn_callback,
 			   NULL_keep_cachetable_callback,
 			   NULL_logger,
diff --git a/ft/tests/recovery-cbegin-cend-hello.cc b/ft/tests/recovery-cbegin-cend-hello.cc
index 3fb2b84d300..6686ba61f64 100644
--- a/ft/tests/recovery-cbegin-cend-hello.cc
+++ b/ft/tests/recovery-cbegin-cend-hello.cc
@@ -128,7 +128,7 @@ run_test(void) {
     r = close(devnul);                      assert(r==0);
 
     // run recovery
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); 
diff --git a/ft/tests/recovery-cbegin-cend.cc b/ft/tests/recovery-cbegin-cend.cc
index 902a4e783b8..ef953923813 100644
--- a/ft/tests/recovery-cbegin-cend.cc
+++ b/ft/tests/recovery-cbegin-cend.cc
@@ -113,7 +113,7 @@ run_test(void) {
     r = toku_logger_close(&logger); assert(r == 0);
 
     // run recovery
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); 
diff --git a/ft/tests/recovery-cbegin.cc b/ft/tests/recovery-cbegin.cc
index b3edff58c89..54d69d2a605 100644
--- a/ft/tests/recovery-cbegin.cc
+++ b/ft/tests/recovery-cbegin.cc
@@ -119,7 +119,7 @@ run_test(void) {
     r = close(devnul);
     assert(r==0);
 
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); 
diff --git a/ft/tests/recovery-cend-cbegin.cc b/ft/tests/recovery-cend-cbegin.cc
index 89d8d48aa24..d03b95fd9f7 100644
--- a/ft/tests/recovery-cend-cbegin.cc
+++ b/ft/tests/recovery-cend-cbegin.cc
@@ -121,7 +121,7 @@ run_test(void) {
     }
 
     // run recovery
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME,
diff --git a/ft/tests/recovery-datadir-is-file.cc b/ft/tests/recovery-datadir-is-file.cc
index aa5f52ee681..5df3b6bdca4 100644
--- a/ft/tests/recovery-datadir-is-file.cc
+++ b/ft/tests/recovery-datadir-is-file.cc
@@ -130,7 +130,7 @@ run_test(void) {
         strncat(buf, testfile, TOKU_PATH_MAX);
         r = system(buf); CKERR(r);
     }
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger,
diff --git a/ft/tests/recovery-empty.cc b/ft/tests/recovery-empty.cc
index 839161918f7..37acb97e82b 100644
--- a/ft/tests/recovery-empty.cc
+++ b/ft/tests/recovery-empty.cc
@@ -116,7 +116,7 @@ run_test(void) {
     }
 
     // run recovery
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); 
diff --git a/ft/tests/recovery-fopen-missing-file.cc b/ft/tests/recovery-fopen-missing-file.cc
index 4fd3851fef7..7590ea162bb 100644
--- a/ft/tests/recovery-fopen-missing-file.cc
+++ b/ft/tests/recovery-fopen-missing-file.cc
@@ -109,7 +109,7 @@ run_test(void) {
     toku_log_begin_checkpoint(logger, &beginlsn, true, 0, 0);
     toku_log_end_checkpoint(logger, NULL, true, beginlsn, 0, 0, 0);
 
-    BYTESTRING iname  = { (uint32_t) strlen("missing_tokudb_file"), (char *) "missing_tokudb_file" };
+    BYTESTRING iname  = { (uint32_t) strlen("missing_tokuft_file"), (char *) "missing_tokuft_file" };
     FILENUM filenum = {42};
     uint32_t treeflags = 0;
     toku_log_fopen(logger, NULL, true, iname, filenum, treeflags);
@@ -122,7 +122,7 @@ run_test(void) {
     r = close(devnul);                      assert(r==0);
 
     // run recovery
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); 
diff --git a/ft/tests/recovery-hello.cc b/ft/tests/recovery-hello.cc
index 22fd7df1084..36126c576af 100644
--- a/ft/tests/recovery-hello.cc
+++ b/ft/tests/recovery-hello.cc
@@ -122,7 +122,7 @@ run_test(void) {
     r = close(devnul);                      assert(r==0);
 
     // run recovery
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); 
diff --git a/ft/tests/recovery-lsn-error-during-forward-scan.cc b/ft/tests/recovery-lsn-error-during-forward-scan.cc
index 4edbaa82afc..f21c307ccf5 100644
--- a/ft/tests/recovery-lsn-error-during-forward-scan.cc
+++ b/ft/tests/recovery-lsn-error-during-forward-scan.cc
@@ -152,7 +152,7 @@ run_test(void) {
     toku_recover_set_callback(recover_callback_at_turnaround, NULL);
 
     // run recovery
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); 
diff --git a/ft/tests/recovery-no-datadir.cc b/ft/tests/recovery-no-datadir.cc
index 689efcd104c..b79ea03bca5 100644
--- a/ft/tests/recovery-no-datadir.cc
+++ b/ft/tests/recovery-no-datadir.cc
@@ -116,7 +116,7 @@ run_test(void) {
     r = close(devnul);                      assert(r==0);
 
     // run recovery
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, "/junk", TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); 
diff --git a/ft/tests/recovery-no-log.cc b/ft/tests/recovery-no-log.cc
index c11be54d7c8..a2fd7b2e010 100644
--- a/ft/tests/recovery-no-log.cc
+++ b/ft/tests/recovery-no-log.cc
@@ -108,7 +108,7 @@ run_test(void) {
     r = close(devnul);                      assert(r==0);
 
     // run recovery
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); 
diff --git a/ft/tests/recovery-no-logdir.cc b/ft/tests/recovery-no-logdir.cc
index 327cd544861..3e889b665a6 100644
--- a/ft/tests/recovery-no-logdir.cc
+++ b/ft/tests/recovery-no-logdir.cc
@@ -102,7 +102,7 @@ run_test(void) {
     r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); assert(r == 0);
 
     // run recovery
-    r = tokudb_recover(NULL,
+    r = tokuft_recover(NULL,
 		       NULL_prepared_txn_callback,
 		       NULL_keep_cachetable_callback,
 		       NULL_logger, NULL, NULL, 0, 0, 0, NULL, 0); 
diff --git a/ft/tests/test-ft-txns.h b/ft/tests/test-ft-txns.h
index 3f7d38e307e..04b2cfdf8bf 100644
--- a/ft/tests/test-ft-txns.h
+++ b/ft/tests/test-ft-txns.h
@@ -136,7 +136,7 @@ static inline void test_setup_and_recover(const char *envdir, TOKULOGGER *logger
     CKERR(r);
 
     DB_ENV *CAST_FROM_VOIDP(ctv, (void *) &ct);  // Use intermediate to avoid compiler warning.
-    r = tokudb_recover(ctv,
+    r = tokuft_recover(ctv,
                        NULL_prepared_txn_callback,
                        xid_lsn_keep_cachetable_callback,
                        logger,
diff --git a/ft/txn/txn.cc b/ft/txn/txn.cc
index a5de5b6adfd..7b475c2c975 100644
--- a/ft/txn/txn.cc
+++ b/ft/txn/txn.cc
@@ -107,7 +107,7 @@ PATENT RIGHTS GRANT:
 
 static TXN_STATUS_S txn_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(txn_status, k, c, t, "txn: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(txn_status, k, c, t, "txn: " l, inc)
 
 void
 txn_status_init(void) {
diff --git a/ft/ule.cc b/ft/ule.cc
index bc2944b90d9..03ec452cbd2 100644
--- a/ft/ule.cc
+++ b/ft/ule.cc
@@ -131,7 +131,7 @@ static uint32_t ule_get_innermost_numbytes(ULE ule, uint32_t keylen);
 
 static LE_STATUS_S le_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(le_status, k, c, t, "le: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(le_status, k, c, t, "le: " l, inc)
 
 void toku_ule_status_init(void) {
     // Note, this function initializes the keyname, type, and legend fields.
diff --git a/locktree/manager.cc b/locktree/manager.cc
index f3d45e2defd..896e7bfdb2d 100644
--- a/locktree/manager.cc
+++ b/locktree/manager.cc
@@ -483,7 +483,7 @@ void locktree_manager::locktree_escalator::run(locktree_manager *mgr, void (*esc
     mgr->add_escalator_wait_time(t1 - t0);
 }
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(status, k, c, t, "locktree: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(status, k, c, t, "locktree: " l, inc)
 
 void locktree_manager::status_init(void) {
     STATUS_INIT(LTM_SIZE_CURRENT,             LOCKTREE_MEMORY_SIZE, UINT64,   "memory size", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
diff --git a/portability/file.cc b/portability/file.cc
index 06ccfcfc97d..6919b54e81d 100644
--- a/portability/file.cc
+++ b/portability/file.cc
@@ -166,10 +166,10 @@ try_again_after_handling_write_error(int fd, size_t len, ssize_t r_write) {
                 ssize_t n = readlink(fname, symname, MY_MAX_PATH);
 
                 if ((int)n == -1)
-                    fprintf(stderr, "%.24s Tokudb No space when writing %" PRIu64 " bytes to fd=%d ", tstr, (uint64_t) len, fd);
+                    fprintf(stderr, "%.24s TokuFT No space when writing %" PRIu64 " bytes to fd=%d ", tstr, (uint64_t) len, fd);
                 else {
 		    tstr[n] = 0; // readlink doesn't append a NUL to the end of the buffer.
-                    fprintf(stderr, "%.24s Tokudb No space when writing %" PRIu64 " bytes to %*s ", tstr, (uint64_t) len, (int) n, symname);
+                    fprintf(stderr, "%.24s TokuFT No space when writing %" PRIu64 " bytes to %*s ", tstr, (uint64_t) len, (int) n, symname);
 		}
                 fprintf(stderr, "retry in %d second%s\n", toku_write_enospc_sleep, toku_write_enospc_sleep > 1 ? "s" : "");
                 fflush(stderr);
diff --git a/src/indexer.cc b/src/indexer.cc
index 4b48b747858..aa821f67fba 100644
--- a/src/indexer.cc
+++ b/src/indexer.cc
@@ -117,7 +117,7 @@ PATENT RIGHTS GRANT:
 
 static INDEXER_STATUS_S indexer_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(indexer_status, k, c, t, "indexer: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(indexer_status, k, c, t, "indexer: " l, inc)
 
 static void
 status_init(void) {
@@ -688,7 +688,7 @@ abort_indexer(DB_INDEXER *indexer) {
 }
 
 
-// derived from ha_tokudb::estimate_num_rows
+// derived from the handlerton's estimate_num_rows()
 static int
 update_estimated_rows(DB_INDEXER *indexer) {
     int error;
diff --git a/src/loader.cc b/src/loader.cc
index e59242f1dee..1a6bf718443 100644
--- a/src/loader.cc
+++ b/src/loader.cc
@@ -119,7 +119,7 @@ enum {MAX_FILE_SIZE=256};
 
 static LOADER_STATUS_S loader_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(loader_status, k, c, t, "loader: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(loader_status, k, c, t, "loader: " l, inc)
 
 static void
 status_init(void) {
diff --git a/src/tests/hot-optimize-table-tests.cc b/src/tests/hot-optimize-table-tests.cc
index 7b580d94953..42f0ef86e82 100644
--- a/src/tests/hot-optimize-table-tests.cc
+++ b/src/tests/hot-optimize-table-tests.cc
@@ -148,7 +148,7 @@ hot_test_setup(void)
     // Remove any previous environment.
     toku_os_recursive_delete(TOKU_TEST_FILENAME);
 
-    // Set up a new TokuDB.
+    // Set up a new environment.
     { int chk_r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(chk_r); }
     { int chk_r = db_env_create(&env, 0); CKERR(chk_r); }
     env->set_errfile(env, stderr);
diff --git a/src/tests/test938c.cc b/src/tests/test938c.cc
index 2ca11d6f44e..154bf05862d 100644
--- a/src/tests/test938c.cc
+++ b/src/tests/test938c.cc
@@ -106,7 +106,7 @@ run (void) {
     //   add (1,101) to the tree
     // In another concurrent txn
     //   look up (1,102) and do  DB_NEXT
-    // That should be fine in TokuDB.
+    // That should be fine in TokuFT.
     // It fails before #938 is fixed.
     // It also fails for BDB for other reasons (page-level locking vs. row-level locking)
     {
diff --git a/src/tests/test_abort1.cc b/src/tests/test_abort1.cc
index c88eeaddd8f..7a8b3384ce8 100644
--- a/src/tests/test_abort1.cc
+++ b/src/tests/test_abort1.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #include "test.h"
 
-/* Simple test of logging.  Can I start a TokuDB with logging enabled? */
+/* Simple test of logging.  Can I start TokuFT with logging enabled? */
 
 #include <errno.h>
 #include <stdlib.h>
diff --git a/src/tests/test_db_env_open_nocreate.cc b/src/tests/test_db_env_open_nocreate.cc
index e45afb55ded..a97ec7de733 100644
--- a/src/tests/test_db_env_open_nocreate.cc
+++ b/src/tests/test_db_env_open_nocreate.cc
@@ -132,7 +132,7 @@ test_main(int argc, char *const argv[]) {
 	r = db_env_create(&dbenv, 0);
 	CKERR(r);
 	r = dbenv->open(dbenv, TOKU_TEST_FILENAME, private_flags|DB_INIT_MPOOL, 0);
-	// TokuDB has no trouble opening an environment if the directory exists.
+	// TokuFT has no trouble opening an environment if the directory exists.
 	CKERR(r);
 	assert(r==0);
 	dbenv->close(dbenv,0); // free memory
diff --git a/src/tests/test_db_open_notexist_reopen.cc b/src/tests/test_db_open_notexist_reopen.cc
index 7881a14fbd7..70580b8f868 100644
--- a/src/tests/test_db_open_notexist_reopen.cc
+++ b/src/tests/test_db_open_notexist_reopen.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #include "test.h"
 
-/* Simple test of logging.  Can I start a TokuDB with logging enabled? */
+/* Simple test of logging.  Can I start TokuFT with logging enabled? */
 
 #include <stdlib.h>
 #include <sys/stat.h>
diff --git a/src/tests/test_log0.cc b/src/tests/test_log0.cc
index 4f7202005d3..c597affc562 100644
--- a/src/tests/test_log0.cc
+++ b/src/tests/test_log0.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #include "test.h"
 
-/* Simple test of logging.  Can I start a TokuDB with logging enabled? */
+/* Simple test of logging.  Can I start TokuFT with logging enabled? */
 
 #include <stdlib.h>
 #include <sys/stat.h>
diff --git a/src/tests/test_log1.cc b/src/tests/test_log1.cc
index 82f376ef067..8379d20ba45 100644
--- a/src/tests/test_log1.cc
+++ b/src/tests/test_log1.cc
@@ -89,7 +89,7 @@ PATENT RIGHTS GRANT:
 #ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
 #include "test.h"
 
-/* Simple test of logging.  Can I start a TokuDB with logging enabled? */
+/* Simple test of logging.  Can I start TokuFT with logging enabled? */
 
 #include <stdlib.h>
 #include <sys/stat.h>
diff --git a/src/tests/test_log10.cc b/src/tests/test_log10.cc
index d47f6220fc5..053efc0f07d 100644
--- a/src/tests/test_log10.cc
+++ b/src/tests/test_log10.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 
 /* Test to see if we can do logging and recovery. */
-/* This is very specific to TokuDB.  It won't work with Berkeley DB. */
+/* This is very specific to TokuFT.  It won't work with Berkeley DB. */
 /* This test_log10 inserts to a db, closes, reopens, and inserts more to db.  We want to make sure that the recovery of the buffers works. */
 /* Lots of stuff gets inserted. */
 
diff --git a/src/tests/test_log2.cc b/src/tests/test_log2.cc
index 40e7f231398..d2a2e6d5006 100644
--- a/src/tests/test_log2.cc
+++ b/src/tests/test_log2.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 
 /* Test to see if we can do logging and recovery. */
-/* This is very specific to TokuDB.  It won't work with Berkeley DB. */
+/* This is very specific to TokuFT.  It won't work with Berkeley DB. */
 
 
 #include <db.h>
diff --git a/src/tests/test_log2_abort.cc b/src/tests/test_log2_abort.cc
index a7470181d7f..9ed3f8a1a6f 100644
--- a/src/tests/test_log2_abort.cc
+++ b/src/tests/test_log2_abort.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 
 /* Like test_log2 except abort. */
-/* This is very specific to TokuDB.  It won't work with Berkeley DB. */
+/* This is very specific to TokuFT.  It won't work with Berkeley DB. */
 
 
 #include <db.h>
diff --git a/src/tests/test_log3.cc b/src/tests/test_log3.cc
index 33c6bba45d2..a8b71fa90db 100644
--- a/src/tests/test_log3.cc
+++ b/src/tests/test_log3.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 
 /* Test to see if we can do logging and recovery. */
-/* This is very specific to TokuDB.  It won't work with Berkeley DB. */
+/* This is very specific to TokuFT.  It won't work with Berkeley DB. */
 
 
 #include <db.h>
diff --git a/src/tests/test_log4.cc b/src/tests/test_log4.cc
index 7d45024ed1a..2117907f1b0 100644
--- a/src/tests/test_log4.cc
+++ b/src/tests/test_log4.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 
 /* Test to see if we can do logging and recovery. */
-/* This is very specific to TokuDB.  It won't work with Berkeley DB. */
+/* This is very specific to TokuFT.  It won't work with Berkeley DB. */
 
 
 #include <db.h>
diff --git a/src/tests/test_log5.cc b/src/tests/test_log5.cc
index f03888f0367..82a122c045a 100644
--- a/src/tests/test_log5.cc
+++ b/src/tests/test_log5.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 
 /* Test to see if we can do logging and recovery. */
-/* This is very specific to TokuDB.  It won't work with Berkeley DB. */
+/* This is very specific to TokuFT.  It won't work with Berkeley DB. */
 
 
 #include <db.h>
diff --git a/src/tests/test_log6.cc b/src/tests/test_log6.cc
index 8084933f5c0..710519d70a7 100644
--- a/src/tests/test_log6.cc
+++ b/src/tests/test_log6.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 
 /* Test to see if we can do logging and recovery. */
-/* This is very specific to TokuDB.  It won't work with Berkeley DB. */
+/* This is very specific to TokuFT.  It won't work with Berkeley DB. */
 
 
 #include <db.h>
diff --git a/src/tests/test_log7.cc b/src/tests/test_log7.cc
index fff24228ae0..afa9a5ab82c 100644
--- a/src/tests/test_log7.cc
+++ b/src/tests/test_log7.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 
 /* Test to see if we can do logging and recovery. */
-/* This is very specific to TokuDB.  It won't work with Berkeley DB. */
+/* This is very specific to TokuFT.  It won't work with Berkeley DB. */
 /* This test_log7 is like test_log5 except maxcount is larger. */
 
 
diff --git a/src/tests/test_log8.cc b/src/tests/test_log8.cc
index a54793da239..39c607b3623 100644
--- a/src/tests/test_log8.cc
+++ b/src/tests/test_log8.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 
 /* Test to see if we can do logging and recovery. */
-/* This is very specific to TokuDB.  It won't work with Berkeley DB. */
+/* This is very specific to TokuFT.  It won't work with Berkeley DB. */
 /* This test_log8 inserts to a db, closes, reopens, and inserts more to db.  We want to make sure that the recovery of the buffers works. */
 
 
diff --git a/src/tests/test_log9.cc b/src/tests/test_log9.cc
index 38b8e7818f5..6b7f1fddb44 100644
--- a/src/tests/test_log9.cc
+++ b/src/tests/test_log9.cc
@@ -91,7 +91,7 @@ PATENT RIGHTS GRANT:
 
 
 /* Test to see if we can do logging and recovery. */
-/* This is very specific to TokuDB.  It won't work with Berkeley DB. */
+/* This is very specific to TokuFT.  It won't work with Berkeley DB. */
 /* This test_log8 inserts to a db, closes, reopens, and inserts more to db.  We want to make sure that the recovery of the buffers works. */
 
 
diff --git a/src/tests/upgrade-test-1.cc b/src/tests/upgrade-test-1.cc
index 5de82f09d51..1f30ab21cf2 100644
--- a/src/tests/upgrade-test-1.cc
+++ b/src/tests/upgrade-test-1.cc
@@ -213,7 +213,7 @@ static void setup(void) {
 	}
     }
     else {
-        fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION);
+        fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION);
         assert(0);
     }
 
diff --git a/src/tests/upgrade-test-2.cc b/src/tests/upgrade-test-2.cc
index b8375a20ede..33003f6780b 100644
--- a/src/tests/upgrade-test-2.cc
+++ b/src/tests/upgrade-test-2.cc
@@ -193,7 +193,7 @@ static void setup(void) {
         src_db_dir = db_v5_dir;
     }
     else {
-        fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION);
+        fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION);
         assert(0);
     }
 
diff --git a/src/tests/upgrade-test-3.cc b/src/tests/upgrade-test-3.cc
index e18ebdcf3b5..61994a2de66 100644
--- a/src/tests/upgrade-test-3.cc
+++ b/src/tests/upgrade-test-3.cc
@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
 
 
 // Purpose of this test is to verify that dictionaries created with 4.2.0
-// can be properly truncated with TokuDB version 5.x or later.
+// can be properly truncated with TokuFT version 5.x or later.
 
 
 #include "test.h"
@@ -216,7 +216,7 @@ static void setup(void) {
         src_db_dir = db_v5_dir;
     }
     else {
-        fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION);
+        fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION);
         assert(0);
     }
 
diff --git a/src/tests/upgrade-test-4.cc b/src/tests/upgrade-test-4.cc
index 86b62fcabee..0d083d9d87a 100644
--- a/src/tests/upgrade-test-4.cc
+++ b/src/tests/upgrade-test-4.cc
@@ -306,7 +306,7 @@ static void setup(void) {
         src_db_dir = db_v5_dir;
     }
     else {
-        fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION);
+        fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION);
         assert(0);
     }
 
diff --git a/src/tests/upgrade-test-5.cc b/src/tests/upgrade-test-5.cc
index ce5e4faeb58..2f5d1863e51 100644
--- a/src/tests/upgrade-test-5.cc
+++ b/src/tests/upgrade-test-5.cc
@@ -210,7 +210,7 @@ static void setup(void) {
         src_db_dir = db_v5_dir;
     }
     else {
-        fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION);
+        fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION);
         assert(0);
     }
 
diff --git a/src/tests/upgrade-test-7.cc b/src/tests/upgrade-test-7.cc
index 12acbc215cb..429f4cddf2c 100644
--- a/src/tests/upgrade-test-7.cc
+++ b/src/tests/upgrade-test-7.cc
@@ -89,8 +89,8 @@ PATENT RIGHTS GRANT:
 #ident "$Id$"
 
 
-// Purpose of this test is to verify that an environment created by TokuDB 3.1.0
-// is properly rejected by the upgrade logic of TokuDB 5.x and later.
+// Purpose of this test is to verify that an environment created by TokuFT 3.1.0
+// is properly rejected by the upgrade logic of TokuFT 5.x and later.
 
 #include "test.h"
 #include "toku_pthread.h"
diff --git a/src/ydb.cc b/src/ydb.cc
index 885671f76c9..af8348f0c63 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -125,7 +125,7 @@ const char *toku_copyright_string = "Copyright (c) 2007-2013 Tokutek Inc.  All r
 
 // Include ydb_lib.cc here so that its constructor/destructor gets put into
 // ydb.o, to make sure they don't get erased at link time (when linking to
-// a static libtokudb.a that was compiled with gcc).  See #5094.
+// a static libtokufractaltree.a that was compiled with gcc).  See #5094.
 #include "ydb_lib.cc"
 
 #ifdef TOKUTRACE
@@ -186,7 +186,7 @@ typedef struct {
 static YDB_LAYER_STATUS_S ydb_layer_status;
 #define STATUS_VALUE(x) ydb_layer_status.status[x].value.num
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_layer_status, k, c, t, l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_layer_status, k, c, t, l, inc)
 
 static void
 ydb_layer_status_init (void) {
@@ -252,14 +252,14 @@ static void
 env_fs_report_in_yellow(DB_ENV *UU(env)) {
     char tbuf[26];
     time_t tnow = time(NULL);
-    fprintf(stderr, "%.24s Tokudb file system space is low\n", ctime_r(&tnow, tbuf)); fflush(stderr);
+    fprintf(stderr, "%.24s TokuFT file system space is low\n", ctime_r(&tnow, tbuf)); fflush(stderr);
 }
 
 static void
 env_fs_report_in_red(DB_ENV *UU(env)) {
     char tbuf[26];
     time_t tnow = time(NULL);
-    fprintf(stderr, "%.24s Tokudb file system space is really low and access is restricted\n", ctime_r(&tnow, tbuf)); fflush(stderr);
+    fprintf(stderr, "%.24s TokuFT file system space is really low and access is restricted\n", ctime_r(&tnow, tbuf)); fflush(stderr);
 }
 
 static inline uint64_t
@@ -268,7 +268,7 @@ env_fs_redzone(DB_ENV *env, uint64_t total) {
 }
 
 #define ZONEREPORTLIMIT 12
-// Check the available space in the file systems used by tokudb and erect barriers when available space gets low.
+// Check the available space in the file systems used by tokuft and erect barriers when available space gets low.
 static int
 env_fs_poller(void *arg) {
     DB_ENV *env = (DB_ENV *) arg;
@@ -445,7 +445,7 @@ static void keep_cachetable_callback (DB_ENV *env, CACHETABLE cachetable)
 static int 
 ydb_do_recovery (DB_ENV *env) {
     assert(env->i->real_log_dir);
-    int r = tokudb_recover(env,
+    int r = tokuft_recover(env,
                            toku_keep_prepared_txn_callback,
                            keep_cachetable_callback,
                            env->i->logger,
@@ -459,7 +459,7 @@ ydb_do_recovery (DB_ENV *env) {
 static int 
 needs_recovery (DB_ENV *env) {
     assert(env->i->real_log_dir);
-    int recovery_needed = tokudb_needs_recovery(env->i->real_log_dir, true);
+    int recovery_needed = tokuft_needs_recovery(env->i->real_log_dir, true);
     return recovery_needed ? DB_RUNRECOVERY : 0;
 }
 
@@ -521,7 +521,7 @@ typedef struct {
 
 static PERSISTENT_UPGRADE_STATUS_S persistent_upgrade_status;
 
-#define PERSISTENT_UPGRADE_STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(persistent_upgrade_status, k, c, t, "upgrade: " l, inc)
+#define PERSISTENT_UPGRADE_STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(persistent_upgrade_status, k, c, t, "upgrade: " l, inc)
 
 static void
 persistent_upgrade_status_init (void) {
@@ -671,7 +671,7 @@ capture_persistent_env_contents (DB_ENV * env, DB_TXN * txn) {
 // return 0 if log exists or ENOENT if log does not exist
 static int
 ydb_recover_log_exists(DB_ENV *env) {
-    int r = tokudb_recover_log_exists(env->i->real_log_dir);
+    int r = tokuft_recover_log_exists(env->i->real_log_dir);
     return r;
 }
 
@@ -834,20 +834,20 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
     HANDLE_EXTRA_FLAGS(env, flags, 
                        DB_CREATE|DB_PRIVATE|DB_INIT_LOG|DB_INIT_TXN|DB_RECOVER|DB_INIT_MPOOL|DB_INIT_LOCK|DB_THREAD);
 
-    // DB_CREATE means create if env does not exist, and Tokudb requires it because
-    // Tokudb requries DB_PRIVATE.
+    // DB_CREATE means create if env does not exist, and TokuFT requires it because
+    // TokuFT requries DB_PRIVATE.
     if ((flags & DB_PRIVATE) && !(flags & DB_CREATE)) {
         r = toku_ydb_do_error(env, ENOENT, "DB_PRIVATE requires DB_CREATE (seems gratuitous to us, but that's BDB's behavior\n");
         goto cleanup;
     }
 
     if (!(flags & DB_PRIVATE)) {
-        r = toku_ydb_do_error(env, ENOENT, "TokuDB requires DB_PRIVATE\n");
+        r = toku_ydb_do_error(env, ENOENT, "TokuFT requires DB_PRIVATE\n");
         goto cleanup;
     }
 
     if ((flags & DB_INIT_LOG) && !(flags & DB_INIT_TXN)) {
-        r = toku_ydb_do_error(env, EINVAL, "TokuDB requires transactions for logging\n");
+        r = toku_ydb_do_error(env, EINVAL, "TokuFT requires transactions for logging\n");
         goto cleanup;
     }
 
@@ -959,13 +959,13 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) {
 
 // This is probably correct, but it will be pain...
 //    if ((flags & DB_THREAD)==0) {
-//        r = toku_ydb_do_error(env, EINVAL, "TokuDB requires DB_THREAD");
+//        r = toku_ydb_do_error(env, EINVAL, "TokuFT requires DB_THREAD");
 //        goto cleanup;
 //    }
     unused_flags &= ~DB_THREAD;
 
     if (unused_flags!=0) {
-        r = toku_ydb_do_error(env, EINVAL, "Extra flags not understood by tokudb: %u\n", unused_flags);
+        r = toku_ydb_do_error(env, EINVAL, "Extra flags not understood by tokuft: %u\n", unused_flags);
         goto cleanup;
     }
 
@@ -1209,7 +1209,7 @@ env_close(DB_ENV * env, uint32_t flags) {
     unlock_single_process(env);
     toku_free(env->i);
     toku_free(env);
-    toku_sync_fetch_and_add(&tokudb_num_envs, -1);
+    toku_sync_fetch_and_add(&tokuft_num_envs, -1);
     if (flags != 0) {
         r = EINVAL;
     }
@@ -1384,7 +1384,7 @@ env_set_flags(DB_ENV * env, uint32_t flags, int onoff) {
         flags  &= ~DB_AUTO_COMMIT;
     }
     if (flags != 0 && onoff) {
-        return toku_ydb_do_error(env, EINVAL, "TokuDB does not (yet) support any nonzero ENV flags other than DB_AUTO_COMMIT\n");
+        return toku_ydb_do_error(env, EINVAL, "TokuFT does not (yet) support any nonzero ENV flags other than DB_AUTO_COMMIT\n");
     }
     if   (onoff) env->i->open_flags |=  change;
     else         env->i->open_flags &= ~change;
@@ -1430,7 +1430,7 @@ env_get_lg_max(DB_ENV * env, uint32_t *lg_maxp) {
 static int 
 env_set_lk_detect(DB_ENV * env, uint32_t UU(detect)) {
     HANDLE_PANICKED_ENV(env);
-    return toku_ydb_do_error(env, EINVAL, "TokuDB does not (yet) support set_lk_detect\n");
+    return toku_ydb_do_error(env, EINVAL, "TokuFT does not (yet) support set_lk_detect\n");
 }
 
 static int 
@@ -1775,7 +1775,7 @@ typedef struct {
 
 static FS_STATUS_S fsstat;
 
-#define FS_STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(fsstat, k, c, t, "filesystem: " l, inc)
+#define FS_STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(fsstat, k, c, t, "filesystem: " l, inc)
 
 static void
 fs_status_init(void) {
@@ -1846,7 +1846,7 @@ typedef struct {
 
 static MEMORY_STATUS_S memory_status;
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(memory_status, k, c, t, "memory: " l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(memory_status, k, c, t, "memory: " l, inc)
 
 static void
 memory_status_init(void) {
@@ -2673,7 +2673,7 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) {
 
     *envp = result;
     r = 0;
-    toku_sync_fetch_and_add(&tokudb_num_envs, 1);
+    toku_sync_fetch_and_add(&tokuft_num_envs, 1);
 cleanup:
     if (r!=0) {
         if (result) {
@@ -3058,15 +3058,15 @@ db_strerror(int error) {
         case TOKUDB_OUT_OF_LOCKS:
             return "Out of locks";
         case TOKUDB_DICTIONARY_TOO_OLD:
-            return "Dictionary too old for this version of TokuDB";
+            return "Dictionary too old for this version of TokuFT";
         case TOKUDB_DICTIONARY_TOO_NEW:
-            return "Dictionary too new for this version of TokuDB";
+            return "Dictionary too new for this version of TokuFT";
         case TOKUDB_CANCELED:
             return "User cancelled operation";
         case TOKUDB_NO_DATA:
             return "Ran out of data (not EOF)";
         case TOKUDB_HUGE_PAGES_ENABLED:
-            return "Transparent huge pages are enabled but TokuDB's memory allocator will oversubscribe main memory with transparent huge pages.  This check can be disabled by setting the environment variable TOKU_HUGE_PAGES_OK.";
+            return "Transparent huge pages are enabled but TokuFT's memory allocator will oversubscribe main memory with transparent huge pages.  This check can be disabled by setting the environment variable TOKU_HUGE_PAGES_OK.";
     }
 
     static char unknown_result[100];    // Race condition if two threads call this at the same time. However even in a bad case, it should be some sort of null-terminated string.
diff --git a/src/ydb_cursor.cc b/src/ydb_cursor.cc
index 81c7a0593ff..c42e2fb673e 100644
--- a/src/ydb_cursor.cc
+++ b/src/ydb_cursor.cc
@@ -105,7 +105,7 @@ static YDB_C_LAYER_STATUS_S ydb_c_layer_status;
 #endif
 #define STATUS_VALUE(x) ydb_c_layer_status.status[x].value.num
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_c_layer_status, k, c, t, l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_c_layer_status, k, c, t, l, inc)
 
 static void
 ydb_c_layer_status_init (void) {
diff --git a/src/ydb_db.cc b/src/ydb_db.cc
index 3d7d328b5f8..f64ed4465e8 100644
--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -114,7 +114,7 @@ static YDB_DB_LAYER_STATUS_S ydb_db_layer_status;
 #endif
 #define STATUS_VALUE(x) ydb_db_layer_status.status[x].value.num
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_db_layer_status, k, c, t, l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_db_layer_status, k, c, t, l, inc)
 
 static void
 ydb_db_layer_status_init (void) {
diff --git a/src/ydb_lib.cc b/src/ydb_lib.cc
index 8956fec04ab..12742cad5c2 100644
--- a/src/ydb_lib.cc
+++ b/src/ydb_lib.cc
@@ -98,14 +98,12 @@ PATENT RIGHTS GRANT:
 
 #if defined(__GNUC__)
 
-static void __attribute__((constructor)) libtokudb_init(void) {
-    // printf("%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__);
+static void __attribute__((constructor)) libtokuft_init(void) {
     int r = toku_ydb_init();
     assert(r==0);
 }
 
-static void __attribute__((destructor)) libtokudb_destroy(void) {
-    // printf("%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__);
+static void __attribute__((destructor)) libtokuft_destroy(void) {
     toku_ydb_destroy();
 }
 
diff --git a/src/ydb_write.cc b/src/ydb_write.cc
index 9f9937a9301..77daf4e6793 100644
--- a/src/ydb_write.cc
+++ b/src/ydb_write.cc
@@ -106,7 +106,7 @@ static YDB_WRITE_LAYER_STATUS_S ydb_write_layer_status;
 #endif
 #define STATUS_VALUE(x) ydb_write_layer_status.status[x].value.num
 
-#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_write_layer_status, k, c, t, l, inc)
+#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_write_layer_status, k, c, t, l, inc)
 
 static void
 ydb_write_layer_status_init (void) {
diff --git a/tools/ftverify.cc b/tools/ftverify.cc
index 7f7e6d27d13..120658b2cb1 100644
--- a/tools/ftverify.cc
+++ b/tools/ftverify.cc
@@ -217,7 +217,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p)
 
     // If either header is too new, the dictionary is unreadable
     if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) {
-        fprintf(stderr, "This dictionary was created with too new a version of TokuDB.  Aborting.\n");
+        fprintf(stderr, "This dictionary was created with a version of TokuFT that is too new.  Aborting.\n");
         abort();
     }
     if (h0_acceptable) {
diff --git a/tools/tdb-recover.cc b/tools/tdb-recover.cc
index 842acc75924..8f185bedb04 100644
--- a/tools/tdb-recover.cc
+++ b/tools/tdb-recover.cc
@@ -120,7 +120,7 @@ int recovery_main (int argc, const char *const argv[]) {
 	return(1);
     }
 
-    int r = tokudb_recover(nullptr,
+    int r = tokuft_recover(nullptr,
 			   nullptr,
 			   nullptr,
 			   nullptr,
diff --git a/util/context.cc b/util/context.cc
index 67f146872d5..6166be41294 100644
--- a/util/context.cc
+++ b/util/context.cc
@@ -121,7 +121,7 @@ const toku::context *toku_thread_get_context() {
 // engine status
 
 static struct context_status context_status;
-#define CONTEXT_STATUS_INIT(key, legend) TOKUDB_STATUS_INIT(context_status, key, nullptr, PARCOUNT, "context: " legend, TOKU_ENGINE_STATUS)
+#define CONTEXT_STATUS_INIT(key, legend) TOKUFT_STATUS_INIT(context_status, key, nullptr, PARCOUNT, "context: " legend, TOKU_ENGINE_STATUS)
 
 static void
 context_status_init(void) {
diff --git a/util/rwlock.h b/util/rwlock.h
index a98e10b47d2..14b447a450f 100644
--- a/util/rwlock.h
+++ b/util/rwlock.h
@@ -99,7 +99,7 @@ PATENT RIGHTS GRANT:
  *     Overview
  *****************************************
  *
- * TokuDB employs readers/writers locks for the ephemeral locks (e.g.,
+ * TokuFT employs readers/writers locks for the ephemeral locks (e.g.,
  * on FT nodes) Why not just use the toku_pthread_rwlock API?
  *
  *   1) we need multiprocess rwlocks (not just multithreaded)
diff --git a/util/status.h b/util/status.h
index 54eb4dbd376..1ab6d35e560 100644
--- a/util/status.h
+++ b/util/status.h
@@ -94,7 +94,7 @@ PATENT RIGHTS GRANT:
 #include <util/partitioned_counter.h>
 #include <util/constexpr.h>
 
-#define TOKUDB_STATUS_INIT(array,k,c,t,l,inc) do { \
+#define TOKUFT_STATUS_INIT(array,k,c,t,l,inc) do {   \
     array.status[k].keyname = #k;                    \
     array.status[k].columnname = #c;                 \
     array.status[k].type    = t;                     \
@@ -105,7 +105,7 @@ PATENT RIGHTS GRANT:
     constexpr_static_assert((inc) == TOKU_ENGINE_STATUS                      \
             || strcmp(#c, "nullptr"), "Missing column name.");               \
     constexpr_static_assert(static_strncasecmp(#c, "TOKU", strlen("TOKU")),  \
-                  "Do not start column names with toku/tokudb.  Names get TOKUDB_ prefix automatically."); \
+                  "Do not start column names with toku."); \
     array.status[k].include = static_cast<toku_engine_status_include_type>(inc);  \
     if (t == PARCOUNT) {                                               \
         array.status[k].value.parcount = create_partitioned_counter(); \

From 4cb144d9fb1c5bdb305b51ff2310edec2e60e787 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Thu, 21 Aug 2014 16:29:50 -0400
Subject: [PATCH 156/190] Add common cmake build directory names to .gitignore

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index 4fede53152b..db9f2f7e1ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,3 +35,7 @@ tags
 
 # Common symlinks used in compiling
 third_party/jemalloc
+
+# Common cmake build directories
+build
+dbg

From ff5a25cca334d6f9ba98837f6ad382a947e07165 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Thu, 21 Aug 2014 19:21:21 -0400
Subject: [PATCH 157/190] FT-584 use trylock inside of the lock tree manager
 get_status function so that it is non-blocking

---
 locktree/manager.cc        | 39 +++++++++++++++++++-------------------
 portability/toku_pthread.h | 15 +++++++++++++++
 2 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/locktree/manager.cc b/locktree/manager.cc
index 896e7bfdb2d..5f69c46f7da 100644
--- a/locktree/manager.cc
+++ b/locktree/manager.cc
@@ -530,33 +530,32 @@ void locktree_manager::get_status(LTM_STATUS statp) {
     STATUS_VALUE(LTM_LONG_WAIT_ESCALATION_COUNT) = m_long_wait_escalation_count;
     STATUS_VALUE(LTM_LONG_WAIT_ESCALATION_TIME) = m_long_wait_escalation_time;    
 
-    mutex_lock();
-
     uint64_t lock_requests_pending = 0;
     uint64_t sto_num_eligible = 0;
     uint64_t sto_end_early_count = 0;
     tokutime_t sto_end_early_time = 0;
+    size_t num_locktrees = 0;
+    struct lt_counters lt_counters = {};
 
-    struct lt_counters lt_counters = m_lt_counters;
-    
-    size_t num_locktrees = m_locktree_map.size();
-    for (size_t i = 0; i < num_locktrees; i++) {
-        locktree *lt;
-        int r = m_locktree_map.fetch(i, &lt);
-        invariant_zero(r);
-
-        toku_mutex_lock(&lt->m_lock_request_info.mutex);
-        lock_requests_pending += lt->m_lock_request_info.pending_lock_requests.size();
-        lt_counters.add(lt->get_lock_request_info()->counters);
-        toku_mutex_unlock(&lt->m_lock_request_info.mutex);
-
-        sto_num_eligible += lt->sto_txnid_is_valid_unsafe() ? 1 : 0;
-        sto_end_early_count += lt->m_sto_end_early_count;
-        sto_end_early_time += lt->m_sto_end_early_time;
+    if (toku_mutex_trylock(&m_mutex) == 0) {
+        lt_counters = m_lt_counters;
+        num_locktrees = m_locktree_map.size();
+        for (size_t i = 0; i < num_locktrees; i++) {
+            locktree *lt;
+            int r = m_locktree_map.fetch(i, &lt);
+            invariant_zero(r);
+            if (toku_mutex_trylock(&lt->m_lock_request_info.mutex) == 0) {
+                lock_requests_pending += lt->m_lock_request_info.pending_lock_requests.size();
+                lt_counters.add(lt->get_lock_request_info()->counters);
+                toku_mutex_unlock(&lt->m_lock_request_info.mutex);
+            }
+            sto_num_eligible += lt->sto_txnid_is_valid_unsafe() ? 1 : 0;
+            sto_end_early_count += lt->m_sto_end_early_count;
+            sto_end_early_time += lt->m_sto_end_early_time;
+        }
+        mutex_unlock();
     }
 
-    mutex_unlock();
-
     STATUS_VALUE(LTM_NUM_LOCKTREES) = num_locktrees;
     STATUS_VALUE(LTM_LOCK_REQUESTS_PENDING) = lock_requests_pending;
     STATUS_VALUE(LTM_STO_NUM_ELIGIBLE) = sto_num_eligible;
diff --git a/portability/toku_pthread.h b/portability/toku_pthread.h
index e6a7ae4d92a..a9dc660b6a7 100644
--- a/portability/toku_pthread.h
+++ b/portability/toku_pthread.h
@@ -212,6 +212,21 @@ toku_mutex_lock(toku_mutex_t *mutex) {
 #endif
 }
 
+static inline int
+toku_mutex_trylock(toku_mutex_t *mutex) {
+    int r = pthread_mutex_trylock(&mutex->pmutex);
+#if TOKU_PTHREAD_DEBUG
+    if (r == 0) {
+        invariant(mutex->valid);
+        invariant(!mutex->locked);
+        invariant(mutex->owner == 0);
+        mutex->locked = true;
+        mutex->owner = pthread_self();
+    }
+#endif
+    return r;
+}
+
 static inline void
 toku_mutex_unlock(toku_mutex_t *mutex) {
 #if TOKU_PTHREAD_DEBUG

From 88a7ade6886b5355c3504d4f447eadb085dbaea5 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Sun, 24 Aug 2014 08:45:19 -0400
Subject: [PATCH 158/190] FT-312 speedup shutdown by parallelizing compression

---
 ft/cachetable/cachetable.cc       |   5 +
 ft/cachetable/cachetable.h        |   5 +
 ft/serialize/ft_node-serialize.cc |   8 +-
 ft/serialize/ft_node-serialize.h  |   2 +
 src/tests/big-shutdown.cc         | 190 ++++++++++++++++++++++++++++++
 src/ydb.cc                        |   1 +
 6 files changed, 210 insertions(+), 1 deletion(-)
 create mode 100644 src/tests/big-shutdown.cc

diff --git a/ft/cachetable/cachetable.cc b/ft/cachetable/cachetable.cc
index 794e3abdca9..feda4abc76a 100644
--- a/ft/cachetable/cachetable.cc
+++ b/ft/cachetable/cachetable.cc
@@ -2512,6 +2512,11 @@ toku_cachetable_minicron_shutdown(CACHETABLE ct) {
     ct->cl.destroy();
 }
 
+void toku_cachetable_prepare_close(CACHETABLE ct UU()) {
+    extern bool toku_serialize_in_parallel;
+    toku_serialize_in_parallel = true;
+}
+
 /* Requires that it all be flushed. */
 void toku_cachetable_close (CACHETABLE *ctp) {
     CACHETABLE ct = *ctp;
diff --git a/ft/cachetable/cachetable.h b/ft/cachetable/cachetable.h
index 3afc600437c..a1ea83406a1 100644
--- a/ft/cachetable/cachetable.h
+++ b/ft/cachetable/cachetable.h
@@ -191,10 +191,15 @@ void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, struct tokulogger *logge
 void toku_cachetable_end_checkpoint(CHECKPOINTER cp, struct tokulogger *logger, 
                                    void (*testcallback_f)(void*),  void * testextra);
 
+
 // Shuts down checkpoint thread
 // Requires no locks be held that are taken by the checkpoint function
 void toku_cachetable_minicron_shutdown(CACHETABLE ct);
 
+// Prepare to close the cachetable.  This informs the cachetable that it is about to be closed
+// so that it can tune its checkpoint resource use.
+void toku_cachetable_prepare_close(CACHETABLE ct);
+
 // Close the cachetable.
 // Effects: All of the memory objects are flushed to disk, and the cachetable is destroyed.
 void toku_cachetable_close(CACHETABLE *ct); 
diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index df067eefa22..3e0e192a6d2 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -134,6 +134,7 @@ toku_ft_upgrade_get_status(FT_UPGRADE_STATUS s) {
 
 static int num_cores = 0; // cache the number of cores for the parallelization
 static struct toku_thread_pool *ft_pool = NULL;
+bool toku_serialize_in_parallel;
 
 int get_num_cores(void) {
     return num_cores;
@@ -143,11 +144,16 @@ struct toku_thread_pool *get_ft_pool(void) {
     return ft_pool;
 }
 
+void toku_serialize_set_parallel(bool in_parallel) {
+    toku_serialize_in_parallel = in_parallel;
+}
+
 void toku_ft_serialize_layer_init(void) {
     num_cores = toku_os_get_number_active_processors();
     int r = toku_thread_pool_create(&ft_pool, num_cores);
     lazy_assert_zero(r);
     block_allocator::maybe_initialize_trace();
+    toku_serialize_in_parallel = false;
 }
 
 void toku_ft_serialize_layer_destroy(void) {
@@ -846,7 +852,7 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
         ft->h->basementnodesize,
         ft->h->compression_method,
         do_rebalancing,
-        false, // in_parallel
+        toku_serialize_in_parallel, // in_parallel
         &n_to_write,
         &n_uncompressed_bytes,
         &compressed_buf
diff --git a/ft/serialize/ft_node-serialize.h b/ft/serialize/ft_node-serialize.h
index 28d3f26590e..319e270dd58 100644
--- a/ft/serialize/ft_node-serialize.h
+++ b/ft/serialize/ft_node-serialize.h
@@ -112,6 +112,8 @@ int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnu
 int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe);
 int toku_deserialize_ftnode_from(int fd, BLOCKNUM off, uint32_t fullhash, FTNODE *node, FTNODE_DISK_DATA *ndd, ftnode_fetch_extra *bfe);
 
+void toku_serialize_set_parallel(bool);
+
 // used by nonleaf node partial eviction
 void toku_create_compressed_partition_from_available(FTNODE node, int childnum,
                                                      enum toku_compression_method compression_method, SUB_BLOCK sb);
diff --git a/src/tests/big-shutdown.cc b/src/tests/big-shutdown.cc
new file mode 100644
index 00000000000..04324148a67
--- /dev/null
+++ b/src/tests/big-shutdown.cc
@@ -0,0 +1,190 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+
+// Create a lot of dirty nodes, kick off a checkpoint, and close the environment.
+// Measure the time it takes to close the environment since we are speeding up that
+// function.
+
+#include "test.h"
+#include <endian.h>
+#include <toku_time.h>
+
+// Insert max_rows key/val pairs into the db
+static void do_inserts(DB_ENV *env, DB *db, uint64_t max_rows, size_t val_size) {
+    char val_data[val_size]; memset(val_data, 0, val_size);
+    int r;
+    DB_TXN *txn = nullptr;
+    r = env->txn_begin(env, nullptr, &txn, 0);
+    CKERR(r);
+
+    for (uint64_t i = 1; i <= max_rows; i++) {
+        // pick a sequential key but it does not matter for this test.
+        uint64_t k[2] = {
+            htobe64(i), random64(),
+        };
+        DBT key = { .data = k, .size = sizeof k };
+        DBT val = { .data = val_data, .size = (uint32_t) val_size };
+        r = db->put(db, txn, &key, &val, 0);
+        CKERR(r);
+
+        if ((i % 1000) == 0) {
+            if (verbose)
+                fprintf(stderr, "put %" PRIu64 "\n", i);
+            r = txn->commit(txn, 0);
+            CKERR(r);
+            r = env->txn_begin(env, nullptr, &txn, 0);
+            CKERR(r);
+        }
+    }
+
+    r = txn->commit(txn, 0);
+    CKERR(r);
+}
+
+// Create a cache with a lot of dirty nodes, kick off a checkpoint, and measure the time to
+// close the environment.
+static void big_shutdown(void) {
+    int r;
+
+    DB_ENV *env = nullptr;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->set_cachesize(env, 8, 0, 1);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME,
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE,
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    DB *db = nullptr;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    do_inserts(env, db, 1000000, 1024);
+
+    // kick the checkpoint thread
+    if (verbose)
+        fprintf(stderr, "env->checkpointing_set_period\n");
+    r = env->checkpointing_set_period(env, 2);
+    CKERR(r);
+    sleep(3);
+
+    if (verbose)
+        fprintf(stderr, "db->close\n");
+    r = db->close(db, 0);
+    CKERR(r);
+
+    // measure the shutdown time
+    uint64_t tstart = toku_current_time_microsec();
+    if (verbose)
+        fprintf(stderr, "env->close\n");
+    r = env->close(env, 0);
+    CKERR(r);
+    uint64_t tend = toku_current_time_microsec();
+    if (verbose)
+        fprintf(stderr, "env->close complete %" PRIu64 " sec\n", (tend - tstart)/1000000);
+}
+
+int test_main (int argc, char *const argv[]) {
+    default_parse_args(argc, argv);
+
+    // init the env directory
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    // run the test
+    big_shutdown();
+
+    return 0;
+}
diff --git a/src/ydb.cc b/src/ydb.cc
index af8348f0c63..e61bf940175 100644
--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -1136,6 +1136,7 @@ env_close(DB_ENV * env, uint32_t flags) {
     }
     env_fsync_log_cron_destroy(env);
     if (env->i->cachetable) {
+        toku_cachetable_prepare_close(env->i->cachetable);
         toku_cachetable_minicron_shutdown(env->i->cachetable);
         if (env->i->logger) {
             CHECKPOINTER cp = nullptr;

From a0150feed6ddbd1b31053d3881431d1370bf4557 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Sun, 24 Aug 2014 12:56:35 -0400
Subject: [PATCH 159/190] FT-312 fix centos compile

---
 src/tests/big-shutdown.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/tests/big-shutdown.cc b/src/tests/big-shutdown.cc
index 04324148a67..386adc15e57 100644
--- a/src/tests/big-shutdown.cc
+++ b/src/tests/big-shutdown.cc
@@ -97,6 +97,10 @@ PATENT RIGHTS GRANT:
 #include <endian.h>
 #include <toku_time.h>
 
+#ifndef htobe64
+#define htobe64(x) __bswap_64(x)
+#endif
+
 // Insert max_rows key/val pairs into the db
 static void do_inserts(DB_ENV *env, DB *db, uint64_t max_rows, size_t val_size) {
     char val_data[val_size]; memset(val_data, 0, val_size);

From 8db687e6c720ed12e66078e901ce86d9527e21db Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Tue, 26 Aug 2014 14:28:16 -0400
Subject: [PATCH 160/190] FT-590 Calculate a node's weight using a 64 bit
 integer to prevent overflow

---
 ft/ft-flusher.cc | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/ft/ft-flusher.cc b/ft/ft-flusher.cc
index bb22d52f763..4db92fa9d2b 100644
--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -182,25 +182,21 @@ static int
 find_heaviest_child(FTNODE node)
 {
     int max_child = 0;
-    int max_weight = toku_bnc_nbytesinbuf(BNC(node, 0)) + BP_WORKDONE(node, 0);
-    int i;
+    uint64_t max_weight = toku_bnc_nbytesinbuf(BNC(node, 0)) + BP_WORKDONE(node, 0);
 
-    if (0) printf("%s:%d weights: %d", __FILE__, __LINE__, max_weight);
-    paranoid_invariant(node->n_children>0);
-    for (i=1; i<node->n_children; i++) {
-#ifdef TOKU_DEBUG_PARANOID
-        if (BP_WORKDONE(node,i)) {
-            assert(toku_bnc_nbytesinbuf(BNC(node,i)) > 0);
+    invariant(node->n_children > 0);
+    for (int i = 1; i < node->n_children; i++) {
+        uint64_t bytes_in_buf = toku_bnc_nbytesinbuf(BNC(node, i));
+        uint64_t workdone = BP_WORKDONE(node, i);
+        if (workdone > 0) {
+            invariant(bytes_in_buf > 0);
         }
-#endif
-        int this_weight = toku_bnc_nbytesinbuf(BNC(node,i)) + BP_WORKDONE(node,i);;
-        if (0) printf(" %d", this_weight);
+        uint64_t this_weight = bytes_in_buf + workdone;
         if (max_weight < this_weight) {
             max_child = i;
             max_weight = this_weight;
         }
     }
-    if (0) printf("\n");
     return max_child;
 }
 

From 66504277bff8e8eaa2b4b17c4f84c5e08c2b9c18 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 27 Aug 2014 10:31:45 -0400
Subject: [PATCH 161/190] DB-702 print upgrade failed clean shutdown required
 error message

---
 storage/tokudb/hatoku_hton.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index 0f88939fce1..79aff483cec 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -321,6 +321,9 @@ static void handle_ydb_error(int error) {
         sql_print_error("                                                            ");
         sql_print_error("************************************************************");
         break;
+    case TOKUDB_UPGRADE_FAILURE:
+        sql_print_error("%s upgrade failed. A clean shutdown of the previous version is required.", tokudb_hton_name);
+        break;
     default:
         sql_print_error("%s unknown error %d", tokudb_hton_name, error);
         break;

From aed2c264b70c240a1eb2f25f4b38b55d0406079e Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 27 Aug 2014 14:07:16 -0400
Subject: [PATCH 162/190] DB-703 DB-704 split dname into schema, table, and
 dictionary

---
 storage/tokudb/hatoku_hton.cc | 115 +++++++++++++++++-----------------
 1 file changed, 56 insertions(+), 59 deletions(-)

diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index 79aff483cec..ed7caf37003 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -1436,6 +1436,25 @@ static struct st_mysql_sys_var *tokudb_system_variables[] = {
     NULL
 };
 
+// Split ./database/table-dictionary into database, table and dictionary strings
+static void tokudb_split_dname(const char *dname, String &database_name, String &table_name, String &dictionary_name) {
+    const char *splitter = strchr(dname, '/');
+    if (splitter) {
+        const char *database_ptr = splitter+1;
+        const char *table_ptr = strchr(database_ptr, '/');
+        if (table_ptr) {
+            database_name.append(database_ptr, table_ptr - database_ptr);
+            table_ptr += 1;
+            const char *dictionary_ptr = strchr(table_ptr, '-');
+            if (dictionary_ptr) {
+                table_name.append(table_ptr, dictionary_ptr - table_ptr);
+                dictionary_ptr += 1;
+                dictionary_name.append(dictionary_ptr);
+            }
+        }
+    }
+}
+
 struct st_mysql_storage_engine tokudb_storage_engine = { MYSQL_HANDLERTON_INTERFACE_VERSION };
 
 static struct st_mysql_information_schema tokudb_file_map_information_schema = { MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION };
@@ -1481,31 +1500,12 @@ static int tokudb_file_map(TABLE *table, THD *thd) {
             assert(iname_len == curr_val.size - 1);
             table->field[1]->store(iname, iname_len, system_charset_info);
 
-            // denormalize the dname
-            const char *database_name = NULL;
-            size_t database_len = 0;
-            const char *table_name = NULL;
-            size_t table_len = 0;
-            const char *dictionary_name = NULL;
-            size_t dictionary_len = 0;
-            database_name = strchr(dname, '/');
-            if (database_name) {
-                database_name += 1;
-                table_name = strchr(database_name, '/');
-                if (table_name) {
-                    database_len = table_name - database_name;
-                    table_name += 1;
-                    dictionary_name = strchr(table_name, '-');
-                    if (dictionary_name) {
-                        table_len = dictionary_name - table_name;
-                        dictionary_name += 1;
-                        dictionary_len = strlen(dictionary_name);
-                    }
-                }
-            }
-            table->field[2]->store(database_name, database_len, system_charset_info);
-            table->field[3]->store(table_name, table_len, system_charset_info);
-            table->field[4]->store(dictionary_name, dictionary_len, system_charset_info);
+            // split the dname
+            String database_name, table_name, dictionary_name;
+            tokudb_split_dname(dname, database_name, table_name, dictionary_name);
+            table->field[2]->store(database_name.c_ptr(), database_name.length(), system_charset_info);
+            table->field[3]->store(table_name.c_ptr(), table_name.length(), system_charset_info);
+            table->field[4]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info);
 
             error = schema_table_store_record(thd, table);
         }
@@ -1566,6 +1566,9 @@ static ST_FIELD_INFO tokudb_fractal_tree_info_field_info[] = {
     {"bt_num_blocks_in_use", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"bt_size_allocated", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"bt_size_in_use", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
 };
 
@@ -1603,25 +1606,25 @@ static int tokudb_report_fractal_tree_info_for_db(const DBT *dname, const DBT *i
     // Recalculate and check just to be safe.
     {
         size_t dname_len = strlen((const char *)dname->data);
-        size_t iname_len = strlen((const char *)iname->data);
         assert(dname_len == dname->size - 1);
+        table->field[0]->store((char *)dname->data, dname_len, system_charset_info);
+        size_t iname_len = strlen((const char *)iname->data);
         assert(iname_len == iname->size - 1);
-        table->field[0]->store(
-            (char *)dname->data,
-            dname_len,
-            system_charset_info
-            );
-        table->field[1]->store(
-            (char *)iname->data,
-            iname_len,
-            system_charset_info
-            );
+        table->field[1]->store((char *)iname->data, iname_len, system_charset_info);
     }
     table->field[2]->store(bt_num_blocks_allocated, false);
     table->field[3]->store(bt_num_blocks_in_use, false);
     table->field[4]->store(bt_size_allocated, false);
     table->field[5]->store(bt_size_in_use, false);
 
+    // split the dname
+    {
+        String database_name, table_name, dictionary_name;
+        tokudb_split_dname((const char *)dname->data, database_name, table_name, dictionary_name);
+        table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info);
+        table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info);
+        table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info);
+    }
     error = schema_table_store_record(thd, table);
 
 exit:
@@ -1645,12 +1648,7 @@ static int tokudb_fractal_tree_info(TABLE *table, THD *thd) {
         goto cleanup;
     }
     while (error == 0) {
-        error = tmp_cursor->c_get(
-            tmp_cursor,
-            &curr_key,
-            &curr_val,
-            DB_NEXT
-            );
+        error = tmp_cursor->c_get(tmp_cursor, &curr_key, &curr_val, DB_NEXT);
         if (!error) {
             error = tokudb_report_fractal_tree_info_for_db(&curr_key, &curr_val, table, thd);
         }
@@ -1714,6 +1712,9 @@ static ST_FIELD_INFO tokudb_fractal_tree_block_map_field_info[] = {
     {"blocknum", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"offset", 0, MYSQL_TYPE_LONGLONG, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE },
     {"size", 0, MYSQL_TYPE_LONGLONG, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE },
+    {"table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
 };
 
@@ -1786,19 +1787,13 @@ static int tokudb_report_fractal_tree_block_map_for_db(const DBT *dname, const D
         // See #5789
         // Recalculate and check just to be safe.
         size_t dname_len = strlen((const char *)dname->data);
-        size_t iname_len = strlen((const char *)iname->data);
         assert(dname_len == dname->size - 1);
+        table->field[0]->store((char *)dname->data, dname_len, system_charset_info);
+
+        size_t iname_len = strlen((const char *)iname->data);
         assert(iname_len == iname->size - 1);
-        table->field[0]->store(
-            (char *)dname->data,
-            dname_len,
-            system_charset_info
-            );
-        table->field[1]->store(
-            (char *)iname->data,
-            iname_len,
-            system_charset_info
-            );
+        table->field[1]->store((char *)iname->data, iname_len, system_charset_info);
+
         table->field[2]->store(e.checkpoint_counts[i], false);
         table->field[3]->store(e.blocknums[i], false);
         static const int64_t freelist_null = -1;
@@ -1817,6 +1812,13 @@ static int tokudb_report_fractal_tree_block_map_for_db(const DBT *dname, const D
             table->field[5]->store(e.sizes[i], false);
         }
 
+        // split the dname
+        String database_name, table_name, dictionary_name;
+        tokudb_split_dname((const char *)dname->data, database_name, table_name,dictionary_name);
+        table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info);
+        table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info);
+        table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info);
+
         error = schema_table_store_record(thd, table);
     }
 
@@ -1857,12 +1859,7 @@ static int tokudb_fractal_tree_block_map(TABLE *table, THD *thd) {
         goto cleanup;
     }
     while (error == 0) {
-        error = tmp_cursor->c_get(
-            tmp_cursor,
-            &curr_key,
-            &curr_val,
-            DB_NEXT
-            );
+        error = tmp_cursor->c_get(tmp_cursor, &curr_key, &curr_val, DB_NEXT);
         if (!error) {
             error = tokudb_report_fractal_tree_block_map_for_db(&curr_key, &curr_val, table, thd);
         }

From f704ecb91eb32e372447698c474b262de1724b57 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 27 Aug 2014 14:49:11 -0400
Subject: [PATCH 163/190] DB-712 split locks and lock_waits dname into schema,
 table, and dictionary

---
 storage/tokudb/hatoku_hton.cc | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index ed7caf37003..2f659f8571e 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -2074,6 +2074,9 @@ static ST_FIELD_INFO tokudb_lock_waits_field_info[] = {
     {"lock_waits_key_left", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"lock_waits_key_right", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"lock_waits_start_time", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"lock_waits_table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"lock_waits_table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"lock_waits_table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
 };
 
@@ -2099,6 +2102,13 @@ static int tokudb_lock_waits_callback(DB *db, uint64_t requesting_txnid, const D
     tokudb_pretty_right_key(db, right_key, &right_str);
     table->field[4]->store(right_str.ptr(), right_str.length(), system_charset_info);
     table->field[5]->store(start_time, false);
+
+    String database_name, table_name, dictionary_name;
+    tokudb_split_dname(dname, database_name, table_name, dictionary_name);
+    table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info);
+    table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info);
+    table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info);
+
     int error = schema_table_store_record(thd, table);
     return error;
 }
@@ -2144,6 +2154,9 @@ static ST_FIELD_INFO tokudb_locks_field_info[] = {
     {"locks_dname", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"locks_key_left", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"locks_key_right", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"locks_table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"locks_table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"locks_table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
 };
 
@@ -2175,6 +2188,12 @@ static int tokudb_locks_callback(uint64_t txn_id, uint64_t client_id, iterate_ro
         tokudb_pretty_right_key(db, &right_key, &right_str);
         table->field[4]->store(right_str.ptr(), right_str.length(), system_charset_info);
 
+        String database_name, table_name, dictionary_name;
+        tokudb_split_dname(dname, database_name, table_name, dictionary_name);
+        table->field[5]->store(database_name.c_ptr(), database_name.length(), system_charset_info);
+        table->field[6]->store(table_name.c_ptr(), table_name.length(), system_charset_info);
+        table->field[7]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info);
+
         error = schema_table_store_record(thd, table);
     }
     return error;

From 5c4c580ef14a486a48df81593dfb0f04e617141d Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 27 Aug 2014 18:00:19 -0400
Subject: [PATCH 164/190] DB-713 separate some long running tokudb tests so
 that valgrind runs without --big-test can exclude them

---
 mysql-test/suite/tokudb.bugs/t/part_index_scan.test             | 2 ++
 mysql-test/suite/tokudb/t/bf_create_select_hash_part.test       | 2 ++
 mysql-test/suite/tokudb/t/bf_create_select_nonpart.test         | 2 ++
 mysql-test/suite/tokudb/t/bf_create_select_range_part.test      | 2 ++
 mysql-test/suite/tokudb/t/bf_create_temp_select_nonpart.test    | 2 ++
 mysql-test/suite/tokudb/t/bf_delete_nonpart.test                | 1 +
 mysql-test/suite/tokudb/t/bf_insert_select_dup_key_nonpart.test | 2 ++
 mysql-test/suite/tokudb/t/bf_insert_select_nonpart.test         | 2 ++
 mysql-test/suite/tokudb/t/bf_replace_select_nonpart.test        | 2 ++
 mysql-test/suite/tokudb/t/bf_select_part.test                   | 2 ++
 10 files changed, 19 insertions(+)

diff --git a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
index 69c4380bf50..b38a979752b 100644
--- a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
+++ b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
@@ -1,7 +1,9 @@
 # verify that index scans on parititions are not slow
 # due to tokudb bulk fetch not being used
+
 source include/have_tokudb.inc;
 source include/have_partition.inc;
+source include/big_test.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2,t3;
diff --git a/mysql-test/suite/tokudb/t/bf_create_select_hash_part.test b/mysql-test/suite/tokudb/t/bf_create_select_hash_part.test
index 1b7f07f2527..b9ec8c52aa8 100644
--- a/mysql-test/suite/tokudb/t/bf_create_select_hash_part.test
+++ b/mysql-test/suite/tokudb/t/bf_create_select_hash_part.test
@@ -1,8 +1,10 @@
 # Verify that index and range scans are not slow
 # on tables during create select statements
 # due to tokudb bulk fetch not being used
+
 source include/have_tokudb.inc;
 source include/have_partition.inc;
+source include/big_test.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2,t3;
diff --git a/mysql-test/suite/tokudb/t/bf_create_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_create_select_nonpart.test
index 7e70eb2da6b..a8b7f9c9f6a 100644
--- a/mysql-test/suite/tokudb/t/bf_create_select_nonpart.test
+++ b/mysql-test/suite/tokudb/t/bf_create_select_nonpart.test
@@ -1,7 +1,9 @@
 # Verify that index and range scans are not slow
 # on tables during create select statements
 # due to tokudb bulk fetch not being used
+
 source include/have_tokudb.inc;
+source include/big_test.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2;
diff --git a/mysql-test/suite/tokudb/t/bf_create_select_range_part.test b/mysql-test/suite/tokudb/t/bf_create_select_range_part.test
index 5b94f5e97bf..a48f272b84f 100644
--- a/mysql-test/suite/tokudb/t/bf_create_select_range_part.test
+++ b/mysql-test/suite/tokudb/t/bf_create_select_range_part.test
@@ -1,8 +1,10 @@
 # Verify that index and range scans are not slow
 # on tables during create select statements
 # due to tokudb bulk fetch not being used
+
 source include/have_tokudb.inc;
 source include/have_partition.inc;
+source include/big_test.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2;
diff --git a/mysql-test/suite/tokudb/t/bf_create_temp_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_create_temp_select_nonpart.test
index 56e3c91dbdf..fdd665076c9 100644
--- a/mysql-test/suite/tokudb/t/bf_create_temp_select_nonpart.test
+++ b/mysql-test/suite/tokudb/t/bf_create_temp_select_nonpart.test
@@ -1,7 +1,9 @@
 # Verify that index and range scans are not slow
 # on temporary tables during create select statements
 # due to tokudb bulk fetch not being used
+
 source include/have_tokudb.inc;
+source include/big_test.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2;
diff --git a/mysql-test/suite/tokudb/t/bf_delete_nonpart.test b/mysql-test/suite/tokudb/t/bf_delete_nonpart.test
index b80b3e6c3c9..65b26df230e 100644
--- a/mysql-test/suite/tokudb/t/bf_delete_nonpart.test
+++ b/mysql-test/suite/tokudb/t/bf_delete_nonpart.test
@@ -2,6 +2,7 @@
 # at least twice as fast
 
 source include/have_tokudb.inc;
+source include/big_test.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t;
diff --git a/mysql-test/suite/tokudb/t/bf_insert_select_dup_key_nonpart.test b/mysql-test/suite/tokudb/t/bf_insert_select_dup_key_nonpart.test
index 5774a3c17d7..3200beeaba9 100644
--- a/mysql-test/suite/tokudb/t/bf_insert_select_dup_key_nonpart.test
+++ b/mysql-test/suite/tokudb/t/bf_insert_select_dup_key_nonpart.test
@@ -3,7 +3,9 @@
 # due to tokudb bulk fetch not being used.
 # In this test case, the on duplicate key condition does not need to fire
 # since the performance of the embedded select statement is all we are measuring.
+
 source include/have_tokudb.inc;
+source include/big_test.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2;
diff --git a/mysql-test/suite/tokudb/t/bf_insert_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_insert_select_nonpart.test
index 31399dedae7..11f14013a30 100644
--- a/mysql-test/suite/tokudb/t/bf_insert_select_nonpart.test
+++ b/mysql-test/suite/tokudb/t/bf_insert_select_nonpart.test
@@ -1,7 +1,9 @@
 # Verify that index and range scans are not slow
 # on tables during insert select statements
 # due to tokudb bulk fetch not being used
+
 source include/have_tokudb.inc;
+source include/big_test.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2;
diff --git a/mysql-test/suite/tokudb/t/bf_replace_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_replace_select_nonpart.test
index 6e900bcd9d3..380eb5adda8 100644
--- a/mysql-test/suite/tokudb/t/bf_replace_select_nonpart.test
+++ b/mysql-test/suite/tokudb/t/bf_replace_select_nonpart.test
@@ -1,7 +1,9 @@
 # Verify that index and range scans are not slow
 # on tables during replace select and insert ignore statements
 # due to tokudb bulk fetch not being used
+
 source include/have_tokudb.inc;
+source include/big_test.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2;
diff --git a/mysql-test/suite/tokudb/t/bf_select_part.test b/mysql-test/suite/tokudb/t/bf_select_part.test
index 2e7bfd9a0d2..d9faa4b0bb2 100644
--- a/mysql-test/suite/tokudb/t/bf_select_part.test
+++ b/mysql-test/suite/tokudb/t/bf_select_part.test
@@ -2,8 +2,10 @@
 # on tables during create select statements
 # against hash and range partitioned tables
 # due to tokudb bulk fetch not being used
+
 source include/have_tokudb.inc;
 source include/have_partition.inc;
+source include/big_test.inc;
 set default_storage_engine='tokudb';
 disable_warnings;
 drop table if exists t,t1,t2,t3;

From 8c5cd26053b92f32761300ac1f24a79e8057a8ac Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Thu, 28 Aug 2014 06:19:32 -0400
Subject: [PATCH 165/190] DB-703 DB-704 split dname into schema, table, and
 dictionary

---
 storage/tokudb/hatoku_hton.cc | 115 +++++++++++++++++-----------------
 1 file changed, 56 insertions(+), 59 deletions(-)

diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index 79aff483cec..ed7caf37003 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -1436,6 +1436,25 @@ static struct st_mysql_sys_var *tokudb_system_variables[] = {
     NULL
 };
 
+// Split ./database/table-dictionary into database, table and dictionary strings
+static void tokudb_split_dname(const char *dname, String &database_name, String &table_name, String &dictionary_name) {
+    const char *splitter = strchr(dname, '/');
+    if (splitter) {
+        const char *database_ptr = splitter+1;
+        const char *table_ptr = strchr(database_ptr, '/');
+        if (table_ptr) {
+            database_name.append(database_ptr, table_ptr - database_ptr);
+            table_ptr += 1;
+            const char *dictionary_ptr = strchr(table_ptr, '-');
+            if (dictionary_ptr) {
+                table_name.append(table_ptr, dictionary_ptr - table_ptr);
+                dictionary_ptr += 1;
+                dictionary_name.append(dictionary_ptr);
+            }
+        }
+    }
+}
+
 struct st_mysql_storage_engine tokudb_storage_engine = { MYSQL_HANDLERTON_INTERFACE_VERSION };
 
 static struct st_mysql_information_schema tokudb_file_map_information_schema = { MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION };
@@ -1481,31 +1500,12 @@ static int tokudb_file_map(TABLE *table, THD *thd) {
             assert(iname_len == curr_val.size - 1);
             table->field[1]->store(iname, iname_len, system_charset_info);
 
-            // denormalize the dname
-            const char *database_name = NULL;
-            size_t database_len = 0;
-            const char *table_name = NULL;
-            size_t table_len = 0;
-            const char *dictionary_name = NULL;
-            size_t dictionary_len = 0;
-            database_name = strchr(dname, '/');
-            if (database_name) {
-                database_name += 1;
-                table_name = strchr(database_name, '/');
-                if (table_name) {
-                    database_len = table_name - database_name;
-                    table_name += 1;
-                    dictionary_name = strchr(table_name, '-');
-                    if (dictionary_name) {
-                        table_len = dictionary_name - table_name;
-                        dictionary_name += 1;
-                        dictionary_len = strlen(dictionary_name);
-                    }
-                }
-            }
-            table->field[2]->store(database_name, database_len, system_charset_info);
-            table->field[3]->store(table_name, table_len, system_charset_info);
-            table->field[4]->store(dictionary_name, dictionary_len, system_charset_info);
+            // split the dname
+            String database_name, table_name, dictionary_name;
+            tokudb_split_dname(dname, database_name, table_name, dictionary_name);
+            table->field[2]->store(database_name.c_ptr(), database_name.length(), system_charset_info);
+            table->field[3]->store(table_name.c_ptr(), table_name.length(), system_charset_info);
+            table->field[4]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info);
 
             error = schema_table_store_record(thd, table);
         }
@@ -1566,6 +1566,9 @@ static ST_FIELD_INFO tokudb_fractal_tree_info_field_info[] = {
     {"bt_num_blocks_in_use", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"bt_size_allocated", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"bt_size_in_use", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
 };
 
@@ -1603,25 +1606,25 @@ static int tokudb_report_fractal_tree_info_for_db(const DBT *dname, const DBT *i
     // Recalculate and check just to be safe.
     {
         size_t dname_len = strlen((const char *)dname->data);
-        size_t iname_len = strlen((const char *)iname->data);
         assert(dname_len == dname->size - 1);
+        table->field[0]->store((char *)dname->data, dname_len, system_charset_info);
+        size_t iname_len = strlen((const char *)iname->data);
         assert(iname_len == iname->size - 1);
-        table->field[0]->store(
-            (char *)dname->data,
-            dname_len,
-            system_charset_info
-            );
-        table->field[1]->store(
-            (char *)iname->data,
-            iname_len,
-            system_charset_info
-            );
+        table->field[1]->store((char *)iname->data, iname_len, system_charset_info);
     }
     table->field[2]->store(bt_num_blocks_allocated, false);
     table->field[3]->store(bt_num_blocks_in_use, false);
     table->field[4]->store(bt_size_allocated, false);
     table->field[5]->store(bt_size_in_use, false);
 
+    // split the dname
+    {
+        String database_name, table_name, dictionary_name;
+        tokudb_split_dname((const char *)dname->data, database_name, table_name, dictionary_name);
+        table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info);
+        table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info);
+        table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info);
+    }
     error = schema_table_store_record(thd, table);
 
 exit:
@@ -1645,12 +1648,7 @@ static int tokudb_fractal_tree_info(TABLE *table, THD *thd) {
         goto cleanup;
     }
     while (error == 0) {
-        error = tmp_cursor->c_get(
-            tmp_cursor,
-            &curr_key,
-            &curr_val,
-            DB_NEXT
-            );
+        error = tmp_cursor->c_get(tmp_cursor, &curr_key, &curr_val, DB_NEXT);
         if (!error) {
             error = tokudb_report_fractal_tree_info_for_db(&curr_key, &curr_val, table, thd);
         }
@@ -1714,6 +1712,9 @@ static ST_FIELD_INFO tokudb_fractal_tree_block_map_field_info[] = {
     {"blocknum", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"offset", 0, MYSQL_TYPE_LONGLONG, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE },
     {"size", 0, MYSQL_TYPE_LONGLONG, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE },
+    {"table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
 };
 
@@ -1786,19 +1787,13 @@ static int tokudb_report_fractal_tree_block_map_for_db(const DBT *dname, const D
         // See #5789
         // Recalculate and check just to be safe.
         size_t dname_len = strlen((const char *)dname->data);
-        size_t iname_len = strlen((const char *)iname->data);
         assert(dname_len == dname->size - 1);
+        table->field[0]->store((char *)dname->data, dname_len, system_charset_info);
+
+        size_t iname_len = strlen((const char *)iname->data);
         assert(iname_len == iname->size - 1);
-        table->field[0]->store(
-            (char *)dname->data,
-            dname_len,
-            system_charset_info
-            );
-        table->field[1]->store(
-            (char *)iname->data,
-            iname_len,
-            system_charset_info
-            );
+        table->field[1]->store((char *)iname->data, iname_len, system_charset_info);
+
         table->field[2]->store(e.checkpoint_counts[i], false);
         table->field[3]->store(e.blocknums[i], false);
         static const int64_t freelist_null = -1;
@@ -1817,6 +1812,13 @@ static int tokudb_report_fractal_tree_block_map_for_db(const DBT *dname, const D
             table->field[5]->store(e.sizes[i], false);
         }
 
+        // split the dname
+        String database_name, table_name, dictionary_name;
+        tokudb_split_dname((const char *)dname->data, database_name, table_name,dictionary_name);
+        table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info);
+        table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info);
+        table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info);
+
         error = schema_table_store_record(thd, table);
     }
 
@@ -1857,12 +1859,7 @@ static int tokudb_fractal_tree_block_map(TABLE *table, THD *thd) {
         goto cleanup;
     }
     while (error == 0) {
-        error = tmp_cursor->c_get(
-            tmp_cursor,
-            &curr_key,
-            &curr_val,
-            DB_NEXT
-            );
+        error = tmp_cursor->c_get(tmp_cursor, &curr_key, &curr_val, DB_NEXT);
         if (!error) {
             error = tokudb_report_fractal_tree_block_map_for_db(&curr_key, &curr_val, table, thd);
         }

From 08eb88eec4ab42a714ac49c510cfb23fdb2a8343 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Thu, 28 Aug 2014 06:19:32 -0400
Subject: [PATCH 166/190] DB-712 split locks and lock_waits dname into schema,
 table, and dictionary

---
 storage/tokudb/hatoku_hton.cc | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index ed7caf37003..2f659f8571e 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -2074,6 +2074,9 @@ static ST_FIELD_INFO tokudb_lock_waits_field_info[] = {
     {"lock_waits_key_left", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"lock_waits_key_right", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"lock_waits_start_time", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"lock_waits_table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"lock_waits_table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"lock_waits_table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
 };
 
@@ -2099,6 +2102,13 @@ static int tokudb_lock_waits_callback(DB *db, uint64_t requesting_txnid, const D
     tokudb_pretty_right_key(db, right_key, &right_str);
     table->field[4]->store(right_str.ptr(), right_str.length(), system_charset_info);
     table->field[5]->store(start_time, false);
+
+    String database_name, table_name, dictionary_name;
+    tokudb_split_dname(dname, database_name, table_name, dictionary_name);
+    table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info);
+    table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info);
+    table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info);
+
     int error = schema_table_store_record(thd, table);
     return error;
 }
@@ -2144,6 +2154,9 @@ static ST_FIELD_INFO tokudb_locks_field_info[] = {
     {"locks_dname", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"locks_key_left", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"locks_key_right", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"locks_table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"locks_table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
+    {"locks_table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
 };
 
@@ -2175,6 +2188,12 @@ static int tokudb_locks_callback(uint64_t txn_id, uint64_t client_id, iterate_ro
         tokudb_pretty_right_key(db, &right_key, &right_str);
         table->field[4]->store(right_str.ptr(), right_str.length(), system_charset_info);
 
+        String database_name, table_name, dictionary_name;
+        tokudb_split_dname(dname, database_name, table_name, dictionary_name);
+        table->field[5]->store(database_name.c_ptr(), database_name.length(), system_charset_info);
+        table->field[6]->store(table_name.c_ptr(), table_name.length(), system_charset_info);
+        table->field[7]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info);
+
         error = schema_table_store_record(thd, table);
     }
     return error;

From f7ee3d4a26e1169c71329eea7a53c02095bf4b80 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Thu, 28 Aug 2014 15:11:55 -0400
Subject: [PATCH 167/190] DB-712 fix tokudb locks info schema test results due
 to new schema

---
 .../r/i_s_tokudb_lock_waits_released.result   | 40 +++++++++----------
 .../r/i_s_tokudb_lock_waits_timeout.result    | 20 +++++-----
 .../suite/tokudb/r/i_s_tokudb_locks.result    | 18 ++++-----
 .../tokudb/r/i_s_tokudb_locks_released.result | 12 +++---
 4 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_released.result b/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_released.result
index 10431bb812a..018900c7b98 100644
--- a/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_released.result
+++ b/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_released.result
@@ -5,64 +5,64 @@ create table t (id int primary key);
 select * from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 set autocommit=0;
 set tokudb_prelock_empty=OFF;
 insert into t values (1);
 set autocommit=0;
 insert into t values (1);
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
-TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
+TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
-REQUEST_TRX_ID	BLOCK_TRX_ID	./test/t-main	0001000000	0001000000	LOCK_WAITS_START_TIME
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
+REQUEST_TRX_ID	BLOCK_TRX_ID	./test/t-main	0001000000	0001000000	LOCK_WAITS_START_TIME	test	t	main
 select * from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 TRX_ID	MYSQL_ID
 TRX_ID	MYSQL_ID
 commit;
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
-TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
+TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 commit;
 select * from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 set autocommit=0;
 set tokudb_prelock_empty=OFF;
 replace into t values (1);
 set autocommit=0;
 replace into t values (1);
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
-TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
+TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
-REQUEST_TRX_ID	BLOCK_TRX_ID	./test/t-main	0001000000	0001000000	LOCK_WAITS_START_TIME
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
+REQUEST_TRX_ID	BLOCK_TRX_ID	./test/t-main	0001000000	0001000000	LOCK_WAITS_START_TIME	test	t	main
 select * from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 TRX_ID	MYSQL_ID
 TRX_ID	MYSQL_ID
 commit;
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
-TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
+TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 commit;
 select * from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 drop table t;
diff --git a/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_timeout.result b/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_timeout.result
index 10e3830506d..b9fca50b507 100644
--- a/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_timeout.result
+++ b/mysql-test/suite/tokudb/r/i_s_tokudb_lock_waits_timeout.result
@@ -5,35 +5,35 @@ create table t (id int primary key);
 select * from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 set autocommit=0;
 set tokudb_prelock_empty=OFF;
 insert into t values (1);
 set autocommit=0;
 insert into t values (1);
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
-TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
+TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
-REQUEST_TRX_ID	BLOCK_TRX_ID	./test/t-main	0001000000	0001000000	LOCK_WAITS_START_TIME
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
+REQUEST_TRX_ID	BLOCK_TRX_ID	./test/t-main	0001000000	0001000000	LOCK_WAITS_START_TIME	test	t	main
 select * from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 TRX_ID	MYSQL_ID
 TRX_ID	MYSQL_ID
 commit;
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 ERROR HY000: Lock wait timeout exceeded; try restarting transaction
 commit;
 select * from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 select * from information_schema.tokudb_lock_waits;
-requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time
+requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 drop table t;
diff --git a/mysql-test/suite/tokudb/r/i_s_tokudb_locks.result b/mysql-test/suite/tokudb/r/i_s_tokudb_locks.result
index 9fce0695983..a07f7ba52fe 100644
--- a/mysql-test/suite/tokudb/r/i_s_tokudb_locks.result
+++ b/mysql-test/suite/tokudb/r/i_s_tokudb_locks.result
@@ -4,7 +4,7 @@ drop table if exists t;
 create table t (id int primary key);
 set autocommit=0;
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 insert into t values (1);
 insert into t values (3);
 insert into t values (5);
@@ -13,16 +13,16 @@ insert into t values (2);
 insert into t values (4);
 insert into t values (6);
 select * from information_schema.tokudb_locks order by locks_trx_id,locks_key_left;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
-TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
-TRX_ID	MYSQL_ID	./test/t-main	0003000000	0003000000
-TRX_ID	MYSQL_ID	./test/t-main	0005000000	0005000000
-TRX_ID	MYSQL_ID	./test/t-main	0002000000	0002000000
-TRX_ID	MYSQL_ID	./test/t-main	0004000000	0004000000
-TRX_ID	MYSQL_ID	./test/t-main	0006000000	0006000000
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
+TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
+TRX_ID	MYSQL_ID	./test/t-main	0003000000	0003000000	test	t	main
+TRX_ID	MYSQL_ID	./test/t-main	0005000000	0005000000	test	t	main
+TRX_ID	MYSQL_ID	./test/t-main	0002000000	0002000000	test	t	main
+TRX_ID	MYSQL_ID	./test/t-main	0004000000	0004000000	test	t	main
+TRX_ID	MYSQL_ID	./test/t-main	0006000000	0006000000	test	t	main
 commit;
 commit;
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 commit;
 drop table t;
diff --git a/mysql-test/suite/tokudb/r/i_s_tokudb_locks_released.result b/mysql-test/suite/tokudb/r/i_s_tokudb_locks_released.result
index c135f3858b4..0a5862e9322 100644
--- a/mysql-test/suite/tokudb/r/i_s_tokudb_locks_released.result
+++ b/mysql-test/suite/tokudb/r/i_s_tokudb_locks_released.result
@@ -4,21 +4,21 @@ drop table if exists t;
 create table t (id int primary key);
 set autocommit=0;
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 set autocommit=0;
 set tokudb_prelock_empty=OFF;
 insert into t values (1);
 set autocommit=0;
 insert into t values (1);
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
-TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
+TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
 commit;
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
-TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
+TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 commit;
 select * from information_schema.tokudb_locks;
-locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right
+locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 drop table t;

From 31a941185b39f294d20f069bbf0e14c1a540b8b9 Mon Sep 17 00:00:00 2001
From: Leif Walsh <leif.walsh@gmail.com>
Date: Thu, 28 Aug 2014 16:25:14 -0400
Subject: [PATCH 168/190] MX-1217 fix TokuMergeLibs to handle empty libs

---
 cmake/merge_archives_unix.cmake.in | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cmake/merge_archives_unix.cmake.in b/cmake/merge_archives_unix.cmake.in
index e7140b8dbbc..66e23a824bd 100644
--- a/cmake/merge_archives_unix.cmake.in
+++ b/cmake/merge_archives_unix.cmake.in
@@ -43,7 +43,9 @@ FOREACH(LIB ${STATIC_LIBS})
 
   LIST(LENGTH LIB_OBJ_LIST LENGTH_WITH_DUPS)
   SET(LIB_OBJ_LIST_NO_DUPS ${LIB_OBJ_LIST})
-  LIST(REMOVE_DUPLICATES LIB_OBJ_LIST_NO_DUPS)
+  IF (LENGTH_WITH_DUPS GREATER 0)
+    LIST(REMOVE_DUPLICATES LIB_OBJ_LIST_NO_DUPS)
+  ENDIF ()
   LIST(LENGTH LIB_OBJ_LIST_NO_DUPS LENGTH_WITHOUT_DUPS)
 
   IF(LENGTH_WITH_DUPS EQUAL LENGTH_WITHOUT_DUPS)

From 6827d3bd51c12f3a5d60d3f15ff6b1f0bc59286b Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Sat, 30 Aug 2014 06:35:59 -0400
Subject: [PATCH 169/190] FT-312 compile big-shutdown on osx

---
 src/tests/big-shutdown.cc | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/tests/big-shutdown.cc b/src/tests/big-shutdown.cc
index 386adc15e57..0dc576e1117 100644
--- a/src/tests/big-shutdown.cc
+++ b/src/tests/big-shutdown.cc
@@ -94,13 +94,8 @@ PATENT RIGHTS GRANT:
 // function.
 
 #include "test.h"
-#include <endian.h>
 #include <toku_time.h>
 
-#ifndef htobe64
-#define htobe64(x) __bswap_64(x)
-#endif
-
 // Insert max_rows key/val pairs into the db
 static void do_inserts(DB_ENV *env, DB *db, uint64_t max_rows, size_t val_size) {
     char val_data[val_size]; memset(val_data, 0, val_size);
@@ -112,7 +107,7 @@ static void do_inserts(DB_ENV *env, DB *db, uint64_t max_rows, size_t val_size)
     for (uint64_t i = 1; i <= max_rows; i++) {
         // pick a sequential key but it does not matter for this test.
         uint64_t k[2] = {
-            htobe64(i), random64(),
+            htonl(i), random64(),
         };
         DBT key = { .data = k, .size = sizeof k };
         DBT val = { .data = val_data, .size = (uint32_t) val_size };

From ed9df111a5a396cacef5d17bbb3bdb9cca1ac6c8 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 2 Sep 2014 09:55:24 -0400
Subject: [PATCH 170/190] FT-548 support upgrade after dirty shutdown of
 versions 25 through 27

---
 ft/logger/log-internal.h                   |   1 +
 ft/logger/log_upgrade.cc                   |  16 +-
 ft/logger/logcursor.cc                     |   6 +-
 ft/logger/logfilemgr.cc                    |   5 +-
 ft/logger/logger.cc                        |  30 ++-
 ft/logger/logger.h                         |   4 +
 ft/tests/CMakeLists.txt                    |  11 +
 ft/tests/generate-upgrade-recovery-logs.cc | 151 +++++++++++
 ft/tests/test-upgrade-recovery-logs.cc     | 192 ++++++++++++++
 src/tests/recover-rollinclude.cc           | 275 +++++++++++++++++++++
 10 files changed, 662 insertions(+), 29 deletions(-)
 create mode 100644 ft/tests/generate-upgrade-recovery-logs.cc
 create mode 100644 ft/tests/test-upgrade-recovery-logs.cc
 create mode 100644 src/tests/recover-rollinclude.cc

diff --git a/ft/logger/log-internal.h b/ft/logger/log-internal.h
index 690a2ccfa08..5516cab30b6 100644
--- a/ft/logger/log-internal.h
+++ b/ft/logger/log-internal.h
@@ -188,6 +188,7 @@ struct tokulogger {
 
 int toku_logger_find_next_unused_log_file(const char *directory, long long *result);
 int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_logfiles);
+void toku_logger_free_logfiles (char **logfiles, int n_logfiles);
 
 static inline int
 txn_has_current_rollback_log(TOKUTXN txn) {
diff --git a/ft/logger/log_upgrade.cc b/ft/logger/log_upgrade.cc
index 60c87240168..6631759fae0 100644
--- a/ft/logger/log_upgrade.cc
+++ b/ft/logger/log_upgrade.cc
@@ -209,10 +209,7 @@ cleanup:
     r = toku_logcursor_destroy(&cursor);
     assert(r == 0);
 cleanup_no_logcursor:
-    for(int i=0;i<n_logfiles;i++) {
-        toku_free(logfiles[i]);
-    }
-    toku_free(logfiles);
+    toku_logger_free_logfiles(logfiles, n_logfiles);
     FOOTPRINTCAPTURE;
     return rval;
 }
@@ -227,10 +224,6 @@ verify_clean_shutdown_of_log_version(const char *log_dir, uint32_t version, LSN
     if (version < TOKU_LOG_VERSION)  {
         FOOTPRINT(1);
         r = verify_clean_shutdown_of_log_version_old(log_dir, last_lsn, last_xid, version);
-	if (r != 0) {
-	    fprintf(stderr, "Cannot upgrade TokuFT version %d database.", version);
-	    fprintf(stderr, "  Previous improper shutdown detected.\n");
-	}
     }
     else {
         FOOTPRINT(2);
@@ -325,6 +318,13 @@ toku_maybe_upgrade_log(const char *env_dir, const char *log_dir, LSN * lsn_of_cl
         TXNID last_xid = TXNID_NONE;
         r = verify_clean_shutdown_of_log_version(log_dir, version_of_logs_on_disk, &last_lsn, &last_xid);
         if (r != 0) {
+            if (TOKU_LOG_VERSION_25 <= version_of_logs_on_disk && version_of_logs_on_disk <= TOKU_LOG_VERSION_27
+                && TOKU_LOG_VERSION_27 == TOKU_LOG_VERSION) {
+                r = 0; // can do recovery on dirty shutdown
+            } else {
+                fprintf(stderr, "Cannot upgrade TokuFT version %d database.", version_of_logs_on_disk);
+                fprintf(stderr, "  Previous improper shutdown detected.\n");
+            }
             goto cleanup;
         }
         FOOTPRINT(5);
diff --git a/ft/logger/logcursor.cc b/ft/logger/logcursor.cc
index ee944c28078..dec3c923bc3 100644
--- a/ft/logger/logcursor.cc
+++ b/ft/logger/logcursor.cc
@@ -277,11 +277,7 @@ int toku_logcursor_destroy(TOKULOGCURSOR *lc) {
             (*lc)->entry_valid = false;
         }
         r = lc_close_cur_logfile(*lc);
-        int lf;
-        for(lf=0;lf<(*lc)->n_logfiles;lf++) {
-            if ( (*lc)->logfiles[lf] ) toku_free((*lc)->logfiles[lf]);
-        }
-        if ( (*lc)->logfiles ) toku_free((*lc)->logfiles);
+        toku_logger_free_logfiles((*lc)->logfiles, (*lc)->n_logfiles);
         if ( (*lc)->logdir )   toku_free((*lc)->logdir);
         if ( (*lc)->buffer )   toku_free((*lc)->buffer);
         toku_free(*lc);
diff --git a/ft/logger/logfilemgr.cc b/ft/logger/logfilemgr.cc
index 4840d8908ce..04d091ae1bc 100644
--- a/ft/logger/logfilemgr.cc
+++ b/ft/logger/logfilemgr.cc
@@ -186,10 +186,7 @@ int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir, TXNID *last_xi
         toku_logfilemgr_add_logfile_info(lfm, lf_info);
         toku_logcursor_destroy(&cursor);
     }
-    for(int i=0;i<n_logfiles;i++) {
-        toku_free(logfiles[i]);
-    }
-    toku_free(logfiles);
+    toku_logger_free_logfiles(logfiles, n_logfiles);
     *last_xid_if_clean_shutdown = last_xid;
     return 0;
 }
diff --git a/ft/logger/logger.cc b/ft/logger/logger.cc
index 6550d9e5610..2296a2b43f8 100644
--- a/ft/logger/logger.cc
+++ b/ft/logger/logger.cc
@@ -421,7 +421,7 @@ wait_till_output_available (TOKULOGGER logger)
 // Implementation hint: Use a pthread_cond_wait.
 // Entry: Holds the output_condition_lock (but not the inlock)
 // Exit: Holds the output_condition_lock and logger->output_is_available
-// 
+//
 {
     tokutime_t t0 = toku_time_now();
     while (!logger->output_is_available) {
@@ -490,7 +490,7 @@ release_output (TOKULOGGER logger, LSN fsynced_lsn)
     toku_cond_broadcast(&logger->output_condition);
     toku_mutex_unlock(&logger->output_condition_lock);
 }
-    
+
 static void
 swap_inbuf_outbuf (TOKULOGGER logger)
 // Effect: Swap the inbuf and outbuf
@@ -693,7 +693,7 @@ int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_lo
     while ((de=readdir(d))) {
         uint64_t thisl;
         uint32_t version_ignore;
-        if ( !(is_a_logfile_any_version(de->d_name, &thisl, &version_ignore)) ) continue; //#2424: Skip over files that don't match the exact logfile template 
+        if ( !(is_a_logfile_any_version(de->d_name, &thisl, &version_ignore)) ) continue; //#2424: Skip over files that don't match the exact logfile template
         if (n_results+1>=result_limit) {
             result_limit*=2;
             XREALLOC_N(result_limit, result);
@@ -707,7 +707,7 @@ int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_lo
     // which are one character longer than old log file names ("xxx.tokulog2").  The comparison function
     // won't look beyond the terminating NUL, so an extra character in the comparison string doesn't matter.
     // Allow room for terminating NUL after "xxx.tokulog13" even if result[0] is of form "xxx.tokulog2."
-    int width = sizeof(result[0]+2);  
+    int width = sizeof(result[0]+2);
     qsort(result, n_results, width, logfilenamecompare);
     *resultp    = result;
     *n_logfiles = n_results;
@@ -715,6 +715,12 @@ int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_lo
     return d ? closedir(d) : 0;
 }
 
+void toku_logger_free_logfiles(char **logfiles, int n_logfiles) {
+    for (int i = 0; i < n_logfiles; i++)
+        toku_free(logfiles[i]);
+    toku_free(logfiles);
+}
+
 static int open_logfile (TOKULOGGER logger)
 // Entry and Exit: This thread has permission to modify the output.
 {
@@ -723,7 +729,7 @@ static int open_logfile (TOKULOGGER logger)
     snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, logger->next_log_file_number, TOKU_LOG_VERSION);
     long long index = logger->next_log_file_number;
     if (logger->write_log_files) {
-        logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRUSR+S_IWUSR);     
+        logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRUSR+S_IWUSR);
         if (logger->fd==-1) {
             return get_error_errno();
         }
@@ -741,7 +747,7 @@ static int open_logfile (TOKULOGGER logger)
     if ( logger->write_log_files ) {
         TOKULOGFILEINFO XMALLOC(lf_info);
         lf_info->index = index;
-        lf_info->maxlsn = logger->written_lsn; 
+        lf_info->maxlsn = logger->written_lsn;
         lf_info->version = TOKU_LOG_VERSION;
         toku_logfilemgr_add_logfile_info(logger->logfilemgr, lf_info);
     }
@@ -770,7 +776,7 @@ void toku_logger_maybe_trim_log(TOKULOGGER logger, LSN trim_lsn)
     int n_logfiles = toku_logfilemgr_num_logfiles(lfm);
 
     TOKULOGFILEINFO lf_info = NULL;
-    
+
     if ( logger->write_log_files && logger->trim_log_files) {
         while ( n_logfiles > 1 ) { // don't delete current logfile
             uint32_t log_version;
@@ -850,7 +856,7 @@ void toku_logger_maybe_fsync(TOKULOGGER logger, LSN lsn, int do_fsync, bool hold
 }
 
 static void
-logger_write_buffer(TOKULOGGER logger, LSN *fsynced_lsn) 
+logger_write_buffer(TOKULOGGER logger, LSN *fsynced_lsn)
 // Entry:  Holds the input lock and permission to modify output.
 // Exit:   Holds only the permission to modify output.
 // Effect:  Write the buffers to the output.  If DO_FSYNC is true, then fsync.
@@ -878,7 +884,7 @@ int toku_logger_restart(TOKULOGGER logger, LSN lastlsn)
 
     // close the log file
     if ( logger->write_log_files) { // fsyncs don't work to /dev/null
-        toku_file_fsync_without_accounting(logger->fd); 
+        toku_file_fsync_without_accounting(logger->fd);
     }
     r = close(logger->fd);                              assert(r == 0);
     logger->fd = -1;
@@ -901,7 +907,7 @@ void toku_logger_log_fcreate (TOKUTXN txn, const char *fname, FILENUM filenum, u
     if (txn) {
         BYTESTRING bs_fname = { .len = (uint32_t) strlen(fname), .data = (char *) fname };
         // fsync log on fcreate
-        toku_log_fcreate (txn->logger, (LSN*)0, 1, txn, toku_txn_get_txnid(txn), filenum, 
+        toku_log_fcreate (txn->logger, (LSN*)0, 1, txn, toku_txn_get_txnid(txn), filenum,
                 bs_fname, mode, treeflags, nodesize, basementnodesize, compression_method);
     }
 }
@@ -1339,7 +1345,7 @@ int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags) {
         for (i=all_n_logs-2; i>=0; i--) { // start at all_n_logs-2 because we never archive the most recent log
             r = peek_at_log(logger, all_logs[i], &earliest_lsn_in_logfile);
             if (r!=0) continue; // In case of error, just keep going
-        
+
             if (earliest_lsn_in_logfile.lsn <= save_lsn.lsn) {
                 break;
             }
@@ -1428,7 +1434,7 @@ toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS statp) {
 
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////
-// Used for upgrade: 
+// Used for upgrade:
 // if any valid log files exist in log_dir, then
 //   set *found_any_logs to true and set *version_found to version number of latest log
 int
diff --git a/ft/logger/logger.h b/ft/logger/logger.h
index 8a81c67b477..83e6c9a7378 100644
--- a/ft/logger/logger.h
+++ b/ft/logger/logger.h
@@ -103,6 +103,10 @@ enum {
     TOKU_LOG_VERSION_2 = 2,
     //After 2 we linked the log version to the FT_LAYOUT VERSION.
     //So it went from 2 to 13 (3-12 do not exist)
+    TOKU_LOG_VERSION_24 = 24,
+    TOKU_LOG_VERSION_25 = 25, // change rollinclude rollback log entry
+    TOKU_LOG_VERSION_26 = 26, // no change from 25
+    TOKU_LOG_VERSION_27 = 27, // no change from 26
     TOKU_LOG_VERSION   = FT_LAYOUT_VERSION, 
     TOKU_LOG_MIN_SUPPORTED_VERSION = FT_LAYOUT_MIN_SUPPORTED_VERSION,
 };
diff --git a/ft/tests/CMakeLists.txt b/ft/tests/CMakeLists.txt
index 17883755c21..6e273230cc8 100644
--- a/ft/tests/CMakeLists.txt
+++ b/ft/tests/CMakeLists.txt
@@ -101,6 +101,17 @@ if(BUILD_TESTING OR BUILD_FT_TESTS)
 
   set_property(TEST ft/upgrade_test_simple APPEND PROPERTY ENVIRONMENT "TOKUDB_DATA=${TOKUDB_DATA}")
 
+  # should be a file GLOB and a loop
+  declare_custom_tests(test-upgrade-recovery-logs)
+  add_ft_test_aux(test-upgrade-recovery-logs-24-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-24-clean)
+  add_ft_test_aux(test-upgrade-recovery-logs-24-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-24-dirty)
+  add_ft_test_aux(test-upgrade-recovery-logs-25-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-25-clean)
+  add_ft_test_aux(test-upgrade-recovery-logs-25-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-25-dirty)
+  add_ft_test_aux(test-upgrade-recovery-logs-26-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-26-clean)
+  add_ft_test_aux(test-upgrade-recovery-logs-26-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-26-dirty)
+  add_ft_test_aux(test-upgrade-recovery-logs-27-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-27-clean)
+  add_ft_test_aux(test-upgrade-recovery-logs-27-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-27-dirty)
+
   ## give some tests, that time out normally, 1 hour to complete
   set(long_tests
     ft/ftloader-test-extractor-3a
diff --git a/ft/tests/generate-upgrade-recovery-logs.cc b/ft/tests/generate-upgrade-recovery-logs.cc
new file mode 100644
index 00000000000..b221e6a4fc5
--- /dev/null
+++ b/ft/tests/generate-upgrade-recovery-logs.cc
@@ -0,0 +1,151 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuFT, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+
+// Generate a recovery log with a checkpoint and an optional shutdown log entry.
+// These logs will be used later to test recovery.
+
+#include "test.h"
+
+static void generate_recovery_log(const char *testdir, bool do_shutdown) {
+    int r;
+
+    // setup the test dir
+    toku_os_recursive_delete(testdir);
+    r = toku_os_mkdir(testdir, S_IRWXU);
+    CKERR(r);
+
+    // open the log
+    TOKULOGGER logger;
+    r = toku_logger_create(&logger);
+    CKERR(r);
+    r = toku_logger_open(testdir, logger);
+    CKERR(r);
+
+    // log checkpoint
+    LSN beginlsn;
+    toku_log_begin_checkpoint(logger, &beginlsn, false, 0, 0);
+    toku_log_end_checkpoint(logger, nullptr, false, beginlsn, 0, 0, 0);
+
+    // log shutdown
+    if (do_shutdown) {
+        toku_log_shutdown(logger, nullptr, true, 0, 0);
+    }
+
+    r = toku_logger_close(&logger);
+    CKERR(r);
+}
+
+int test_main(int argc, const char *argv[]) {
+    bool do_shutdown = true;
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-v") == 0) {
+            verbose++;
+            continue;
+        }
+        if (strcmp(argv[i], "-q") == 0) {
+            if (verbose > 0)
+                verbose--;
+            continue;
+        }
+        if (strcmp(argv[i], "--clean") == 0) {
+            do_shutdown = true;
+            continue;
+        }
+        if (strcmp(argv[i], "--dirty") == 0) {
+            do_shutdown = false;
+            continue;
+        }
+    }
+    char testdir[256];
+    sprintf(testdir, "upgrade-recovery-logs-%d-%s", TOKU_LOG_VERSION, do_shutdown ? "clean" : "dirty");
+    generate_recovery_log(testdir, do_shutdown);
+    return 0;
+}
diff --git a/ft/tests/test-upgrade-recovery-logs.cc b/ft/tests/test-upgrade-recovery-logs.cc
new file mode 100644
index 00000000000..37cbf01fad6
--- /dev/null
+++ b/ft/tests/test-upgrade-recovery-logs.cc
@@ -0,0 +1,192 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuFT, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+
+// Test that recovery works correctly on a recovery log in a log directory.
+
+#include "test.h"
+
+static void run_recovery(const char *testdir) {
+    int r;
+
+    int log_version;
+    char shutdown[32+1];
+    r = sscanf(testdir, "upgrade-recovery-logs-%d-%32s", &log_version, shutdown);
+    assert(r == 2);
+
+    char **logfiles = nullptr;
+    int n_logfiles = 0;
+    r = toku_logger_find_logfiles(testdir, &logfiles, &n_logfiles);
+    CKERR(r);
+    assert(n_logfiles > 0);
+
+    FILE *f = fopen(logfiles[n_logfiles-1], "r");
+    assert(f);
+    uint32_t real_log_version;
+    r = toku_read_logmagic(f, &real_log_version);
+    CKERR(r);
+    assert((uint32_t)log_version == (uint32_t)real_log_version);
+    r = fclose(f);
+    CKERR(r);
+
+    toku_logger_free_logfiles(logfiles, n_logfiles);
+
+    // test needs recovery
+    r = tokuft_needs_recovery(testdir, false);
+    if (strcmp(shutdown, "clean") == 0) {
+        CKERR(r); // clean does not need recovery
+    } else if (strcmp(shutdown, "dirty") == 0) {
+        CKERR2(r, 1); // dirty needs recovery
+    } else {
+        CKERR(EINVAL);
+    }
+
+    // test maybe upgrade log
+    LSN lsn_of_clean_shutdown;
+    bool upgrade_in_progress;
+    r = toku_maybe_upgrade_log(testdir, testdir, &lsn_of_clean_shutdown, &upgrade_in_progress);
+    if (strcmp(shutdown, "dirty") == 0 && log_version <= 24) {
+        CKERR2(r, TOKUDB_UPGRADE_FAILURE); // we dont support dirty upgrade from versions <= 24
+        return;
+    } else {
+        CKERR(r);
+    }
+
+    if (!verbose) {
+        // redirect stderr
+        int devnul = open(DEV_NULL_FILE, O_WRONLY);
+        assert(devnul >= 0);
+        int rr = toku_dup2(devnul, fileno(stderr));
+        assert(rr == fileno(stderr));
+        rr = close(devnul);
+        assert(rr == 0);
+    }
+
+    // run recovery
+    if (r == 0) {
+        r = tokuft_recover(NULL,
+                           NULL_prepared_txn_callback,
+                           NULL_keep_cachetable_callback,
+                           NULL_logger, testdir, testdir, 0, 0, 0, NULL, 0);
+        CKERR(r);
+    }
+}
+
+int test_main(int argc, const char *argv[]) {
+    int i = 0;
+    for (i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-v") == 0) {
+            verbose++;
+            continue;
+        }
+        if (strcmp(argv[i], "-q") == 0) {
+            if (verbose > 0)
+                verbose--;
+            continue;
+        }
+        break;
+    }
+    if (i < argc) {
+        const char *full_test_dir = argv[i];
+        const char *test_dir = basename(full_test_dir);
+        if (strcmp(full_test_dir, test_dir) != 0) {
+            int r;
+            char cmd[32 + strlen(full_test_dir) + strlen(test_dir)];
+            sprintf(cmd, "rm -rf %s", test_dir);
+            r = system(cmd);
+            CKERR(r);
+            sprintf(cmd, "cp -r %s %s", full_test_dir, test_dir);
+            r = system(cmd);
+            CKERR(r);
+        }
+        run_recovery(test_dir);
+    }
+    return 0;
+}
diff --git a/src/tests/recover-rollinclude.cc b/src/tests/recover-rollinclude.cc
new file mode 100644
index 00000000000..0c6b1827496
--- /dev/null
+++ b/src/tests/recover-rollinclude.cc
@@ -0,0 +1,275 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+
+// Create a rollback log with a rollinclude log entry, crash after the txn commits and before the last checkpoint.
+// Recovery crashes 7.1.0, should succeed.
+
+#include "test.h"
+#include <endian.h>
+
+// Insert max_rows key/val pairs into the db
+
+// We want to force a rollinclude so we use a child transaction and insert enough rows so that it spills.
+// It spills at about 144K and 289K rows.
+static void do_inserts(DB_ENV *env, DB *db, uint64_t max_rows, size_t val_size) {
+    char val_data[val_size]; memset(val_data, 0, val_size);
+    int r;
+    DB_TXN *parent = nullptr;
+    r = env->txn_begin(env, nullptr, &parent, 0);
+    CKERR(r);
+
+    DB_TXN *child = nullptr;
+    r = env->txn_begin(env, parent, &child, 0);
+    CKERR(r);
+
+    for (uint64_t i = 0; i < max_rows; i++) {
+        // pick a sequential key but it does not matter for this test.
+        uint64_t k[2] = {
+            htonl(i), random64(),
+        };
+
+        DBT key = { .data = k, .size = sizeof k };
+        DBT val = { .data = val_data, .size = (uint32_t) val_size };
+        r = db->put(db, child, &key, &val, 0);
+        CKERR(r);
+
+        if (i == max_rows-1) {
+            r = child->commit(child, 0);
+            CKERR(r);
+
+            r = env->txn_checkpoint(env, 0, 0, 0);
+            CKERR(r);
+        }
+    }
+
+    r = parent->commit(parent, 0);
+    CKERR(r);
+}
+
+static void run_test(uint64_t num_rows, size_t val_size, bool do_crash) {
+    int r;
+
+    DB_ENV *env = nullptr;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->set_cachesize(env, 8, 0, 1);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME,
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE,
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    DB *db = nullptr;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    r = env->txn_checkpoint(env, 0, 0, 0);
+    CKERR(r);
+
+    do_inserts(env, db, num_rows, val_size);
+
+    if (do_crash)
+        assert(0); // crash on purpose
+
+    r = db->close(db, 0);
+    CKERR(r);
+
+    r = env->close(env, 0);
+    CKERR(r);
+}
+
+static void do_verify(DB_ENV *env, DB *db, uint64_t num_rows, size_t val_size UU()) {
+    int r;
+    DB_TXN *txn = nullptr;
+    r = env->txn_begin(env, nullptr, &txn, 0);
+    CKERR(r);
+
+    DBC *c = nullptr;
+    r = db->cursor(db, txn, &c, 0);
+    CKERR(r);
+
+    uint64_t i = 0;
+    while (1) {
+        DBT key = {};
+        DBT val = {};
+        r = c->c_get(c, &key, &val, DB_NEXT);
+        if (r == DB_NOTFOUND)
+            break;
+        CKERR(r);
+        assert(key.size == 16);
+        uint64_t k[2];
+        memcpy(k, key.data, key.size);
+        assert(htonl(k[0]) == i);
+        assert(val.size == val_size);
+        i++;
+    }
+    assert(i == num_rows);
+
+    r = c->c_close(c);
+    CKERR(r);
+
+    r = txn->commit(txn, 0);
+    CKERR(r);
+}
+
+static void run_recover(uint64_t num_rows, size_t val_size) {
+    int r;
+
+    DB_ENV *env = nullptr;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->set_cachesize(env, 8, 0, 1);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME,
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER,
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    DB *db = nullptr;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    do_verify(env, db, num_rows, val_size);
+
+    r = db->close(db, 0);
+    CKERR(r);
+
+    r = env->close(env, 0);
+    CKERR(r);
+}
+
+int test_main (int argc, char *const argv[]) {
+    bool do_test = false;
+    bool do_recover = false;
+    bool do_crash = true;
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-v") == 0) {
+            verbose++;
+            continue;
+        }
+        if (strcmp(argv[i], "-q") == 0) {
+            if (verbose > 0) verbose--;
+            continue;
+        }
+        if (strcmp(argv[i], "--test") == 0) {
+            do_test = true;
+            continue;
+        }
+        if (strcmp(argv[i], "--recover") == 0) {
+            do_recover = true;
+            continue;
+        }
+        if (strcmp(argv[i], "--crash") == 0 && i+1 < argc) {
+            do_crash = atoi(argv[++i]);
+            continue;
+        }
+    }
+
+    uint64_t num_rows = 300000;
+    size_t val_size = 1;
+
+    if (do_test) {
+        // init the env directory
+        toku_os_recursive_delete(TOKU_TEST_FILENAME);
+        int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO);
+        CKERR(r);
+        run_test(num_rows, val_size, do_crash);
+    }
+    if (do_recover) {
+        run_recover(num_rows, val_size);
+    }
+
+    return 0;
+}

From 2f04fc27c8af8406aac5d908a995ba3a250099b7 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 2 Sep 2014 11:53:49 -0400
Subject: [PATCH 171/190] FT-548 fix dirty upgrade build on osx problem

---
 ft/tests/test-upgrade-recovery-logs.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ft/tests/test-upgrade-recovery-logs.cc b/ft/tests/test-upgrade-recovery-logs.cc
index 37cbf01fad6..467807050b7 100644
--- a/ft/tests/test-upgrade-recovery-logs.cc
+++ b/ft/tests/test-upgrade-recovery-logs.cc
@@ -92,6 +92,7 @@ PATENT RIGHTS GRANT:
 // Test that recovery works correctly on a recovery log in a log directory.
 
 #include "test.h"
+#include <libgen.h>
 
 static void run_recovery(const char *testdir) {
     int r;
@@ -175,7 +176,7 @@ int test_main(int argc, const char *argv[]) {
     }
     if (i < argc) {
         const char *full_test_dir = argv[i];
-        const char *test_dir = basename(full_test_dir);
+        const char *test_dir = basename((char *)full_test_dir);
         if (strcmp(full_test_dir, test_dir) != 0) {
             int r;
             char cmd[32 + strlen(full_test_dir) + strlen(test_dir)];

From 9d28e3a6cff8f0190104fa96c1460d5001dd8dc8 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 2 Sep 2014 12:29:55 -0400
Subject: [PATCH 172/190] FT-548 fix dirty upgrade build on osx problem

---
 src/tests/recover-rollinclude.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tests/recover-rollinclude.cc b/src/tests/recover-rollinclude.cc
index 0c6b1827496..5a3a89a4052 100644
--- a/src/tests/recover-rollinclude.cc
+++ b/src/tests/recover-rollinclude.cc
@@ -93,7 +93,6 @@ PATENT RIGHTS GRANT:
 // Recovery crashes 7.1.0, should succeed.
 
 #include "test.h"
-#include <endian.h>
 
 // Insert max_rows key/val pairs into the db
 

From 6ee0e1c22e204199e8d1b9a40dafb0575c25bea3 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 2 Sep 2014 13:01:56 -0400
Subject: [PATCH 173/190] DB-716 use jemalloc 3.6.0 in tokudb builds

---
 scripts/make.mysql.bash | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/make.mysql.bash b/scripts/make.mysql.bash
index a0e5db48a47..1bf258c5c3b 100755
--- a/scripts/make.mysql.bash
+++ b/scripts/make.mysql.bash
@@ -52,7 +52,7 @@ cmake_build_type=RelWithDebInfo
 mysql_tree=
 tokudbengine_tree=
 ftindex_tree=
-jemalloc_version=3.3.0
+jemalloc_version=3.6.0
 jemalloc_tree=
 backup_tree=
 

From 561edb14e8695814e10a355a71cf53de198b9b79 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 2 Sep 2014 15:44:40 -0400
Subject: [PATCH 174/190] DB-717 add iterations limit to tokustat

---
 scripts/tokustat.py | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/scripts/tokustat.py b/scripts/tokustat.py
index 8e9233e5943..219d3be5368 100755
--- a/scripts/tokustat.py
+++ b/scripts/tokustat.py
@@ -9,10 +9,10 @@ def usage():
     print "diff the tokudb engine status"
     print "--host=HOSTNAME (default: localhost)"
     print "--port=PORT"
-    print "--sleeptime=SLEEPTIME (default: 10 seconds)"
+    print "--interations=MAX_ITERATIONS (default: forever)"
+    print "--interval=TIME_BETWEEN_SAMPLES (default: 10 seconds)"
     print "--q='show engine tokudb status'"
     print "--q='select * from information_schema.global_status'"
-
     return 1
 
 def convert(v):
@@ -23,14 +23,11 @@ def convert(v):
             v = float(v)
     return v
 
-def printit(stats, rs, sleeptime):
-    # print rs
+def printit(stats, rs, interval):
     for t in rs:
         l = len(t) # grab the last 2 fields in t
         k = t[l-2]
         v = t[l-1]
-        # print k, v # debug
-        # try to convert v
         try:
             v = convert(v)
         except:
@@ -41,11 +38,11 @@ def printit(stats, rs, sleeptime):
                 print k, "|", oldv, "|", v,
                 try:
                     d = v - oldv
-                    if sleeptime != 1:
-                        if d >= sleeptime:
-                            e = d / sleeptime
+                    if interval != 1:
+                        if d >= interval:
+                            e = d / interval
                         else:
-                            e = float(d) / sleeptime
+                            e = float(d) / interval
                         print "|", d, "|", e
                     else:
                         print "|", d
@@ -59,7 +56,9 @@ def main():
     port = None
     user = None
     passwd = None
-    sleeptime = 10
+    interval = 10
+    iterations = 0
+
     q = 'show engine tokudb status'
 
     for a in sys.argv[1:]:
@@ -71,6 +70,9 @@ def main():
             continue
         return usage()
 
+    iterations = int(iterations)
+    interval = int(interval)
+
     connect_parameters = {}
     if host is not None:
         if host[0] == '/':
@@ -93,7 +95,9 @@ def main():
     print "connected"
 
     stats = {}
-    while 1:
+    i = 0
+    while iterations == 0 or i <= iterations:
+        i += 1
         try:
             c = db.cursor()
             n = c.execute(q)
@@ -105,8 +109,8 @@ def main():
             return 2
 
         try:
-            printit(stats, rs, int(sleeptime))
-            time.sleep(int(sleeptime))
+            printit(stats, rs, interval)
+            time.sleep(interval)
         except:
             print "printit", sys.exc_info()
             return 3

From ddc9e7488a391e8f1b6a81781b92b1596b5c9da5 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 2 Sep 2014 16:23:32 -0400
Subject: [PATCH 175/190] DB-717 add iterations limit to tokustat

---
 scripts/tokustat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/tokustat.py b/scripts/tokustat.py
index 219d3be5368..3ecac68e769 100755
--- a/scripts/tokustat.py
+++ b/scripts/tokustat.py
@@ -9,7 +9,7 @@ def usage():
     print "diff the tokudb engine status"
     print "--host=HOSTNAME (default: localhost)"
     print "--port=PORT"
-    print "--interations=MAX_ITERATIONS (default: forever)"
+    print "--iterations=MAX_ITERATIONS (default: forever)"
     print "--interval=TIME_BETWEEN_SAMPLES (default: 10 seconds)"
     print "--q='show engine tokudb status'"
     print "--q='select * from information_schema.global_status'"

From f7c43e4b9c33c72db66a8fafa31f4d33a857b061 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 3 Sep 2014 07:49:52 -0400
Subject: [PATCH 176/190] DB-718 handle errors in tokudb info schema plugins

---
 storage/tokudb/hatoku_hton.cc | 36 +++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index 2f659f8571e..1118ab0fb19 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -1536,10 +1536,12 @@ static int tokudb_file_map_fill_table(THD *thd, TABLE_LIST *tables, COND *cond)
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
-        error = -1;
+        error = ER_PLUGIN_IS_NOT_LOADED;
+        my_error(error, MYF(0), tokudb_hton_name);
     } else {
         error = tokudb_file_map(table, thd);
+        if (error)
+            my_error(error, MYF(0));
     }
 
     rw_unlock(&tokudb_hton_initialized_lock);
@@ -1681,10 +1683,12 @@ static int tokudb_fractal_tree_info_fill_table(THD *thd, TABLE_LIST *tables, CON
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
-        error = -1;
+        error = ER_PLUGIN_IS_NOT_LOADED;
+        my_error(error, MYF(0), tokudb_hton_name);
     } else {
         error = tokudb_fractal_tree_info(table, thd);
+        if (error)
+            my_error(error, MYF(0));
     }
 
     //3938: unlock the status flag lock
@@ -1892,10 +1896,12 @@ static int tokudb_fractal_tree_block_map_fill_table(THD *thd, TABLE_LIST *tables
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
-        error = -1;
+        error = ER_PLUGIN_IS_NOT_LOADED;
+        my_error(error, MYF(0), tokudb_hton_name);
     } else {
         error = tokudb_fractal_tree_block_map(table, thd);
+        if (error)
+            my_error(error, MYF(0));
     }
 
     //3938: unlock the status flag lock
@@ -2043,11 +2049,13 @@ static int tokudb_trx_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) {
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
-        error = -1;
+        error = ER_PLUGIN_IS_NOT_LOADED;
+        my_error(error, MYF(0), tokudb_hton_name);
     } else {
         struct tokudb_trx_extra e = { thd, tables->table };
         error = db_env->iterate_live_transactions(db_env, tokudb_trx_callback, &e);
+        if (error)
+            my_error(error, MYF(0));
     }
 
     rw_unlock(&tokudb_hton_initialized_lock);
@@ -2124,11 +2132,13 @@ static int tokudb_lock_waits_fill_table(THD *thd, TABLE_LIST *tables, COND *cond
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
-        error = -1;
+        error = ER_PLUGIN_IS_NOT_LOADED;
+        my_error(error, MYF(0), tokudb_hton_name);
     } else {
         struct tokudb_lock_waits_extra e = { thd, tables->table };
         error = db_env->iterate_pending_lock_requests(db_env, tokudb_lock_waits_callback, &e);
+        if (error)
+            my_error(error, MYF(0));
     }
 
     rw_unlock(&tokudb_hton_initialized_lock);
@@ -2210,11 +2220,13 @@ static int tokudb_locks_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) {
     rw_rdlock(&tokudb_hton_initialized_lock);
 
     if (!tokudb_hton_initialized) {
-        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), tokudb_hton_name);
-        error = -1;
+        error = ER_PLUGIN_IS_NOT_LOADED;
+        my_error(error, MYF(0), tokudb_hton_name);
     } else {
         struct tokudb_locks_extra e = { thd, tables->table };
         error = db_env->iterate_live_transactions(db_env, tokudb_locks_callback, &e);
+        if (error)
+            my_error(error, MYF(0));
     }
 
     rw_unlock(&tokudb_hton_initialized_lock);

From 507da246c85381b27274ee48c5562ede396c2784 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 3 Sep 2014 08:13:49 -0400
Subject: [PATCH 177/190] DB-718 test case for broken error handling in
 tokudb_fractal_tree_info info schema plugin

---
 mysql-test/suite/tokudb.bugs/r/tokudb718.result |  9 +++++++++
 mysql-test/suite/tokudb.bugs/t/tokudb718.test   | 13 +++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 mysql-test/suite/tokudb.bugs/r/tokudb718.result
 create mode 100644 mysql-test/suite/tokudb.bugs/t/tokudb718.test

diff --git a/mysql-test/suite/tokudb.bugs/r/tokudb718.result b/mysql-test/suite/tokudb.bugs/r/tokudb718.result
new file mode 100644
index 00000000000..022a4d56d75
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/r/tokudb718.result
@@ -0,0 +1,9 @@
+set default_storage_engine='tokudb';
+drop table if exists t;
+create table t (id int primary key);
+begin;
+insert into t values (1),(2);
+select * from information_schema.tokudb_fractal_tree_info;
+ERROR HY000: Unknown error -30994
+commit;
+drop table t;
diff --git a/mysql-test/suite/tokudb.bugs/t/tokudb718.test b/mysql-test/suite/tokudb.bugs/t/tokudb718.test
new file mode 100644
index 00000000000..877087776b2
--- /dev/null
+++ b/mysql-test/suite/tokudb.bugs/t/tokudb718.test
@@ -0,0 +1,13 @@
+# test DB-718, a crash caused by broken error handling in tokudb's fractal_tree_info information schema
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+create table t (id int primary key);
+begin;
+insert into t values (1),(2);
+--error 34542
+select * from information_schema.tokudb_fractal_tree_info;
+commit;
+drop table t;

From 40165a10835cda427d6952c267d5fae46900f5da Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Wed, 3 Sep 2014 14:38:04 -0400
Subject: [PATCH 178/190] FT-273 Prevent setting the memcmp magic on db handles
 for which a FT is already open. Improve comments. Add a test.

---
 ft/comparator.h                |  11 +-
 ft/ft-ops.cc                   |  30 +++--
 ft/ft-ops.h                    |   2 +-
 src/tests/test_memcmp_magic.cc | 209 +++++++++++++++++++++++++++++++++
 src/ydb_db.cc                  |   6 +-
 5 files changed, 241 insertions(+), 17 deletions(-)
 create mode 100644 src/tests/test_memcmp_magic.cc

diff --git a/ft/comparator.h b/ft/comparator.h
index 81a794e4afd..caf2b8b9d18 100644
--- a/ft/comparator.h
+++ b/ft/comparator.h
@@ -117,7 +117,10 @@ namespace toku {
         }
 
     public:
-        void create(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic = 0) {
+        // This magic value is reserved to mean that the magic has not been set.
+        static const uint8_t MEMCMP_MAGIC_NONE = 0;
+
+        void create(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic = MEMCMP_MAGIC_NONE) {
             XCALLOC(_fake_db);
             init(cmp, desc, memcmp_magic);
         }
@@ -165,8 +168,10 @@ namespace toku {
         int operator()(const DBT *a, const DBT *b) const {
             if (__builtin_expect(toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b), 0)) {
                 return toku_dbt_infinite_compare(a, b);
-            } else if (_memcmp_magic && dbt_has_memcmp_magic(a)
-                       // At this point we expect b to also have the memcmp magic
+            } else if (_memcmp_magic != MEMCMP_MAGIC_NONE
+                       // If `a' has the memcmp magic..
+                       && dbt_has_memcmp_magic(a)
+                       // ..then we expect `b' to also have the memcmp magic
                        && __builtin_expect(dbt_has_memcmp_magic(b), 1)) {
                 return toku_builtin_compare_fun(nullptr, a, b);
             } else {
diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index a48b785700e..d32b92f8a4a 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -2858,20 +2858,27 @@ toku_ft_handle_get_fanout(FT_HANDLE ft_handle, unsigned int *fanout)
     }
 }
 
-void toku_ft_handle_set_memcmp_magic(FT_HANDLE ft_handle, uint8_t magic) {
-    invariant(magic != 0);
-    if (ft_handle->ft) {
-        // handle is already open, application bug if memcmp magic changes
-        invariant(ft_handle->ft->cmp.get_memcmp_magic() == magic);
-    } else {
-        ft_handle->options.memcmp_magic = magic;
+// The memcmp magic byte may be set on a per fractal tree basis to communicate
+// that if two keys begin with this byte, they may be compared with the builtin
+// key comparison function. This greatly optimizes certain in-memory workloads.
+int toku_ft_handle_set_memcmp_magic(FT_HANDLE ft_handle, uint8_t magic) {
+    if (magic == comparator::MEMCMP_MAGIC_NONE) {
+        return EINVAL;
     }
+    if (ft_handle->ft != nullptr) {
+        // if the handle is already open, then we cannot set the memcmp magic
+        // (because it may or may not have been set by someone else already)
+        return EINVAL;
+    }
+    ft_handle->options.memcmp_magic = magic;
+    return 0;
 }
 
 static int
 verify_builtin_comparisons_consistent(FT_HANDLE t, uint32_t flags) {
-    if ((flags & TOKU_DB_KEYCMP_BUILTIN) && (t->options.compare_fun != toku_builtin_compare_fun))
+    if ((flags & TOKU_DB_KEYCMP_BUILTIN) && (t->options.compare_fun != toku_builtin_compare_fun)) {
         return EINVAL;
+    }
     return 0;
 }
 
@@ -3017,6 +3024,13 @@ ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only
         r = EINVAL;
         goto exit;
     }
+
+    // Ensure that the memcmp magic bits are consistent, if set.
+    if (ft->cmp.get_memcmp_magic() != toku::comparator::MEMCMP_MAGIC_NONE &&
+        ft_h->options.memcmp_magic != ft->cmp.get_memcmp_magic()) {
+        r = EINVAL;
+        goto exit;
+    }
     toku_ft_handle_inherit_options(ft_h, ft);
 
     if (!was_already_open) {
diff --git a/ft/ft-ops.h b/ft/ft-ops.h
index dae335a3cc2..c45e0c71ef5 100644
--- a/ft/ft-ops.h
+++ b/ft/ft-ops.h
@@ -126,7 +126,7 @@ void toku_ft_handle_set_compression_method(FT_HANDLE, enum toku_compression_meth
 void toku_ft_handle_get_compression_method(FT_HANDLE, enum toku_compression_method *);
 void toku_ft_handle_set_fanout(FT_HANDLE, unsigned int fanout);
 void toku_ft_handle_get_fanout(FT_HANDLE, unsigned int *fanout);
-void toku_ft_handle_set_memcmp_magic(FT_HANDLE, uint8_t magic);
+int toku_ft_handle_set_memcmp_magic(FT_HANDLE, uint8_t magic);
 
 void toku_ft_set_bt_compare(FT_HANDLE ft_handle, ft_compare_func cmp_func);
 const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle);
diff --git a/src/tests/test_memcmp_magic.cc b/src/tests/test_memcmp_magic.cc
new file mode 100644
index 00000000000..473dcaf7453
--- /dev/null
+++ b/src/tests/test_memcmp_magic.cc
@@ -0,0 +1,209 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuFT, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2014 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#include "test.h"
+
+#include "util/dbt.h"
+
+static void test_memcmp_magic(void) {
+    int r;
+
+    DB_ENV *env;
+    r = db_env_create(&env, 0); CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r);
+
+    DB *db;
+    r = db_create(&db, env, 0); CKERR(r);
+
+    // Can't set the memcmp magic to 0 (since it's used as a sentinel for `none')
+    r = db->set_memcmp_magic(db, 0); CKERR2(r, EINVAL);
+
+    // Should be ok to set it more than once, even to different things, before opening.
+    r = db->set_memcmp_magic(db, 1); CKERR(r);
+    r = db->set_memcmp_magic(db, 2); CKERR(r);
+    r = db->open(db, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
+
+    // Can't set the memcmp magic after opening.
+    r = db->set_memcmp_magic(db, 0); CKERR2(r, EINVAL);
+    r = db->set_memcmp_magic(db, 1); CKERR2(r, EINVAL);
+
+    DB *db2;
+    r = db_create(&db2, env, 0); CKERR(r);
+    r = db2->set_memcmp_magic(db2, 3); CKERR(r); // ..we can try setting it to something different
+    // ..but it should fail to open
+    r = db2->open(db2, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR2(r, EINVAL);
+    r = db2->set_memcmp_magic(db2, 2); CKERR(r);
+    r = db2->open(db2, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
+    r = db2->close(db2, 0);
+
+    r = db->close(db, 0); CKERR(r);
+    r = env->close(env, 0); CKERR(r);
+}
+
+static int comparison_function_unused(DB *UU(db), const DBT *UU(a), const DBT *UU(b)) {
+    // We're testing that the memcmp magic gets used so the real
+    // comparison function should never get called.
+    invariant(false);
+    return 0;
+}
+
+static int getf_key_cb(const DBT *key, const DBT *UU(val), void *extra) {
+    DBT *dbt = reinterpret_cast<DBT *>(extra);
+    toku_clone_dbt(dbt, *key);
+    return 0;
+}
+
+static void test_memcmp_magic_sort_order(void) {
+    int r;
+
+    // Verify that randomly generated integer keys are sorted in memcmp
+    // order when packed as little endian, even with an environment-wide
+    // comparison function that sorts as though keys are big-endian ints.
+
+    DB_ENV *env;
+    r = db_env_create(&env, 0); CKERR(r);
+    r = env->set_default_bt_compare(env, comparison_function_unused); CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r);
+
+    const int magic = 49;
+
+    DB *db;
+    r = db_create(&db, env, 0); CKERR(r);
+    r = db->set_memcmp_magic(db, magic); CKERR(r);
+    r = db->open(db, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
+
+    for (int i = 0; i < 10000; i++) {
+        char buf[1 + sizeof(int)];
+        // Serialize key to first have the magic byte, then the little-endian key.
+        int k = toku_htonl(random());
+        buf[0] = magic;
+        memcpy(&buf[1], &k, sizeof(int));
+
+        DBT key;
+        dbt_init(&key, buf, sizeof(buf));
+        r = db->put(db, NULL, &key, &key, 0); CKERR(r);
+    }
+
+    DB_TXN *txn;
+    env->txn_begin(env, NULL, &txn, 0);
+    DBC *dbc;
+    db->cursor(db, txn, &dbc, 0);
+    DBT prev_dbt, curr_dbt;
+    memset(&curr_dbt, 0, sizeof(DBT));
+    memset(&prev_dbt, 0, sizeof(DBT));
+    while (dbc->c_getf_next(dbc, 0, getf_key_cb, &curr_dbt)) {
+        invariant(curr_dbt.size == sizeof(int));
+        if (prev_dbt.data != NULL) {
+            // Each key should be >= to the last using memcmp
+            int c = memcmp(prev_dbt.data, curr_dbt.data, sizeof(int));
+            invariant(c <= 0);
+        }
+        toku_destroy_dbt(&prev_dbt);
+        prev_dbt = curr_dbt;
+    }
+    toku_destroy_dbt(&curr_dbt);
+    toku_destroy_dbt(&prev_dbt);
+    dbc->c_close(dbc);
+    txn->commit(txn, 0);
+
+    r = db->close(db, 0); CKERR(r);
+    r = env->close(env, 0); CKERR(r);
+}
+
+int
+test_main(int argc, char *const argv[]) {
+    parse_args(argc, argv);
+
+    toku_os_recursive_delete(TOKU_TEST_FILENAME);
+    int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
+
+    test_memcmp_magic();
+    test_memcmp_magic_sort_order();
+
+    return 0;
+}
diff --git a/src/ydb_db.cc b/src/ydb_db.cc
index f64ed4465e8..2c54a3bd4dc 100644
--- a/src/ydb_db.cc
+++ b/src/ydb_db.cc
@@ -729,11 +729,7 @@ toku_db_set_memcmp_magic(DB *db, uint8_t magic) {
     if (db_opened(db)) {
         return EINVAL;
     }
-    if (magic == 0) {
-        return EINVAL;
-    }
-    toku_ft_handle_set_memcmp_magic(db->i->ft_handle, magic);
-    return 0;
+    return toku_ft_handle_set_memcmp_magic(db->i->ft_handle, magic);
 }
 
 static int

From cf3dae3b8d17335166f1a54572ff3c15499fc039 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Wed, 3 Sep 2014 14:52:48 -0400
Subject: [PATCH 179/190] FT-273 Be more specific about why memcmp magic is
 interesting

---
 ft/ft-ops.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index d32b92f8a4a..7b3092a4f99 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -2860,7 +2860,8 @@ toku_ft_handle_get_fanout(FT_HANDLE ft_handle, unsigned int *fanout)
 
 // The memcmp magic byte may be set on a per fractal tree basis to communicate
 // that if two keys begin with this byte, they may be compared with the builtin
-// key comparison function. This greatly optimizes certain in-memory workloads.
+// key comparison function. This greatly optimizes certain in-memory workloads,
+// such as lookups by OID primary key in TokuMX.
 int toku_ft_handle_set_memcmp_magic(FT_HANDLE ft_handle, uint8_t magic) {
     if (magic == comparator::MEMCMP_MAGIC_NONE) {
         return EINVAL;

From ef579d8921953f39fee79f3c92593f4ad625152d Mon Sep 17 00:00:00 2001
From: BohuTANG <overred.shuttler@gmail.com>
Date: Fri, 5 Sep 2014 13:17:54 +0800
Subject: [PATCH 180/190] DB-310 add tokudb_support_xa variable this variable
 scope is Global, Session type is boolean and default value is TRUE

---
 .../suite/tokudb/r/tokudb_support_xa.result   | 126 +++++++++++++++++
 .../suite/tokudb/t/tokudb_support_xa.test     | 132 ++++++++++++++++++
 storage/tokudb/hatoku_hton.cc                 |   9 ++
 storage/tokudb/hatoku_hton.h                  |  10 ++
 4 files changed, 277 insertions(+)
 create mode 100644 mysql-test/suite/tokudb/r/tokudb_support_xa.result
 create mode 100644 mysql-test/suite/tokudb/t/tokudb_support_xa.test

diff --git a/mysql-test/suite/tokudb/r/tokudb_support_xa.result b/mysql-test/suite/tokudb/r/tokudb_support_xa.result
new file mode 100644
index 00000000000..08ad215d7e2
--- /dev/null
+++ b/mysql-test/suite/tokudb/r/tokudb_support_xa.result
@@ -0,0 +1,126 @@
+'#--------------------begin------------------------#'
+SET @session_start_value = @@session.tokudb_support_xa;
+SELECT @session_start_value;
+@session_start_value
+1
+SET @global_start_value = @@global.tokudb_support_xa;
+SELECT @global_start_value;
+@global_start_value
+1
+SET @@session.tokudb_support_xa = 0;
+SET @@session.tokudb_support_xa = DEFAULT;
+SELECT @@session.tokudb_support_xa;
+@@session.tokudb_support_xa
+1
+SET @@global.tokudb_support_xa = 0;
+SET @@global.tokudb_support_xa = DEFAULT;
+SELECT @@global.tokudb_support_xa;
+@@global.tokudb_support_xa
+1
+'#--------------------case#1 valid set support_xa------------------------#'
+SET @@session.tokudb_support_xa = 0;
+SELECT @@session.tokudb_support_xa;
+@@session.tokudb_support_xa
+0
+SET @@session.tokudb_support_xa = 1;
+SELECT @@session.tokudb_support_xa;
+@@session.tokudb_support_xa
+1
+SET @@global.tokudb_support_xa = 0;
+SELECT @@global.tokudb_support_xa;
+@@global.tokudb_support_xa
+0
+SET @@global.tokudb_support_xa = 1;
+SELECT @@global.tokudb_support_xa;
+@@global.tokudb_support_xa
+1
+'#--------------------case#2 invalid set support_xa------------------------#'
+SET @@session.tokudb_support_xa = -0.6;
+ERROR 42000: Incorrect argument type to variable 'tokudb_support_xa'
+SET @@session.tokudb_support_xa = 1.6;
+ERROR 42000: Incorrect argument type to variable 'tokudb_support_xa'
+SET @@session.tokudb_support_xa = "T";
+ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'T'
+SET @@session.tokudb_support_xa = "Y";
+ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'Y'
+SET @@session.tokudb_support_xa = OF;
+SELECT @@session.tokudb_support_xa;
+@@session.tokudb_support_xa
+0
+SET @@global.tokudb_support_xa = 2;
+ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of '2'
+SET @@global.tokudb_support_xa = "T";
+ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'T'
+SET @@global.tokudb_support_xa = "Y";
+ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'Y'
+'#--------------------case#3 xa.test port from tokudb_mariadb/xa.test ------------------------#'
+'#--------------------xa.test with tokudb_support_xa OFF ------------------------#'
+SET @@global.tokudb_support_xa = OFF;
+SELECT @@global.tokudb_support_xa;
+@@global.tokudb_support_xa
+0
+create table t1 (a int) engine=tokudb;
+xa start 'test1';
+insert t1 values (10);
+xa end 'test1';
+xa prepare 'test1';
+xa rollback 'test1';
+select * from t1;
+a
+xa start 'test2';
+xa start 'test-bad';
+ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the  ACTIVE state
+insert t1 values (20);
+xa prepare 'test2';
+ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the  ACTIVE state
+xa end 'test2';
+xa prepare 'test2';
+xa commit 'test2';
+select * from t1;
+a
+20
+xa start 'testa','testb';
+insert t1 values (30);
+commit;
+ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the  ACTIVE state
+xa end 'testa','testb';
+begin;
+ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the  IDLE state
+create table t2 (a int);
+ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the  IDLE state
+xa start 'testa','testb';
+ERROR XAE08: XAER_DUPID: The XID already exists
+xa start 'testa','testb', 123;
+ERROR XAE08: XAER_DUPID: The XID already exists
+xa start 0x7465737462, 0x2030405060, 0xb;
+insert t1 values (40);
+xa end 'testb',' 0@P`',11;
+xa prepare 'testb',0x2030405060,11;
+start transaction;
+ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the  PREPARED state
+xa recover;
+formatID	gtrid_length	bqual_length	data
+11	5	5	testb 0@P`
+xa prepare 'testa','testb';
+xa recover;
+formatID	gtrid_length	bqual_length	data
+11	5	5	testb 0@P`
+1	5	5	testatestb
+xa commit 'testb',0x2030405060,11;
+ERROR XAE04: XAER_NOTA: Unknown XID
+xa rollback 'testa','testb';
+xa start 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz';
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '' at line 1
+select * from t1;
+a
+20
+drop table t1;
+'#--------------------end------------------------#'
+SET @@session.tokudb_support_xa = @session_start_value;
+SELECT @@session.tokudb_support_xa;
+@@session.tokudb_support_xa
+1
+SET @@global.tokudb_support_xa = @global_start_value;
+SELECT @@global.tokudb_support_xa;
+@@global.tokudb_support_xa
+1
diff --git a/mysql-test/suite/tokudb/t/tokudb_support_xa.test b/mysql-test/suite/tokudb/t/tokudb_support_xa.test
new file mode 100644
index 00000000000..a76c8d77b6a
--- /dev/null
+++ b/mysql-test/suite/tokudb/t/tokudb_support_xa.test
@@ -0,0 +1,132 @@
+--source include/load_sysvars.inc
+let $engine=TokuDB;
+
+--echo '#--------------------begin------------------------#'
+SET @session_start_value = @@session.tokudb_support_xa;
+SELECT @session_start_value;
+
+SET @global_start_value = @@global.tokudb_support_xa;
+SELECT @global_start_value;
+
+SET @@session.tokudb_support_xa = 0;
+SET @@session.tokudb_support_xa = DEFAULT;
+SELECT @@session.tokudb_support_xa;
+SET @@global.tokudb_support_xa = 0;
+SET @@global.tokudb_support_xa = DEFAULT;
+SELECT @@global.tokudb_support_xa;
+
+--echo '#--------------------case#1 valid set support_xa------------------------#'
+# for session
+SET @@session.tokudb_support_xa = 0;
+SELECT @@session.tokudb_support_xa;
+SET @@session.tokudb_support_xa = 1;
+SELECT @@session.tokudb_support_xa;
+
+# for global
+SET @@global.tokudb_support_xa = 0;
+SELECT @@global.tokudb_support_xa;
+SET @@global.tokudb_support_xa = 1;
+SELECT @@global.tokudb_support_xa;
+
+--echo '#--------------------case#2 invalid set support_xa------------------------#'
+# for session
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@session.tokudb_support_xa = -0.6;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@session.tokudb_support_xa = 1.6;
+--Error ER_WRONG_VALUE_FOR_VAR
+SET @@session.tokudb_support_xa = "T";
+--Error ER_WRONG_VALUE_FOR_VAR
+SET @@session.tokudb_support_xa = "Y";
+SET @@session.tokudb_support_xa = OF;
+SELECT @@session.tokudb_support_xa;
+
+# for global
+--Error ER_WRONG_VALUE_FOR_VAR
+SET @@global.tokudb_support_xa = 2;
+--Error ER_WRONG_VALUE_FOR_VAR
+SET @@global.tokudb_support_xa = "T";
+--Error ER_WRONG_VALUE_FOR_VAR
+SET @@global.tokudb_support_xa = "Y";
+
+
+--echo '#--------------------case#3 xa.test port from tokudb_mariadb/xa.test ------------------------#'
+--echo '#--------------------xa.test with tokudb_support_xa OFF ------------------------#'
+SET @@global.tokudb_support_xa = OFF;
+SELECT @@global.tokudb_support_xa;
+create table t1 (a int) engine=tokudb;
+xa start 'test1';
+insert t1 values (10);
+xa end 'test1';
+xa prepare 'test1';
+xa rollback 'test1';
+select * from t1;
+
+xa start 'test2';
+--error ER_XAER_RMFAIL
+xa start 'test-bad';
+insert t1 values (20);
+--error ER_XAER_RMFAIL
+xa prepare 'test2';
+xa end 'test2';
+xa prepare 'test2';
+xa commit 'test2';
+select * from t1;
+
+xa start 'testa','testb';
+insert t1 values (30);
+
+--error ER_XAER_RMFAIL
+commit;
+
+xa end 'testa','testb';
+
+--error ER_XAER_RMFAIL
+begin;
+--error ER_XAER_RMFAIL
+create table t2 (a int);
+
+connect (con1,localhost,root,,);
+connection con1;
+
+--error ER_XAER_DUPID
+xa start 'testa','testb';
+--error ER_XAER_DUPID
+xa start 'testa','testb', 123;
+
+#        gtrid [ , bqual [ , formatID ] ]
+xa start 0x7465737462, 0x2030405060, 0xb;
+insert t1 values (40);
+xa end 'testb',' 0@P`',11;
+xa prepare 'testb',0x2030405060,11;
+
+--error ER_XAER_RMFAIL
+start transaction;
+
+xa recover;
+
+connection default;
+
+xa prepare 'testa','testb';
+
+xa recover;
+
+--error ER_XAER_NOTA
+xa commit 'testb',0x2030405060,11;
+xa rollback 'testa','testb';
+
+--error ER_PARSE_ERROR
+xa start 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz';
+
+select * from t1;
+
+disconnect con1;
+connection default;
+drop table t1;
+
+--echo '#--------------------end------------------------#'
+SET @@session.tokudb_support_xa = @session_start_value;
+SELECT @@session.tokudb_support_xa;
+
+SET @@global.tokudb_support_xa = @global_start_value;
+SELECT @@global.tokudb_support_xa;
diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index 1118ab0fb19..0f02afa10bc 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -830,6 +830,12 @@ static int tokudb_rollback(handlerton * hton, THD * thd, bool all) {
 static int tokudb_xa_prepare(handlerton* hton, THD* thd, bool all) {
     TOKUDB_DBUG_ENTER("");
     int r = 0;
+
+    /* if support_xa is disable, just return */
+    if (!THDVAR(thd, support_xa)) {
+        TOKUDB_DBUG_RETURN(r);
+    }
+
     DBUG_PRINT("trans", ("preparing transaction %s", all ? "all" : "stmt"));
     tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, hton);
     DB_TXN* txn = all ? trx->all : trx->stmt;
@@ -1433,6 +1439,9 @@ static struct st_mysql_sys_var *tokudb_system_variables[] = {
     MYSQL_SYSVAR(check_jemalloc),
 #endif
     MYSQL_SYSVAR(bulk_fetch),
+#if TOKU_INCLUDE_XA
+    MYSQL_SYSVAR(support_xa),
+#endif
     NULL
 };
 
diff --git a/storage/tokudb/hatoku_hton.h b/storage/tokudb/hatoku_hton.h
index ac0ddefbe5d..1b89aa5f772 100644
--- a/storage/tokudb/hatoku_hton.h
+++ b/storage/tokudb/hatoku_hton.h
@@ -515,6 +515,16 @@ static uint tokudb_check_jemalloc;
 static MYSQL_SYSVAR_UINT(check_jemalloc, tokudb_check_jemalloc, 0, "Check if jemalloc is linked", NULL, NULL, 1, 0, 1, 0);
 #endif
 
+#if TOKU_INCLUDE_XA
+static MYSQL_THDVAR_BOOL(support_xa,
+    PLUGIN_VAR_OPCMDARG,
+    "Enable TokuDB support for the XA two-phase commit",
+    NULL, // check
+    NULL, // update
+    true  // default
+);
+#endif
+
 static MYSQL_THDVAR_BOOL(bulk_fetch, PLUGIN_VAR_THDLOCAL, "enable bulk fetch", NULL /*check*/, NULL /*update*/, true /*default*/);
 
 extern HASH tokudb_open_tables;

From 1a4342501da1b6c49e9e0e1bce570016c2777f17 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Fri, 5 Sep 2014 09:49:27 -0400
Subject: [PATCH 181/190] DB-310 run tokudb_support_xa.test on mysql and
 mariadb 5.5

---
 mysql-test/suite/tokudb/r/tokudb_support_xa.result | 2 +-
 mysql-test/suite/tokudb/t/tokudb_support_xa.test   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/mysql-test/suite/tokudb/r/tokudb_support_xa.result b/mysql-test/suite/tokudb/r/tokudb_support_xa.result
index 08ad215d7e2..c265f38cdc2 100644
--- a/mysql-test/suite/tokudb/r/tokudb_support_xa.result
+++ b/mysql-test/suite/tokudb/r/tokudb_support_xa.result
@@ -110,7 +110,7 @@ xa commit 'testb',0x2030405060,11;
 ERROR XAE04: XAER_NOTA: Unknown XID
 xa rollback 'testa','testb';
 xa start 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz';
-ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '' at line 1
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your XYZ server version for the right syntax to use near '' at line 1
 select * from t1;
 a
 20
diff --git a/mysql-test/suite/tokudb/t/tokudb_support_xa.test b/mysql-test/suite/tokudb/t/tokudb_support_xa.test
index a76c8d77b6a..ba0b1f92a13 100644
--- a/mysql-test/suite/tokudb/t/tokudb_support_xa.test
+++ b/mysql-test/suite/tokudb/t/tokudb_support_xa.test
@@ -115,6 +115,7 @@ xa recover;
 xa commit 'testb',0x2030405060,11;
 xa rollback 'testa','testb';
 
+--replace_regex /MariaDB/XYZ/ /MySQL/XYZ/
 --error ER_PARSE_ERROR
 xa start 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz';
 

From 20e92c1d33c76d2df0731aecb1a8a7ae0bd9f4e6 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Tue, 9 Sep 2014 06:47:11 -0400
Subject: [PATCH 182/190] FT-591 fix valgrind uninitialized value error in
 block allocator test caused by reading past the end of a the blockpair array

---
 ft/serialize/block_allocator_strategy.cc | 43 ++++++++++++++++++------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/ft/serialize/block_allocator_strategy.cc b/ft/serialize/block_allocator_strategy.cc
index dbfbb1e5585..f896a41aaba 100644
--- a/ft/serialize/block_allocator_strategy.cc
+++ b/ft/serialize/block_allocator_strategy.cc
@@ -111,20 +111,43 @@ static uint64_t _roundup_to_power_of_two(uint64_t value) {
 static struct block_allocator::blockpair *
 _first_fit(struct block_allocator::blockpair *blocks_array,
            uint64_t n_blocks, uint64_t size, uint64_t alignment,
-           bool forward, uint64_t max_padding) {
+           uint64_t max_padding) {
     if (n_blocks == 1) {
         // won't enter loop, can't underflow the direction < 0 case
         return nullptr;
     }
 
-    struct block_allocator::blockpair *bp = forward ? &blocks_array[0] : &blocks_array[-1];
+    struct block_allocator::blockpair *bp = &blocks_array[0];
     for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
-         n_spaces_to_check--, forward ? bp++ : bp--) {
+         n_spaces_to_check--, bp++) {
         // Consider the space after bp
         uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
         uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
-        if (possible_offset + size <= bp[1].offset) {
-            invariant((forward ? bp - blocks_array : blocks_array - bp) < (int64_t) n_blocks);
+        if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1]
+            invariant(bp - blocks_array < (int64_t) n_blocks);
+            return bp;
+        }
+    }
+    return nullptr;
+}
+
+static struct block_allocator::blockpair *
+_first_fit_bw(struct block_allocator::blockpair *blocks_array,
+           uint64_t n_blocks, uint64_t size, uint64_t alignment,
+           uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) {
+    if (n_blocks == 1) {
+        // won't enter loop, can't underflow the direction < 0 case
+        return nullptr;
+    }
+
+    struct block_allocator::blockpair *bp = &blocks_array[-1];
+    for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
+         n_spaces_to_check--, bp--) {
+        // Consider the space after bp
+        uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
+        uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
+        if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) {
+            invariant(blocks_array - bp < (int64_t) n_blocks);
             return bp;
         }
     }
@@ -134,7 +157,7 @@ _first_fit(struct block_allocator::blockpair *blocks_array,
 struct block_allocator::blockpair *
 block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
                                     uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    return _first_fit(blocks_array, n_blocks, size, alignment, true, 0);
+    return _first_fit(blocks_array, n_blocks, size, alignment, 0);
 }
 
 // Best fit block allocation
@@ -188,7 +211,7 @@ static void determine_padded_fit_alignment_from_env(void) {
 struct block_allocator::blockpair *
 block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
                                      uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    return _first_fit(blocks_array, n_blocks, size, alignment, true, padded_fit_alignment);
+    return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment);
 }
 
 static double hot_zone_threshold = 0.85;
@@ -231,21 +254,21 @@ block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_ar
 
         if (blocks_in_zone > 0) {
             // Find the first fit in the hot zone, going forward.
-            bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, true, 0);
+            bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0);
             if (bp != nullptr) {
                 return bp;
             }
         }
         if (blocks_outside_zone > 0) {
             // Find the first fit in the cold zone, going backwards.
-            bp = _first_fit(boundary_bp, blocks_outside_zone, size, alignment, false, 0);
+            bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]);
             if (bp != nullptr) {
                 return bp;
             }
         }
     } else {
         // Cold allocations are simply first-fit from the beginning.
-        return _first_fit(blocks_array, n_blocks, size, alignment, true, 0);
+        return _first_fit(blocks_array, n_blocks, size, alignment, 0);
     }
     return nullptr;
 }

From 12bf3294a4714162d3c2f8e64c5192d9d6030ee2 Mon Sep 17 00:00:00 2001
From: John Esmet <john.esmet@gmail.com>
Date: Wed, 10 Sep 2014 09:48:11 -0400
Subject: [PATCH 183/190] FT-273 Fix a bug where we'd overactively assert that
 the memcmp magic bits were inconsistent though the handle was opened without
 those bits set. This caused dbremove to always fail.

---
 ft/ft-ops.cc                   |  1 +
 src/tests/test_memcmp_magic.cc | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc
index 7b3092a4f99..bf845d2c38d 100644
--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -3028,6 +3028,7 @@ ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only
 
     // Ensure that the memcmp magic bits are consistent, if set.
     if (ft->cmp.get_memcmp_magic() != toku::comparator::MEMCMP_MAGIC_NONE &&
+        ft_h->options.memcmp_magic != toku::comparator::MEMCMP_MAGIC_NONE &&
         ft_h->options.memcmp_magic != ft->cmp.get_memcmp_magic()) {
         r = EINVAL;
         goto exit;
diff --git a/src/tests/test_memcmp_magic.cc b/src/tests/test_memcmp_magic.cc
index 473dcaf7453..5ddb473a6ed 100644
--- a/src/tests/test_memcmp_magic.cc
+++ b/src/tests/test_memcmp_magic.cc
@@ -106,7 +106,7 @@ static void test_memcmp_magic(void) {
     // Should be ok to set it more than once, even to different things, before opening.
     r = db->set_memcmp_magic(db, 1); CKERR(r);
     r = db->set_memcmp_magic(db, 2); CKERR(r);
-    r = db->open(db, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
+    r = db->open(db, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR(r);
 
     // Can't set the memcmp magic after opening.
     r = db->set_memcmp_magic(db, 0); CKERR2(r, EINVAL);
@@ -116,12 +116,17 @@ static void test_memcmp_magic(void) {
     r = db_create(&db2, env, 0); CKERR(r);
     r = db2->set_memcmp_magic(db2, 3); CKERR(r); // ..we can try setting it to something different
     // ..but it should fail to open
-    r = db2->open(db2, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR2(r, EINVAL);
+    r = db2->open(db2, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR2(r, EINVAL);
     r = db2->set_memcmp_magic(db2, 2); CKERR(r);
-    r = db2->open(db2, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
-    r = db2->close(db2, 0);
+    r = db2->open(db2, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR(r);
 
+    r = db2->close(db2, 0);
     r = db->close(db, 0); CKERR(r);
+
+    // dbremove opens its own handle internally. ensure that the open
+    // operation succeeds (and so does dbremove) despite the fact the
+    // internal open does not set the memcmp magic
+    r = env->dbremove(env, NULL, "db", "db", 0); CKERR(r);
     r = env->close(env, 0); CKERR(r);
 }
 
@@ -155,7 +160,7 @@ static void test_memcmp_magic_sort_order(void) {
     DB *db;
     r = db_create(&db, env, 0); CKERR(r);
     r = db->set_memcmp_magic(db, magic); CKERR(r);
-    r = db->open(db, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
+    r = db->open(db, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR(r);
 
     for (int i = 0; i < 10000; i++) {
         char buf[1 + sizeof(int)];
@@ -192,6 +197,11 @@ static void test_memcmp_magic_sort_order(void) {
     txn->commit(txn, 0);
 
     r = db->close(db, 0); CKERR(r);
+
+    // dbremove opens its own handle internally. ensure that the open
+    // operation succeeds (and so does dbremove) despite the fact the
+    // internal open does not set the memcmp magic
+    r = env->dbremove(env, NULL, "db", "db", 0); CKERR(r);
     r = env->close(env, 0); CKERR(r);
 }
 

From 3bffb9de16802cfa234ee847102179aa4652ffd4 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 10 Sep 2014 14:52:18 -0400
Subject: [PATCH 184/190] FT-592 add tokuftdump --node N parameter

---
 tools/tokuftdump.cc | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/tokuftdump.cc b/tools/tokuftdump.cc
index 6f3d14deba7..3aab5401cd3 100644
--- a/tools/tokuftdump.cc
+++ b/tools/tokuftdump.cc
@@ -113,6 +113,8 @@ static int do_fragmentation = 0;
 static int do_garbage = 0;
 static int do_translation_table = 0;
 static int do_rootnode = 0;
+static int do_node = 0;
+static BLOCKNUM do_node_num;
 static int do_tsv = 0;
 
 static const char *arg0;
@@ -612,12 +614,13 @@ static int usage(void) {
     fprintf(stderr, "--dumpdata 0|1 ");
     fprintf(stderr, "--header ");
     fprintf(stderr, "--rootnode ");
+    fprintf(stderr, "--node N ");
     fprintf(stderr, "--fragmentation ");
     fprintf(stderr, "--garbage ");
     fprintf(stderr, "--tsv ");
     fprintf(stderr, "--translation-table ");
     fprintf(stderr, "--tsv ");
-    fprintf(stderr, "ftfilename \n");
+    fprintf(stderr, "filename \n");
     return 1;
 }
 
@@ -636,6 +639,10 @@ int main (int argc, const char *const argv[]) {
             do_header = 1;
         } else if (strcmp(argv[0], "--rootnode") == 0) {
             do_rootnode = 1;
+        } else if (strcmp(argv[0], "--node") == 0 && argc > 1) {
+            argc--; argv++;
+            do_node = 1;
+            do_node_num = make_blocknum(getuint64(argv[0]));
         } else if (strcmp(argv[0], "--fragmentation") == 0) {
             do_fragmentation = 1;
         } else if (strcmp(argv[0], "--garbage") == 0) {
@@ -684,6 +691,9 @@ int main (int argc, const char *const argv[]) {
         if (do_rootnode) {
             dump_node(fd, ft->h->root_blocknum, ft);
         } 
+        if (do_node) {
+            dump_node(fd, do_node_num, ft);
+        }
         if (do_fragmentation) {
             dump_fragmentation(fd, ft, do_tsv);
         }

From 9596d75797f8f7e0fbc101a086f0fe9db234198a Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Wed, 10 Sep 2014 18:18:53 -0400
Subject: [PATCH 185/190] DB-548 generate dirty upgrade test cases with old
 rollback nodes

---
 ft/serialize/ft_node-serialize.cc      |  23 +--
 ft/tests/test-upgrade-recovery-logs.cc |   2 +-
 src/tests/recover-rollback.cc          | 262 +++++++++++++++++++++++++
 3 files changed, 273 insertions(+), 14 deletions(-)
 create mode 100644 src/tests/recover-rollback.cc

diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc
index 3e0e192a6d2..8e6e27b34b3 100644
--- a/ft/serialize/ft_node-serialize.cc
+++ b/ft/serialize/ft_node-serialize.cc
@@ -2593,13 +2593,13 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p,
 	return r;
     }
 
-    //printf("Deserializing %lld datasize=%d\n", off, datasize);
     const void *magic;
     rbuf_literal_bytes(rb, &magic, 8);
     lazy_assert(!memcmp(magic, "tokuroll", 8));
 
     result->layout_version    = rbuf_int(rb);
-    lazy_assert(result->layout_version == FT_LAYOUT_VERSION);
+    lazy_assert((FT_LAYOUT_VERSION_25 <= result->layout_version && result->layout_version <= FT_LAYOUT_VERSION_27) ||
+                (result->layout_version == FT_LAYOUT_VERSION));
     result->layout_version_original = rbuf_int(rb);
     result->layout_version_read_from_disk = result->layout_version;
     result->build_id = rbuf_int(rb);
@@ -2659,7 +2659,7 @@ deserialize_rollback_log_from_rbuf_versioned (uint32_t version, BLOCKNUM blocknu
                                               struct rbuf *rb) {
     int r = 0;
     ROLLBACK_LOG_NODE rollback_log_node = NULL;
-    invariant(version==FT_LAYOUT_VERSION); //Rollback log nodes do not survive version changes.
+    invariant((FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) || version == FT_LAYOUT_VERSION);
     r = deserialize_rollback_log_from_rbuf(blocknum, &rollback_log_node, rb);
     if (r==0) {
         *log = rollback_log_node;
@@ -2756,18 +2756,15 @@ exit:
     return r;
 }
 
-static int
-decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
+static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
     // This function exists solely to accomodate future changes in compression.
     int r = 0;
-    switch (version) {
-        case FT_LAYOUT_VERSION_13:
-        case FT_LAYOUT_VERSION_14:
-        case FT_LAYOUT_VERSION:
-            r = decompress_from_raw_block_into_rbuf(raw_block, raw_block_size, rb, blocknum);
-            break;
-        default:
-            abort();
+    if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) ||
+        (FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) ||
+        version == FT_LAYOUT_VERSION) {
+        r = decompress_from_raw_block_into_rbuf(raw_block, raw_block_size, rb, blocknum);
+    } else {
+        abort();
     }
     return r;
 }
diff --git a/ft/tests/test-upgrade-recovery-logs.cc b/ft/tests/test-upgrade-recovery-logs.cc
index 467807050b7..528e7889599 100644
--- a/ft/tests/test-upgrade-recovery-logs.cc
+++ b/ft/tests/test-upgrade-recovery-logs.cc
@@ -123,7 +123,7 @@ static void run_recovery(const char *testdir) {
     r = tokuft_needs_recovery(testdir, false);
     if (strcmp(shutdown, "clean") == 0) {
         CKERR(r); // clean does not need recovery
-    } else if (strcmp(shutdown, "dirty") == 0) {
+    } else if (strncmp(shutdown, "dirty", 5) == 0) {
         CKERR2(r, 1); // dirty needs recovery
     } else {
         CKERR(EINVAL);
diff --git a/src/tests/recover-rollback.cc b/src/tests/recover-rollback.cc
new file mode 100644
index 00000000000..2d976c05b5a
--- /dev/null
+++ b/src/tests/recover-rollback.cc
@@ -0,0 +1,262 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2013 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#ident "Copyright (c) 2007-2013 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+
+// Test dirty upgrade.
+// Generate a rollback log that requires recovery.
+
+#include "test.h"
+
+// Insert max_rows key/val pairs into the db
+static void do_inserts(DB_TXN *txn, DB *db, uint64_t max_rows, size_t val_size) {
+    char val_data[val_size]; memset(val_data, 0, val_size);
+    int r;
+
+    for (uint64_t i = 0; i < max_rows; i++) {
+        // pick a sequential key but it does not matter for this test.
+        uint64_t k[2] = {
+            htonl(i), random64(),
+        };
+
+        DBT key = { .data = k, .size = sizeof k };
+        DBT val = { .data = val_data, .size = (uint32_t) val_size };
+        r = db->put(db, txn, &key, &val, 0);
+        CKERR(r);
+    }
+}
+
+static void run_test(uint64_t num_rows, size_t val_size, bool do_crash) {
+    int r;
+
+    DB_ENV *env = nullptr;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->set_cachesize(env, 8, 0, 1);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME,
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE,
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    DB *db = nullptr;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    r = env->txn_checkpoint(env, 0, 0, 0);
+    CKERR(r);
+
+    DB_TXN *txn = nullptr;
+    r = env->txn_begin(env, nullptr, &txn, 0);
+    CKERR(r);
+
+    do_inserts(txn, db, num_rows, val_size);
+
+    r = env->txn_checkpoint(env, 0, 0, 0);
+    CKERR(r);
+
+    r = txn->commit(txn, 0);
+    CKERR(r);
+
+    if (do_crash)
+        assert(0); // crash on purpose
+
+    r = db->close(db, 0);
+    CKERR(r);
+
+    r = env->close(env, 0);
+    CKERR(r);
+}
+
+static void do_verify(DB_ENV *env, DB *db, uint64_t num_rows, size_t val_size UU()) {
+    int r;
+    DB_TXN *txn = nullptr;
+    r = env->txn_begin(env, nullptr, &txn, 0);
+    CKERR(r);
+
+    DBC *c = nullptr;
+    r = db->cursor(db, txn, &c, 0);
+    CKERR(r);
+
+    uint64_t i = 0;
+    while (1) {
+        DBT key = {};
+        DBT val = {};
+        r = c->c_get(c, &key, &val, DB_NEXT);
+        if (r == DB_NOTFOUND)
+            break;
+        CKERR(r);
+        assert(key.size == 16);
+        uint64_t k[2];
+        memcpy(k, key.data, key.size);
+        assert(htonl(k[0]) == i);
+        assert(val.size == val_size);
+        i++;
+    }
+    assert(i == num_rows);
+
+    r = c->c_close(c);
+    CKERR(r);
+
+    r = txn->commit(txn, 0);
+    CKERR(r);
+}
+
+static void run_recover(uint64_t num_rows, size_t val_size) {
+    int r;
+
+    DB_ENV *env = nullptr;
+    r = db_env_create(&env, 0);
+    CKERR(r);
+    r = env->set_cachesize(env, 8, 0, 1);
+    CKERR(r);
+    r = env->open(env, TOKU_TEST_FILENAME,
+                  DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER,
+                  S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    DB *db = nullptr;
+    r = db_create(&db, env, 0);
+    CKERR(r);
+    r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, S_IRWXU+S_IRWXG+S_IRWXO);
+    CKERR(r);
+
+    do_verify(env, db, num_rows, val_size);
+
+    r = db->close(db, 0);
+    CKERR(r);
+
+    r = env->close(env, 0);
+    CKERR(r);
+}
+
+int test_main (int argc, char *const argv[]) {
+    bool do_test = false;
+    bool do_recover = false;
+    bool do_crash = true;
+    uint64_t num_rows = 1;
+    size_t val_size = 1;
+
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-v") == 0) {
+            verbose++;
+            continue;
+        }
+        if (strcmp(argv[i], "-q") == 0) {
+            if (verbose > 0) verbose--;
+            continue;
+        }
+        if (strcmp(argv[i], "--test") == 0) {
+            do_test = true;
+            continue;
+        }
+        if (strcmp(argv[i], "--recover") == 0) {
+            do_recover = true;
+            continue;
+        }
+        if (strcmp(argv[i], "--crash") == 0 && i+1 < argc) {
+            do_crash = atoi(argv[++i]);
+            continue;
+        }
+    }
+    if (do_test) {
+        // init the env directory
+        toku_os_recursive_delete(TOKU_TEST_FILENAME);
+        int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO);
+        CKERR(r);
+        run_test(num_rows, val_size, do_crash);
+    }
+    if (do_recover) {
+        run_recover(num_rows, val_size);
+    }
+
+    return 0;
+}

From b73bef237aa6456ba0ffe08e6eba1102d42d1864 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Fri, 12 Sep 2014 07:31:01 -0400
Subject: [PATCH 186/190] DB-716 use jemalloc 3.6.0 in debug env

---
 scripts/make.mysql.debug.env.bash | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/make.mysql.debug.env.bash b/scripts/make.mysql.debug.env.bash
index b2bfaef1b71..b7c270cfbd7 100755
--- a/scripts/make.mysql.debug.env.bash
+++ b/scripts/make.mysql.debug.env.bash
@@ -57,7 +57,7 @@ git_tag=
 mysql=mysql-5.5
 mysql_tree=mysql-5.5.35
 jemalloc=jemalloc
-jemalloc_tree=3.3.1
+jemalloc_tree=3.6.0
 tokudbengine=tokudb-engine
 tokudbengine_tree=master
 ftindex=ft-index

From d85b993ca813e4a793b20bb7694a28ef404740c8 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Sat, 13 Sep 2014 16:06:55 -0400
Subject: [PATCH 187/190] DB-504 redo bulk fetch select tests in partitioned
 tables

---
 .../tokudb.bugs/r/part_index_scan.result      | 248 --------
 .../suite/tokudb.bugs/t/part_index_scan.test  | 157 -----
 ...nonpart.result => bf_create_select.result} |   0
 ...rt.result => bf_create_temp_select.result} |   0
 ...delete_nonpart.result => bf_delete.result} |   0
 ...nonpart.result => bf_insert_select.result} |   0
 ...result => bf_insert_select_dup_key.result} |   0
 ...onpart.result => bf_replace_select.result} |   0
 .../suite/tokudb/r/bf_select_hash_part.result | 278 +++++++++
 .../suite/tokudb/r/bf_select_part.result      | 543 ------------------
 .../tokudb/r/bf_select_range_part.result      | 286 +++++++++
 ...ect_nonpart.test => bf_create_select.test} |   0
 ...onpart.test => bf_create_temp_select.test} |   0
 ...{bf_delete_nonpart.test => bf_delete.test} |   0
 ...ect_nonpart.test => bf_insert_select.test} |   0
 ...art.test => bf_insert_select_dup_key.test} |   0
 ...ct_nonpart.test => bf_replace_select.test} |   0
 .../suite/tokudb/t/bf_select_hash_part.test   | 100 ++++
 mysql-test/suite/tokudb/t/bf_select_part.test | 220 -------
 .../suite/tokudb/t/bf_select_range_part.test  | 108 ++++
 20 files changed, 772 insertions(+), 1168 deletions(-)
 delete mode 100644 mysql-test/suite/tokudb.bugs/r/part_index_scan.result
 delete mode 100644 mysql-test/suite/tokudb.bugs/t/part_index_scan.test
 rename mysql-test/suite/tokudb/r/{bf_create_select_nonpart.result => bf_create_select.result} (100%)
 rename mysql-test/suite/tokudb/r/{bf_create_temp_select_nonpart.result => bf_create_temp_select.result} (100%)
 rename mysql-test/suite/tokudb/r/{bf_delete_nonpart.result => bf_delete.result} (100%)
 rename mysql-test/suite/tokudb/r/{bf_insert_select_nonpart.result => bf_insert_select.result} (100%)
 rename mysql-test/suite/tokudb/r/{bf_insert_select_dup_key_nonpart.result => bf_insert_select_dup_key.result} (100%)
 rename mysql-test/suite/tokudb/r/{bf_replace_select_nonpart.result => bf_replace_select.result} (100%)
 create mode 100644 mysql-test/suite/tokudb/r/bf_select_hash_part.result
 delete mode 100644 mysql-test/suite/tokudb/r/bf_select_part.result
 create mode 100644 mysql-test/suite/tokudb/r/bf_select_range_part.result
 rename mysql-test/suite/tokudb/t/{bf_create_select_nonpart.test => bf_create_select.test} (100%)
 rename mysql-test/suite/tokudb/t/{bf_create_temp_select_nonpart.test => bf_create_temp_select.test} (100%)
 rename mysql-test/suite/tokudb/t/{bf_delete_nonpart.test => bf_delete.test} (100%)
 rename mysql-test/suite/tokudb/t/{bf_insert_select_nonpart.test => bf_insert_select.test} (100%)
 rename mysql-test/suite/tokudb/t/{bf_insert_select_dup_key_nonpart.test => bf_insert_select_dup_key.test} (100%)
 rename mysql-test/suite/tokudb/t/{bf_replace_select_nonpart.test => bf_replace_select.test} (100%)
 create mode 100644 mysql-test/suite/tokudb/t/bf_select_hash_part.test
 delete mode 100644 mysql-test/suite/tokudb/t/bf_select_part.test
 create mode 100644 mysql-test/suite/tokudb/t/bf_select_range_part.test

diff --git a/mysql-test/suite/tokudb.bugs/r/part_index_scan.result b/mysql-test/suite/tokudb.bugs/r/part_index_scan.result
deleted file mode 100644
index 0d416c734b7..00000000000
--- a/mysql-test/suite/tokudb.bugs/r/part_index_scan.result
+++ /dev/null
@@ -1,248 +0,0 @@
-set default_storage_engine='tokudb';
-drop table if exists t,t1,t2,t3;
-CREATE TABLE `t` (
-`num` int(10) unsigned auto_increment NOT NULL,
-`val` varchar(32) DEFAULT NULL,
-PRIMARY KEY (`num`)
-);
-INSERT INTO t values (null,null);
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-SELECT count(*) FROM t;
-count(*)
-8388608
-CREATE TABLE `t1` (
-`num` int(10) unsigned NOT NULL,
-`val` varchar(32) DEFAULT NULL,
-PRIMARY KEY (`num`)
-);
-CREATE TABLE `t2` (
-`num` int(10) unsigned NOT NULL,
-`val` varchar(32) DEFAULT NULL,
-PRIMARY KEY (`num`)
-) 
-PARTITION BY HASH (num) PARTITIONS 10;
-CREATE TABLE `t3` (
-`num` int(10) unsigned NOT NULL,
-`val` varchar(32) DEFAULT NULL,
-PRIMARY KEY (`num`)
-)
-PARTITION BY RANGE (num)
-(PARTITION p0 VALUES LESS THAN (1000000),
-PARTITION p1 VALUES LESS THAN (2000000),
-PARTITION p2 VALUES LESS THAN (3000000),
-PARTITION p3 VALUES LESS THAN (4000000),
-PARTITION p4 VALUES LESS THAN (5000000),
-PARTITION p5 VALUES LESS THAN (6000000),
-PARTITION p6 VALUES LESS THAN (7000000),
-PARTITION p7 VALUES LESS THAN (8000000),
-PARTITION px VALUES LESS THAN MAXVALUE);
-insert into t1 select * from t;
-insert into t2 select * from t;
-insert into t3 select * from t;
-select count(*) from t1;
-count(*)
-8388608
-select count(*) from t1;
-count(*)
-8388608
-select count(*) from t1;
-count(*)
-8388608
-select count(*) from t1;
-count(*)
-8388608
-select count(*) from t1;
-count(*)
-8388608
-select count(*) from t1;
-count(*)
-8388608
-select count(*) from t1;
-count(*)
-8388608
-select count(*) from t1;
-count(*)
-8388608
-select count(*) from t1;
-count(*)
-8388608
-select count(*) from t1;
-count(*)
-8388608
-select count(*) from t2;
-count(*)
-8388608
-select count(*) from t2;
-count(*)
-8388608
-select count(*) from t2;
-count(*)
-8388608
-select count(*) from t2;
-count(*)
-8388608
-select count(*) from t2;
-count(*)
-8388608
-select count(*) from t2;
-count(*)
-8388608
-select count(*) from t2;
-count(*)
-8388608
-select count(*) from t2;
-count(*)
-8388608
-select count(*) from t2;
-count(*)
-8388608
-select count(*) from t2;
-count(*)
-8388608
-1
-select count(*) from t3;
-count(*)
-8388608
-select count(*) from t3;
-count(*)
-8388608
-select count(*) from t3;
-count(*)
-8388608
-select count(*) from t3;
-count(*)
-8388608
-select count(*) from t3;
-count(*)
-8388608
-select count(*) from t3;
-count(*)
-8388608
-select count(*) from t3;
-count(*)
-8388608
-select count(*) from t3;
-count(*)
-8388608
-select count(*) from t3;
-count(*)
-8388608
-select count(*) from t3;
-count(*)
-8388608
-1
-select count(*) from t1 where num>7000000;
-count(*)
-1847274
-select count(*) from t1 where num>7000000;
-count(*)
-1847274
-select count(*) from t1 where num>7000000;
-count(*)
-1847274
-select count(*) from t1 where num>7000000;
-count(*)
-1847274
-select count(*) from t1 where num>7000000;
-count(*)
-1847274
-select count(*) from t1 where num>7000000;
-count(*)
-1847274
-select count(*) from t1 where num>7000000;
-count(*)
-1847274
-select count(*) from t1 where num>7000000;
-count(*)
-1847274
-select count(*) from t1 where num>7000000;
-count(*)
-1847274
-select count(*) from t1 where num>7000000;
-count(*)
-1847274
-select count(*) from t2 where num>7000000;
-count(*)
-1847274
-select count(*) from t2 where num>7000000;
-count(*)
-1847274
-select count(*) from t2 where num>7000000;
-count(*)
-1847274
-select count(*) from t2 where num>7000000;
-count(*)
-1847274
-select count(*) from t2 where num>7000000;
-count(*)
-1847274
-select count(*) from t2 where num>7000000;
-count(*)
-1847274
-select count(*) from t2 where num>7000000;
-count(*)
-1847274
-select count(*) from t2 where num>7000000;
-count(*)
-1847274
-select count(*) from t2 where num>7000000;
-count(*)
-1847274
-select count(*) from t2 where num>7000000;
-count(*)
-1847274
-1
-select count(*) from t3 where num>7000000;
-count(*)
-1847274
-select count(*) from t3 where num>7000000;
-count(*)
-1847274
-select count(*) from t3 where num>7000000;
-count(*)
-1847274
-select count(*) from t3 where num>7000000;
-count(*)
-1847274
-select count(*) from t3 where num>7000000;
-count(*)
-1847274
-select count(*) from t3 where num>7000000;
-count(*)
-1847274
-select count(*) from t3 where num>7000000;
-count(*)
-1847274
-select count(*) from t3 where num>7000000;
-count(*)
-1847274
-select count(*) from t3 where num>7000000;
-count(*)
-1847274
-select count(*) from t3 where num>7000000;
-count(*)
-1847274
-1
-drop table if exists t,t1,t2,t3;
diff --git a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
deleted file mode 100644
index b38a979752b..00000000000
--- a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
+++ /dev/null
@@ -1,157 +0,0 @@
-# verify that index scans on parititions are not slow
-# due to tokudb bulk fetch not being used
-
-source include/have_tokudb.inc;
-source include/have_partition.inc;
-source include/big_test.inc;
-set default_storage_engine='tokudb';
-disable_warnings;
-drop table if exists t,t1,t2,t3;
-enable_warnings;
-
-let $debug = 0;
-let $maxq = 10;
-
-CREATE TABLE `t` (
-  `num` int(10) unsigned auto_increment NOT NULL,
-  `val` varchar(32) DEFAULT NULL,
-  PRIMARY KEY (`num`)
-);
-
-# put 8M rows into t
-if ($debug) { let $ts = `select now()`; echo "start $ts"; }
-INSERT INTO t values (null,null);
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-INSERT INTO t SELECT null,null FROM t;
-if ($debug) { let $ts = `select now()`; echo "select $ts"; }
-SELECT count(*) FROM t;
-if ($debug) { let $ts = `select now()`; echo "select done $ts"; }
-
-CREATE TABLE `t1` (
-  `num` int(10) unsigned NOT NULL,
-  `val` varchar(32) DEFAULT NULL,
-  PRIMARY KEY (`num`)
-);
-
-CREATE TABLE `t2` (
-  `num` int(10) unsigned NOT NULL,
-  `val` varchar(32) DEFAULT NULL,
-  PRIMARY KEY (`num`)
-) 
-PARTITION BY HASH (num) PARTITIONS 10;
-
-CREATE TABLE `t3` (
-  `num` int(10) unsigned NOT NULL,
-  `val` varchar(32) DEFAULT NULL,
-  PRIMARY KEY (`num`)
-)
-PARTITION BY RANGE (num)
-(PARTITION p0 VALUES LESS THAN (1000000),
- PARTITION p1 VALUES LESS THAN (2000000),
- PARTITION p2 VALUES LESS THAN (3000000),
- PARTITION p3 VALUES LESS THAN (4000000),
- PARTITION p4 VALUES LESS THAN (5000000),
- PARTITION p5 VALUES LESS THAN (6000000),
- PARTITION p6 VALUES LESS THAN (7000000),
- PARTITION p7 VALUES LESS THAN (8000000),
- PARTITION px VALUES LESS THAN MAXVALUE);
-
-if ($debug) { let $ts = `select now()`; echo "insert t1 $ts"; }
-insert into t1 select * from t;
-if ($debug) { let $ts = `select now()`; echo "insert t2 $ts"; }
-insert into t2 select * from t;
-if ($debug) { let $ts = `select now()`; echo "insert t3 $ts"; }
-insert into t3 select * from t;
-if ($debug) { let $ts = `select now()`; echo "select t1 $ts"; }
-
-# verify that full index scans on partitioned tables t2 and t3 are comparable to a non-partitioned table t1
-let $s = `select to_seconds(now())`;
-let $i = 0; 
-while ($i < $maxq) {
-    select count(*) from t1;
-    inc $i;
-}
-let $t1 = `select to_seconds(now()) - $s`;
-
-if ($debug) { let $ts = `select now()`; echo "select t2 $ts"; }
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxq) {
-    select count(*) from t2;
-    inc $i;
-}
-let $t2 = `select to_seconds(now()) - $s`;
-let $d = `select abs($t2 - $t1) <= $t1`;
-echo $d;
-
-if ($debug) { let $ts = `select now()`; echo "select t3 $ts"; }
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxq) {
-    select count(*) from t3;
-    inc $i;
-}
-let $t3 = `select to_seconds(now()) - $s`;
-let $d = `select abs($t3 - $t1) <= $t1`;
-echo $d;
-
-if ($debug) { let $ts = `select now()`; echo "select t1 $ts"; }
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxq) {
-    select count(*) from t1 where num>7000000;
-    inc $i;
-}
-let $t1 = `select to_seconds(now()) - $s`;
-
-if ($debug) { let $ts = `select now()`; echo "select t2 $ts"; }
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxq) {
-    select count(*) from t2 where num>7000000;
-    inc $i;
-}
-let $t2 = `select to_seconds(now()) - $s`;
-let $d = `select abs($t2 - $t1) <= $t1`;
-echo $d;
-
-if ($debug) { let $ts = `select now()`; echo "select t3 $ts"; }
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxq) {
-    select count(*) from t3 where num>7000000;
-    inc $i;
-}
-let $t3 = `select to_seconds(now()) - $s`;
-let $d = `select abs($t3 - $t1) <= $t1`;
-echo $d;
-
-if ($debug) { let $ts = `select now()`; echo "done $ts"; }
-
-drop table if exists t,t1,t2,t3;
diff --git a/mysql-test/suite/tokudb/r/bf_create_select_nonpart.result b/mysql-test/suite/tokudb/r/bf_create_select.result
similarity index 100%
rename from mysql-test/suite/tokudb/r/bf_create_select_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_create_select.result
diff --git a/mysql-test/suite/tokudb/r/bf_create_temp_select_nonpart.result b/mysql-test/suite/tokudb/r/bf_create_temp_select.result
similarity index 100%
rename from mysql-test/suite/tokudb/r/bf_create_temp_select_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_create_temp_select.result
diff --git a/mysql-test/suite/tokudb/r/bf_delete_nonpart.result b/mysql-test/suite/tokudb/r/bf_delete.result
similarity index 100%
rename from mysql-test/suite/tokudb/r/bf_delete_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_delete.result
diff --git a/mysql-test/suite/tokudb/r/bf_insert_select_nonpart.result b/mysql-test/suite/tokudb/r/bf_insert_select.result
similarity index 100%
rename from mysql-test/suite/tokudb/r/bf_insert_select_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_insert_select.result
diff --git a/mysql-test/suite/tokudb/r/bf_insert_select_dup_key_nonpart.result b/mysql-test/suite/tokudb/r/bf_insert_select_dup_key.result
similarity index 100%
rename from mysql-test/suite/tokudb/r/bf_insert_select_dup_key_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_insert_select_dup_key.result
diff --git a/mysql-test/suite/tokudb/r/bf_replace_select_nonpart.result b/mysql-test/suite/tokudb/r/bf_replace_select.result
similarity index 100%
rename from mysql-test/suite/tokudb/r/bf_replace_select_nonpart.result
rename to mysql-test/suite/tokudb/r/bf_replace_select.result
diff --git a/mysql-test/suite/tokudb/r/bf_select_hash_part.result b/mysql-test/suite/tokudb/r/bf_select_hash_part.result
new file mode 100644
index 00000000000..2c72c7129db
--- /dev/null
+++ b/mysql-test/suite/tokudb/r/bf_select_hash_part.result
@@ -0,0 +1,278 @@
+set default_storage_engine='tokudb';
+drop table if exists t;
+CREATE TABLE `t` (
+`num` int(10) unsigned NOT NULL auto_increment,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) PARTITION BY HASH (num) PARTITIONS 8;
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+1048576
+set tokudb_bulk_fetch=ON;
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+set tokudb_bulk_fetch=OFF;
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+1
+set tokudb_bulk_fetch=ON;
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+set tokudb_bulk_fetch=OFF;
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+SELECT count(*) from t where num > 500000;
+count(*)
+548576
+1
+drop table t;
diff --git a/mysql-test/suite/tokudb/r/bf_select_part.result b/mysql-test/suite/tokudb/r/bf_select_part.result
deleted file mode 100644
index f36fc0e1ce3..00000000000
--- a/mysql-test/suite/tokudb/r/bf_select_part.result
+++ /dev/null
@@ -1,543 +0,0 @@
-set default_storage_engine='tokudb';
-drop table if exists t,t1,t2,t3;
-CREATE TABLE `t` (
-`num` int(10) unsigned auto_increment NOT NULL,
-`val` varchar(32) DEFAULT NULL,
-PRIMARY KEY (`num`)
-);
-INSERT INTO t values (null,null);
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-SELECT count(*) FROM t;
-count(*)
-8388608
-CREATE TABLE `t1` (
-`num` int(10) unsigned NOT NULL,
-`val` varchar(32) DEFAULT NULL,
-PRIMARY KEY (`num`)
-) as select * from t;
-CREATE TABLE `t2` (
-`num` int(10) unsigned NOT NULL,
-`val` varchar(32) DEFAULT NULL,
-PRIMARY KEY (`num`)
-) PARTITION BY HASH (num)
-PARTITIONS 8 as select * from t;
-CREATE TABLE `t3` (
-`num` int(10) unsigned NOT NULL,
-`val` varchar(32) DEFAULT NULL,
-PRIMARY KEY (`num`)
-) PARTITION BY RANGE (num)
-(PARTITION p0 VALUES LESS THAN (1000000),
-PARTITION p1 VALUES LESS THAN (2000000),
-PARTITION p2 VALUES LESS THAN (3000000),
-PARTITION p3 VALUES LESS THAN (4000000),
-PARTITION p4 VALUES LESS THAN (5000000),
-PARTITION p5 VALUES LESS THAN (6000000),
-PARTITION p6 VALUES LESS THAN (7000000),
-PARTITION p7 VALUES LESS THAN MAXVALUE) as select * from t;
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t2;
-count(*)
-8388608
-SELECT count(*) from t2;
-count(*)
-8388608
-SELECT count(*) from t2;
-count(*)
-8388608
-SELECT count(*) from t2;
-count(*)
-8388608
-SELECT count(*) from t2;
-count(*)
-8388608
-SELECT count(*) from t2;
-count(*)
-8388608
-SELECT count(*) from t2;
-count(*)
-8388608
-SELECT count(*) from t2;
-count(*)
-8388608
-SELECT count(*) from t2;
-count(*)
-8388608
-SELECT count(*) from t2;
-count(*)
-8388608
-1
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t1;
-count(*)
-8388608
-SELECT count(*) from t3;
-count(*)
-8388608
-SELECT count(*) from t3;
-count(*)
-8388608
-SELECT count(*) from t3;
-count(*)
-8388608
-SELECT count(*) from t3;
-count(*)
-8388608
-SELECT count(*) from t3;
-count(*)
-8388608
-SELECT count(*) from t3;
-count(*)
-8388608
-SELECT count(*) from t3;
-count(*)
-8388608
-SELECT count(*) from t3;
-count(*)
-8388608
-SELECT count(*) from t3;
-count(*)
-8388608
-SELECT count(*) from t3;
-count(*)
-8388608
-1
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t2 where num > 7000000;
-count(*)
-1847274
-1
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t1 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-SELECT count(*) from t3 where num > 7000000;
-count(*)
-1847274
-1
-drop table t,t1,t2,t3;
diff --git a/mysql-test/suite/tokudb/r/bf_select_range_part.result b/mysql-test/suite/tokudb/r/bf_select_range_part.result
new file mode 100644
index 00000000000..c13324aa34e
--- /dev/null
+++ b/mysql-test/suite/tokudb/r/bf_select_range_part.result
@@ -0,0 +1,286 @@
+set default_storage_engine='tokudb';
+drop table if exists t;
+CREATE TABLE `t` (
+`num` int(10) unsigned NOT NULL auto_increment,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) PARTITION BY RANGE (num)
+(PARTITION p0 VALUES LESS THAN (100000),
+PARTITION p1 VALUES LESS THAN (200000),
+PARTITION p2 VALUES LESS THAN (300000),
+PARTITION p3 VALUES LESS THAN (400000),
+PARTITION p4 VALUES LESS THAN (500000),
+PARTITION p5 VALUES LESS THAN (600000),
+PARTITION p6 VALUES LESS THAN (700000),
+PARTITION p7 VALUES LESS THAN MAXVALUE);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+count(*)
+1048576
+set tokudb_bulk_fetch=ON;
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+set tokudb_bulk_fetch=OFF;
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+SELECT count(*) from t;
+count(*)
+1048576
+1
+set tokudb_bulk_fetch=ON;
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+set tokudb_bulk_fetch=OFF;
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+SELECT count(*) from t where num > 700000;
+count(*)
+348576
+1
+drop table t;
diff --git a/mysql-test/suite/tokudb/t/bf_create_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_create_select.test
similarity index 100%
rename from mysql-test/suite/tokudb/t/bf_create_select_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_create_select.test
diff --git a/mysql-test/suite/tokudb/t/bf_create_temp_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_create_temp_select.test
similarity index 100%
rename from mysql-test/suite/tokudb/t/bf_create_temp_select_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_create_temp_select.test
diff --git a/mysql-test/suite/tokudb/t/bf_delete_nonpart.test b/mysql-test/suite/tokudb/t/bf_delete.test
similarity index 100%
rename from mysql-test/suite/tokudb/t/bf_delete_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_delete.test
diff --git a/mysql-test/suite/tokudb/t/bf_insert_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_insert_select.test
similarity index 100%
rename from mysql-test/suite/tokudb/t/bf_insert_select_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_insert_select.test
diff --git a/mysql-test/suite/tokudb/t/bf_insert_select_dup_key_nonpart.test b/mysql-test/suite/tokudb/t/bf_insert_select_dup_key.test
similarity index 100%
rename from mysql-test/suite/tokudb/t/bf_insert_select_dup_key_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_insert_select_dup_key.test
diff --git a/mysql-test/suite/tokudb/t/bf_replace_select_nonpart.test b/mysql-test/suite/tokudb/t/bf_replace_select.test
similarity index 100%
rename from mysql-test/suite/tokudb/t/bf_replace_select_nonpart.test
rename to mysql-test/suite/tokudb/t/bf_replace_select.test
diff --git a/mysql-test/suite/tokudb/t/bf_select_hash_part.test b/mysql-test/suite/tokudb/t/bf_select_hash_part.test
new file mode 100644
index 00000000000..67fefcb45bd
--- /dev/null
+++ b/mysql-test/suite/tokudb/t/bf_select_hash_part.test
@@ -0,0 +1,100 @@
+# Verify that index and range scans on a hash partitioned tokudb table are not slow on tables 
+# due to tokudb bulk fetch not being used.
+
+source include/have_tokudb.inc;
+source include/have_partition.inc;
+source include/big_test.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+let $maxq = 20;
+let $debug = 0;
+
+# create the hash partition table
+CREATE TABLE `t` (
+  `num` int(10) unsigned NOT NULL auto_increment,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) PARTITION BY HASH (num) PARTITIONS 8;
+
+# put 1M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+set tokudb_bulk_fetch=ON;
+let $s = `select unix_timestamp()`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t;
+    inc $i;
+}
+let $time_bf_on = `select unix_timestamp() - $s`;
+
+if ($debug) { echo index scans took $time_bf_on; }
+
+set tokudb_bulk_fetch=OFF;
+let $s = `select unix_timestamp()`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t;
+    inc $i;
+}
+let $time_bf_off = `select unix_timestamp() - $s`;
+
+if ($debug) { echo index scans took $time_bf_off.; }
+
+# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on
+let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`;
+echo $verdict;
+if (!$verdict) { echo index scan $time_bf_on $time_bf_off; }
+
+set tokudb_bulk_fetch=ON;
+let $s = `select unix_timestamp()`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t where num > 500000;
+    inc $i;
+}
+let $time_bf_on = `select unix_timestamp() - $s`;
+
+if ($debug) { echo range scans took $time_bf_on; }
+
+set tokudb_bulk_fetch=OFF;
+let $s = `select unix_timestamp()`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t where num > 500000;
+    inc $i;
+}
+let $time_bf_off = `select unix_timestamp() - $s`;
+
+if ($debug) { echo range scans took $time_bf_off.; }
+
+# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on
+let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`;
+echo $verdict;
+if (!$verdict) { echo range scan $time_bf_on $time_bf_off; }
+
+drop table t;
diff --git a/mysql-test/suite/tokudb/t/bf_select_part.test b/mysql-test/suite/tokudb/t/bf_select_part.test
deleted file mode 100644
index d9faa4b0bb2..00000000000
--- a/mysql-test/suite/tokudb/t/bf_select_part.test
+++ /dev/null
@@ -1,220 +0,0 @@
-# Verify that index and range scans are not slow
-# on tables during create select statements
-# against hash and range partitioned tables
-# due to tokudb bulk fetch not being used
-
-source include/have_tokudb.inc;
-source include/have_partition.inc;
-source include/big_test.inc;
-set default_storage_engine='tokudb';
-disable_warnings;
-drop table if exists t,t1,t2,t3;
-enable_warnings;
-
-let $maxq = 10;
-
-CREATE TABLE `t` (
-  `num` int(10) unsigned auto_increment NOT NULL,
-  `val` varchar(32) DEFAULT NULL,
-  PRIMARY KEY (`num`)
-);
-
-# put 8M rows into t
-INSERT INTO t values (null,null);
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-INSERT INTO t SELECT null,val FROM t;
-SELECT count(*) FROM t;
-
-# Create first table from source table t
-CREATE TABLE `t1` (
-  `num` int(10) unsigned NOT NULL,
-  `val` varchar(32) DEFAULT NULL,
-  PRIMARY KEY (`num`)
-) as select * from t;
-
-# Create second table from source table t
-CREATE TABLE `t2` (
-  `num` int(10) unsigned NOT NULL,
-  `val` varchar(32) DEFAULT NULL,
-  PRIMARY KEY (`num`)
-) PARTITION BY HASH (num)
-PARTITIONS 8 as select * from t;
-
-# Create third table from source table t;
-CREATE TABLE `t3` (
-  `num` int(10) unsigned NOT NULL,
-  `val` varchar(32) DEFAULT NULL,
-  PRIMARY KEY (`num`)
-) PARTITION BY RANGE (num)
-(PARTITION p0 VALUES LESS THAN (1000000),
- PARTITION p1 VALUES LESS THAN (2000000),
- PARTITION p2 VALUES LESS THAN (3000000),
- PARTITION p3 VALUES LESS THAN (4000000),
- PARTITION p4 VALUES LESS THAN (5000000),
- PARTITION p5 VALUES LESS THAN (6000000),
- PARTITION p6 VALUES LESS THAN (7000000),
- PARTITION p7 VALUES LESS THAN MAXVALUE) as select * from t;
-
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxq) {
-    SELECT count(*) from t1;
-    inc $i;
-}
-let $time_elapsed_select = `select to_seconds(now()) - $s`;
-
-# The following line can be used to display the time elapsed data
-# which could be useful for debugging.
-# echo Index scans took $time_elapsed_select seconds.;
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxq) {
-    SELECT count(*) from t2;
-    inc $i;
-}
-
-let $time_elapsed_select_hash = `select to_seconds(now()) - $s`;
-
-# The following line can be used to display the time elapsed data
-# which could be useful for debugging.
-# echo Index scans took $time_elapsed_select_hash seconds.;
-
-# This check evaluates whether the time elapsed during the select statement 
-# against a hashed partition table is on par with the select statment
-# against a non-partitioned table, which will confirm that bulk fetch is in fact being used.
-let $verdict = `select abs($time_elapsed_select_hash - $time_elapsed_select) <= $time_elapsed_select`;
-echo $verdict;
-if (!$verdict) { echo index scan t2 $time_elapsed_select_hash $time_elapsed_select; }
-
-######################################################################
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxq) {
-    SELECT count(*) from t1;
-    inc $i;
-}
-let $time_elapsed_select = `select to_seconds(now()) - $s`;
-
-# The following line can be used to display the time elapsed data
-# which could be useful for debugging.
-#echo Index scans took $time_elapsed_select seconds.;
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxq) {
-    SELECT count(*) from t3;
-    inc $i;
-}
-
-let $time_elapsed_select_range = `select to_seconds(now()) - $s`;
-
-# The following line can be used to display the time elapsed data
-# which could be useful for debugging.
-#echo Index scans took $time_elapsed_select_range seconds.;
-
-# This check evaluates whether the time elapsed during the select statement 
-# against a range partition table is on par with the select statment
-# against a non-partitioned table, which will confirm that bulk fetch is in fact being used.
-let $verdict = `select abs($time_elapsed_select_range - $time_elapsed_select) <= $time_elapsed_select`;
-echo $verdict;
-if (!$verdict) { echo index scan t3 $time_elapsed_select_range $time_elapsed_select; }
-
-#########################################################################
-
-let $maxrq = 30;
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxrq) {
-    SELECT count(*) from t1 where num > 7000000;
-    inc $i;
-}
-let $time_elapsed_select = `select to_seconds(now()) - $s`;
-
-# The following line can be used to display the time elapsed data
-# which could be useful for debugging.
-#echo Index scans took $time_elapsed_select seconds.;
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxrq) {
-    SELECT count(*) from t2 where num > 7000000;
-    inc $i;
-}
-
-let $time_elapsed_select_hash = `select to_seconds(now()) - $s`;
-
-# The following line can be used to display the time elapsed data
-# which could be useful for debugging.
-#echo Index scans took $time_elapsed_select_hash seconds.;
-
-
-# This check evaluates whether the time elapsed during the select statement 
-# against a hash partition table is on par with the select statment
-# against a non-partitioned table, which will confirm that bulk fetch is in fact being used.
-let $verdict = `select abs($time_elapsed_select_hash - $time_elapsed_select) <= $time_elapsed_select`;
-echo $verdict;
-if (!$verdict) { echo range scan t2 $time_elapsed_select_hash $time_elapsed_select; }
-
-#########################################################################
-
-let $maxrq = 30;
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxrq) {
-    SELECT count(*) from t1 where num > 7000000;
-    inc $i;
-}
-let $time_elapsed_select = `select to_seconds(now()) - $s`;
-
-# The following line can be used to display the time elapsed data
-# which could be useful for debugging.
-#echo Index scans took $time_elapsed_select seconds.;
-
-let $s = `select to_seconds(now())`;
-let $i = 0;
-while ($i < $maxrq) {
-    SELECT count(*) from t3 where num > 7000000;
-    inc $i;
-}
-
-let $time_elapsed_select_range = `select to_seconds(now()) - $s`;
-
-# The following line can be used to display the time elapsed data
-# which could be useful for debugging.
-#echo Index scans took $time_elapsed_select_range seconds.;
-
-
-# This check evaluates whether the time elapsed during the select statement 
-# against a range partition table is on par with the select statment
-# against a non-partitioned table, which will confirm that bulk fetch is in fact being used.
-let $verdict = `select abs($time_elapsed_select_range - $time_elapsed_select) <= $time_elapsed_select`;
-echo $verdict;
-if (!$verdict) { echo range scan t3 $time_elapsed_select_range $time_elapsed_select; }
-
-drop table t,t1,t2,t3;
diff --git a/mysql-test/suite/tokudb/t/bf_select_range_part.test b/mysql-test/suite/tokudb/t/bf_select_range_part.test
new file mode 100644
index 00000000000..0a1d7de3747
--- /dev/null
+++ b/mysql-test/suite/tokudb/t/bf_select_range_part.test
@@ -0,0 +1,108 @@
+# Verify that index and range scans on a range partitioned tokudb table are not slow on tables 
+# due to tokudb bulk fetch not being used.
+
+source include/have_tokudb.inc;
+source include/have_partition.inc;
+source include/big_test.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+let $maxq = 20;
+let $debug = 0;
+
+# create the range partition table
+CREATE TABLE `t` (
+  `num` int(10) unsigned NOT NULL auto_increment,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) PARTITION BY RANGE (num)
+(PARTITION p0 VALUES LESS THAN (100000),
+ PARTITION p1 VALUES LESS THAN (200000),
+ PARTITION p2 VALUES LESS THAN (300000),
+ PARTITION p3 VALUES LESS THAN (400000),
+ PARTITION p4 VALUES LESS THAN (500000),
+ PARTITION p5 VALUES LESS THAN (600000),
+ PARTITION p6 VALUES LESS THAN (700000),
+ PARTITION p7 VALUES LESS THAN MAXVALUE);
+
+# put 1M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+INSERT INTO t SELECT null,val FROM t;
+SELECT count(*) FROM t;
+
+set tokudb_bulk_fetch=ON;
+let $s = `select unix_timestamp()`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t;
+    inc $i;
+}
+let $time_bf_on = `select unix_timestamp() - $s`;
+
+if ($debug) { echo index scans took $time_bf_on; }
+
+set tokudb_bulk_fetch=OFF;
+let $s = `select unix_timestamp()`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t;
+    inc $i;
+}
+let $time_bf_off = `select unix_timestamp() - $s`;
+
+if ($debug) { echo index scans took $time_bf_off.; }
+
+# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on
+let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`;
+echo $verdict;
+if (!$verdict) { echo index scan $time_bf_on $time_bf_off; }
+
+set tokudb_bulk_fetch=ON;
+let $s = `select unix_timestamp()`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t where num > 700000;
+    inc $i;
+}
+let $time_bf_on = `select unix_timestamp() - $s`;
+
+if ($debug) { echo range scans took $time_bf_on; }
+
+set tokudb_bulk_fetch=OFF;
+let $s = `select unix_timestamp()`;
+let $i = 0;
+while ($i < $maxq) {
+    SELECT count(*) from t where num > 700000;
+    inc $i;
+}
+let $time_bf_off = `select unix_timestamp() - $s`;
+
+if ($debug) { echo range scans took $time_bf_off.; }
+
+# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on
+let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`;
+echo $verdict;
+if (!$verdict) { echo range scan $time_bf_on $time_bf_off; }
+
+drop table t;

From ac45ebc14faf5adc51d30dab317eba89ea90b6a9 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 15 Sep 2014 07:27:18 -0400
Subject: [PATCH 188/190] DB-720 test case for clustering keys on partitioned
 tokudb tables

---
 .../suite/tokudb/r/cluster_key_part.result    | 28 +++++++++++++++++++
 .../suite/tokudb/t/cluster_key_part.test      | 22 +++++++++++++++
 2 files changed, 50 insertions(+)
 create mode 100644 mysql-test/suite/tokudb/r/cluster_key_part.result
 create mode 100644 mysql-test/suite/tokudb/t/cluster_key_part.test

diff --git a/mysql-test/suite/tokudb/r/cluster_key_part.result b/mysql-test/suite/tokudb/r/cluster_key_part.result
new file mode 100644
index 00000000000..cd8fc340314
--- /dev/null
+++ b/mysql-test/suite/tokudb/r/cluster_key_part.result
@@ -0,0 +1,28 @@
+set default_storage_engine='tokudb';
+drop table if exists t;
+create table t (
+x int not null,
+y int not null, 
+primary key(x)) 
+partition by hash(x) partitions 2;
+show create table t;
+Table	Create Table
+t	CREATE TABLE `t` (
+  `x` int(11) NOT NULL,
+  `y` int(11) NOT NULL,
+  PRIMARY KEY (`x`)
+) ENGINE=TokuDB DEFAULT CHARSET=latin1
+/*!50100 PARTITION BY HASH (x)
+PARTITIONS 2 */
+alter table t add clustering key(y);
+show create table t;
+Table	Create Table
+t	CREATE TABLE `t` (
+  `x` int(11) NOT NULL,
+  `y` int(11) NOT NULL,
+  PRIMARY KEY (`x`),
+  CLUSTERING KEY `y` (`y`)
+) ENGINE=TokuDB DEFAULT CHARSET=latin1
+/*!50100 PARTITION BY HASH (x)
+PARTITIONS 2 */
+drop table t;
diff --git a/mysql-test/suite/tokudb/t/cluster_key_part.test b/mysql-test/suite/tokudb/t/cluster_key_part.test
new file mode 100644
index 00000000000..dd569b88ad1
--- /dev/null
+++ b/mysql-test/suite/tokudb/t/cluster_key_part.test
@@ -0,0 +1,22 @@
+# Test that clustering keys can be created on partitioned tokudb tables
+
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+create table t (
+    x int not null,
+    y int not null, 
+    primary key(x)) 
+partition by hash(x) partitions 2;
+
+show create table t;
+
+alter table t add clustering key(y);
+
+show create table t;
+
+drop table t;

From b183d81d2ee63223b99d3fc3542bf016087af604 Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 15 Sep 2014 07:47:43 -0400
Subject: [PATCH 189/190] DB-714 read free replication

---
 .../suite/rpl/r/rpl_tokudb_delete_pk.result   |  17 +++
 .../rpl/r/rpl_tokudb_delete_pk_lookup1.result |  17 +++
 .../r/rpl_tokudb_update_pk_uc0_lookup0.result |  27 ++++
 .../r/rpl_tokudb_update_pk_uc0_lookup1.result |  27 ++++
 .../r/rpl_tokudb_update_pk_uc1_lookup0.result |  27 ++++
 .../r/rpl_tokudb_update_pk_uc1_lookup1.result |  27 ++++
 ...pl_tokudb_update_unique_uc0_lookup0.result |  27 ++++
 ...pl_tokudb_update_unique_uc0_lookup1.result |  27 ++++
 .../suite/rpl/r/rpl_tokudb_write_pk.result    |  14 ++
 .../rpl/r/rpl_tokudb_write_pk_uc1.result      |  14 ++
 .../rpl/r/rpl_tokudb_write_unique.result      |  14 ++
 .../rpl/r/rpl_tokudb_write_unique_uc1.result  |  14 ++
 .../rpl/t/rpl_tokudb_delete_pk-slave.opt      |   1 +
 .../suite/rpl/t/rpl_tokudb_delete_pk.test     |  63 +++++++++
 .../t/rpl_tokudb_delete_pk_lookup1-slave.opt  |   1 +
 .../rpl/t/rpl_tokudb_delete_pk_lookup1.test   |  66 +++++++++
 ...rpl_tokudb_update_pk_uc0_lookup0-slave.opt |   1 +
 .../t/rpl_tokudb_update_pk_uc0_lookup0.test   |  70 ++++++++++
 ...rpl_tokudb_update_pk_uc0_lookup1-slave.opt |   1 +
 .../t/rpl_tokudb_update_pk_uc0_lookup1.test   |  70 ++++++++++
 ...rpl_tokudb_update_pk_uc1_lookup0-slave.opt |   1 +
 .../t/rpl_tokudb_update_pk_uc1_lookup0.test   |  70 ++++++++++
 ...rpl_tokudb_update_pk_uc1_lookup1-slave.opt |   1 +
 .../t/rpl_tokudb_update_pk_uc1_lookup1.test   |  70 ++++++++++
 ...tokudb_update_unique_uc0_lookup0-slave.opt |   1 +
 .../rpl_tokudb_update_unique_uc0_lookup0.test |  70 ++++++++++
 ...tokudb_update_unique_uc0_lookup1-slave.opt |   1 +
 .../rpl_tokudb_update_unique_uc0_lookup1.test |  69 ++++++++++
 .../suite/rpl/t/rpl_tokudb_write_pk-slave.opt |   1 +
 .../suite/rpl/t/rpl_tokudb_write_pk.test      |  53 ++++++++
 .../rpl/t/rpl_tokudb_write_pk_uc1-slave.opt   |   1 +
 .../suite/rpl/t/rpl_tokudb_write_pk_uc1.test  |  53 ++++++++
 .../rpl/t/rpl_tokudb_write_unique-slave.opt   |   1 +
 .../suite/rpl/t/rpl_tokudb_write_unique.test  |  52 ++++++++
 .../t/rpl_tokudb_write_unique_uc1-slave.opt   |   1 +
 .../rpl/t/rpl_tokudb_write_unique_uc1.test    |  52 ++++++++
 storage/tokudb/ha_tokudb.cc                   | 126 ++++++++++++------
 storage/tokudb/ha_tokudb.h                    |  13 ++
 storage/tokudb/hatoku_hton.cc                 |   4 +
 storage/tokudb/hatoku_hton.h                  |  21 ++-
 40 files changed, 1142 insertions(+), 44 deletions(-)
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_delete_pk.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_delete_pk_lookup1.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_write_pk.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_write_pk_uc1.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_write_unique.result
 create mode 100644 mysql-test/suite/rpl/r/rpl_tokudb_write_unique_uc1.result
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_delete_pk-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_delete_pk.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_delete_pk_lookup1.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_write_pk-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_write_pk.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_write_pk_uc1.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_write_unique-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_write_unique.test
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt
 create mode 100644 mysql-test/suite/rpl/t/rpl_tokudb_write_unique_uc1.test

diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_delete_pk.result b/mysql-test/suite/rpl/r/rpl_tokudb_delete_pk.result
new file mode 100644
index 00000000000..cd8608f4387
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_delete_pk.result
@@ -0,0 +1,17 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, primary key(a)) engine=tokudb;
+insert into t values (1);
+insert into t values (2),(3);
+insert into t values (4);
+include/diff_tables.inc [master:test.t, slave:test.t]
+delete from t where a=2;
+select unix_timestamp() into @tstart;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5;
+@tend-@tstart <= 5
+1
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_delete_pk_lookup1.result b/mysql-test/suite/rpl/r/rpl_tokudb_delete_pk_lookup1.result
new file mode 100644
index 00000000000..ae2aea84287
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_delete_pk_lookup1.result
@@ -0,0 +1,17 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, primary key(a)) engine=tokudb;
+insert into t values (1);
+insert into t values (2),(3);
+insert into t values (4);
+include/diff_tables.inc [master:test.t, slave:test.t]
+delete from t where a=2;
+select unix_timestamp() into @tstart;
+select unix_timestamp() into @tend;
+select @tend-@tstart > 5;
+@tend-@tstart > 5
+1
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result b/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result
new file mode 100644
index 00000000000..fc961fd0c13
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result
@@ -0,0 +1,27 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb;
+insert into t values (1,0);
+insert into t values (2,0),(3,0);
+insert into t values (4,0);
+include/diff_tables.inc [master:test.t, slave:test.t]
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5;
+@tend-@tstart <= 5
+1
+select * from t;
+a	b
+1	3
+2	2
+3	5
+4	3
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result b/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result
new file mode 100644
index 00000000000..5325f6c3c6d
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result
@@ -0,0 +1,27 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb;
+insert into t values (1,0);
+insert into t values (2,0),(3,0);
+insert into t values (4,0);
+include/diff_tables.inc [master:test.t, slave:test.t]
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5;
+@tend-@tstart <= 5
+0
+select * from t;
+a	b
+1	3
+2	2
+3	5
+4	3
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result b/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result
new file mode 100644
index 00000000000..5325f6c3c6d
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result
@@ -0,0 +1,27 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb;
+insert into t values (1,0);
+insert into t values (2,0),(3,0);
+insert into t values (4,0);
+include/diff_tables.inc [master:test.t, slave:test.t]
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5;
+@tend-@tstart <= 5
+0
+select * from t;
+a	b
+1	3
+2	2
+3	5
+4	3
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result b/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result
new file mode 100644
index 00000000000..5325f6c3c6d
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result
@@ -0,0 +1,27 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb;
+insert into t values (1,0);
+insert into t values (2,0),(3,0);
+insert into t values (4,0);
+include/diff_tables.inc [master:test.t, slave:test.t]
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5;
+@tend-@tstart <= 5
+0
+select * from t;
+a	b
+1	3
+2	2
+3	5
+4	3
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result b/mysql-test/suite/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result
new file mode 100644
index 00000000000..0b958b89d0f
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result
@@ -0,0 +1,27 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=tokudb;
+insert into t values (1,0,-1);
+insert into t values (2,0,-2),(3,0,-3);
+insert into t values (4,0,-4);
+include/diff_tables.inc [master:test.t, slave:test.t]
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5;
+@tend-@tstart <= 5
+1
+select * from t;
+a	b	c
+1	3	-1
+2	2	-2
+3	5	-3
+4	3	-4
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result b/mysql-test/suite/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result
new file mode 100644
index 00000000000..83dcdb394df
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result
@@ -0,0 +1,27 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=tokudb;
+insert into t values (1,0,-1);
+insert into t values (2,0,-2),(3,0,-3);
+insert into t values (4,0,-4);
+include/diff_tables.inc [master:test.t, slave:test.t]
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5;
+@tend-@tstart <= 5
+0
+select * from t;
+a	b	c
+1	3	-1
+2	2	-2
+3	5	-3
+4	3	-4
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_write_pk.result b/mysql-test/suite/rpl/r/rpl_tokudb_write_pk.result
new file mode 100644
index 00000000000..6db2036d933
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_write_pk.result
@@ -0,0 +1,14 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, primary key(a)) engine=tokudb;
+select unix_timestamp() into @tstart;
+insert into t values (1);
+insert into t values (2),(3);
+insert into t values (4);
+select unix_timestamp()-@tstart <= 10;
+unix_timestamp()-@tstart <= 10
+1
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_write_pk_uc1.result b/mysql-test/suite/rpl/r/rpl_tokudb_write_pk_uc1.result
new file mode 100644
index 00000000000..3bcd3e8ccdd
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_write_pk_uc1.result
@@ -0,0 +1,14 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, primary key(a)) engine=tokudb;
+select unix_timestamp() into @tstart;
+insert into t values (1);
+insert into t values (2),(3);
+insert into t values (4);
+select unix_timestamp()-@tstart <= 10;
+unix_timestamp()-@tstart <= 10
+0
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_write_unique.result b/mysql-test/suite/rpl/r/rpl_tokudb_write_unique.result
new file mode 100644
index 00000000000..9eb1f2edf20
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_write_unique.result
@@ -0,0 +1,14 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=tokudb;
+select unix_timestamp() into @tstart;
+insert into t values (1,2);
+insert into t values (2,3),(3,4);
+insert into t values (4,5);
+select unix_timestamp()-@tstart <= 10;
+unix_timestamp()-@tstart <= 10
+1
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_tokudb_write_unique_uc1.result b/mysql-test/suite/rpl/r/rpl_tokudb_write_unique_uc1.result
new file mode 100644
index 00000000000..3bed6ea282a
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_tokudb_write_unique_uc1.result
@@ -0,0 +1,14 @@
+include/master-slave.inc
+[connection master]
+drop table if exists t;
+create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=tokudb;
+select unix_timestamp() into @tstart;
+insert into t values (1,2);
+insert into t values (2,3),(3,4);
+insert into t values (4,5);
+select unix_timestamp()-@tstart <= 10;
+unix_timestamp()-@tstart <= 10
+0
+include/diff_tables.inc [master:test.t, slave:test.t]
+drop table if exists t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk-slave.opt
new file mode 100644
index 00000000000..dc139282dc4
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk.test b/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk.test
new file mode 100644
index 00000000000..fb42f40bb62
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk.test
@@ -0,0 +1,63 @@
+# test replicated delete rows log events on a table with a primary key.
+# the slave is read only with tokudb rpl row lookups OFF.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, primary key(a)) engine=$engine;
+# show create table t;
+insert into t values (1);
+insert into t values (2),(3);
+insert into t values (4);
+
+# wait for the inserts to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# delete a row
+connection master;
+delete from t where a=2;
+select unix_timestamp() into @tstart;
+
+# wait for the delete to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+connection master;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5; # assert no delay in the delete time
+
+# diff tables
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt
new file mode 100644
index 00000000000..4675b07763d
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=0 --tokudb-rpl-unique-checks=ON --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk_lookup1.test b/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk_lookup1.test
new file mode 100644
index 00000000000..bf5edbd2c1b
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_delete_pk_lookup1.test
@@ -0,0 +1,66 @@
+# test replicated delete rows log events on a table with a primary key.
+# the slave is read only with tokudb rpl row lookups ON.
+# this will cause SLOW deletes.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, primary key(a)) engine=$engine;
+# show create table t;
+insert into t values (1);
+insert into t values (2),(3);
+insert into t values (4);
+
+# wait for the inserts to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# delete a row
+connection master;
+delete from t where a=2;
+select unix_timestamp() into @tstart;
+
+# wait for the delete to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+connection master;
+select unix_timestamp() into @tend;
+select @tend-@tstart > 5; # assert big delay in the delete time
+
+# diff tables
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt
new file mode 100644
index 00000000000..dc139282dc4
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test
new file mode 100644
index 00000000000..998987349c7
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test
@@ -0,0 +1,70 @@
+# test replicated update rows log events on a table with a primary key.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine;
+# show create table t;
+insert into t values (1,0);
+insert into t values (2,0),(3,0);
+insert into t values (4,0);
+
+# wait for the inserts to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# delete a row
+connection master;
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+
+# wait for the delete to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+connection master;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5; # assert no delay in the delete time
+
+connection slave;
+select * from t;
+
+# diff tables
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt
new file mode 100644
index 00000000000..d546dd00669
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test
new file mode 100644
index 00000000000..998987349c7
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test
@@ -0,0 +1,70 @@
+# test replicated update rows log events on a table with a primary key.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine;
+# show create table t;
+insert into t values (1,0);
+insert into t values (2,0),(3,0);
+insert into t values (4,0);
+
+# wait for the inserts to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# delete a row
+connection master;
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+
+# wait for the delete to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+connection master;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5; # assert no delay in the delete time
+
+connection slave;
+select * from t;
+
+# diff tables
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt
new file mode 100644
index 00000000000..5cfe5f83a91
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=ON --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test
new file mode 100644
index 00000000000..998987349c7
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test
@@ -0,0 +1,70 @@
+# test replicated update rows log events on a table with a primary key.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine;
+# show create table t;
+insert into t values (1,0);
+insert into t values (2,0),(3,0);
+insert into t values (4,0);
+
+# wait for the inserts to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# delete a row
+connection master;
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+
+# wait for the delete to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+connection master;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5; # assert no delay in the delete time
+
+connection slave;
+select * from t;
+
+# diff tables
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt
new file mode 100644
index 00000000000..7cd575c52bb
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=ON --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test
new file mode 100644
index 00000000000..998987349c7
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test
@@ -0,0 +1,70 @@
+# test replicated update rows log events on a table with a primary key.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine;
+# show create table t;
+insert into t values (1,0);
+insert into t values (2,0),(3,0);
+insert into t values (4,0);
+
+# wait for the inserts to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# delete a row
+connection master;
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+
+# wait for the delete to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+connection master;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5; # assert no delay in the delete time
+
+connection slave;
+select * from t;
+
+# diff tables
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt
new file mode 100644
index 00000000000..dc139282dc4
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test b/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test
new file mode 100644
index 00000000000..11401ac0ce0
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test
@@ -0,0 +1,70 @@
+# test replicated update rows log events on a table with a primary key.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=$engine;
+# show create table t;
+insert into t values (1,0,-1);
+insert into t values (2,0,-2),(3,0,-3);
+insert into t values (4,0,-4);
+
+# wait for the inserts to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# delete a row
+connection master;
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+
+# wait for the delete to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+connection master;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5; # assert no delay in the delete time
+
+connection slave;
+select * from t;
+
+# diff tables
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt
new file mode 100644
index 00000000000..d546dd00669
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test b/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test
new file mode 100644
index 00000000000..ea77447bc75
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test
@@ -0,0 +1,69 @@
+# test replicated update rows log events on a table with a primary key.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=$engine;
+# show create table t;
+insert into t values (1,0,-1);
+insert into t values (2,0,-2),(3,0,-3);
+insert into t values (4,0,-4);
+
+# wait for the inserts to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# delete a row
+connection master;
+update t set b=b+1 where a=2;
+update t set b=b+2 where a=1;
+update t set b=b+3 where a=4;
+update t set b=b+4 where a=3;
+update t set b=b+1 where 1<=a and a<=3;
+select unix_timestamp() into @tstart;
+
+# wait for the delete to finish on the slave
+connection master;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+connection master;
+select unix_timestamp() into @tend;
+select @tend-@tstart <= 5; # assert no delay in the delete time
+
+connection slave;
+select * from t;
+
+# diff tables
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_write_pk-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_write_pk-slave.opt
new file mode 100644
index 00000000000..9baf0d65ecf
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_write_pk-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=5000 --tokudb-rpl-unique-checks=OFF
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_write_pk.test b/mysql-test/suite/rpl/t/rpl_tokudb_write_pk.test
new file mode 100644
index 00000000000..c77e4b49605
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_write_pk.test
@@ -0,0 +1,53 @@
+# test replicated write rows log events on a table with a primary key.
+# the slave is read only with tokudb unique checks disabled.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_unique_checks%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, primary key(a)) engine=$engine;
+# show create table t;
+select unix_timestamp() into @tstart;
+insert into t values (1);
+insert into t values (2),(3);
+insert into t values (4);
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+connection master;
+select unix_timestamp()-@tstart <= 10; 
+
+connection slave;
+# insert into t values (5); # test read-only
+# show create table t;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt
new file mode 100644
index 00000000000..b1df0b6daf0
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=ON
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_write_pk_uc1.test b/mysql-test/suite/rpl/t/rpl_tokudb_write_pk_uc1.test
new file mode 100644
index 00000000000..c77e4b49605
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_write_pk_uc1.test
@@ -0,0 +1,53 @@
+# test replicated write rows log events on a table with a primary key.
+# the slave is read only with tokudb unique checks disabled.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_unique_checks%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, primary key(a)) engine=$engine;
+# show create table t;
+select unix_timestamp() into @tstart;
+insert into t values (1);
+insert into t values (2),(3);
+insert into t values (4);
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+connection master;
+select unix_timestamp()-@tstart <= 10; 
+
+connection slave;
+# insert into t values (5); # test read-only
+# show create table t;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_write_unique-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_write_unique-slave.opt
new file mode 100644
index 00000000000..9baf0d65ecf
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_write_unique-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=5000 --tokudb-rpl-unique-checks=OFF
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_write_unique.test b/mysql-test/suite/rpl/t/rpl_tokudb_write_unique.test
new file mode 100644
index 00000000000..cf6a26b423d
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_write_unique.test
@@ -0,0 +1,52 @@
+# test replicated write rows log events on a table with a primary key and a unique secondary key.
+# the slave is read only with tokudb unique checks disabled.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_unique_checks%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=$engine;
+# show create table t;
+select unix_timestamp() into @tstart;
+insert into t values (1,2);
+insert into t values (2,3),(3,4);
+insert into t values (4,5);
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+connection master;
+select unix_timestamp()-@tstart <= 10;
+
+connection slave;
+# show create table t;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt b/mysql-test/suite/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt
new file mode 100644
index 00000000000..0518efd3da5
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt
@@ -0,0 +1 @@
+--read-only=ON --tokudb-rpl-unique-checks-delay=5000 --tokudb-rpl-unique-checks=ON
diff --git a/mysql-test/suite/rpl/t/rpl_tokudb_write_unique_uc1.test b/mysql-test/suite/rpl/t/rpl_tokudb_write_unique_uc1.test
new file mode 100644
index 00000000000..cf6a26b423d
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_tokudb_write_unique_uc1.test
@@ -0,0 +1,52 @@
+# test replicated write rows log events on a table with a primary key and a unique secondary key.
+# the slave is read only with tokudb unique checks disabled.
+
+source include/have_tokudb.inc;
+let $engine=tokudb;
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+
+# initialize
+connection master;
+disable_warnings;
+drop table if exists t;
+enable_warnings;
+
+connection slave;
+# show variables like 'read_only';
+# show variables like 'tokudb_rpl_unique_checks%';
+
+# insert some rows
+connection master;
+# select @@binlog_format;
+# select @@autocommit;
+eval create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=$engine;
+# show create table t;
+select unix_timestamp() into @tstart;
+insert into t values (1,2);
+insert into t values (2,3),(3,4);
+insert into t values (4,5);
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+connection master;
+select unix_timestamp()-@tstart <= 10;
+
+connection slave;
+# show create table t;
+
+# diff tables
+connection master;
+--let $diff_tables= master:test.t, slave:test.t
+source include/diff_tables.inc;
+
+# cleanup
+connection master;
+drop table if exists t;
+
+sync_slave_with_master;
+# source include/sync_slave_sql_with_master.inc;
+
+source include/rpl_end.inc;
+
diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 178b6a59d7d..3caf870bf3c 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -1249,6 +1249,7 @@ ha_tokudb::ha_tokudb(handlerton * hton, TABLE_SHARE * table_arg):handler(hton, t
     tokudb_active_index = MAX_KEY;
     invalidate_icp();
     trx_handler_list.data = this;
+    in_rpl_write_rows = in_rpl_delete_rows = in_rpl_update_rows = false;
     TOKUDB_HANDLER_DBUG_VOID_RETURN;
 }
 
@@ -3550,12 +3551,27 @@ cleanup:
     return error;
 }
 
+static void maybe_do_unique_checks_delay(THD *thd) {
+    if (thd->slave_thread) {
+        uint64_t delay_ms = THDVAR(thd, rpl_unique_checks_delay);
+        if (delay_ms)
+            usleep(delay_ms * 1000);
+    }
+}
+
+static bool do_unique_checks(THD *thd, bool do_rpl_event) {
+    if (do_rpl_event && thd->slave_thread && opt_readonly && !THDVAR(thd, rpl_unique_checks))
+        return false;
+    else
+        return !thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS);
+}
+
 int ha_tokudb::do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd) {
-    int error;
+    int error = 0;
     //
     // first do uniqueness checks
     //
-    if (share->has_unique_keys && !thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
+    if (share->has_unique_keys && do_unique_checks(thd, in_rpl_write_rows)) {
         for (uint keynr = 0; keynr < table_share->keys; keynr++) {
             bool is_unique_key = (table->key_info[keynr].flags & HA_NOSAME) || (keynr == primary_key);
             bool is_unique = false;
@@ -3568,13 +3584,18 @@ int ha_tokudb::do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd) {
             if (!is_unique_key) {
                 continue;
             }
+
+            maybe_do_unique_checks_delay(thd);
+
             //
             // if unique key, check uniqueness constraint
             // but, we do not need to check it if the key has a null
             // and we do not need to check it if unique_checks is off
             //
             error = is_val_unique(&is_unique, record, &table->key_info[keynr], keynr, txn);
-            if (error) { goto cleanup; }
+            if (error) { 
+                goto cleanup; 
+            }
             if (!is_unique) {
                 error = DB_KEYEXIST;
                 last_dup_key = keynr;
@@ -3582,7 +3603,6 @@ int ha_tokudb::do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd) {
             }
         }
     }    
-    error = 0;
 cleanup:
     return error;
 }
@@ -3685,15 +3705,8 @@ void ha_tokudb::test_row_packing(uchar* record, DBT* pk_key, DBT* pk_val) {
     tokudb_my_free(tmp_pk_val_data);
 }
 
-//
 // set the put flags for the main dictionary
-//
-void ha_tokudb::set_main_dict_put_flags(
-    THD* thd, 
-    bool opt_eligible,
-    uint32_t* put_flags
-    ) 
-{
+void ha_tokudb::set_main_dict_put_flags(THD* thd, bool opt_eligible, uint32_t* put_flags) {
     uint32_t old_prelock_flags = 0;
     uint curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
     bool in_hot_index = share->num_DBs > curr_num_DBs;
@@ -3713,8 +3726,7 @@ void ha_tokudb::set_main_dict_put_flags(
     {
         *put_flags = old_prelock_flags;
     }
-    else if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)
-            && !is_replace_into(thd) && !is_insert_ignore(thd))
+    else if (!do_unique_checks(thd, in_rpl_write_rows | in_rpl_update_rows) && !is_replace_into(thd) && !is_insert_ignore(thd))
     {
         *put_flags = old_prelock_flags;
     }
@@ -3736,22 +3748,18 @@ void ha_tokudb::set_main_dict_put_flags(
 
 int ha_tokudb::insert_row_to_main_dictionary(uchar* record, DBT* pk_key, DBT* pk_val, DB_TXN* txn) {
     int error = 0;
-    uint32_t put_flags = mult_put_flags[primary_key];
-    THD *thd = ha_thd();
     uint curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
-
     assert(curr_num_DBs == 1);
-    
+
+    uint32_t put_flags = mult_put_flags[primary_key];
+    THD *thd = ha_thd(); 
     set_main_dict_put_flags(thd, true, &put_flags);
 
-    error = share->file->put(
-        share->file, 
-        txn, 
-        pk_key,
-        pk_val, 
-        put_flags
-        );
+    // for test, make unique checks have a very long duration
+    if ((put_flags & DB_OPFLAGS_MASK) == DB_NOOVERWRITE)
+        maybe_do_unique_checks_delay(thd);
 
+    error = share->file->put(share->file, txn, pk_key, pk_val, put_flags);
     if (error) {
         last_dup_key = primary_key;
         goto cleanup;
@@ -3765,14 +3773,18 @@ int ha_tokudb::insert_rows_to_dictionaries_mult(DBT* pk_key, DBT* pk_val, DB_TXN
     int error = 0;
     uint curr_num_DBs = share->num_DBs;
     set_main_dict_put_flags(thd, true, &mult_put_flags[primary_key]);
-    uint32_t i, flags = mult_put_flags[primary_key];
+    uint32_t flags = mult_put_flags[primary_key];
+
+    // for test, make unique checks have a very long duration
+    if ((flags & DB_OPFLAGS_MASK) == DB_NOOVERWRITE)
+        maybe_do_unique_checks_delay(thd);
 
     // the insert ignore optimization uses DB_NOOVERWRITE_NO_ERROR, 
     // which is not allowed with env->put_multiple. 
     // we have to insert the rows one by one in this case.
     if (flags & DB_NOOVERWRITE_NO_ERROR) {
         DB * src_db = share->key_file[primary_key];
-        for (i = 0; i < curr_num_DBs; i++) {
+        for (uint32_t i = 0; i < curr_num_DBs; i++) {
             DB * db = share->key_file[i];
             if (i == primary_key) {
                 // if it's the primary key, insert the rows
@@ -3833,7 +3845,7 @@ out:
 //      error otherwise
 //
 int ha_tokudb::write_row(uchar * record) {
-    TOKUDB_HANDLER_DBUG_ENTER("");
+    TOKUDB_HANDLER_DBUG_ENTER("%p", record);
 
     DBT row, prim_key;
     int error;
@@ -3871,10 +3883,7 @@ int ha_tokudb::write_row(uchar * record) {
     if (share->has_auto_inc && record == table->record[0]) {
         tokudb_pthread_mutex_lock(&share->mutex);
         ulonglong curr_auto_inc = retrieve_auto_increment(
-            table->field[share->ai_field_index]->key_type(), 
-            field_offset(table->field[share->ai_field_index], table),
-            record
-            );
+            table->field[share->ai_field_index]->key_type(), field_offset(table->field[share->ai_field_index], table), record);
         if (curr_auto_inc > share->last_auto_increment) {
             share->last_auto_increment = curr_auto_inc;
             if (delay_updating_ai_metadata) {
@@ -4042,7 +4051,6 @@ int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) {
     memset((void *) &prim_row, 0, sizeof(prim_row));
     memset((void *) &old_prim_row, 0, sizeof(old_prim_row));
 
-
     ha_statistic_increment(&SSV::ha_update_count);
 #if MYSQL_VERSION_ID < 50600
     if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) {
@@ -4089,7 +4097,6 @@ int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) {
     }
     txn = using_ignore ? sub_trans : transaction;
 
-
     if (hidden_primary_key) {
         memset((void *) &prim_key, 0, sizeof(prim_key));
         prim_key.data = (void *) current_ident;
@@ -4101,10 +4108,8 @@ int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) {
         create_dbt_key_from_table(&old_prim_key, primary_key, primary_key_buff, old_row, &has_null);
     }
 
-    //
     // do uniqueness checks
-    //
-    if (share->has_unique_keys && !thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
+    if (share->has_unique_keys && do_unique_checks(thd, in_rpl_update_rows)) {
         for (uint keynr = 0; keynr < table_share->keys; keynr++) {
             bool is_unique_key = (table->key_info[keynr].flags & HA_NOSAME) || (keynr == primary_key);
             if (keynr == primary_key && !share->pk_has_string) {
@@ -4145,6 +4150,10 @@ int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) {
 
     set_main_dict_put_flags(thd, false, &mult_put_flags[primary_key]);
 
+    // for test, make unique checks have a very long duration
+    if ((mult_put_flags[primary_key] & DB_OPFLAGS_MASK) == DB_NOOVERWRITE)
+        maybe_do_unique_checks_delay(thd);
+
     error = db_env->update_multiple(
         db_env, 
         share->key_file[primary_key], 
@@ -5616,13 +5625,11 @@ DBT *ha_tokudb::get_pos(DBT * to, uchar * pos) {
     DBUG_RETURN(to);
 }
 
-//
 // Retrieves a row with based on the primary key saved in pos
 // Returns:
 //      0 on success
 //      HA_ERR_KEY_NOT_FOUND if not found
 //      error otherwise
-//
 int ha_tokudb::rnd_pos(uchar * buf, uchar * pos) {
     TOKUDB_HANDLER_DBUG_ENTER("");
     DBT db_pos;
@@ -5635,12 +5642,20 @@ int ha_tokudb::rnd_pos(uchar * buf, uchar * pos) {
     ha_statistic_increment(&SSV::ha_read_rnd_count);
     tokudb_active_index = MAX_KEY;
 
+    // test rpl slave by inducing a delay before the point query
+    THD *thd = ha_thd();
+    if (thd->slave_thread && (in_rpl_delete_rows || in_rpl_update_rows)) {
+        uint64_t delay_ms = THDVAR(thd, rpl_lookup_rows_delay);
+        if (delay_ms)
+            usleep(delay_ms * 1000);
+    }
+
     info.ha = this;
     info.buf = buf;
     info.keynr = primary_key;
 
     error = share->file->getf_set(share->file, transaction, 
-            get_cursor_isolation_flags(lock.type, ha_thd()), 
+            get_cursor_isolation_flags(lock.type, thd),
             key, smart_dbt_callback_rowread_ptquery, &info);
 
     if (error == DB_NOTFOUND) {
@@ -8167,6 +8182,37 @@ void ha_tokudb::remove_from_trx_handler_list() {
     trx->handlers = list_delete(trx->handlers, &trx_handler_list);
 }
 
+void ha_tokudb::rpl_before_write_rows() {
+    in_rpl_write_rows = true;
+}
+
+void ha_tokudb::rpl_after_write_rows() {
+    in_rpl_write_rows = false;
+}
+
+void ha_tokudb::rpl_before_delete_rows() {
+    in_rpl_delete_rows = true;
+}
+
+void ha_tokudb::rpl_after_delete_rows() {
+    in_rpl_delete_rows = false;
+}
+
+void ha_tokudb::rpl_before_update_rows() {
+    in_rpl_update_rows = true;
+}
+
+void ha_tokudb::rpl_after_update_rows() {
+    in_rpl_update_rows = false;
+}
+
+bool ha_tokudb::rpl_lookup_rows() {
+    if (!in_rpl_delete_rows && !in_rpl_update_rows)
+        return true;
+    else
+        return THDVAR(ha_thd(), rpl_lookup_rows);
+}
+
 // table admin 
 #include "ha_tokudb_admin.cc"
 
diff --git a/storage/tokudb/ha_tokudb.h b/storage/tokudb/ha_tokudb.h
index d9a98a825d9..cdbadce6b67 100644
--- a/storage/tokudb/ha_tokudb.h
+++ b/storage/tokudb/ha_tokudb.h
@@ -799,6 +799,19 @@ private:
 private:
     int do_optimize(THD *thd);
     int map_to_handler_error(int error);
+
+public:
+    void rpl_before_write_rows();
+    void rpl_after_write_rows();
+    void rpl_before_delete_rows();
+    void rpl_after_delete_rows();
+    void rpl_before_update_rows();
+    void rpl_after_update_rows();
+    bool rpl_lookup_rows();
+private:
+    bool in_rpl_write_rows;
+    bool in_rpl_delete_rows;
+    bool in_rpl_update_rows;
 };
 
 #if TOKU_INCLUDE_OPTION_STRUCTS
diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index 0f02afa10bc..db18cd40ad7 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -1442,6 +1442,10 @@ static struct st_mysql_sys_var *tokudb_system_variables[] = {
 #if TOKU_INCLUDE_XA
     MYSQL_SYSVAR(support_xa),
 #endif
+    MYSQL_SYSVAR(rpl_unique_checks),
+    MYSQL_SYSVAR(rpl_unique_checks_delay),
+    MYSQL_SYSVAR(rpl_lookup_rows),
+    MYSQL_SYSVAR(rpl_lookup_rows_delay),
     NULL
 };
 
diff --git a/storage/tokudb/hatoku_hton.h b/storage/tokudb/hatoku_hton.h
index 1b89aa5f772..87f86e0d0a1 100644
--- a/storage/tokudb/hatoku_hton.h
+++ b/storage/tokudb/hatoku_hton.h
@@ -504,17 +504,20 @@ static TYPELIB tokudb_empty_scan_typelib = {
     NULL
 };
 
-static MYSQL_THDVAR_ENUM(empty_scan,
-    PLUGIN_VAR_OPCMDARG,
+static MYSQL_THDVAR_ENUM(empty_scan, PLUGIN_VAR_OPCMDARG,
     "TokuDB algorithm to check if the table is empty when opened. ",
     NULL, NULL, TOKUDB_EMPTY_SCAN_RL, &tokudb_empty_scan_typelib
 );
 
 #if TOKUDB_CHECK_JEMALLOC
 static uint tokudb_check_jemalloc;
-static MYSQL_SYSVAR_UINT(check_jemalloc, tokudb_check_jemalloc, 0, "Check if jemalloc is linked", NULL, NULL, 1, 0, 1, 0);
+static MYSQL_SYSVAR_UINT(check_jemalloc, tokudb_check_jemalloc, 0, "Check if jemalloc is linked",
+                         NULL, NULL, 1, 0, 1, 0);
 #endif
 
+static MYSQL_THDVAR_BOOL(bulk_fetch, PLUGIN_VAR_THDLOCAL, "enable bulk fetch",
+                         NULL /*check*/, NULL /*update*/, true /*default*/);
+
 #if TOKU_INCLUDE_XA
 static MYSQL_THDVAR_BOOL(support_xa,
     PLUGIN_VAR_OPCMDARG,
@@ -525,7 +528,17 @@ static MYSQL_THDVAR_BOOL(support_xa,
 );
 #endif
 
-static MYSQL_THDVAR_BOOL(bulk_fetch, PLUGIN_VAR_THDLOCAL, "enable bulk fetch", NULL /*check*/, NULL /*update*/, true /*default*/);
+static MYSQL_THDVAR_BOOL(rpl_unique_checks, PLUGIN_VAR_THDLOCAL, "enable unique checks on replication slave",
+                         NULL /*check*/, NULL /*update*/, true /*default*/);
+
+static MYSQL_THDVAR_ULONGLONG(rpl_unique_checks_delay, PLUGIN_VAR_THDLOCAL, "time in milliseconds to add to unique checks test on replication slave",
+                              NULL, NULL, 0 /*default*/, 0 /*min*/, ~0ULL /*max*/, 1 /*blocksize*/);
+
+static MYSQL_THDVAR_BOOL(rpl_lookup_rows, PLUGIN_VAR_THDLOCAL, "lookup a row on rpl slave",
+                         NULL /*check*/, NULL /*update*/, true /*default*/);
+
+static MYSQL_THDVAR_ULONGLONG(rpl_lookup_rows_delay, PLUGIN_VAR_THDLOCAL, "time in milliseconds to add to lookups on replication slave",
+                              NULL, NULL, 0 /*default*/, 0 /*min*/, ~0ULL /*max*/, 1 /*blocksize*/);
 
 extern HASH tokudb_open_tables;
 extern pthread_mutex_t tokudb_mutex;

From bb75cf067027e7567383cabbfa784c0c713f1aeb Mon Sep 17 00:00:00 2001
From: Rich Prohaska <prohaska@tokutek.com>
Date: Mon, 15 Sep 2014 14:49:16 -0400
Subject: [PATCH 190/190] DB-720 run the cluster key partition test on mariadb

---
 mysql-test/suite/tokudb/t/cluster_key_part.test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mysql-test/suite/tokudb/t/cluster_key_part.test b/mysql-test/suite/tokudb/t/cluster_key_part.test
index dd569b88ad1..ed84404616e 100644
--- a/mysql-test/suite/tokudb/t/cluster_key_part.test
+++ b/mysql-test/suite/tokudb/t/cluster_key_part.test
@@ -1,6 +1,7 @@
 # Test that clustering keys can be created on partitioned tokudb tables
 
 source include/have_tokudb.inc;
+source include/have_partition.inc;
 set default_storage_engine='tokudb';
 
 disable_warnings;