From 69b8e1c779acdef12b7b6a411c18aac2c9fce4f0 Mon Sep 17 00:00:00 2001
From: Sergey Zefirov <72864488+mariadb-SergeyZefirov@users.noreply.github.com>
Date: Fri, 17 Nov 2023 17:14:35 +0300
Subject: [PATCH] feat(extent-elimination)!: re-enable extent-elimination for
 dictionary columns scanning

This is "productization" of an old code that would enable extent
elimination for dictionary columns.

This concrete patch enables it, fixes perfomance degradation (main
problem with old code) and also fixes incorrect behavior of cpimport.
---
 build/bootstrap_mcs.sh                        |  7 +-
 dbcon/joblist/batchprimitiveprocessor-jl.cpp  |  2 +-
 dbcon/joblist/dictstep-jl.cpp                 |  5 +-
 ...l4580-dictionary-extent-elimination.result | 64 +++++++++++++
 ...col4580-dictionary-extent-elimination.test | 47 +++++++++
 primitives/linux-port/dictionary.cpp          |  3 +-
 primitives/linux-port/primitiveprocessor.h    |  2 +-
 utils/common/string_prefixes.cpp              | 11 +--
 utils/common/string_prefixes.h                |  8 +-
 writeengine/bulk/we_colextinf.cpp             | 95 +++++++++++--------
 writeengine/wrapper/writeengine.cpp           | 17 ++--
 11 files changed, 197 insertions(+), 64 deletions(-)
 create mode 100644 mysql-test/columnstore/basic/r/mcol4580-dictionary-extent-elimination.result
 create mode 100644 mysql-test/columnstore/basic/t/mcol4580-dictionary-extent-elimination.test

diff --git a/build/bootstrap_mcs.sh b/build/bootstrap_mcs.sh
index 301a5556b..063cce2cd 100755
--- a/build/bootstrap_mcs.sh
+++ b/build/bootstrap_mcs.sh
@@ -33,7 +33,7 @@ optparse.define short=S long=skip-columnstore-submodules desc="Skip columnstore
 optparse.define short=u long=skip-unit-tests desc="Skip UnitTests" variable=SKIP_UNIT_TESTS default=false value=true
 optparse.define short=B long=run-microbench="Compile and run microbenchmarks " variable=RUN_BENCHMARKS default=false value=true
 optparse.define short=b long=branch desc="Choose git branch. For menu use -b \"\"" variable=BRANCH default=$CURRENT_BRANCH
-optparse.define short=D long=without-core-dumps desc="Do not produce core dumps" variable=WITHOUT_COREDUMPS default=false value=true
+optparse.define short=W long=without-core-dumps desc="Do not produce core dumps" variable=WITHOUT_COREDUMPS default=false value=true
 optparse.define short=v long=verbose desc="Verbose makefile commands" variable=MAKEFILE_VERBOSE default=false value=true
 optparse.define short=A long=asan desc="Build with ASAN" variable=ASAN default=false value=true
 optparse.define short=T long=tsan desc="Build with TSAN" variable=TSAN default=false value=true
@@ -46,6 +46,7 @@ optparse.define short=n long=no-clean-install desc="Do not perform a clean insta
 optparse.define short=j long=parallel desc="Number of paralles for build" variable=CPUS default=$(getconf _NPROCESSORS_ONLN)
 optparse.define short=F long=show-build-flags desc="Print CMake flags, while build" variable=PRINT_CMAKE_FLAGS default=false
 optparse.define short=c long=cloud desc="Enable cloud storage" variable=CLOUD_STORAGE_ENABLED default=false value=true
+optparse.define short=f long=do-not-freeze-revision desc="Disable revision freezing, or do not set 'update none' for columnstore submodule in MDB repository" variable=DO_NOT_FREEZE_REVISION default=false value=true
 
 source $( optparse.build )
 
@@ -547,7 +548,9 @@ generate_svgs()
     fi
 }
 
-disable_git_restore_frozen_revision
+if [[ $DO_NOT_FREEZE_REVISION = false ]] ; then
+    disable_git_restore_frozen_revision
+fi
 
 select_branch
 
diff --git a/dbcon/joblist/batchprimitiveprocessor-jl.cpp b/dbcon/joblist/batchprimitiveprocessor-jl.cpp
index d8f2943b6..7d94daa8f 100644
--- a/dbcon/joblist/batchprimitiveprocessor-jl.cpp
+++ b/dbcon/joblist/batchprimitiveprocessor-jl.cpp
@@ -50,7 +50,7 @@ using namespace messageqcpp;
 using namespace rowgroup;
 using namespace joiner;
 
-//#define XXX_BATCHPRIMPROC_TOKENS_RANGES_XXX
+#define XXX_BATCHPRIMPROC_TOKENS_RANGES_XXX
 
 namespace joblist
 {
diff --git a/dbcon/joblist/dictstep-jl.cpp b/dbcon/joblist/dictstep-jl.cpp
index 86f6f4340..8bfb3893a 100644
--- a/dbcon/joblist/dictstep-jl.cpp
+++ b/dbcon/joblist/dictstep-jl.cpp
@@ -126,6 +126,7 @@ messageqcpp::ByteStream DictStepJL::reencodedFilterString() const
 {
   messageqcpp::ByteStream bs;
 
+  datatypes::Charset cset(charsetNumber);
   if (hasEqFilter)
   {
     idbassert(filterCount == eqFilter.size());
@@ -133,7 +134,7 @@ messageqcpp::ByteStream DictStepJL::reencodedFilterString() const
     for (uint32_t i = 0; i < filterCount; i++)
     {
       uint8_t roundFlag = 0;
-      int64_t encodedPrefix = encodeStringPrefix((unsigned char*)eqFilter[i].c_str(), eqFilter[i].size(), charsetNumber);
+      int64_t encodedPrefix = encodeStringPrefix((unsigned char*)eqFilter[i].c_str(), eqFilter[i].size(), cset);
       bs << eqOp;
       bs << roundFlag;
       bs << encodedPrefix;
@@ -173,7 +174,7 @@ messageqcpp::ByteStream DictStepJL::reencodedFilterString() const
       bs << roundFlag;
       filterStringCopy >> size;
       ptr = filterStringCopy.buf();
-      encodedPrefix = encodeStringPrefix(ptr, size, charsetNumber);
+      encodedPrefix = encodeStringPrefix(ptr, size, cset);
       bs << encodedPrefix;
       filterStringCopy.advance(size);
     }
diff --git a/mysql-test/columnstore/basic/r/mcol4580-dictionary-extent-elimination.result b/mysql-test/columnstore/basic/r/mcol4580-dictionary-extent-elimination.result
new file mode 100644
index 000000000..16511c2cb
--- /dev/null
+++ b/mysql-test/columnstore/basic/r/mcol4580-dictionary-extent-elimination.result
@@ -0,0 +1,64 @@
+DROP DATABASE IF EXISTS MCOL4580;
+CREATE DATABASE MCOL4580;
+USE MCOL4580;
+CREATE TABLE t(d TEXT) ENGINE=COLUMNSTORE;
+INSERT INTO t(d) VALUES ('b'),('b'),('b');
+SELECT CALSETTRACE(1);
+CALSETTRACE(1)
+0
+SELECT COUNT(*) FROM t WHERE d = 'a';
+COUNT(*)
+0
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+touched	eliminated
+BlocksTouched-0;	PartitionBlocksEliminated-1;
+SELECT COUNT(*) FROM t WHERE d < 'b';
+COUNT(*)
+0
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+touched	eliminated
+BlocksTouched-3;	PartitionBlocksEliminated-0;
+SELECT COUNT(*) FROM t WHERE d > 'b';
+COUNT(*)
+0
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+touched	eliminated
+BlocksTouched-3;	PartitionBlocksEliminated-0;
+SELECT COUNT(*) FROM t WHERE d <= 'a';
+COUNT(*)
+0
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+touched	eliminated
+BlocksTouched-0;	PartitionBlocksEliminated-1;
+SELECT COUNT(*) FROM t WHERE d >= 'c';
+COUNT(*)
+0
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+touched	eliminated
+BlocksTouched-0;	PartitionBlocksEliminated-1;
+SELECT COUNT(*) FROM t WHERE d != 'b';
+COUNT(*)
+0
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+touched	eliminated
+BlocksTouched-0;	PartitionBlocksEliminated-1;
+INSERT INTO t SELECT * FROM t;
+SELECT COUNT(*) FROM t WHERE d = 'b';
+COUNT(*)
+6
+DROP TABLE t;
+CREATE TABLE t (c TEXT CHARACTER SET utf8 COLLATE utf8_czech_ci) engine=columnstore;
+INSERT INTO t(c) VALUES ('ch'), ('ch');
+SELECT COUNT(*) FROM t WHERE c < 'cz';
+COUNT(*)
+0
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+touched	eliminated
+BlocksTouched-0;	PartitionBlocksEliminated-1;
+SELECT COUNT(*) FROM t WHERE c > 'cz';
+COUNT(*)
+2
+SELECT COUNT(*) FROM t WHERE c = 'CH';
+COUNT(*)
+2
+DROP DATABASE MCOL4580;
diff --git a/mysql-test/columnstore/basic/t/mcol4580-dictionary-extent-elimination.test b/mysql-test/columnstore/basic/t/mcol4580-dictionary-extent-elimination.test
new file mode 100644
index 000000000..9a2f794b8
--- /dev/null
+++ b/mysql-test/columnstore/basic/t/mcol4580-dictionary-extent-elimination.test
@@ -0,0 +1,47 @@
+--disable_warnings # we disable warnings through the test: as we use calsettrace(1), it produces many unnecessary warnings.
+DROP DATABASE IF EXISTS MCOL4580;
+CREATE DATABASE MCOL4580;
+USE MCOL4580;
+
+# -----------------------------------------------------------------------------
+# Binary collation test.
+
+CREATE TABLE t(d TEXT) ENGINE=COLUMNSTORE;
+INSERT INTO t(d) VALUES ('b'),('b'),('b');
+SELECT CALSETTRACE(1);
+SELECT COUNT(*) FROM t WHERE d = 'a';
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+# As strict comparisons became soft (strict less '<' became less or equal '<='), these two parts will not work as expected.
+SELECT COUNT(*) FROM t WHERE d < 'b';
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+SELECT COUNT(*) FROM t WHERE d > 'b';
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+SELECT COUNT(*) FROM t WHERE d <= 'a';
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+SELECT COUNT(*) FROM t WHERE d >= 'c';
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+# note that extent elimination will eliminate extents with only single value
+# in the case of not-equal predicate.
+SELECT COUNT(*) FROM t WHERE d != 'b';
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+INSERT INTO t SELECT * FROM t;
+SELECT COUNT(*) FROM t WHERE d = 'b';
+DROP TABLE t;
+
+# -----------------------------------------------------------------------------
+# Actual collation test.
+
+# Reference chart: https://collation-charts.org/mysql60/mysql604.utf8_czech_ci.html
+# We will use the fact that "cz" should go before "ch".
+
+CREATE TABLE t (c TEXT CHARACTER SET utf8 COLLATE utf8_czech_ci) engine=columnstore;
+INSERT INTO t(c) VALUES ('ch'), ('ch');
+SELECT COUNT(*) FROM t WHERE c < 'cz';
+SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
+
+# and to see we do not broke anything (must be 1):
+SELECT COUNT(*) FROM t WHERE c > 'cz';
+SELECT COUNT(*) FROM t WHERE c = 'CH';
+
+DROP DATABASE MCOL4580;
+--enable_warnings
diff --git a/primitives/linux-port/dictionary.cpp b/primitives/linux-port/dictionary.cpp
index 9cf69f9f8..43da0ad1a 100644
--- a/primitives/linux-port/dictionary.cpp
+++ b/primitives/linux-port/dictionary.cpp
@@ -434,6 +434,7 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector<uint8_t>* out,
   header.PhysicalIO = 0;
 
   header.NBYTES = sizeof(DictOutput);
+  datatypes::Charset cset(charsetNumber);
 
   for (nextSig(in->NVALS, in->tokens, &sigptr, in->OutputType, (in->InputFlags ? true : false), skipNulls);
        sigptr.len != -1;
@@ -442,7 +443,7 @@ void PrimitiveProcessor::p_Dictionary(const DictInput* in, vector<uint8_t>* out,
 #if defined(XXX_PRIMITIVES_TOKEN_RANGES_XXX)
     if (minMax)
     {
-      uint64_t v = encodeStringPrefix_check_null(sigptr.data, sigptr.len, charsetNumber);
+      uint64_t v = encodeStringPrefix_check_null(sigptr.data, sigptr.len, cset);
       minMax[1] = minMax[1] < v ? v : minMax[1];
       minMax[0] = minMax[0] > v ? v : minMax[0];
     }
diff --git a/primitives/linux-port/primitiveprocessor.h b/primitives/linux-port/primitiveprocessor.h
index ecff92489..86bdca550 100644
--- a/primitives/linux-port/primitiveprocessor.h
+++ b/primitives/linux-port/primitiveprocessor.h
@@ -49,7 +49,7 @@
 class PrimTest;
 
 // XXX: turn off dictionary range setting during scan.
-//#define XXX_PRIMITIVES_TOKEN_RANGES_XXX
+#define XXX_PRIMITIVES_TOKEN_RANGES_XXX
 
 namespace primitives
 {
diff --git a/utils/common/string_prefixes.cpp b/utils/common/string_prefixes.cpp
index 9f9c5c2e7..07748f05f 100644
--- a/utils/common/string_prefixes.cpp
+++ b/utils/common/string_prefixes.cpp
@@ -24,10 +24,9 @@
 #include "string_prefixes.h"
 
 // XXX: string (or, actually, a BLOB) with all NUL chars will be encoded into zero. Which corresponds to
-//      encoding of empty string, or NULL.
-int64_t encodeStringPrefix(const uint8_t* str, size_t len, int charsetNumber)
+//      encoding of empty string.
+int64_t encodeStringPrefix(const uint8_t* str, size_t len, datatypes::Charset& cset)
 {
-  datatypes::Charset cset(charsetNumber);
   uint8_t fixedLenPrefix[8];
   memset(fixedLenPrefix, 0, sizeof(fixedLenPrefix));
   cset.strnxfrm(fixedLenPrefix, sizeof(fixedLenPrefix), 8, str, len, 0);
@@ -41,11 +40,11 @@ int64_t encodeStringPrefix(const uint8_t* str, size_t len, int charsetNumber)
   return acc;
 }
 
-int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, int charsetNumber)
+int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, datatypes::Charset& cset)
 {
-  if (len < 1)
+  if (len < 1 && str == nullptr)
   {
     return joblist::UBIGINTNULL;
   }
-  return encodeStringPrefix(str, len, charsetNumber);
+  return encodeStringPrefix(str, len, cset);
 }
diff --git a/utils/common/string_prefixes.h b/utils/common/string_prefixes.h
index 750c552b5..9a8c13293 100644
--- a/utils/common/string_prefixes.h
+++ b/utils/common/string_prefixes.h
@@ -23,10 +23,14 @@
 #include <stdlib.h>
 #include <stdint.h>
 
+#include "collation.h"
+#include "joblisttypes.h"
+
+
 // Encode string prefix into an int64_t, packing as many chars from string as possible
 // into the result and respecting the collation provided by charsetNumber.
 //
 // For one example, for CI Czech collation, encodeStringPrefix("cz") < encodeStringPrefix("CH").
-int64_t encodeStringPrefix(const uint8_t* str, size_t len, int charsetNumber);
+int64_t encodeStringPrefix(const uint8_t* str, size_t len, datatypes::Charset& cset);
 
-int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, int charsetNumber);
+int64_t encodeStringPrefix_check_null(const uint8_t* str, size_t len, datatypes::Charset& cset);
diff --git a/writeengine/bulk/we_colextinf.cpp b/writeengine/bulk/we_colextinf.cpp
index 74271c2f4..85f191418 100644
--- a/writeengine/bulk/we_colextinf.cpp
+++ b/writeengine/bulk/we_colextinf.cpp
@@ -189,6 +189,7 @@ int ColExtInf::updateEntryLbid(BRM::LBID_t startLbid)
 void ColExtInf::getCPInfoForBRM(JobColumn column, BRMReporter& brmReporter)
 {
   bool bIsChar = ((column.weType == WriteEngine::WR_CHAR) && (column.colType != COL_TYPE_DICT));
+  bool bIsText = (column.weType == WriteEngine::WR_TEXT);
 
   boost::mutex::scoped_lock lock(fMapMutex);
 
@@ -206,69 +207,81 @@ void ColExtInf::getCPInfoForBRM(JobColumn column, BRMReporter& brmReporter)
     int128_t bigMinVal = iter->second.fbigMinVal;
     int128_t bigMaxVal = iter->second.fbigMaxVal;
 
-    if (bIsChar)
+    bool bIsValid = true;
+
+    if (bIsChar || bIsText)
     {
       // If we have added 1 or more rows, then we should have a valid
       // range in our RowExtMap object, in which case...
       // We swap/restore byte order before sending min/max string to BRM;
       // else we leave fMinVal & fMaxVal set to LLONG_MIN and send as-is,
       // to let BRM know we added no rows.
+
       if ((iter->second.fMinVal != iter->second.fMaxVal) || (iter->second.fMinVal != LLONG_MIN))
       {
         minVal = static_cast<int64_t>(uint64ToStr(static_cast<uint64_t>(iter->second.fMinVal)));
         maxVal = static_cast<int64_t>(uint64ToStr(static_cast<uint64_t>(iter->second.fMaxVal)));
       }
+      else
+      {
+        // This is dropping range to invalid.
+        minVal = static_cast<int64_t>(~(0UL));
+        maxVal = static_cast<int64_t>(0);
+        bIsValid = false;
+      }
     }
 
-    // Log for now; may control with debug flag later
-    // if (fLog->isDebug( DEBUG_1 ))
-    // TODO MCOL-641 Add support here.
-    {
-      std::ostringstream oss;
-      oss << "Saving CP  update for OID-" << fColOid << "; lbid-" << iter->second.fLbid << "; type-"
-          << bIsChar << "; isNew-" << iter->second.fNewExtent;
+    if (bIsValid) {
+      // Log for now; may control with debug flag later
+      // if (fLog->isDebug( DEBUG_1 ))
+      // TODO MCOL-641 Add support here.
+      {
+        std::ostringstream oss;
+        oss << "Saving CP  update for OID-" << fColOid << "; lbid-" << iter->second.fLbid << "; type-"
+            << bIsChar << "; isNew-" << iter->second.fNewExtent;
 
-      if (bIsChar)
-      {
-        char minValStr[sizeof(int64_t) + 1];
-        char maxValStr[sizeof(int64_t) + 1];
-        memcpy(minValStr, &minVal, sizeof(int64_t));
-        memcpy(maxValStr, &maxVal, sizeof(int64_t));
-        minValStr[sizeof(int64_t)] = '\0';
-        maxValStr[sizeof(int64_t)] = '\0';
-        oss << "; minVal: " << minVal << "; (" << minValStr << ")"
-            << "; maxVal: " << maxVal << "; (" << maxValStr << ")";
+        if (bIsChar)
+        {
+          char minValStr[sizeof(int64_t) + 1];
+          char maxValStr[sizeof(int64_t) + 1];
+          memcpy(minValStr, &minVal, sizeof(int64_t));
+          memcpy(maxValStr, &maxVal, sizeof(int64_t));
+          minValStr[sizeof(int64_t)] = '\0';
+          maxValStr[sizeof(int64_t)] = '\0';
+          oss << "; minVal: " << minVal << "; (" << minValStr << ")"
+              << "; maxVal: " << maxVal << "; (" << maxValStr << ")";
+        }
+        else if (isUnsigned(column.dataType))
+        {
+          oss << "; min: " << static_cast<uint64_t>(minVal) << "; max: " << static_cast<uint64_t>(maxVal);
+        }
+        else
+        {
+          oss << "; min: " << minVal << "; max: " << maxVal;
+        }
+
+        fLog->logMsg(oss.str(), MSGLVL_INFO2);
       }
-      else if (isUnsigned(column.dataType))
+
+      BRM::CPInfoMerge cpInfoMerge;
+      cpInfoMerge.startLbid = iter->second.fLbid;
+      if (column.width <= 8)
       {
-        oss << "; min: " << static_cast<uint64_t>(minVal) << "; max: " << static_cast<uint64_t>(maxVal);
+        cpInfoMerge.max = maxVal;
+        cpInfoMerge.min = minVal;
       }
       else
       {
-        oss << "; min: " << minVal << "; max: " << maxVal;
+        cpInfoMerge.bigMax = bigMaxVal;
+        cpInfoMerge.bigMin = bigMinVal;
       }
-
-      fLog->logMsg(oss.str(), MSGLVL_INFO2);
+      cpInfoMerge.seqNum = -1;  // Not used by mergeExtentsMaxMin. XXX: this marks extent invalid, BTW.
+      cpInfoMerge.type = column.dataType;
+      cpInfoMerge.newExtent = iter->second.fNewExtent;
+      cpInfoMerge.colWidth = column.width;
+      brmReporter.addToCPInfo(cpInfoMerge);
     }
 
-    BRM::CPInfoMerge cpInfoMerge;
-    cpInfoMerge.startLbid = iter->second.fLbid;
-    if (column.width <= 8)
-    {
-      cpInfoMerge.max = maxVal;
-      cpInfoMerge.min = minVal;
-    }
-    else
-    {
-      cpInfoMerge.bigMax = bigMaxVal;
-      cpInfoMerge.bigMin = bigMinVal;
-    }
-    cpInfoMerge.seqNum = -1;  // Not used by mergeExtentsMaxMin
-    cpInfoMerge.type = column.dataType;
-    cpInfoMerge.newExtent = iter->second.fNewExtent;
-    cpInfoMerge.colWidth = column.width;
-    brmReporter.addToCPInfo(cpInfoMerge);
-
     ++iter;
   }
 
diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp
index a2d5f5388..edf6e0370 100644
--- a/writeengine/wrapper/writeengine.cpp
+++ b/writeengine/wrapper/writeengine.cpp
@@ -23,7 +23,7 @@
  */
 
 // XXX: a definition to switch off computations for token columns.
-//#define	XXX_WRITEENGINE_TOKENS_RANGES_XXX
+#define	XXX_WRITEENGINE_TOKENS_RANGES_XXX
 
 #include <cmath>
 #include <cstdlib>
@@ -465,12 +465,12 @@ void WriteEngineWrapper::updateMaxMinRange(const size_t totalNewRow, const size_
       }
       case WR_CHAR:
       {
-        fetchNewOldValues<int64_t, int64_t>(value, oldValue, valArrayVoid, oldValArrayVoid, i, totalNewRow);
+        fetchNewOldValues<uint64_t, uint64_t>(uvalue, oldUValue, valArrayVoid, oldValArrayVoid, i, totalNewRow);
         // for characters (strings, actually), we fetched then in LSB order, on x86, at the very least.
         // this means most significant byte of the string, which is first, is now in LSB of uvalue/oldValue.
         // we must perform a conversion.
-        value = uint64ToStr(uvalue);
-        oldValue = uint64ToStr(oldValue);
+        uvalue = uint64ToStr(uvalue);
+        oldUValue = uint64ToStr(oldUValue);
         break;
       }
       default: idbassert_s(0, "unknown WR type tag"); return;
@@ -1732,6 +1732,9 @@ int WriteEngineWrapper::insertColumnRecs(
         return rc;
       }
 
+#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
+      datatypes::Charset cset(dctnryStructList[i].fCharsetNumber);
+#endif
       for (uint32_t rows = 0; rows < (totalRow - rowsLeft); rows++)
       {
 #if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
@@ -1754,8 +1757,7 @@ int WriteEngineWrapper::insertColumnRecs(
           dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
           dctTuple.sigSize = dctStr_iter->length();
 #if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
-          strPrefix = encodeStringPrefix(dctTuple.sigValue, dctTuple.sigSize,
-                                                    dctnryStructList[i].fCharsetNumber);
+          strPrefix = encodeStringPrefix(dctTuple.sigValue, dctTuple.sigSize, cset);
 #endif
           dctTuple.isNull = false;
           rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
@@ -1822,8 +1824,7 @@ int WriteEngineWrapper::insertColumnRecs(
             dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
             dctTuple.sigSize = dctStr_iter->length();
 #if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
-            strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize,
-                                                      dctnryStructList[i].fCharsetNumber);
+            strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize, cset);
 #endif
             dctTuple.isNull = false;
             rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);