diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2fdde8afc..7d7194f86 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -307,6 +307,8 @@ SET (ENGINE_UTILS_QUERYSTATS_INCLUDE  "${CMAKE_CURRENT_SOURCE_DIR}/utils/queryst
 SET (ENGINE_UTILS_LIBMYSQL_CL_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/utils/libmysql_client")
 SET (ENGINE_WE_CONFIGCPP_INCLUDE      "${CMAKE_CURRENT_SOURCE_DIR}/writeengine/xml")
 SET (ENGINE_DATATYPES_INCLUDE         "${CMAKE_CURRENT_SOURCE_DIR}/datatypes")
+SET (ENGINE_BLOCKCACHE_INCLUDE        "${CMAKE_CURRENT_SOURCE_DIR}/primitives/blockcache")
+SET (ENGINE_PRIMPROC_INCLUDE          "${CMAKE_CURRENT_SOURCE_DIR}/primitives/primproc")
 SET (ENGINE_SERVER_SQL_INCLUDE        "${SERVER_SOURCE_ROOT_DIR}/sql")
 SET (ENGINE_SERVER_INCLUDE_INCLUDE    "${SERVER_SOURCE_ROOT_DIR}/include")
 IF (PCRE_INCLUDES)
diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h
index b6bd803e5..fbaa47e05 100644
--- a/datatypes/mcs_datatype.h
+++ b/datatypes/mcs_datatype.h
@@ -123,12 +123,6 @@ template<> struct make_unsigned<int32_t>  { typedef uint32_t type;  };
 template<> struct make_unsigned<int64_t>  { typedef uint64_t type;  };
 template<> struct make_unsigned<int128_t> { typedef uint128_t type; };
 
-} // namespace datatypes
-
-
-namespace datatypes
-{
-
 template<int W, typename T = void>
 struct _WidthToSIntegralType
 {
diff --git a/dbcon/joblist/primitivemsg.h b/dbcon/joblist/primitivemsg.h
index 1ede5ecad..da82a8a02 100644
--- a/dbcon/joblist/primitivemsg.h
+++ b/dbcon/joblist/primitivemsg.h
@@ -760,6 +760,32 @@ struct NewColRequestHeader
     // QQ: The below constructor is never used.
     // This struct is used in a cast only, in a hackish way.
     NewColRequestHeader();
+    inline uint16_t* getRIDArrayPtr(const int width) const
+    {
+        return (NVALS > 0) ? reinterpret_cast<uint16_t*>((uint8_t*)this +
+                                                         sizeof(NewColRequestHeader) +
+                                                         (NOPS * getFilterSize(width)))
+                           : nullptr;
+    }
+    // WIP change to unsigned if needed
+    inline int getFilterSize(const int width) const
+    {
+        return sizeof(uint8_t) + sizeof(uint8_t) + width;
+    }
+    inline uint8_t* getFilterStringPtr()
+    {
+        return reinterpret_cast<uint8_t*>(this) + sizeof(NewColRequestHeader);
+    }
+    inline void sortRIDArrayIfNeeded(const int width)
+    {
+        // NVALS means number of RIDs in the context of the f().
+        if (NVALS > 0)
+        {
+            uint16_t* ridArray = getRIDArrayPtr(width);
+            if (sort == 1)
+                std::sort(ridArray, ridArray + NVALS);
+        }
+    }
 };
 
 struct NewColAggRequestHeader
diff --git a/primitives/linux-port/CMakeLists.txt b/primitives/linux-port/CMakeLists.txt
index 569169d12..fdc7fae8c 100644
--- a/primitives/linux-port/CMakeLists.txt
+++ b/primitives/linux-port/CMakeLists.txt
@@ -6,8 +6,6 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ../blockcache ../primproc)
 
 set(processor_STAT_SRCS primitiveprocessor.cpp dictionary.cpp column.cpp)
 
-#libprocessor_a_CXXFLAGS = $(march_flags) $(AM_CXXFLAGS)
-
 add_library(processor STATIC ${processor_STAT_SRCS})
 
 add_dependencies(processor loggingcpp)
@@ -15,10 +13,3 @@ add_dependencies(processor loggingcpp)
 target_link_libraries(processor ${NETSNMP_LIBRARIES})
 
 INSTALL (TARGETS processor DESTINATION ${ENGINE_LIBDIR})
-
-#if (WITH_PP_SCAN_UT)
-#    add_executable(pp_scan_unittest pp-scan-unittest.cpp)
-#    target_link_libraries(pp_scan_unittest ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS} ${CPPUNIT_LIBRARIES} cppunit)
-#    install(TARGETS pp_scan_unittest DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
-#endif()
-
diff --git a/primitives/linux-port/column.cpp b/primitives/linux-port/column.cpp
index a02075646..4204daf22 100644
--- a/primitives/linux-port/column.cpp
+++ b/primitives/linux-port/column.cpp
@@ -1,5 +1,5 @@
 /* Copyright (C) 2014 InfiniDB, Inc.
-   Copyright (C) 2016-2020 MariaDB Corporation
+   Copyright (C) 2016-2021 MariaDB Corporation
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
@@ -16,10 +16,6 @@
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
    MA 02110-1301, USA. */
 
-/*****************************************************************************
- * $Id: column.cpp 2103 2013-06-04 17:53:38Z dcathey $
- *
- ****************************************************************************/
 #include <iostream>
 #include <sstream>
 //#define NDEBUG
@@ -51,6 +47,14 @@ using namespace execplan;
 
 namespace
 {
+using RID_T = uint16_t;  // Row index type, as used in rid arrays
+
+// Column filtering is dispatched 4-way based on the column type,
+// which defines implementation of comparison operations for the column values
+enum ENUM_KIND {KIND_DEFAULT,   // compared as signed integers
+                KIND_UNSIGNED,  // compared as unsigned integers
+                KIND_FLOAT,     // compared as floating-point numbers
+                KIND_TEXT};     // whitespace-trimmed and then compared as signed integers
 
 inline uint64_t order_swap(uint64_t x)
 {
@@ -87,7 +91,6 @@ void logIt(int mid, int arg1, const string& arg2 = string())
     logger.logErrorMessage(msg);
 }
 
-
 template<class T>
 inline bool colCompare_(const T& val1, const T& val2, uint8_t COP)
 {
@@ -120,7 +123,6 @@ inline bool colCompare_(const T& val1, const T& val2, uint8_t COP)
     }
 }
 
-
 inline bool colCompareStr(const ColRequestHeaderDataType &type,
                           uint8_t COP,
                           const utils::ConstString &val1,
@@ -164,7 +166,7 @@ inline bool colCompare_(const T& val1, const T& val2, uint8_t COP, uint8_t rf)
             return val1 > val2 || (val1 == val2 && (rf & 0x80));
 
         default:
-            logIt(34, COP, "colCompare_l");
+            logIt(34, COP, "colCompare_");
             return false;						// throw an exception here?
     }
 }
@@ -199,300 +201,11 @@ inline bool colStrCompare_(uint64_t val1, uint64_t val2, uint8_t COP, uint8_t rf
         case COMPARE_LIKE:
         case COMPARE_NLIKE:
         default:
-            logIt(34, COP, "colCompare_l");
+            logIt(34, COP, "colStrCompare_");
             return false;						// throw an exception here?
     }
 }
 
-
-template<int>
-inline bool isEmptyVal(uint8_t type, const uint8_t* val8);
-
-template<>
-inline bool isEmptyVal<16>(uint8_t type, const uint8_t* ival) // For BINARY
-{
-    const int128_t* val = reinterpret_cast<const int128_t*>(ival);
-    // Wide-DECIMAL supplies a universal NULL/EMPTY magics for all 16 byte
-    // data types.
-    return *val == datatypes::Decimal128Empty;
-}
-
-template<>
-inline bool isEmptyVal<8>(uint8_t type, const uint8_t* ival)
-{
-    const uint64_t* val = reinterpret_cast<const uint64_t*>(ival);
-
-    switch (type)
-    {
-        case CalpontSystemCatalog::DOUBLE:
-        case CalpontSystemCatalog::UDOUBLE:
-            return (joblist::DOUBLEEMPTYROW == *val);
-
-        case CalpontSystemCatalog::CHAR:
-        case CalpontSystemCatalog::VARCHAR:
-        case CalpontSystemCatalog::DATE:
-        case CalpontSystemCatalog::DATETIME:
-        case CalpontSystemCatalog::TIMESTAMP:
-        case CalpontSystemCatalog::TIME:
-        case CalpontSystemCatalog::VARBINARY:
-        case CalpontSystemCatalog::BLOB:
-        case CalpontSystemCatalog::TEXT:
-            return (*val == joblist::CHAR8EMPTYROW);
-
-        case CalpontSystemCatalog::UBIGINT:
-            return (joblist::UBIGINTEMPTYROW == *val);
-
-        default:
-            break;
-    }
-
-    return (joblist::BIGINTEMPTYROW == *val);
-}
-
-template<>
-inline bool isEmptyVal<4>(uint8_t type, const uint8_t* ival)
-{
-    const uint32_t* val = reinterpret_cast<const uint32_t*>(ival);
-
-    switch (type)
-    {
-        case CalpontSystemCatalog::FLOAT:
-        case CalpontSystemCatalog::UFLOAT:
-            return (joblist::FLOATEMPTYROW == *val);
-
-        case CalpontSystemCatalog::CHAR:
-        case CalpontSystemCatalog::VARCHAR:
-        case CalpontSystemCatalog::BLOB:
-        case CalpontSystemCatalog::TEXT:
-        case CalpontSystemCatalog::DATE:
-        case CalpontSystemCatalog::DATETIME:
-        case CalpontSystemCatalog::TIMESTAMP:
-        case CalpontSystemCatalog::TIME:
-            return (joblist::CHAR4EMPTYROW == *val);
-
-        case CalpontSystemCatalog::UINT:
-        case CalpontSystemCatalog::UMEDINT:
-            return (joblist::UINTEMPTYROW == *val);
-
-        default:
-            break;
-    }
-
-    return (joblist::INTEMPTYROW == *val);
-}
-
-template<>
-inline bool isEmptyVal<2>(uint8_t type, const uint8_t* ival)
-{
-    const uint16_t* val = reinterpret_cast<const uint16_t*>(ival);
-
-    switch (type)
-    {
-        case CalpontSystemCatalog::CHAR:
-        case CalpontSystemCatalog::VARCHAR:
-        case CalpontSystemCatalog::BLOB:
-        case CalpontSystemCatalog::TEXT:
-        case CalpontSystemCatalog::DATE:
-        case CalpontSystemCatalog::DATETIME:
-        case CalpontSystemCatalog::TIMESTAMP:
-        case CalpontSystemCatalog::TIME:
-            return (joblist::CHAR2EMPTYROW == *val);
-
-        case CalpontSystemCatalog::USMALLINT:
-            return (joblist::USMALLINTEMPTYROW == *val);
-
-        default:
-            break;
-    }
-
-    return (joblist::SMALLINTEMPTYROW == *val);
-}
-
-template<>
-inline bool isEmptyVal<1>(uint8_t type, const uint8_t* ival)
-{
-    const uint8_t* val = reinterpret_cast<const uint8_t*>(ival);
-
-    switch (type)
-    {
-        case CalpontSystemCatalog::CHAR:
-        case CalpontSystemCatalog::VARCHAR:
-        case CalpontSystemCatalog::BLOB:
-        case CalpontSystemCatalog::TEXT:
-        case CalpontSystemCatalog::DATE:
-        case CalpontSystemCatalog::DATETIME:
-        case CalpontSystemCatalog::TIMESTAMP:
-        case CalpontSystemCatalog::TIME:
-            return (*val == joblist::CHAR1EMPTYROW);
-
-        case CalpontSystemCatalog::UTINYINT:
-            return (*val == joblist::UTINYINTEMPTYROW);
-
-        default:
-            break;
-    }
-
-    return (*val == joblist::TINYINTEMPTYROW);
-}
-
-template<int>
-inline bool isNullVal(uint8_t type, const uint8_t* val8);
-
-template<>
-inline bool isNullVal<16>(uint8_t type, const uint8_t* ival)
-{
-    const int128_t* val = reinterpret_cast<const int128_t*>(ival);
-    // Wide-DECIMAL supplies a universal NULL/EMPTY magics for all 16 byte
-    // data types.
-    return *val == datatypes::Decimal128Null;
-}
-
-template<>
-inline bool isNullVal<8>(uint8_t type, const uint8_t* ival)
-{
-    const uint64_t* val = reinterpret_cast<const uint64_t*>(ival);
-
-    switch (type)
-    {
-        case CalpontSystemCatalog::DOUBLE:
-        case CalpontSystemCatalog::UDOUBLE:
-            return (joblist::DOUBLENULL == *val);
-
-        case CalpontSystemCatalog::CHAR:
-        case CalpontSystemCatalog::VARCHAR:
-        case CalpontSystemCatalog::DATE:
-        case CalpontSystemCatalog::DATETIME:
-        case CalpontSystemCatalog::TIMESTAMP:
-        case CalpontSystemCatalog::TIME:
-        case CalpontSystemCatalog::VARBINARY:
-        case CalpontSystemCatalog::BLOB:
-        case CalpontSystemCatalog::TEXT:
-            //@bug 339 might be a token here
-            //TODO: what's up with the second const here?
-            return (*val == joblist::CHAR8NULL || 0xFFFFFFFFFFFFFFFELL == *val);
-
-        case CalpontSystemCatalog::UBIGINT:
-            return (joblist::UBIGINTNULL == *val);
-
-        default:
-            break;
-    }
-
-    return (joblist::BIGINTNULL == *val);
-}
-
-template<>
-inline bool isNullVal<4>(uint8_t type, const uint8_t* ival)
-{
-    const uint32_t* val = reinterpret_cast<const uint32_t*>(ival);
-
-    switch (type)
-    {
-        case CalpontSystemCatalog::FLOAT:
-        case CalpontSystemCatalog::UFLOAT:
-            return (joblist::FLOATNULL == *val);
-
-        case CalpontSystemCatalog::CHAR:
-        case CalpontSystemCatalog::VARCHAR:
-        case CalpontSystemCatalog::BLOB:
-        case CalpontSystemCatalog::TEXT:
-            return (joblist::CHAR4NULL == *val);
-
-        case CalpontSystemCatalog::DATE:
-        case CalpontSystemCatalog::DATETIME:
-        case CalpontSystemCatalog::TIMESTAMP:
-        case CalpontSystemCatalog::TIME:
-            return (joblist::DATENULL == *val);
-
-        case CalpontSystemCatalog::UINT:
-        case CalpontSystemCatalog::UMEDINT:
-            return (joblist::UINTNULL == *val);
-
-        default:
-            break;
-    }
-
-    return (joblist::INTNULL == *val);
-}
-
-template<>
-inline bool isNullVal<2>(uint8_t type, const uint8_t* ival)
-{
-    const uint16_t* val = reinterpret_cast<const uint16_t*>(ival);
-
-    switch (type)
-    {
-        case CalpontSystemCatalog::CHAR:
-        case CalpontSystemCatalog::VARCHAR:
-        case CalpontSystemCatalog::BLOB:
-        case CalpontSystemCatalog::TEXT:
-        case CalpontSystemCatalog::DATE:
-        case CalpontSystemCatalog::DATETIME:
-        case CalpontSystemCatalog::TIMESTAMP:
-        case CalpontSystemCatalog::TIME:
-            return (joblist::CHAR2NULL == *val);
-
-        case CalpontSystemCatalog::USMALLINT:
-            return (joblist::USMALLINTNULL == *val);
-
-        default:
-            break;
-    }
-
-    return (joblist::SMALLINTNULL == *val);
-}
-
-template<>
-inline bool isNullVal<1>(uint8_t type, const uint8_t* ival)
-{
-    const uint8_t* val = reinterpret_cast<const uint8_t*>(ival);
-
-    switch (type)
-    {
-        case CalpontSystemCatalog::CHAR:
-        case CalpontSystemCatalog::VARCHAR:
-        case CalpontSystemCatalog::BLOB:
-        case CalpontSystemCatalog::TEXT:
-        case CalpontSystemCatalog::DATE:
-        case CalpontSystemCatalog::DATETIME:
-        case CalpontSystemCatalog::TIMESTAMP:
-        case CalpontSystemCatalog::TIME:
-            return (*val == joblist::CHAR1NULL);
-
-        case CalpontSystemCatalog::UTINYINT:
-            return (joblist::UTINYINTNULL == *val);
-
-        default:
-            break;
-    }
-
-    return (*val == joblist::TINYINTNULL);
-}
-
-/* A generic isNullVal */
-inline bool isNullVal(uint32_t length, uint8_t type, const uint8_t* val8)
-{
-    switch (length)
-    {
-        case 16:
-            return isNullVal<16>(type, val8);
-
-        case 8:
-            return isNullVal<8>(type, val8);
-
-        case 4:
-            return isNullVal<4>(type, val8);
-
-        case 2:
-            return isNullVal<2>(type, val8);
-
-        case 1:
-            return isNullVal<1>(type, val8);
-    };
-
-    return false;
-}
-
 // Set the minimum and maximum in the return header if we will be doing a block scan and
 // we are dealing with a type that is comparable as a 64 bit integer.  Subsequent calls can then
 // skip this block if the value being searched is outside of the Min/Max range.
@@ -540,937 +253,1247 @@ inline bool isMinMaxValid(const NewColRequestHeader* in)
     }
 }
 
-
-inline bool colCompare(int64_t val1, int64_t val2, uint8_t COP, uint8_t rf,
-                       const ColRequestHeaderDataType &typeHolder, uint8_t width,
-                       bool isNull = false)
+template<ENUM_KIND KIND, int COL_WIDTH, bool IS_NULL, typename T1, typename T2,
+         typename std::enable_if<COL_WIDTH == sizeof(int32_t) && KIND == KIND_FLOAT && !IS_NULL, T1>::type* = nullptr>
+inline bool colCompareDispatcherT(
+    T1 columnValue,
+    T2 filterValue,
+    uint8_t cop,
+    uint8_t rf,
+    const ColRequestHeaderDataType& typeHolder,
+    bool isVal2Null)
 {
-    uint8_t type = typeHolder.DataType;
-// 	cout << "comparing " << hex << val1 << " to " << val2 << endl;
+    float dVal1 = *((float*) &columnValue);
+    float dVal2 = *((float*) &filterValue);
+    return colCompare_(dVal1, dVal2, cop);
+}
 
-    if (COMPARE_NIL == COP) return false;
+template<ENUM_KIND KIND, int COL_WIDTH, bool IS_NULL, typename T1, typename T2,
+         typename std::enable_if<COL_WIDTH == sizeof(int64_t) && KIND == KIND_FLOAT && !IS_NULL, T1>::type* = nullptr>
+inline bool colCompareDispatcherT(
+    T1 columnValue,
+    T2 filterValue,
+    uint8_t cop,
+    uint8_t rf,
+    const ColRequestHeaderDataType& typeHolder,
+    bool isVal2Null)
+{
+    double dVal1 = *((double*) &columnValue);
+    double dVal2 = *((double*) &filterValue);
+    return colCompare_(dVal1, dVal2, cop);
+}
 
-    //@bug 425 added isNull condition
-    else if ( !isNull && (type == CalpontSystemCatalog::FLOAT || type == CalpontSystemCatalog::DOUBLE))
+template<ENUM_KIND KIND, int COL_WIDTH, bool IS_NULL, typename T1, typename T2,
+         typename std::enable_if<KIND == KIND_TEXT && !IS_NULL, T1>::type* = nullptr>
+inline bool colCompareDispatcherT(
+    T1 columnValue,
+    T2 filterValue,
+    uint8_t cop,
+    uint8_t rf,
+    const ColRequestHeaderDataType& typeHolder,
+    bool isVal2Null)
+{
+    if (cop & COMPARE_LIKE) // LIKE and NOT LIKE
     {
-        double dVal1, dVal2;
-
-        if (type == CalpontSystemCatalog::FLOAT)
-        {
-            dVal1 = *((float*) &val1);
-            dVal2 = *((float*) &val2);
-        }
-        else
-        {
-            dVal1 = *((double*) &val1);
-            dVal2 = *((double*) &val2);
-        }
-
-        return colCompare_(dVal1, dVal2, COP);
+        utils::ConstString subject{reinterpret_cast<const char*>(&columnValue), COL_WIDTH};
+        utils::ConstString pattern{reinterpret_cast<const char*>(&filterValue), COL_WIDTH};
+        return typeHolder.like(cop & COMPARE_NOT, subject.rtrimZero(),
+                                                  pattern.rtrimZero());
     }
 
-    else if ( (type == CalpontSystemCatalog::CHAR || type == CalpontSystemCatalog::VARCHAR ||
-               type == CalpontSystemCatalog::TEXT) && !isNull )
+    if (!rf)
     {
-        if (COP & COMPARE_LIKE) // LIKE and NOT LIKE
-        {
-            utils::ConstString subject = {reinterpret_cast<const char*>(&val1), width};
-            utils::ConstString pattern = {reinterpret_cast<const char*>(&val2), width};
-            return typeHolder.like(COP & COMPARE_NOT, subject.rtrimZero(),
-                                                      pattern.rtrimZero());
-        }
-
-        if (!rf)
-        {
-            // A temporary hack for xxx_nopad_bin collations
-            // TODO: MCOL-4534 Improve comparison performance in 8bit nopad_bin collations
-            if ((typeHolder.getCharset().state & (MY_CS_BINSORT|MY_CS_NOPAD)) ==
-                (MY_CS_BINSORT|MY_CS_NOPAD))
-              return colCompare_(order_swap(val1), order_swap(val2), COP);
-            utils::ConstString s1 = {reinterpret_cast<const char*>(&val1), width};
-            utils::ConstString s2 = {reinterpret_cast<const char*>(&val2), width};
-            return colCompareStr(typeHolder, COP, s1.rtrimZero(), s2.rtrimZero());
-        }
-        else
-            return colStrCompare_(order_swap(val1), order_swap(val2), COP, rf);
+        // A temporary hack for xxx_nopad_bin collations
+        // TODO: MCOL-4534 Improve comparison performance in 8bit nopad_bin collations
+        if ((typeHolder.getCharset().state & (MY_CS_BINSORT|MY_CS_NOPAD)) ==
+            (MY_CS_BINSORT|MY_CS_NOPAD))
+          return colCompare_(order_swap(columnValue), order_swap(filterValue), cop);
+        utils::ConstString s1{reinterpret_cast<const char*>(&columnValue), COL_WIDTH};
+        utils::ConstString s2{reinterpret_cast<const char*>(&filterValue), COL_WIDTH};
+        return colCompareStr(typeHolder, cop, s1.rtrimZero(), s2.rtrimZero());
     }
-
-    /* isNullVal should work on the normalized value on little endian machines */
     else
-    {
-        bool val2Null = isNullVal(width, type, (uint8_t*) &val2);
+        return colStrCompare_(order_swap(columnValue), order_swap(filterValue), cop, rf);
 
-        if (isNull == val2Null || (val2Null && COP == COMPARE_NE))
-            return colCompare_(val1, val2, COP, rf);
+}
+
+// This template where IS_NULL = true is used only comparing filter predicate
+// values with column NULL so I left branching here.
+template<ENUM_KIND KIND, int COL_WIDTH, bool IS_NULL, typename T1, typename T2,
+         typename std::enable_if<IS_NULL, T1>::type* = nullptr>
+inline bool colCompareDispatcherT(
+    T1 columnValue,
+    T2 filterValue,
+    uint8_t cop,
+    uint8_t rf,
+    const ColRequestHeaderDataType& typeHolder,
+    bool isVal2Null)
+{
+    if (IS_NULL == isVal2Null || (isVal2Null && cop == COMPARE_NE))
+    {
+        if (KIND_UNSIGNED == KIND)
+        {
+            // Ugly hack to convert all to the biggest type b/w T1 and T2.
+            // I presume that sizeof(T2) AKA a filter predicate type is GEQ sizeof(T1) AKA col type.
+            using UT2 = typename datatypes::make_unsigned<T2>::type;
+            UT2 ucolumnValue = columnValue;
+            UT2 ufilterValue = filterValue;
+            return colCompare_(ucolumnValue, ufilterValue, cop, rf);
+        }
         else
+        {
+            // Ugly hack to convert all to the biggest type b/w T1 and T2.
+            // I presume that sizeof(T2) AKA a filter predicate type is GEQ sizeof(T1) AKA col type.
+            T2 tempVal1 = columnValue;
+            return colCompare_(tempVal1, filterValue, cop, rf);
+        }
+    }
+    else
+        return false;
+}
+
+template<ENUM_KIND KIND, int COL_WIDTH, bool IS_NULL, typename T1, typename T2,
+         typename std::enable_if<KIND == KIND_UNSIGNED && !IS_NULL, T1>::type* = nullptr>
+inline bool colCompareDispatcherT(
+    T1 columnValue,
+    T2 filterValue,
+    uint8_t cop,
+    uint8_t rf,
+    const ColRequestHeaderDataType& typeHolder,
+    bool isVal2Null)
+{
+    if (IS_NULL == isVal2Null || (isVal2Null && cop == COMPARE_NE))
+    {
+        // Ugly hack to convert all to the biggest type b/w T1 and T2.
+        // I presume that sizeof(T2)(a filter predicate type) is GEQ T1(col type).
+        using UT2 = typename datatypes::make_unsigned<T2>::type;
+        UT2 ucolumnValue = columnValue;
+        UT2 ufilterValue = filterValue;
+        return colCompare_(ucolumnValue, ufilterValue, cop, rf);
+    }
+    else
+        return false;
+}
+
+template<ENUM_KIND KIND, int COL_WIDTH, bool IS_NULL, typename T1, typename T2,
+         typename std::enable_if<KIND == KIND_DEFAULT && !IS_NULL, T1>::type* = nullptr>
+inline bool colCompareDispatcherT(
+    T1 columnValue,
+    T2 filterValue,
+    uint8_t cop,
+    uint8_t rf,
+    const ColRequestHeaderDataType& typeHolder,
+    bool isVal2Null)
+{
+    if (IS_NULL == isVal2Null || (isVal2Null && cop == COMPARE_NE))
+    {
+        // Ugly hack to convert all to the biggest type b/w T1 and T2.
+        // I presume that sizeof(T2)(a filter predicate type) is GEQ T1(col type).
+        T2 tempVal1 = columnValue;
+        return colCompare_(tempVal1, filterValue, cop, rf);
+    }
+    else
+        return false;
+}
+
+// Compare two column values using given comparison operation,
+// taking into account all rules about NULL values, string trimming and so on
+template<ENUM_KIND KIND, int COL_WIDTH, bool IS_NULL = false, typename T1, typename T2>
+inline bool colCompare(
+    T1 columnValue,
+    T2 filterValue,
+    uint8_t cop,
+    uint8_t rf,
+    const ColRequestHeaderDataType& typeHolder,
+    bool isVal2Null = false)
+{
+// 	cout << "comparing " << hex << columnValue << " to " << filterValue << endl;
+    if (COMPARE_NIL == cop) return false;
+
+    return colCompareDispatcherT<KIND, COL_WIDTH, IS_NULL, T1, T2>(columnValue, filterValue,
+        cop, rf, typeHolder, isVal2Null);
+}
+
+/*****************************************************************************
+ *** NULL/EMPTY VALUES FOR EVERY COLUMN TYPE/WIDTH ***************************
+ *****************************************************************************/
+// Bit pattern representing EMPTY value for given column type/width
+// TBD Use typeHandler
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int128_t), T>::type* = nullptr>
+T getEmptyValue(uint8_t type)
+{
+    return datatypes::Decimal128Empty;
+}
+
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int64_t), T>::type* = nullptr>
+T getEmptyValue(uint8_t type)
+{
+    switch (type)
+    {
+        case CalpontSystemCatalog::DOUBLE:
+        case CalpontSystemCatalog::UDOUBLE:
+            return joblist::DOUBLEEMPTYROW;
+
+        case CalpontSystemCatalog::CHAR:
+        case CalpontSystemCatalog::VARCHAR:
+        case CalpontSystemCatalog::DATE:
+        case CalpontSystemCatalog::DATETIME:
+        case CalpontSystemCatalog::TIMESTAMP:
+        case CalpontSystemCatalog::TIME:
+        case CalpontSystemCatalog::VARBINARY:
+        case CalpontSystemCatalog::BLOB:
+        case CalpontSystemCatalog::TEXT:
+            return joblist::CHAR8EMPTYROW;
+
+        case CalpontSystemCatalog::UBIGINT:
+            return joblist::UBIGINTEMPTYROW;
+
+        default:
+            return joblist::BIGINTEMPTYROW;
+    }
+}
+
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int32_t), T>::type* = nullptr>
+T getEmptyValue(uint8_t type)
+{
+    switch (type)
+    {
+        case CalpontSystemCatalog::FLOAT:
+        case CalpontSystemCatalog::UFLOAT:
+            return joblist::FLOATEMPTYROW;
+
+        case CalpontSystemCatalog::CHAR:
+        case CalpontSystemCatalog::VARCHAR:
+        case CalpontSystemCatalog::BLOB:
+        case CalpontSystemCatalog::TEXT:
+        case CalpontSystemCatalog::DATE:
+        case CalpontSystemCatalog::DATETIME:
+        case CalpontSystemCatalog::TIMESTAMP:
+        case CalpontSystemCatalog::TIME:
+            return joblist::CHAR4EMPTYROW;
+
+        case CalpontSystemCatalog::UINT:
+        case CalpontSystemCatalog::UMEDINT:
+            return joblist::UINTEMPTYROW;
+
+        default:
+            return joblist::INTEMPTYROW;
+    }
+}
+
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int16_t), T>::type* = nullptr>
+T getEmptyValue(uint8_t type)
+{
+    switch (type)
+    {
+        case CalpontSystemCatalog::CHAR:
+        case CalpontSystemCatalog::VARCHAR:
+        case CalpontSystemCatalog::BLOB:
+        case CalpontSystemCatalog::TEXT:
+        case CalpontSystemCatalog::DATE:
+        case CalpontSystemCatalog::DATETIME:
+        case CalpontSystemCatalog::TIMESTAMP:
+        case CalpontSystemCatalog::TIME:
+            return joblist::CHAR2EMPTYROW;
+
+        case CalpontSystemCatalog::USMALLINT:
+            return joblist::USMALLINTEMPTYROW;
+
+        default:
+            return joblist::SMALLINTEMPTYROW;
+    }
+}
+
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int8_t), T>::type* = nullptr>
+T getEmptyValue(uint8_t type)
+{
+    switch (type)
+    {
+        case CalpontSystemCatalog::CHAR:
+        case CalpontSystemCatalog::VARCHAR:
+        case CalpontSystemCatalog::BLOB:
+        case CalpontSystemCatalog::TEXT:
+        case CalpontSystemCatalog::DATE:
+        case CalpontSystemCatalog::DATETIME:
+        case CalpontSystemCatalog::TIMESTAMP:
+        case CalpontSystemCatalog::TIME:
+            return joblist::CHAR1EMPTYROW;
+
+        case CalpontSystemCatalog::UTINYINT:
+            return joblist::UTINYINTEMPTYROW;
+
+        default:
+            return joblist::TINYINTEMPTYROW;
+    }
+}
+
+// Bit pattern representing NULL value for given column type/width
+// TBD Use TypeHandler
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int128_t), T>::type* = nullptr>
+T getNullValue(uint8_t type)
+{
+    return datatypes::Decimal128Null;
+}
+
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int64_t), T>::type* = nullptr>
+T getNullValue(uint8_t type)
+{
+    switch (type)
+    {
+        case CalpontSystemCatalog::DOUBLE:
+        case CalpontSystemCatalog::UDOUBLE:
+            return joblist::DOUBLENULL;
+
+        case CalpontSystemCatalog::CHAR:
+        case CalpontSystemCatalog::VARCHAR:
+        case CalpontSystemCatalog::DATE:
+        case CalpontSystemCatalog::DATETIME:
+        case CalpontSystemCatalog::TIMESTAMP:
+        case CalpontSystemCatalog::TIME:
+        case CalpontSystemCatalog::VARBINARY:
+        case CalpontSystemCatalog::BLOB:
+        case CalpontSystemCatalog::TEXT:
+            return joblist::CHAR8NULL;
+
+        case CalpontSystemCatalog::UBIGINT:
+            return joblist::UBIGINTNULL;
+
+        default:
+            return joblist::BIGINTNULL;
+    }
+}
+
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int32_t), T>::type* = nullptr>
+T getNullValue(uint8_t type)
+{
+    switch (type)
+    {
+        case CalpontSystemCatalog::FLOAT:
+        case CalpontSystemCatalog::UFLOAT:
+            return joblist::FLOATNULL;
+
+        case CalpontSystemCatalog::CHAR:
+        case CalpontSystemCatalog::VARCHAR:
+        case CalpontSystemCatalog::BLOB:
+        case CalpontSystemCatalog::TEXT:
+            return joblist::CHAR4NULL;
+
+        case CalpontSystemCatalog::DATE:
+        case CalpontSystemCatalog::DATETIME:
+        case CalpontSystemCatalog::TIMESTAMP:
+        case CalpontSystemCatalog::TIME:
+            return joblist::DATENULL;
+
+        case CalpontSystemCatalog::UINT:
+        case CalpontSystemCatalog::UMEDINT:
+            return joblist::UINTNULL;
+
+        default:
+            return joblist::INTNULL;
+    }
+}
+
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int16_t), T>::type* = nullptr>
+T getNullValue(uint8_t type)
+{
+    switch (type)
+    {
+        case CalpontSystemCatalog::CHAR:
+        case CalpontSystemCatalog::VARCHAR:
+        case CalpontSystemCatalog::BLOB:
+        case CalpontSystemCatalog::TEXT:
+        case CalpontSystemCatalog::DATE:
+        case CalpontSystemCatalog::DATETIME:
+        case CalpontSystemCatalog::TIMESTAMP:
+        case CalpontSystemCatalog::TIME:
+            return joblist::CHAR2NULL;
+
+        case CalpontSystemCatalog::USMALLINT:
+            return joblist::USMALLINTNULL;
+
+        default:
+            return joblist::SMALLINTNULL;
+    }
+}
+
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int8_t), T>::type* = nullptr>
+T getNullValue(uint8_t type)
+{
+    switch (type)
+    {
+        case CalpontSystemCatalog::CHAR:
+        case CalpontSystemCatalog::VARCHAR:
+        case CalpontSystemCatalog::BLOB:
+        case CalpontSystemCatalog::TEXT:
+        case CalpontSystemCatalog::DATE:
+        case CalpontSystemCatalog::DATETIME:
+        case CalpontSystemCatalog::TIMESTAMP:
+        case CalpontSystemCatalog::TIME:
+            return joblist::CHAR1NULL;
+
+        case CalpontSystemCatalog::UTINYINT:
+            return joblist::UTINYINTNULL;
+
+        default:
+            return joblist::TINYINTNULL;
+    }
+}
+
+// Check whether val is NULL (or alternative NULL bit pattern for 64-bit string types)
+template<ENUM_KIND KIND, typename T>
+inline bool isNullValue(const T val, const T NULL_VALUE)
+{
+    return val == NULL_VALUE;
+}
+
+template<>
+inline bool isNullValue<KIND_TEXT, int64_t>(const int64_t val, const int64_t NULL_VALUE)
+{
+    //@bug 339 might be a token here
+    //TODO: what's up with the alternative NULL here?
+    constexpr const int64_t ALT_NULL_VALUE = 0xFFFFFFFFFFFFFFFELL;
+
+    return (val == NULL_VALUE ||
+            val == ALT_NULL_VALUE);
+}
+
+//
+// FILTER A COLUMN VALUE
+//
+
+template<bool IS_NULL, typename T, typename FT,
+         typename std::enable_if<IS_NULL == true, T>::type* = nullptr>
+inline bool noneValuesInArray(const T curValue,
+                              const FT* filterValues,
+                              const uint32_t filterCount)
+{
+    // ignore NULLs in the array and in the column data
+    return false;
+}
+
+template<bool IS_NULL, typename T, typename FT,
+         typename std::enable_if<IS_NULL == false, T>::type* = nullptr>
+inline bool noneValuesInArray(const T curValue,
+                              const FT* filterValues,
+                              const uint32_t filterCount)
+{
+    for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++)
+    {
+        if (curValue == static_cast<T>(filterValues[argIndex]))
             return false;
     }
+
+    return true;
 }
 
-inline bool colCompare(int128_t val1, int128_t val2, uint8_t COP, uint8_t rf, int type, uint8_t width, bool isNull = false)
+template<bool IS_NULL, typename T, typename ST,
+         typename std::enable_if<IS_NULL == true, T>::type* = nullptr>
+inline bool noneValuesInSet(const T curValue, const ST* filterSet)
 {
-    if (COMPARE_NIL == COP) return false;
+    // bug 1920: ignore NULLs in the set and in the column data
+    return false;
+}
 
-    /* isNullVal should work on the normalized value on little endian machines */
-    bool val2Null = isNullVal(width, type, (uint8_t*) &val2);
+template<bool IS_NULL, typename T, typename ST,
+         typename std::enable_if<IS_NULL == false, T>::type* = nullptr>
+inline bool noneValuesInSet(const T curValue, const ST* filterSet)
+{
+    bool found = (filterSet->find(curValue) != filterSet->end());
+    return !found;
+}
 
-    if (isNull == val2Null || (val2Null && COP == COMPARE_NE))
-        return colCompare_(val1, val2, COP, rf);
+// The routine is used to test the value from a block against filters
+// according with columnFilterMode(see the corresponding enum for details).
+// Returns true if the curValue matches the filter.
+template<ENUM_KIND KIND, int COL_WIDTH, bool IS_NULL = false,
+        typename T, typename FT, typename ST>
+inline bool matchingColValue(const T curValue,
+                             const ColumnFilterMode columnFilterMode,
+                             const ST* filterSet, // Set of values for simple filters (any of values / none of them)
+                             const uint32_t filterCount, // Number of filter elements, each described by one entry in the following arrays:
+                             const uint8_t* filterCOPs, //   comparison operation
+                             const FT* filterValues, //   value to compare to
+                             const uint8_t* filterRFs, // reverse byte order flags
+                             const ColRequestHeaderDataType& typeHolder,
+                             const T NULL_VALUE)                   // Bit pattern representing NULL value for this column type/width
+{
+    /* In order to make filtering as fast as possible, we replaced the single generic algorithm
+       with several algorithms, better tailored for more specific cases:
+       empty filter, single comparison, and/or/xor comparison results, one/none of small/large set of values
+    */
+    switch (columnFilterMode)
+    {
+        // Empty filter is always true
+        case ALWAYS_TRUE:
+            return true;
+
+
+        // Filter consisting of exactly one comparison operation
+        case SINGLE_COMPARISON:
+        {
+            auto filterValue = filterValues[0];
+            // This can be future optimized checking if a filterValue is NULL or not
+            bool cmp = colCompare<KIND, COL_WIDTH, IS_NULL>(curValue, filterValue, filterCOPs[0],
+                                                            filterRFs[0], typeHolder, isNullValue<KIND,T>(filterValue,
+                                                            NULL_VALUE));
+            return cmp;
+        }
+
+
+        // Filter is true if ANY comparison is true (BOP_OR)
+        case ANY_COMPARISON_TRUE:
+        {
+            for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++)
+            {
+                auto filterValue = filterValues[argIndex];
+                // This can be future optimized checking if a filterValues are NULLs or not before the higher level loop.
+                bool cmp = colCompare<KIND, COL_WIDTH, IS_NULL>(curValue, filterValue, filterCOPs[argIndex],
+                                                                filterRFs[argIndex], typeHolder, isNullValue<KIND,T>(filterValue,
+                                                                NULL_VALUE));
+
+                // Short-circuit the filter evaluation - true || ... == true
+                if (cmp == true)
+                    return true;
+            }
+
+            // We can get here only if all filters returned false
+            return false;
+        }
+
+
+        // Filter is true only if ALL comparisons are true (BOP_AND)
+        case ALL_COMPARISONS_TRUE:
+        {
+            for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++)
+            {
+                auto filterValue = filterValues[argIndex];
+                // This can be future optimized checking if a filterValues are NULLs or not before the higher level loop.
+                bool cmp = colCompare<KIND, COL_WIDTH, IS_NULL>(curValue, filterValue, filterCOPs[argIndex],
+                                                                filterRFs[argIndex], typeHolder,
+                                                                isNullValue<KIND,T>(filterValue, NULL_VALUE));
+
+                // Short-circuit the filter evaluation - false && ... = false
+                if (cmp == false)
+                    return false;
+            }
+
+            // We can get here only if all filters returned true
+            return true;
+        }
+
+
+        // XORing results of comparisons (BOP_XOR)
+        case XOR_COMPARISONS:
+        {
+            bool result = false;
+
+            for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++)
+            {
+                auto filterValue = filterValues[argIndex];
+                // This can be future optimized checking if a filterValues are NULLs or not before the higher level loop.
+                bool cmp = colCompare<KIND, COL_WIDTH, IS_NULL>(curValue, filterValue, filterCOPs[argIndex],
+                                                                filterRFs[argIndex], typeHolder,
+                                                                isNullValue<KIND,T>(filterValue, NULL_VALUE));
+                result ^= cmp;
+            }
+
+            return result;
+        }
+
+
+        // ONE of the values in the small set represented by an array (BOP_OR + all COMPARE_EQ)
+        case ONE_OF_VALUES_IN_ARRAY:
+        {
+            for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++)
+            {
+                if (curValue == static_cast<T>(filterValues[argIndex]))
+                    return true;
+            }
+
+            return false;
+        }
+
+
+        // NONE of the values in the small set represented by an array (BOP_AND + all COMPARE_NE)
+        case NONE_OF_VALUES_IN_ARRAY:
+            return noneValuesInArray<IS_NULL, T, FT>(curValue, filterValues, filterCount);
+
+
+        // ONE of the values in the set is equal to the value checked (BOP_OR + all COMPARE_EQ)
+        case ONE_OF_VALUES_IN_SET:
+        {
+            bool found = (filterSet->find(curValue) != filterSet->end());
+            return found;
+        }
+
+
+        // NONE of the values in the set is equal to the value checked (BOP_AND + all COMPARE_NE)
+        case NONE_OF_VALUES_IN_SET:
+            return noneValuesInSet<IS_NULL, T, ST>(curValue, filterSet);
+
+
+        default:
+            idbassert(0);
+            return true;
+    }
+}
+
+/*****************************************************************************
+ *** READ COLUMN VALUES ******************************************************
+ *****************************************************************************/
+
+// Read one ColValue from the input block.
+// Return true on success, false on End of Block.
+// Values are read from srcArray either in natural order or in the order defined by ridArray.
+// Empty values are skipped, unless ridArray==0 && !(OutputType & OT_RID).
+template<typename T, int COL_WIDTH>
+inline bool nextColValue(
+    T& result,            // Place for the value returned
+    bool* isEmpty,              // ... and flag whether it's EMPTY
+    uint32_t* index,                 // Successive index either in srcArray (going from 0 to srcSize-1) or ridArray (0..ridSize-1)
+    uint16_t* rid,              // Index in srcArray of the value returned
+    const T* srcArray,          // Input array
+    const uint32_t srcSize,     // ... and its size
+    const uint16_t* ridArray,   // Optional array of indexes into srcArray, that defines the read order
+    const uint16_t ridSize,          // ... and its size
+    const uint8_t OutputType,   // Used to decide whether to skip EMPTY values
+    T EMPTY_VALUE)
+{
+    auto i = *index;    // local copy of *index to speed up loops
+    T value;            // value to be written into *result, local for the same reason
+
+    if (ridArray)
+    {
+        // Read next non-empty value in the order defined by ridArray
+        for( ; ; i++)
+        {
+            if (UNLIKELY(i >= ridSize))
+                return false;
+
+            value = srcArray[ridArray[i]];
+
+            if (value != EMPTY_VALUE)
+                break;
+        }
+
+        *rid = ridArray[i];
+        *isEmpty = false;
+    }
+    else if (OutputType & OT_RID)   //TODO: check correctness of this condition for SKIP_EMPTY_VALUES
+    {
+        // Read next non-empty value in the natural order
+        for( ; ; i++)
+        {
+            if (UNLIKELY(i >= srcSize))
+                return false;
+
+            value = srcArray[i];
+
+            if (value != EMPTY_VALUE)
+                break;
+        }
+
+        *rid = i;
+        *isEmpty = false;
+    }
     else
-        return false;
+    {
+        // Read next value in the natural order
+        if (UNLIKELY(i >= srcSize))
+            return false;
+
+        *rid = i;
+        value = srcArray[i];
+        *isEmpty = (value == EMPTY_VALUE);
+    }
+
+    *index = i+1;
+    result = value;
+    return true;
 }
 
-inline bool colCompareUnsigned(uint64_t val1, uint64_t val2, uint8_t COP, uint8_t rf, int type, uint8_t width, bool isNull = false)
-{
-    // 	cout << "comparing unsigned" << hex << val1 << " to " << val2 << endl;
-
-    if (COMPARE_NIL == COP) return false;
-
-    /* isNullVal should work on the normalized value on little endian machines */
-    bool val2Null = isNullVal(width, type, (uint8_t*) &val2);
-
-    if (isNull == val2Null || (val2Null && COP == COMPARE_NE))
-        return colCompare_(val1, val2, COP, rf);
-    else
-        return false;
-}
-
-inline void store(const NewColRequestHeader* in,
-                  NewColResultHeader* out,
-                  unsigned outSize,
-                  unsigned* written,
-                  uint16_t rid, const uint8_t* block8)
+///
+/// WRITE COLUMN VALUES
+///
+
+// Append value to the output buffer with debug-time check for buffer overflow
+template<typename T>
+inline void checkedWriteValue(
+    void* out,
+    unsigned outSize,
+    unsigned* outPos,
+    const T* src,
+    int errSubtype)
 {
+#ifdef PRIM_DEBUG
+    if (sizeof(T) > outSize - *outPos)
+    {
+        logIt(35, errSubtype);
+        throw logic_error("PrimitiveProcessor::checkedWriteValue(): output buffer is too small");
+    }
+#endif
     uint8_t* out8 = reinterpret_cast<uint8_t*>(out);
+    memcpy(out8 + *outPos, src, sizeof(T));
+    *outPos += sizeof(T);
+}
 
-    if (in->OutputType & OT_RID)
+// Write the value index in srcArray and/or the value itself, depending on bits in OutputType,
+// into the output buffer and update the output pointer.
+template<typename T>
+inline void writeColValue(
+    uint8_t OutputType,
+    NewColResultHeader* out,
+    unsigned outSize,
+    unsigned* written,
+    uint16_t rid,
+    const T* srcArray)
+{
+    if (OutputType & OT_RID)
     {
-#ifdef PRIM_DEBUG
-
-        if (*written + 2 > outSize)
-        {
-            logIt(35, 1);
-            throw logic_error("PrimitiveProcessor::store(): output buffer is too small");
-        }
-
-#endif
+        checkedWriteValue(out, outSize, written, &rid, 1);
         out->RidFlags |= (1 << (rid >> 9)); // set the (row/512)'th bit
-        memcpy(&out8[*written], &rid, 2);
-        *written += 2;
     }
 
-    if (in->OutputType & OT_TOKEN || in->OutputType & OT_DATAVALUE)
+    if (OutputType & (OT_TOKEN | OT_DATAVALUE))
     {
-#ifdef PRIM_DEBUG
-
-        if (*written + in->colType.DataSize > outSize)
-        {
-            logIt(35, 2);
-            throw logic_error("PrimitiveProcessor::store(): output buffer is too small");
-        }
-
-#endif
-
-        void* ptr1 = &out8[*written];
-        const uint8_t* ptr2 = &block8[0];
-
-        switch (in->colType.DataSize)
-        {
-            case 16:
-                ptr2 += (rid << 4);
-                memcpy(ptr1, ptr2, 16);
-                break;
-
-            default:
-                // fallthrough
-
-            case 8:
-                ptr2 += (rid << 3);
-                memcpy(ptr1, ptr2, 8);
-                break;
-
-            case 4:
-                ptr2 += (rid << 2);
-                memcpy(ptr1, ptr2, 4);
-                break;
-
-            case 2:
-                ptr2 += (rid << 1);
-                memcpy(ptr1, ptr2, 2);
-                break;
-
-            case 1:
-                ptr2 += (rid << 0);
-                memcpy(ptr1, ptr2, 1);
-                break;
-        }
-
-        *written += in->colType.DataSize;
+        checkedWriteValue(out, outSize, written, &srcArray[rid], 2);
     }
 
-    out->NVALS++;
+    out->NVALS++;   //TODO: Can be computed at the end from *written value
 }
 
-template<int W>
-inline uint64_t nextUnsignedColValue(int type,
-                                     const uint16_t* ridArray,
-                                     int NVALS,
-                                     int* index,
-                                     bool* done,
-                                     bool* isNull,
-                                     bool* isEmpty,
-                                     uint16_t* rid,
-                                     uint8_t OutputType, uint8_t* val8, unsigned itemsPerBlk)
+/* WIP
+template <bool WRITE_RID, bool WRITE_DATA, bool IS_NULL_VALUE_MATCHES, typename FILTER_ARRAY_T, typename RID_T, typename T>
+void writeArray(
+    size_t dataSize,
+    const T* dataArray,
+    const RID_T* dataRid,
+    const FILTER_ARRAY_T *filterArray,
+    uint8_t* outbuf,
+    unsigned* written,
+    uint16_t* NVALS,
+    uint8_t* RidFlagsPtr,
+    T NULL_VALUE)
 {
-    const uint8_t* vp = 0;
+    uint8_t* out = outbuf;
+    uint8_t RidFlags = *RidFlagsPtr;
 
-    if (ridArray == NULL)
+    for (size_t i = 0; i < dataSize; ++i)
     {
-        while (static_cast<unsigned>(*index) < itemsPerBlk &&
-                isEmptyVal<W>(type, &val8[*index * W]) &&
-                (OutputType & OT_RID))
+        //TODO: optimize handling of NULL values and flags by avoiding non-predictable jumps
+        if (dataArray[i]==NULL_VALUE? IS_NULL_VALUE_MATCHES : filterArray[i])
         {
-            (*index)++;
-        }
-
-        if (static_cast<unsigned>(*index) >= itemsPerBlk)
-        {
-            *done = true;
-            return 0;
-        }
-
-        vp = &val8[*index * W];
-        *isNull = isNullVal<W>(type, vp);
-        *isEmpty = isEmptyVal<W>(type, vp);
-        *rid = (*index)++;
-    }
-    else
-    {
-        while (*index < NVALS &&
-                isEmptyVal<W>(type, &val8[ridArray[*index] * W]))
-        {
-            (*index)++;
-        }
-
-        if (*index >= NVALS)
-        {
-            *done = true;
-            return 0;
-        }
-
-        vp = &val8[ridArray[*index] * W];
-        *isNull = isNullVal<W>(type, vp);
-        *isEmpty = isEmptyVal<W>(type, vp);
-        *rid = ridArray[(*index)++];
-    }
-
-    // at this point, nextRid is the index to return, and index is...
-    //   if RIDs are not specified, nextRid + 1,
-    //	 if RIDs are specified, it's the next index in the rid array.
-    //Bug 838, tinyint null problem
-    switch (W)
-    {
-        case 1:
-            return reinterpret_cast<uint8_t*> (val8)[*rid];
-
-        case 2:
-            return reinterpret_cast<uint16_t*>(val8)[*rid];
-
-        case 4:
-            return reinterpret_cast<uint32_t*>(val8)[*rid];
-
-        case 8:
-            return reinterpret_cast<uint64_t*>(val8)[*rid];
-
-        default:
-            logIt(33, W);
-
-#ifdef PRIM_DEBUG
-            throw logic_error("PrimitiveProcessor::nextColValue() bad width");
-#endif
-            return -1;
-    }
-}
-template<int W>
-inline uint8_t* nextBinColValue(int type,
-                                     const uint16_t* ridArray,
-                                     int NVALS,
-                                     int* index,
-                                     bool* done,
-                                     bool* isNull,
-                                     bool* isEmpty,
-                                     uint16_t* rid,
-                                     uint8_t OutputType, uint8_t* val8, unsigned itemsPerBlk)
-{
-    if (ridArray == NULL)
-    {
-        while (static_cast<unsigned>(*index) < itemsPerBlk &&
-                isEmptyVal<W>(type, &val8[*index * W]) &&
-                (OutputType & OT_RID))
-        {
-            (*index)++;
-        }
-
-
-        if (static_cast<unsigned>(*index) >= itemsPerBlk)
-        {
-            *done = true;
-            return NULL;
-        }
-        *rid = (*index)++;
-    }
-    else
-    {
-        while (*index < NVALS &&
-            isEmptyVal<W>(type, &val8[ridArray[*index] * W]))
-        {
-            (*index)++;
-        }
-
-        if (*index >= NVALS)
-        {
-            *done = true;
-            return NULL;
-        }
-        *rid = ridArray[(*index)++];
-    }
-
-    uint32_t curValueOffset = *rid * W;
-
-    *isNull = isNullVal<W>(type, &val8[curValueOffset]);
-    *isEmpty = isEmptyVal<W>(type, &val8[curValueOffset]);
-    //cout << "nextColBinValue " << *index <<  " rowid " << *rid << endl;
-    // at this point, nextRid is the index to return, and index is...
-    //   if RIDs are not specified, nextRid + 1,
-    //	 if RIDs are specified, it's the next index in the rid array.
-    return &val8[curValueOffset];
-
-#ifdef PRIM_DEBUG
-            throw logic_error("PrimitiveProcessor::nextColBinValue() bad width");
-#endif
-            return NULL;
-}
-
-
-template<int W>
-inline int64_t nextColValue(int type,
-                            const uint16_t* ridArray,
-                            int NVALS,
-                            int* index,
-                            bool* done,
-                            bool* isNull,
-                            bool* isEmpty,
-                            uint16_t* rid,
-                            uint8_t OutputType, uint8_t* val8, unsigned itemsPerBlk)
-{
-    const uint8_t* vp = 0;
-
-    if (ridArray == NULL)
-    {
-        while (static_cast<unsigned>(*index) < itemsPerBlk &&
-                isEmptyVal<W>(type, &val8[*index * W]) &&
-                (OutputType & OT_RID))
-        {
-            (*index)++;
-        }
-
-        if (static_cast<unsigned>(*index) >= itemsPerBlk)
-        {
-            *done = true;
-            return 0;
-        }
-
-        vp = &val8[*index * W];
-        *isNull = isNullVal<W>(type, vp);
-        *isEmpty = isEmptyVal<W>(type, vp);
-        *rid = (*index)++;
-    }
-    else
-    {
-        while (*index < NVALS &&
-                isEmptyVal<W>(type, &val8[ridArray[*index] * W]))
-        {
-            (*index)++;
-        }
-
-        if (*index >= NVALS)
-        {
-            *done = true;
-            return 0;
-        }
-
-        vp = &val8[ridArray[*index] * W];
-        *isNull = isNullVal<W>(type, vp);
-        *isEmpty = isEmptyVal<W>(type, vp);
-        *rid = ridArray[(*index)++];
-    }
-
-    // at this point, nextRid is the index to return, and index is...
-    //   if RIDs are not specified, nextRid + 1,
-    //	 if RIDs are specified, it's the next index in the rid array.
-    //Bug 838, tinyint null problem
-    switch (W)
-    {
-        case 1:
-            return reinterpret_cast<int8_t*> (val8)[*rid];
-
-        case 2:
-            return reinterpret_cast<int16_t*>(val8)[*rid];
-
-        case 4:
-#if 0
-            if (type == CalpontSystemCatalog::FLOAT)
+            if (WRITE_RID)
             {
-                // convert the float to a 64-bit type, return that w/o conversion
-                int32_t* val32 = reinterpret_cast<int32_t*>(val8);
-                double dTmp;
-                dTmp = (double) * ((float*) &val32[*rid]);
-                return *((int64_t*) &dTmp);
-            }
-            else
-            {
-                return reinterpret_cast<int32_t*>(val8)[*rid];
+                copyValue(out, &dataRid[i], sizeof(RID_T));
+                out += sizeof(RID_T);
+
+                RidFlags |= (1 << (dataRid[i] >> 10)); // set the (row/1024)'th bit
             }
 
-#else
-            return reinterpret_cast<int32_t*>(val8)[*rid];
-#endif
-
-        case 8:
-            return reinterpret_cast<int64_t*>(val8)[*rid];
-
-        default:
-            logIt(33, W);
-
-#ifdef PRIM_DEBUG
-            throw logic_error("PrimitiveProcessor::nextColValue() bad width");
-#endif
-            return -1;
+            if (WRITE_DATA)
+            {
+                copyValue(out, &dataArray[i], sizeof(T));
+                out += sizeof(T);
+            }
+        }
     }
+
+    // Update number of written values, number of written bytes and out->RidFlags
+    int size1 = (WRITE_RID? sizeof(RID_T) : 0) + (WRITE_DATA? sizeof(T) : 0);
+    *NVALS += (out - outbuf) / size1;
+    *written += out - outbuf;
+    *RidFlagsPtr = RidFlags;
 }
+*/
 
+/*****************************************************************************
+ *** RUN DATA THROUGH A COLUMN FILTER ****************************************
+ *****************************************************************************/
 
-template<int W>
-inline void p_Col_ridArray(NewColRequestHeader* in,
-                           NewColResultHeader* out,
-                           unsigned outSize,
-                           unsigned* written, int* block, Stats* fStatsPtr, unsigned itemsPerBlk,
-                           boost::shared_ptr<ParsedColumnFilter> parsedColumnFilter)
+/* "Vertical" processing of the column filter:
+   1. load all data into temporary vector
+   2. process one filter element over entire vector before going to a next one
+   3. write records, that succesfully passed through the filter, to outbuf
+*/
+/*
+template<typename T, ENUM_KIND KIND, typename VALTYPE>
+void processArray(
+    // Source data
+    const T* srcArray,
+    size_t srcSize,
+    uint16_t* ridArray,
+    size_t ridSize,                 // Number of values in ridArray
+    // Filter description
+    int BOP,
+    prestored_set_t* filterSet,     // Set of values for simple filters (any of values / none of them)
+    uint32_t filterCount,           // Number of filter elements, each described by one entry in the following arrays:
+    uint8_t* filterCOPs,            //   comparison operation
+    int64_t* filterValues,          //   value to compare to
+    // Output buffer/stats
+    uint8_t* outbuf,                // Pointer to the place for output data
+    unsigned* written,              // Number of written bytes, that we need to update
+    uint16_t* NVALS,                // Number of written values, that we need to update
+    uint8_t* RidFlagsPtr,           // Pointer to out->RidFlags
+    // Processing parameters
+    bool WRITE_RID,
+    bool WRITE_DATA,
+    bool SKIP_EMPTY_VALUES,
+    T EMPTY_VALUE,
+    bool IS_NULL_VALUE_MATCHES,
+    T NULL_VALUE,
+    // Min/Max search
+    bool ValidMinMax,
+    VALTYPE* MinPtr,
+    VALTYPE* MaxPtr)
 {
-    uint16_t* ridArray = 0;
-    uint8_t* in8 = reinterpret_cast<uint8_t*>(in);
-    const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + W;
+    // Alloc temporary arrays
+    size_t inputSize = (ridArray? ridSize : srcSize);
 
-    if (in->NVALS > 0)
-        ridArray = reinterpret_cast<uint16_t*>(&in8[sizeof(NewColRequestHeader) +
-                                                                           (in->NOPS * filterSize)]);
+    // Temporary array with data to filter
+    std::vector<T> dataVec(inputSize);
+    auto dataArray = dataVec.data();
 
-    if (ridArray && 1 == in->sort )
+    // Temporary array with RIDs of corresponding dataArray elements
+    std::vector<RID_T> dataRidVec(WRITE_RID? inputSize : 0);
+    auto dataRid = dataRidVec.data();
+
+
+    // Copy input data into temporary array, opt. storing RIDs, opt. skipping EMPTYs
+    size_t dataSize;  // number of values copied into dataArray
+    if (ridArray != NULL)
     {
-        qsort(ridArray, in->NVALS, sizeof(uint16_t), compareBlock<uint16_t>);
+        SKIP_EMPTY_VALUES = true;  // let findMinMaxArray() know that empty values will be skipped
 
-        if (fStatsPtr)
-#ifdef _MSC_VER
-            fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'O');
-
-#else
-            fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'O');
-#endif
+        dataSize = WRITE_RID? readArray<true, true,true>(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE)
+                            : readArray<false,true,true>(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE);
     }
-
-    // Set boolean indicating whether to capture the min and max values.
-    out->ValidMinMax = isMinMaxValid(in);
-
-    if (out->ValidMinMax)
+    else if (SKIP_EMPTY_VALUES)
     {
-        if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType))
-        {
-            out->Min = static_cast<int64_t>(numeric_limits<uint64_t>::max());
-            out->Max = 0;
-        }
-        else
-        {
-            out->Min = numeric_limits<int64_t>::max();
-            out->Max = numeric_limits<int64_t>::min();
-        }
+        dataSize = WRITE_RID? readArray<true, false,true>(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE)
+                            : readArray<false,false,true>(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE);
     }
     else
     {
-        out->Min = 0;
-        out->Max = 0;
+        dataSize = WRITE_RID? readArray<true, false,false>(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE)
+                            : readArray<false,false,false>(srcArray, srcSize, dataArray, dataRid, ridArray, ridSize, EMPTY_VALUE);
     }
 
-    const ColArgs* args = NULL;
-    int64_t val = 0;
-    uint64_t uval = 0;
-    int nextRidIndex = 0, argIndex = 0;
-    bool done = false, cmp = false, isNull = false, isEmpty = false;
+    // If required, find Min/Max values of the data
+    if (ValidMinMax)
+    {
+        SKIP_EMPTY_VALUES? findMinMaxArray<true> (dataSize, dataArray, MinPtr, MaxPtr, EMPTY_VALUE, NULL_VALUE)
+                         : findMinMaxArray<false>(dataSize, dataArray, MinPtr, MaxPtr, EMPTY_VALUE, NULL_VALUE);
+    }
+
+
+    // Choose initial filterArray[i] value depending on the operation
+    bool initValue = false;
+    if      (filterCount == 0) {initValue = true;}
+    else if (BOP_NONE == BOP)  {initValue = false;  BOP = BOP_OR;}
+    else if (BOP_OR   == BOP)  {initValue = false;}
+    else if (BOP_XOR  == BOP)  {initValue = false;}
+    else if (BOP_AND  == BOP)  {initValue = true;}
+
+    // Temporary array accumulating results of filtering for each record
+    std::vector<uint8_t> filterVec(dataSize, initValue);
+    auto filterArray = filterVec.data();
+
+    // Real type of column data, may be floating-point (used only for comparisons in the filtering)
+    using FLOAT_T = typename std::conditional<sizeof(T) == 8, double, float>::type;
+    using DATA_T  = typename std::conditional<KIND_FLOAT == KIND, FLOAT_T, T>::type;
+    auto realDataArray = reinterpret_cast<DATA_T*>(dataArray);
+
+
+    // Evaluate column filter on elements of dataArray and store results into filterArray
+    if (filterSet != NULL  &&  BOP == BOP_OR)
+    {
+        applySetFilter<BOP_OR>(dataSize, dataArray, filterSet, filterArray);
+    }
+    else if (filterSet != NULL  &&  BOP == BOP_AND)
+    {
+        applySetFilter<BOP_AND>(dataSize, dataArray, filterSet, filterArray);
+    }
+    else
+
+        for (int i = 0; i < filterCount; ++i)
+        {
+            DATA_T cmp_value;   // value for comparison, may be floating-point
+            copyValue(&cmp_value, &filterValues[i], sizeof(cmp_value));
+
+            switch(BOP)
+            {
+                case BOP_AND:  applyFilterElement<BOP_AND>(filterCOPs[i], dataSize, realDataArray, cmp_value, filterArray);  break;
+                case BOP_OR:   applyFilterElement<BOP_OR> (filterCOPs[i], dataSize, realDataArray, cmp_value, filterArray);  break;
+                case BOP_XOR:  applyFilterElement<BOP_XOR>(filterCOPs[i], dataSize, realDataArray, cmp_value, filterArray);  break;
+                default:       idbassert(0);
+            }
+        }
+    }
+
+
+    // Copy filtered data and/or their RIDs into output buffer
+    if (WRITE_RID && WRITE_DATA)
+    {
+        IS_NULL_VALUE_MATCHES? writeArray<true,true,true> (dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE)
+                             : writeArray<true,true,false>(dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE);
+    }
+    else if (WRITE_RID)
+    {
+        IS_NULL_VALUE_MATCHES? writeArray<true,false,true> (dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE)
+                             : writeArray<true,false,false>(dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE);
+    }
+    else
+    {
+        IS_NULL_VALUE_MATCHES? writeArray<false,true,true> (dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE)
+                             : writeArray<false,true,false>(dataSize, dataArray, dataRid, filterArray, outbuf, written, NVALS, RidFlagsPtr, NULL_VALUE);
+    }
+}
+*/
+
+// These two are templates update min/max values in the loop iterating the values in filterColumnData.
+template<ENUM_KIND KIND, typename T,
+         typename std::enable_if<KIND == KIND_TEXT, T>::type* = nullptr>
+inline void updateMinMax(T& Min, T& Max, T& curValue, NewColRequestHeader* in)
+{
+    constexpr int COL_WIDTH = sizeof(T);
+    if (colCompare<KIND_TEXT, COL_WIDTH>(Min, curValue, COMPARE_GT, false, in->colType))
+        Min = curValue;
+
+    if (colCompare<KIND_TEXT, COL_WIDTH>(Max, curValue, COMPARE_LT, false, in->colType))
+        Max = curValue;
+}
+
+template<ENUM_KIND KIND, typename T,
+         typename std::enable_if<KIND != KIND_TEXT, T>::type* = nullptr>
+inline void updateMinMax(T& Min, T& Max, T& curValue, NewColRequestHeader* in)
+{
+    if (Min > curValue)
+        Min = curValue;
+
+    if (Max < curValue)
+        Max = curValue;
+}
+
+// TBD Check if MCS really needs to copy values from in into out msgs or
+// it is possible to copy from in msg into BPP::values directly.
+// This template contains the main scanning/filtering loop.
+// Copy data matching parsedColumnFilter from input to output.
+// Input is srcArray[srcSize], optionally accessed in the order defined by ridArray[ridSize].
+// Output is BLOB out[outSize], written starting at offset *written, which is updated afterward.
+template<typename T, ENUM_KIND KIND>
+void filterColumnData(
+    NewColRequestHeader* in,
+    NewColResultHeader* out,
+    unsigned outSize,
+    unsigned* written,
+    uint16_t* ridArray,
+    const uint16_t ridSize,                // Number of values in ridArray
+    int* srcArray16,
+    const uint32_t srcSize,
+    boost::shared_ptr<ParsedColumnFilter> parsedColumnFilter)
+{
+    using FT = typename IntegralTypeToFilterType<T>::type;
+    using ST = typename IntegralTypeToFilterSetType<T>::type;
+    constexpr int COL_WIDTH = sizeof(T);
+    const T* srcArray = reinterpret_cast<const T*>(srcArray16);
+
+    // Cache some structure fields in local vars
+    auto dataType = (CalpontSystemCatalog::ColDataType) in->colType.DataType;  // Column datatype
+    uint32_t filterCount = in->NOPS;        // Number of elements in the filter
+    uint8_t  outputType  = in->OutputType;
+
+    // If no pre-parsed column filter is set, parse the filter in the message
+    if (parsedColumnFilter.get() == nullptr  &&  filterCount > 0)
+        parsedColumnFilter = _parseColumnFilter<T>(in->getFilterStringPtr(),
+                                                   dataType, filterCount, in->BOP);
+
+    // Cache parsedColumnFilter fields in local vars
+    auto columnFilterMode = filterCount==0 ? ALWAYS_TRUE : parsedColumnFilter->columnFilterMode;
+    FT* filterValues  = filterCount==0 ? nullptr : parsedColumnFilter->getFilterVals<FT>();
+    auto filterCOPs  = filterCount==0 ? nullptr : parsedColumnFilter->prestored_cops.get();
+    auto filterRFs   = filterCount==0 ? nullptr : parsedColumnFilter->prestored_rfs.get();
+    ST* filterSet    = filterCount==0 ? nullptr : parsedColumnFilter->getFilterSet<ST>();
+
+    // ###########################
+    // Bit patterns in srcArray[i] representing EMPTY and NULL values
+    T EMPTY_VALUE = getEmptyValue<T>(dataType);
+    T NULL_VALUE  = getNullValue<T>(dataType);
+
+    // Precompute filter results for NULL values
+    bool isNullValueMatches = matchingColValue<KIND, COL_WIDTH, true>(NULL_VALUE, columnFilterMode,
+        filterSet, filterCount, filterCOPs, filterValues, filterRFs, in->colType, NULL_VALUE);
+
+    // Boolean indicating whether to capture the min and max values
+    bool ValidMinMax = isMinMaxValid(in);
+    // Local vars to capture the min and max values
+    T Min = datatypes::numeric_limits<T>::max();
+    T Max = (KIND == KIND_UNSIGNED) ? 0 : datatypes::numeric_limits<T>::min();
+
+/* WIP add vertical processing
+    // If possible, use faster "vertical" filtering approach
+    if (KIND != KIND_TEXT)
+    {
+        bool canUseFastFiltering = true;
+        for (int i = 0; i < filterCount; ++i)
+            if (filterRFs[i] != 0)
+            canUseFastFiltering = false;
+
+        if (canUseFastFiltering)
+        {
+            processArray<T, KIND, T>(srcArray, srcSize, ridArray, ridSize,
+                         in->BOP, filterSet, filterCount, filterCOPs, filterValues,
+                         reinterpret_cast<uint8_t*>(out) + *written,
+                         written, & out->NVALS, & out->RidFlags,
+                         (outputType & OT_RID) != 0,
+                         (outputType & (OT_TOKEN | OT_DATAVALUE)) != 0,
+                         (outputType & OT_RID) != 0,  //TODO: check correctness of this condition for SKIP_EMPTY_VALUES
+                         EMPTY_VALUE,
+                         isNullValueMatches, NULL_VALUE,
+                         ValidMinMax, &Min, &Max);
+            return;
+        }
+    }
+*/
+
+    // Loop-local variables
+    T curValue = 0;
     uint16_t rid = 0;
-    prestored_set_t::const_iterator it;
+    bool isEmpty = false;
 
-    int64_t* std_argVals = (int64_t*)alloca(in->NOPS * sizeof(int64_t));
-    uint8_t* std_cops = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t));
-    uint8_t* std_rfs = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t));
-    int64_t* argVals = NULL;
-    uint64_t* uargVals = NULL;
-    uint8_t* cops = NULL;
-    uint8_t* rfs = NULL;
-
-    // no pre-parsed column filter is set, parse the filter in the message
-    if (parsedColumnFilter.get() == NULL)
+    // Loop over the column values, storing those matching the filter, and updating the min..max range
+    for (uint32_t i = 0;
+         nextColValue<T, COL_WIDTH>(curValue, &isEmpty,
+                                    &i, &rid,
+                                    srcArray, srcSize, ridArray, ridSize,
+                                    outputType, EMPTY_VALUE); )
     {
-        if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType))
+        if (isEmpty)
+            continue;
+        else if (isNullValue<KIND,T>(curValue, NULL_VALUE))
         {
-            uargVals = reinterpret_cast<uint64_t*>(std_argVals);
-            cops = std_cops;
-            rfs = std_rfs;
-
-            for (argIndex = 0; argIndex < in->NOPS; argIndex++)
-            {
-                args = reinterpret_cast<const ColArgs*>(&in8[sizeof(NewColRequestHeader) +
-                                                                                    (argIndex * filterSize)]);
-                cops[argIndex] = args->COP;
-                rfs[argIndex] = args->rf;
-
-                switch (W)
-                {
-                    case 1:
-                        uargVals[argIndex] = *reinterpret_cast<const uint8_t*>(args->val);
-                        break;
-
-                    case 2:
-                        uargVals[argIndex] = *reinterpret_cast<const uint16_t*>(args->val);
-                        break;
-
-                    case 4:
-                        uargVals[argIndex] = *reinterpret_cast<const uint32_t*>(args->val);
-                        break;
-
-                    case 8:
-                        uargVals[argIndex] = *reinterpret_cast<const uint64_t*>(args->val);
-                        break;
-                }
-            }
+            // If NULL values match the filter, write curValue to the output buffer
+            if (isNullValueMatches)
+                writeColValue<T>(outputType, out, outSize, written, rid, srcArray);
         }
         else
         {
-            argVals = std_argVals;
-            cops = std_cops;
-            rfs = std_rfs;
-
-            for (argIndex = 0; argIndex < in->NOPS; argIndex++)
+            // If curValue matches the filter, write it to the output buffer
+            if (matchingColValue<KIND, COL_WIDTH, false>(curValue, columnFilterMode, filterSet, filterCount,
+                                filterCOPs, filterValues, filterRFs, in->colType, NULL_VALUE))
             {
-                args = reinterpret_cast<const ColArgs*>(&in8[sizeof(NewColRequestHeader) +
-                                                                                    (argIndex * filterSize)]);
-                cops[argIndex] = args->COP;
-                rfs[argIndex] = args->rf;
-
-                switch (W)
-                {
-                    case 1:
-                        argVals[argIndex] = args->val[0];
-                        break;
-
-                    case 2:
-                        argVals[argIndex] = *reinterpret_cast<const int16_t*>(args->val);
-                        break;
-
-                    case 4:
-#if 0
-                        if (in->colType.DataType == CalpontSystemCatalog::FLOAT)
-                        {
-                            double dTmp;
-
-                            dTmp = (double) * ((const float*) args->val);
-                            argVals[argIndex] = *((int64_t*) &dTmp);
-                        }
-                        else
-                            argVals[argIndex] = *reinterpret_cast<const int32_t*>(args->val);
-
-#else
-                        argVals[argIndex] = *reinterpret_cast<const int32_t*>(args->val);
-#endif
-                        break;
-
-                    case 8:
-                        argVals[argIndex] = *reinterpret_cast<const int64_t*>(args->val);
-                        break;
-                }
+                writeColValue<T>(outputType, out, outSize, written, rid, srcArray);
             }
+
+            // Update Min and Max if necessary.  EMPTY/NULL values are processed in other branches.
+            if (ValidMinMax)
+                updateMinMax<KIND>(Min, Max, curValue, in);
         }
     }
-    // we have a pre-parsed filter, and it's in the form of op and value arrays
-    else if (parsedColumnFilter->columnFilterMode == TWO_ARRAYS)
+
+
+    // Write captured Min/Max values to *out
+    out->ValidMinMax = ValidMinMax;
+    if (ValidMinMax)
     {
-        argVals = parsedColumnFilter->prestored_argVals.get();
-        uargVals = reinterpret_cast<uint64_t*>(parsedColumnFilter->prestored_argVals.get());
-        cops = parsedColumnFilter->prestored_cops.get();
-        rfs = parsedColumnFilter->prestored_rfs.get();
+        out->Min = Min;
+        out->Max = Max;
     }
-
-    // else we have a pre-parsed filter, and it's an unordered set for quick == comparisons
-
-    if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType))
-    {
-        uval = nextUnsignedColValue<W>(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull,
-                                       &isEmpty, &rid, in->OutputType, reinterpret_cast<uint8_t*>(block), itemsPerBlk);
-    }
-    else
-    {
-        val = nextColValue<W>(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull,
-                              &isEmpty, &rid, in->OutputType, reinterpret_cast<uint8_t*>(block), itemsPerBlk);
-    }
-
-    while (!done)
-    {
-        if (cops == NULL)    // implies parsedColumnFilter && columnFilterMode == SET
-        {
-            /* bug 1920: ignore NULLs in the set and in the column data */
-            if (!(isNull && in->BOP == BOP_AND))
-            {
-                if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType))
-                {
-                    it = parsedColumnFilter->prestored_set->find(*reinterpret_cast<int64_t*>(&uval));
-                }
-                else
-                {
-                    it = parsedColumnFilter->prestored_set->find(val);
-                }
-
-                if (in->BOP == BOP_OR)
-                {
-                    // assume COP == COMPARE_EQ
-                    if (it != parsedColumnFilter->prestored_set->end())
-                    {
-                        store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
-                    }
-                }
-                else if (in->BOP == BOP_AND)
-                {
-                    // assume COP == COMPARE_NE
-                    if (it == parsedColumnFilter->prestored_set->end())
-                    {
-                        store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
-                    }
-                }
-            }
-        }
-        else
-        {
-            for (argIndex = 0; argIndex < in->NOPS; argIndex++)
-            {
-                if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType))
-                {
-                    cmp = colCompareUnsigned(uval, uargVals[argIndex], cops[argIndex],
-                                             rfs[argIndex], in->colType.DataType, W, isNull);
-                }
-                else
-                {
-                    cmp = colCompare(val, argVals[argIndex], cops[argIndex],
-                                     rfs[argIndex], in->colType, W, isNull);
-                }
-
-                if (in->NOPS == 1)
-                {
-                    if (cmp == true)
-                    {
-                        store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
-                    }
-
-                    break;
-                }
-                else if (in->BOP == BOP_AND && cmp == false)
-                {
-                    break;
-                }
-                else if (in->BOP == BOP_OR && cmp == true)
-                {
-                    store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
-                    break;
-                }
-            }
-
-            if ((argIndex == in->NOPS && in->BOP == BOP_AND) || in->NOPS == 0)
-            {
-                store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
-            }
-        }
-
-        // Set the min and max if necessary.  Ignore nulls.
-        if (out->ValidMinMax && !isNull && !isEmpty)
-        {
-
-            if (in->colType.DataType == CalpontSystemCatalog::CHAR ||
-                in->colType.DataType == CalpontSystemCatalog::VARCHAR ||
-                in->colType.DataType == CalpontSystemCatalog::BLOB ||
-                in->colType.DataType == CalpontSystemCatalog::TEXT )
-            {
-                if (colCompare(out->Min, val, COMPARE_GT, false, in->colType, W))
-                    out->Min = val;
-
-                if (colCompare(out->Max, val, COMPARE_LT, false, in->colType, W))
-                    out->Max = val;
-            }
-            else if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType))
-            {
-                if (static_cast<uint64_t>(out->Min) > uval)
-                    out->Min = static_cast<int64_t>(uval);
-
-                if (static_cast<uint64_t>(out->Max) < uval)
-                    out->Max = static_cast<int64_t>(uval);;
-            }
-            else
-            {
-                if (out->Min > val)
-                    out->Min = val;
-
-                if (out->Max < val)
-                    out->Max = val;
-            }
-        }
-
-        if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType))
-        {
-            uval = nextUnsignedColValue<W>(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done,
-                                           &isNull, &isEmpty, &rid, in->OutputType, reinterpret_cast<uint8_t*>(block),
-                                           itemsPerBlk);
-        }
-        else
-        {
-            val = nextColValue<W>(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done,
-                                  &isNull, &isEmpty, &rid, in->OutputType, reinterpret_cast<uint8_t*>(block),
-                                  itemsPerBlk);
-        }
-    }
-
-    if (fStatsPtr)
-#ifdef _MSC_VER
-        fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'K');
-
-#else
-        fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'K');
-#endif
-}
-
-// There are number of hardcoded type-dependant objects
-// that effectively makes this template int128-based only.
-// Use type based template method for Min,Max values.
-// prestored_set_128 must be a template with a type arg.
-template<int W, typename T>
-inline void p_Col_bin_ridArray(NewColRequestHeader* in,
-                           NewColResultHeader* out,
-                           unsigned outSize,
-                           unsigned* written, int* block, Stats* fStatsPtr, unsigned itemsPerBlk,
-                           boost::shared_ptr<ParsedColumnFilter> parsedColumnFilter)
-{
-    uint16_t* ridArray = 0;
-    uint8_t* in8 = reinterpret_cast<uint8_t*>(in);
-    const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + W;
-
-    if (in->NVALS > 0)
-        ridArray = reinterpret_cast<uint16_t*>(&in8[sizeof(NewColRequestHeader) +
-                                                                           (in->NOPS * filterSize)]);
-
-    if (ridArray && 1 == in->sort )
-    {
-        qsort(ridArray, in->NVALS, sizeof(uint16_t), compareBlock<uint16_t>);
-
-        if (fStatsPtr)
-#ifdef _MSC_VER
-            fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'O');
-
-#else
-            fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'O');
-#endif
-    }
-
-    // Set boolean indicating whether to capture the min and max values.
-    out->ValidMinMax = isMinMaxValid(in);
-
-    if (out->ValidMinMax)
-    {
-        // Assume that isUnsigned returns true for 8-bytes DTs only
-        if (isUnsigned((CalpontSystemCatalog::ColDataType)in->colType.DataType))
-        {
-            out->Min = -1;
-            out->Max = 0;
-        }
-        else
-        {
-            out->Min = datatypes::Decimal::maxInt128;
-            out->Max = datatypes::Decimal::minInt128;
-        }
-    }
-    else
-    {
-        out->Min = 0;
-        out->Max = 0;
-    }
-
-    typedef char binWtype [W];
-
-    const ColArgs* args = NULL;
-    binWtype* bval;
-    int nextRidIndex = 0, argIndex = 0;
-    bool done = false, cmp = false, isNull = false, isEmpty = false;
-    uint16_t rid = 0;
-    prestored_set_t_128::const_iterator it;
-
-    binWtype* argVals = (binWtype*)alloca(in->NOPS * W);
-    uint8_t* std_cops = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t));
-    uint8_t* std_rfs = (uint8_t*)alloca(in->NOPS * sizeof(uint8_t));
-    uint8_t* cops = NULL;
-    uint8_t* rfs = NULL;
-
-    // no pre-parsed column filter is set, parse the filter in the message
-    if (parsedColumnFilter.get() == NULL) {
-
-        cops = std_cops;
-        rfs = std_rfs;
-
-        for (argIndex = 0; argIndex < in->NOPS; argIndex++) {
-            args = reinterpret_cast<const ColArgs*> (&in8[sizeof (NewColRequestHeader) +
-                    (argIndex * filterSize)]);
-            cops[argIndex] = args->COP;
-            rfs[argIndex] = args->rf;
-
-            memcpy(argVals[argIndex],args->val, W);
-        }
-    }
-    // we have a pre-parsed filter, and it's in the form of op and value arrays
-    else if (parsedColumnFilter->columnFilterMode == TWO_ARRAYS)
-    {
-        argVals = (binWtype*) parsedColumnFilter->prestored_argVals128.get();
-        cops = parsedColumnFilter->prestored_cops.get();
-        rfs = parsedColumnFilter->prestored_rfs.get();
-    }
-
-    // else we have a pre-parsed filter, and it's an unordered set for quick == comparisons
-
-    bval = (binWtype*)nextBinColValue<W>(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull,
-                &isEmpty, &rid, in->OutputType, reinterpret_cast<uint8_t*>(block), itemsPerBlk);
-
-    T val;
-
-    while (!done)
-    {
-        val = *reinterpret_cast<T*>(bval);
-
-        if (cops == NULL)    // implies parsedColumnFilter && columnFilterMode == SET
-        {
-            /* bug 1920: ignore NULLs in the set and in the column data */
-            if (!(isNull && in->BOP == BOP_AND))
-            {
-
-                it = parsedColumnFilter->prestored_set_128->find(val);
-
-
-                if (in->BOP == BOP_OR)
-                {
-                    // assume COP == COMPARE_EQ
-                    if (it != parsedColumnFilter->prestored_set_128->end())
-                    {
-                        store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
-                    }
-                }
-                else if (in->BOP == BOP_AND)
-                {
-                    // assume COP == COMPARE_NE
-                    if (it == parsedColumnFilter->prestored_set_128->end())
-                    {
-                        store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
-                    }
-                }
-            }
-        }
-        else
-        {
-            for (argIndex = 0; argIndex < in->NOPS; argIndex++)
-            {
-                T filterVal = *reinterpret_cast<T*>(argVals[argIndex]);
-
-                cmp = colCompare(val, filterVal, cops[argIndex],
-                                 rfs[argIndex], in->colType.DataType, W, isNull);
-
-                if (in->NOPS == 1)
-                {
-                    if (cmp == true)
-                    {
-                        store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
-                    }
-                    break;
-                }
-                else if (in->BOP == BOP_AND && cmp == false)
-                {
-                    break;
-                }
-                else if (in->BOP == BOP_OR && cmp == true)
-                {
-                    store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
-                    break;
-                }
-            }
-
-            if ((argIndex == in->NOPS && in->BOP == BOP_AND) || in->NOPS == 0)
-            {
-                store(in, out, outSize, written, rid, reinterpret_cast<const uint8_t*>(block));
-            }
-        }
-
-        // Set the min and max if necessary.  Ignore nulls.
-        if (out->ValidMinMax && !isNull && !isEmpty)
-        {
-
-            if (in->colType.DataType == CalpontSystemCatalog::CHAR ||
-                in->colType.DataType == CalpontSystemCatalog::VARCHAR)
-            {
-                // !!! colCompare is overloaded with int128_t only yet.
-                if (colCompare(out->Min, val, COMPARE_GT, false, in->colType, W))
-                {
-                    out->Min = val;
-                }
-
-                if (colCompare(out->Max, val, COMPARE_LT, false, in->colType, W))
-                {
-                    out->Max = val;
-                }
-            }
-            else
-            {
-                if (out->Min > val)
-                {
-                    out->Min = val;
-                }
-
-                if (out->Max < val)
-                {
-                    out->Max = val;
-                }
-            }
-        }
-
-        bval = (binWtype*)nextBinColValue<W>(in->colType.DataType, ridArray, in->NVALS, &nextRidIndex, &done, &isNull,
-            &isEmpty, &rid, in->OutputType, reinterpret_cast<uint8_t*>(block), itemsPerBlk);
-
-    }
-
-    if (fStatsPtr)
-#ifdef _MSC_VER
-        fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'K');
-
-#else
-        fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'K');
-#endif
-}
+} // end of filterColumnData
 
 } //namespace anon
 
 namespace primitives
 {
 
-void PrimitiveProcessor::p_Col(NewColRequestHeader* in, NewColResultHeader* out,
-                               unsigned outSize, unsigned* written)
+// The routine used to dispatch CHAR|VARCHAR|TEXT|BLOB scan.
+inline bool isDictTokenScan(NewColRequestHeader* in)
 {
+    switch (in->colType.DataType)
+    {
+        case CalpontSystemCatalog::CHAR:
+            return (in->colType.DataSize > 8);
+
+        case CalpontSystemCatalog::VARCHAR:
+        case CalpontSystemCatalog::BLOB:
+        case CalpontSystemCatalog::TEXT:
+            return (in->colType.DataSize > 7);
+        default:
+            return false;
+    }
+}
+
+// A set of dispatchers for different column widths/integral types.
+template<typename T,
+// Remove this ugly preprocessor macrosses when RHEL7 reaches EOL.
+// This ugly preprocessor if is here b/c of templated class method parameter default value syntax diff b/w gcc versions.
+#ifdef __GNUC__
+ #if ___GNUC__ >= 5
+         typename std::enable_if<sizeof(T) == sizeof(int32_t), T>::type* = nullptr> // gcc >= 5
+ #else
+         typename std::enable_if<sizeof(T) == sizeof(int32_t), T>::type*> // gcc 4.8.5
+ #endif
+#else
+         typename std::enable_if<sizeof(T) == sizeof(int32_t), T>::type* = nullptr>
+#endif
+void PrimitiveProcessor::scanAndFilterTypeDispatcher(NewColRequestHeader* in,
+    NewColResultHeader* out,
+    unsigned outSize,
+    unsigned* written)
+{
+    constexpr int W = sizeof(T);
+    auto dataType = (execplan::CalpontSystemCatalog::ColDataType) in->colType.DataType;
+    if (dataType == execplan::CalpontSystemCatalog::FLOAT)
+    {
+// WIP make this inline function
+        const uint16_t ridSize = in->NVALS;
+        uint16_t* ridArray = in->getRIDArrayPtr(W);
+        const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE
+                                                        : BLOCK_SIZE / W;
+        filterColumnData<T, KIND_FLOAT>(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
+        return;
+    }
+    _scanAndFilterTypeDispatcher<T>(in, out, outSize, written);
+}
+
+template<typename T,
+#ifdef __GNUC__
+ #if ___GNUC__ >= 5
+         typename std::enable_if<sizeof(T) == sizeof(int64_t), T>::type* = nullptr> // gcc >= 5
+ #else
+         typename std::enable_if<sizeof(T) == sizeof(int64_t), T>::type*> // gcc 4.8.5
+ #endif
+#else
+         typename std::enable_if<sizeof(T) == sizeof(int64_t), T>::type* = nullptr>
+#endif
+void PrimitiveProcessor::scanAndFilterTypeDispatcher(NewColRequestHeader* in,
+    NewColResultHeader* out,
+    unsigned outSize,
+    unsigned* written)
+{
+    constexpr int W = sizeof(T);
+    auto dataType = (execplan::CalpontSystemCatalog::ColDataType) in->colType.DataType;
+    if (dataType == execplan::CalpontSystemCatalog::DOUBLE)
+    {
+        const uint16_t ridSize = in->NVALS;
+        uint16_t* ridArray = in->getRIDArrayPtr(W);
+        const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE
+                                                        : BLOCK_SIZE / W;
+        filterColumnData<T, KIND_FLOAT>(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
+        return;
+    }
+    _scanAndFilterTypeDispatcher<T>(in, out, outSize, written);
+}
+
+template<typename T,
+         typename std::enable_if<sizeof(T) == sizeof(int8_t) ||
+                                 sizeof(T) == sizeof(int16_t) ||
+#ifdef __GNUC__
+ #if ___GNUC__ >= 5
+                                 sizeof(T) == sizeof(int128_t), T>::type* = nullptr> // gcc >= 5
+ #else
+                                 sizeof(T) == sizeof(int128_t), T>::type*> // gcc 4.8.5
+ #endif
+#else
+                                 sizeof(T) == sizeof(int128_t), T>::type* = nullptr>
+#endif
+void PrimitiveProcessor::scanAndFilterTypeDispatcher(NewColRequestHeader* in,
+    NewColResultHeader* out,
+    unsigned outSize,
+    unsigned* written)
+{
+    _scanAndFilterTypeDispatcher<T>(in, out, outSize, written);
+}
+
+template<typename T,
+#ifdef __GNUC__
+ #if ___GNUC__ >= 5
+         typename std::enable_if<sizeof(T) == sizeof(int128_t), T>::type* = nullptr> // gcc >= 5
+ #else
+         typename std::enable_if<sizeof(T) == sizeof(int128_t), T>::type*> // gcc 4.8.5
+ #endif
+#else
+         typename std::enable_if<sizeof(T) == sizeof(int128_t), T>::type* = nullptr>
+#endif
+void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in,
+    NewColResultHeader* out,
+    unsigned outSize,
+    unsigned* written)
+{
+    constexpr int W = sizeof(T);
+    const uint16_t ridSize = in->NVALS;
+    uint16_t* ridArray = in->getRIDArrayPtr(W);
+    const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE
+                                                    : BLOCK_SIZE / W;
+
+    filterColumnData<T, KIND_DEFAULT>(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
+}
+
+template<typename T,
+#ifdef __GNUC__
+ #if ___GNUC__ >= 5
+         typename std::enable_if<sizeof(T) <= sizeof(int64_t), T>::type* = nullptr> // gcc >= 5
+ #else
+         typename std::enable_if<sizeof(T) <= sizeof(int64_t), T>::type*> // gcc 4.8.5
+ #endif
+#else
+         typename std::enable_if<sizeof(T) <= sizeof(int64_t), T>::type* = nullptr>
+#endif
+void PrimitiveProcessor::_scanAndFilterTypeDispatcher(NewColRequestHeader* in,
+    NewColResultHeader* out,
+    unsigned outSize,
+    unsigned* written)
+{
+    constexpr int W = sizeof(T);
+    const uint16_t ridSize = in->NVALS;
+    uint16_t* ridArray = in->getRIDArrayPtr(W);
+    const uint32_t itemsPerBlock = logicalBlockMode ? BLOCK_SIZE
+                                                    : BLOCK_SIZE / W;
+
+    auto dataType = (execplan::CalpontSystemCatalog::ColDataType) in->colType.DataType;
+    if ((dataType == execplan::CalpontSystemCatalog::CHAR ||
+        dataType == execplan::CalpontSystemCatalog::VARCHAR ||
+        dataType == execplan::CalpontSystemCatalog::TEXT) &&
+        !isDictTokenScan(in))
+    {
+        filterColumnData<T, KIND_TEXT>(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
+        return;
+    }
+
+    if (datatypes::isUnsigned(dataType))
+    {
+        using UT = typename std::conditional<std::is_unsigned<T>::value || datatypes::is_uint128_t<T>::value, T, typename datatypes::make_unsigned<T>::type>::type;
+        filterColumnData<UT, KIND_UNSIGNED>(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
+        return;
+    }
+    filterColumnData<T, KIND_DEFAULT>(in, out, outSize, written, ridArray, ridSize, block, itemsPerBlock, parsedColumnFilter);
+}
+
+// The entrypoint for block scanning and filtering.
+// The block is in in msg, out msg is used to store values|RIDs matched.
+template<typename T>
+void PrimitiveProcessor::columnScanAndFilter(NewColRequestHeader* in, NewColResultHeader* out,
+                                             unsigned outSize, unsigned* written)
+{
+#ifdef PRIM_DEBUG
+    auto markEvent = [&] (char eventChar)
+    {
+        if (fStatsPtr)
+            fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, eventChar);
+    };
+#endif
+    constexpr int W = sizeof(T);
+
     void *outp = static_cast<void*>(out);
     memcpy(outp, in, sizeof(ISMPacketHeader) + sizeof(PrimitiveHeader));
     out->NVALS = 0;
@@ -1479,19 +1502,11 @@ void PrimitiveProcessor::p_Col(NewColRequestHeader* in, NewColResultHeader* out,
     out->OutputType = in->OutputType;
     out->RidFlags = 0;
     *written = sizeof(NewColResultHeader);
-    unsigned itemsPerBlk = 0;
-
-    if (logicalBlockMode)
-        itemsPerBlk = BLOCK_SIZE;
-    else
-        itemsPerBlk = BLOCK_SIZE / in->colType.DataSize;
-
     //...Initialize I/O counts;
     out->CacheIO    = 0;
     out->PhysicalIO = 0;
 
 #if 0
-
     // short-circuit the actual block scan for testing
     if (out->LBID >= 802816)
     {
@@ -1500,205 +1515,30 @@ void PrimitiveProcessor::p_Col(NewColRequestHeader* in, NewColResultHeader* out,
         out->Max = 0;
         return;
     }
-
 #endif
 
-    if (fStatsPtr)
-#ifdef _MSC_VER
-        fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'B');
-
-#else
-        fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'B');
+#ifdef PRIM_DEBUG
+    markEvent('B');
 #endif
 
-    switch (in->colType.DataSize)
-    {
-        case 8:
-            p_Col_ridArray<8>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter);
-            break;
-
-        case 4:
-            p_Col_ridArray<4>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter);
-            break;
-
-        case 2:
-            p_Col_ridArray<2>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter);
-            break;
-
-        case 1:
-            p_Col_ridArray<1>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter);
-            break;
-
-        case 16:
-            p_Col_bin_ridArray<16, int128_t>(in, out, outSize, written, block, fStatsPtr, itemsPerBlk, parsedColumnFilter);
-            break;
-
-        default:
-            idbassert(0);
-            break;
-    }
-
-    if (fStatsPtr)
-#ifdef _MSC_VER
-        fStatsPtr->markEvent(in->LBID, GetCurrentThreadId(), in->hdr.SessionID, 'C');
-
-#else
-        fStatsPtr->markEvent(in->LBID, pthread_self(), in->hdr.SessionID, 'C');
+    // Sort ridArray (the row index array) if there are RIDs with this in msg
+    in->sortRIDArrayIfNeeded(W);
+    scanAndFilterTypeDispatcher<T>(in, out, outSize, written);
+#ifdef PRIM_DEBUG
+    markEvent('C');
 #endif
 }
 
-boost::shared_ptr<ParsedColumnFilter> parseColumnFilter
-(const uint8_t* filterString, uint32_t colWidth, uint32_t colType, uint32_t filterCount,
- uint32_t BOP)
-{
-    boost::shared_ptr<ParsedColumnFilter> ret;
-    uint32_t argIndex;
-    const ColArgs* args;
-    bool convertToSet = true;
-
-    if (filterCount == 0)
-        return ret;
-
-    ret.reset(new ParsedColumnFilter());
-
-    ret->columnFilterMode = TWO_ARRAYS;
-    if (datatypes::isWideDecimalType(
-        (CalpontSystemCatalog::ColDataType)colType, colWidth))
-        ret->prestored_argVals128.reset(new int128_t[filterCount]);
-    else
-        ret->prestored_argVals.reset(new int64_t[filterCount]);
-    ret->prestored_cops.reset(new uint8_t[filterCount]);
-    ret->prestored_rfs.reset(new uint8_t[filterCount]);
-
-    /*
-    for (unsigned ii = 0; ii < filterCount; ii++)
-    {
-    	ret->prestored_argVals[ii] = 0;
-    	ret->prestored_cops[ii] = 0;
-    	ret->prestored_rfs[ii] = 0;
-    }
-    */
-
-    const uint8_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + colWidth;
-
-    /*  Decide which structure to use.  I think the only cases where we can use the set
-    	are when NOPS > 1, BOP is OR, and every COP is ==,
-    	and when NOPS > 1, BOP is AND, and every COP is !=.
-
-    	Parse the filter predicates and insert them into argVals and cops.
-    	If there were no predicates that violate the condition for using a set,
-    	insert argVals into a set.
-    */
-    if (filterCount == 1)
-        convertToSet = false;
-
-    for (argIndex = 0; argIndex < filterCount; argIndex++)
-    {
-        args = reinterpret_cast<const ColArgs*>(filterString + (argIndex * filterSize));
-        ret->prestored_cops[argIndex] = args->COP;
-        ret->prestored_rfs[argIndex] = args->rf;
-
-        if ((BOP == BOP_OR && args->COP != COMPARE_EQ) ||
-                (BOP == BOP_AND && args->COP != COMPARE_NE) ||
-                (args->COP == COMPARE_NIL))
-            convertToSet = false;
-
-        if (isUnsigned((CalpontSystemCatalog::ColDataType)colType))
-        {
-            switch (colWidth)
-            {
-                case 1:
-                    ret->prestored_argVals[argIndex] = *reinterpret_cast<const uint8_t*>(args->val);
-                    break;
-
-                case 2:
-                    ret->prestored_argVals[argIndex] = *reinterpret_cast<const uint16_t*>(args->val);
-                    break;
-
-                case 4:
-                    ret->prestored_argVals[argIndex] = *reinterpret_cast<const uint32_t*>(args->val);
-                    break;
-
-                case 8:
-                    ret->prestored_argVals[argIndex] = *reinterpret_cast<const uint64_t*>(args->val);
-                    break;
-            }
-        }
-        else
-        {
-            switch (colWidth)
-            {
-                case 1:
-                    ret->prestored_argVals[argIndex] = args->val[0];
-                    break;
-
-                case 2:
-                    ret->prestored_argVals[argIndex] = *reinterpret_cast<const int16_t*>(args->val);
-                    break;
-
-                case 4:
-#if 0
-                    if (colType == CalpontSystemCatalog::FLOAT)
-                    {
-                        double dTmp;
-
-                        dTmp = (double) * ((const float*) args->val);
-                        ret->prestored_argVals[argIndex] = *((int64_t*) &dTmp);
-                    }
-                    else
-                        ret->prestored_argVals[argIndex] =
-                            *reinterpret_cast<const int32_t*>(args->val);
-
-#else
-                    ret->prestored_argVals[argIndex] = *reinterpret_cast<const int32_t*>(args->val);
-#endif
-                    break;
-
-                case 8:
-                    ret->prestored_argVals[argIndex] = *reinterpret_cast<const int64_t*>(args->val);
-                    break;
-
-                case 16:
-                {
-                    datatypes::TSInt128::assignPtrPtr(&(ret->prestored_argVals128[argIndex]),
-                                                            args->val);
-                    break;
-                }
-            }
-        }
-
-// 		cout << "inserted* " << hex << ret->prestored_argVals[argIndex] << dec <<
-// 		  " COP = " << (int) ret->prestored_cops[argIndex] << endl;
-
-    }
-
-    if (convertToSet)
-    {
-        ret->columnFilterMode = UNORDERED_SET;
-        if (datatypes::isWideDecimalType(
-            (CalpontSystemCatalog::ColDataType)colType, colWidth))
-        {
-            ret->prestored_set_128.reset(new prestored_set_t_128());
-
-            // @bug 2584, use COMPARE_NIL for "= null" to allow "is null" in OR expression
-            for (argIndex = 0; argIndex < filterCount; argIndex++)
-                if (ret->prestored_rfs[argIndex] == 0)
-                    ret->prestored_set_128->insert(ret->prestored_argVals128[argIndex]);
-        }
-        else
-        {
-            ret->prestored_set.reset(new prestored_set_t());
-
-            // @bug 2584, use COMPARE_NIL for "= null" to allow "is null" in OR expression
-            for (argIndex = 0; argIndex < filterCount; argIndex++)
-                if (ret->prestored_rfs[argIndex] == 0)
-                    ret->prestored_set->insert(ret->prestored_argVals[argIndex]);
-        }
-    }
-
-    return ret;
-}
+template
+void primitives::PrimitiveProcessor::columnScanAndFilter<int8_t>(NewColRequestHeader*, NewColResultHeader*, unsigned, unsigned*);
+template
+void primitives::PrimitiveProcessor::columnScanAndFilter<int16_t>(NewColRequestHeader*, NewColResultHeader*, unsigned int, unsigned int*);
+template
+void primitives::PrimitiveProcessor::columnScanAndFilter<int32_t>(NewColRequestHeader*, NewColResultHeader*, unsigned int, unsigned int*);
+template
+void primitives::PrimitiveProcessor::columnScanAndFilter<int64_t>(NewColRequestHeader*, NewColResultHeader*, unsigned int, unsigned int*);
+template
+void primitives::PrimitiveProcessor::columnScanAndFilter<int128_t>(NewColRequestHeader*, NewColResultHeader*, unsigned int, unsigned int*);
 
 } // namespace primitives
 // vim:ts=4 sw=4:
-
diff --git a/primitives/linux-port/pp-scan-unittest.cpp b/primitives/linux-port/pp-scan-unittest.cpp
index 72483bc9e..69309e8ed 100644
--- a/primitives/linux-port/pp-scan-unittest.cpp
+++ b/primitives/linux-port/pp-scan-unittest.cpp
@@ -3238,7 +3238,7 @@ public:
         args->COP = COMPARE_LT;
         memcpy(args->val, &tmp, in->DataSize);
         args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) +
-                                                                      sizeof(ColArgs) + in->DataSize]);
+                                                 sizeof(ColArgs) + in->DataSize]);
         args->COP = COMPARE_GT;
         tmp = 1000;
         memcpy(args->val, &tmp, in->DataSize);
diff --git a/primitives/linux-port/primitiveprocessor.cpp b/primitives/linux-port/primitiveprocessor.cpp
index 3a8adaa4f..cc71b1eaa 100644
--- a/primitives/linux-port/primitiveprocessor.cpp
+++ b/primitives/linux-port/primitiveprocessor.cpp
@@ -58,10 +58,17 @@ void PrimitiveProcessor::setParsedColumnFilter(boost::shared_ptr<ParsedColumnFil
     parsedColumnFilter = pcf;
 }
 
-ParsedColumnFilter::ParsedColumnFilter() : columnFilterMode(STANDARD)
+ParsedColumnFilter::ParsedColumnFilter() : columnFilterMode(ALWAYS_TRUE), mFilterCount(0) 
 {
 }
 
+ParsedColumnFilter::ParsedColumnFilter(const uint32_t aFilterCount)
+    : columnFilterMode(ALWAYS_TRUE), mFilterCount(aFilterCount)
+{
+    prestored_rfs.reset(new uint8_t[mFilterCount]);
+    prestored_cops.reset(new uint8_t[mFilterCount]);
+}
+
 ParsedColumnFilter::~ParsedColumnFilter()
 {
 }
diff --git a/primitives/linux-port/primitiveprocessor.h b/primitives/linux-port/primitiveprocessor.h
index 62e2b7dfa..f06b13cee 100644
--- a/primitives/linux-port/primitiveprocessor.h
+++ b/primitives/linux-port/primitiveprocessor.h
@@ -1,4 +1,5 @@
 /* Copyright (C) 2014 InfiniDB, Inc.
+   Copyright (C) 2016-2021 MariaDB Corporation
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
@@ -59,11 +60,18 @@ namespace primitives
 
 enum ColumnFilterMode
 {
-    STANDARD,
-    TWO_ARRAYS,
-    UNORDERED_SET
+    ALWAYS_TRUE,            // empty filter is always true
+    SINGLE_COMPARISON,      // exactly one comparison operation
+    ANY_COMPARISON_TRUE,    // ANY comparison is true (BOP_OR)
+    ALL_COMPARISONS_TRUE,   // ALL comparisons are true (BOP_AND)
+    XOR_COMPARISONS,        // XORing results of comparisons (BOP_XOR)
+    ONE_OF_VALUES_IN_SET,   // ONE of the values in the set is equal to the value checked (BOP_OR + all COMPARE_EQ)
+    NONE_OF_VALUES_IN_SET,  // NONE of the values in the set is equal to the value checked (BOP_AND + all COMPARE_NE)
+    ONE_OF_VALUES_IN_ARRAY, // ONE of the values in the small set represented by an array (BOP_OR + all COMPARE_EQ)
+    NONE_OF_VALUES_IN_ARRAY,// NONE of the values in the small set represented by an array (BOP_AND + all COMPARE_NE)
 };
 
+// TBD Test if avalance makes lookup in the hash maps based on this hashers faster.
 class pcfHasher
 {
 public:
@@ -100,6 +108,7 @@ public:
     }
 };
 
+// TBD Test robinhood as tr1 set replacement.
 typedef std::tr1::unordered_set<int64_t, pcfHasher, pcfEqual> prestored_set_t;
 typedef std::tr1::unordered_set<int128_t, pcfHasher128, pcfEqual128> prestored_set_t_128;
 
@@ -124,9 +133,39 @@ public:
     }
 };
 
-
-struct ParsedColumnFilter
+// Not the safest way b/c it doesn't cover uint128_t but the type
+// isn't used now and the approach saves some space.
+template <typename T, typename D = void>
+struct IntegralTypeToFilterType
 {
+    using type = int64_t;
+};
+
+template<>
+struct IntegralTypeToFilterType<int128_t>
+{
+    using type = int128_t;
+};
+
+template <typename T, typename D = void>
+struct IntegralTypeToFilterSetType
+{
+    using type = prestored_set_t;
+};
+
+template<>
+struct IntegralTypeToFilterSetType<int128_t>
+{
+    using type = prestored_set_t_128;
+};
+
+// DRRTuy shared_arrays and shared_ptr looks redundant here
+// given that the higher level code uses shared_ptr<ParsedColumnFilter>
+// thus runtime calls ParsedColumnFilter dtor in the end.
+class ParsedColumnFilter
+{
+  public:
+    static constexpr uint32_t noSetFilterThreshold = 8; 
     ColumnFilterMode columnFilterMode;
     boost::shared_array<int64_t> prestored_argVals;
     boost::shared_array<int128_t> prestored_argVals128;
@@ -136,7 +175,92 @@ struct ParsedColumnFilter
     boost::shared_ptr<prestored_set_t_128> prestored_set_128;
 
     ParsedColumnFilter();
+    ParsedColumnFilter(const uint32_t aFilterCount);
     ~ParsedColumnFilter();
+
+    template<typename T,
+             typename std::enable_if<std::is_same<T, int64_t>::value, T>::type* = nullptr>
+    T* getFilterVals()
+    {
+        return prestored_argVals.get();
+    }
+
+    template<typename T,
+             typename std::enable_if<std::is_same<T, int128_t>::value, T>::type* = nullptr>
+    T* getFilterVals()
+    {
+        return prestored_argVals128.get();
+    }
+
+    template<typename T,
+             typename std::enable_if<std::is_same<T, prestored_set_t>::value, T>::type* = nullptr>
+    T* getFilterSet()
+    {
+        return prestored_set.get();
+    }
+
+    template<typename T,
+             typename std::enable_if<std::is_same<T, prestored_set_t_128>::value, T>::type* = nullptr>
+    T* getFilterSet()
+    {
+        return prestored_set_128.get();
+    }
+
+    template<typename T,
+             typename std::enable_if<sizeof(T) <= sizeof(int64_t), T>::type* = nullptr>
+    void storeFilterArg(const uint32_t argIndex, const T* argValPtr)
+    {
+        prestored_argVals[argIndex] = *argValPtr;
+    }
+
+    template<typename WT,
+             typename std::enable_if<sizeof(WT) == sizeof(int128_t), WT>::type* = nullptr>
+    void storeFilterArg(const uint32_t argIndex, const WT* argValPtr)
+    {
+        datatypes::TSInt128::assignPtrPtr(&(prestored_argVals128[argIndex]),
+                                          argValPtr);
+    }
+
+    template<typename T,
+             typename std::enable_if<sizeof(T) <= sizeof(int64_t), T>::type* = nullptr>
+    void allocateSpaceForFilterArgs()
+    {
+        prestored_argVals.reset(new int64_t[mFilterCount]);
+    }
+
+    template<typename WT,
+             typename std::enable_if<sizeof(WT) == sizeof(int128_t), WT>::type* = nullptr>
+    void allocateSpaceForFilterArgs()
+    {
+        prestored_argVals128.reset(new int128_t[mFilterCount]);
+    }
+
+    template<typename WT,
+             typename std::enable_if<sizeof(WT) == sizeof(int128_t), WT>::type* = nullptr>
+    void populatePrestoredSet()
+    {
+        prestored_set_128.reset(new prestored_set_t_128());
+
+        // @bug 2584, use COMPARE_NIL for "= null" to allow "is null" in OR expression
+        for (uint32_t argIndex = 0; argIndex < mFilterCount; ++argIndex)
+            if (prestored_rfs[argIndex] == 0)
+                prestored_set_128->insert(prestored_argVals128[argIndex]);
+    }
+
+    template<typename T,
+             typename std::enable_if<sizeof(T) <= sizeof(int64_t), T>::type* = nullptr>
+    void populatePrestoredSet()
+    {
+        prestored_set.reset(new prestored_set_t());
+
+        // @bug 2584, use COMPARE_NIL for "= null" to allow "is null" in OR expression
+        for (uint32_t argIndex = 0; argIndex < mFilterCount; argIndex++)
+            if (prestored_rfs[argIndex] == 0)
+                prestored_set->insert(prestored_argVals[argIndex]);
+    }
+
+  private:
+    uint32_t mFilterCount;
 };
 
 //@bug 1828 These need to be public so that column operations can use it for 'like'
@@ -146,9 +270,6 @@ struct p_DataValue
     const uint8_t* data;
 };
 
-boost::shared_ptr<ParsedColumnFilter> parseColumnFilter(const uint8_t* filterString,
-        uint32_t colWidth, uint32_t colType, uint32_t filterCount, uint32_t BOP);
-
 /** @brief This class encapsulates the primitive processing functionality of the system.
  *
  *  This class encapsulates the primitive processing functionality of the system.
@@ -258,7 +379,7 @@ public:
      * a NewColResultHeader, followed by the output type specified by in->OutputType.
      * \li If OT_RID, it will be an array of RIDs
      * \li If OT_DATAVALUE, it will be an array of matching data values stored in the column
-     * \li If OT_BOTH, it will be an array of <DataValue, RID> pairs
+     * \li If OT_BOTH, it will be an array of <RID, DataValue> pairs
      * @param outSize The size of the output buffer in bytes.
      * @param written (out parameter) A pointer to 1 int, which will contain the
      * number of bytes written to out.
@@ -267,8 +388,41 @@ public:
     void p_Col(NewColRequestHeader* in, NewColResultHeader* out, unsigned outSize,
                unsigned* written);
 
+    template<typename T,
+             typename std::enable_if<sizeof(T) == sizeof(int8_t) ||
+                                     sizeof(T) == sizeof(int16_t) ||
+                                     sizeof(T) == sizeof(int128_t), T>::type* = nullptr>
+    void scanAndFilterTypeDispatcher(NewColRequestHeader* in, NewColResultHeader* out,
+                                    unsigned outSize, unsigned* written);
+
+    template<typename T,
+             typename std::enable_if<sizeof(T) == sizeof(int32_t), T>::type* = nullptr>
+    void scanAndFilterTypeDispatcher(NewColRequestHeader* in, NewColResultHeader* out,
+                                     unsigned outSize, unsigned* written);
+    template<typename T,
+             typename std::enable_if<sizeof(T) == sizeof(int64_t), T>::type* = nullptr>
+    void scanAndFilterTypeDispatcher(NewColRequestHeader* in, NewColResultHeader* out,
+                                     unsigned outSize, unsigned* written);
+    
+    template<typename T,
+             typename std::enable_if<sizeof(T) <= sizeof(int64_t), T>::type* = nullptr>
+    void _scanAndFilterTypeDispatcher(NewColRequestHeader* in, NewColResultHeader* out,
+                                    unsigned outSize, unsigned* written);
+
+    template<typename T,
+             typename std::enable_if<sizeof(T) == sizeof(int128_t), T>::type* = nullptr>
+    void _scanAndFilterTypeDispatcher(NewColRequestHeader* in, NewColResultHeader* out,
+                                    unsigned outSize, unsigned* written);
+ 
+    template<typename T>
+    void columnScanAndFilter(NewColRequestHeader* in, NewColResultHeader* out,
+                             unsigned outSize, unsigned* written);
+
     boost::shared_ptr<ParsedColumnFilter> parseColumnFilter(const uint8_t* filterString,
-            uint32_t colWidth, uint32_t colType, uint32_t filterCount, uint32_t BOP);
+                                                            uint32_t colWidth,
+                                                            uint32_t colType,
+                                                            uint32_t filterCount,
+                                                            uint32_t BOP);
     void setParsedColumnFilter(boost::shared_ptr<ParsedColumnFilter>);
 
     /** @brief The p_ColAggregate primitive processor.
@@ -312,9 +466,6 @@ private:
     void nextSig(int NVALS, const PrimToken* tokens, p_DataValue* ret,
                  uint8_t outputFlags = 0, bool oldGetSigBehavior = false, bool skipNulls = false) throw();
 
-//	void do_sum8(NewColAggResultHeader *out, int64_t val);
-//    void do_unsignedsum8(NewColAggResultHeader *out, int64_t val);
-
     uint64_t masks[11];
     int dict_OffsetIndex, currentOffsetIndex;		// used by p_dictionary
     int fDebugLevel;
@@ -326,6 +477,108 @@ private:
     friend class ::PrimTest;
 };
 
+//
+//COMPILE A COLUMN FILTER
+//
+// Compile column filter from BLOB into structure optimized for fast filtering.
+// Return a shared_ptr for the compiled filter.
+template<typename T>                // C++ integer type providing storage for colType
+boost::shared_ptr<ParsedColumnFilter> _parseColumnFilter(
+    const uint8_t* filterString,    // Filter represented as BLOB
+    uint32_t colType,               // Column datatype as ColDataType
+    uint32_t filterCount,           // Number of filter elements contained in filterString
+    uint32_t BOP)                   // Operation (and/or/xor/none) that combines all filter elements
+{
+    using UT = typename std::conditional<std::is_unsigned<T>::value || datatypes::is_uint128_t<T>::value, T, typename datatypes::make_unsigned<T>::type>::type;
+    const uint32_t WIDTH = sizeof(T);  // Sizeof of the column to be filtered
+
+    boost::shared_ptr<ParsedColumnFilter> ret;  // Place for building the value to return
+    if (filterCount == 0)
+        return ret;
+
+    // Allocate the compiled filter structure with space for filterCount filters.
+    // No need to init arrays since they will be filled on the fly.
+    ret.reset(new ParsedColumnFilter(filterCount));
+    ret->allocateSpaceForFilterArgs<T>();
+
+    // Choose initial filter mode based on operation and number of filter elements
+    if (filterCount == 1)
+        ret->columnFilterMode = SINGLE_COMPARISON;
+    else if (BOP == BOP_OR)
+        ret->columnFilterMode = ANY_COMPARISON_TRUE;
+    else if (BOP == BOP_AND)
+        ret->columnFilterMode = ALL_COMPARISONS_TRUE;
+    else if (BOP == BOP_XOR)
+        ret->columnFilterMode = XOR_COMPARISONS;
+    else
+        idbassert(0);   // BOP_NONE is compatible only with filterCount <= 1
+
+    // Size of single filter element in filterString BLOB
+    const uint32_t filterSize = sizeof(uint8_t) + sizeof(uint8_t) + WIDTH;
+
+    // Parse the filter predicates and insert them into argVals and cops
+    for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++)
+    {
+        // Pointer to ColArgs structure representing argIndex'th element in the BLOB
+        auto args = reinterpret_cast<const ColArgs*>(filterString + (argIndex * filterSize));
+
+        ret->prestored_cops[argIndex] = args->COP;
+        ret->prestored_rfs[argIndex] = args->rf;
+
+        if (datatypes::isUnsigned((execplan::CalpontSystemCatalog::ColDataType)colType))
+            ret->storeFilterArg(argIndex, reinterpret_cast<const UT*>(args->val));
+        else
+            ret->storeFilterArg(argIndex, reinterpret_cast<const T*>(args->val));
+    }
+
+    /*  Decide which structure to use.  I think the only cases where we can use the set
+        are when NOPS > 1, BOP is OR, and every COP is ==,
+        and when NOPS > 1, BOP is AND, and every COP is !=.
+
+        If there were no predicates that violate the condition for using a set,
+        insert argVals into a set.
+    */
+    if (filterCount > 1)
+    {
+        // Check that all COPs are of right kind that depends on BOP
+        for (uint32_t argIndex = 0; argIndex < filterCount; argIndex++)
+        {
+            auto cop = ret->prestored_cops[argIndex];
+            if (! ((BOP == BOP_OR  && cop == COMPARE_EQ) ||
+                   (BOP == BOP_AND && cop == COMPARE_NE)))
+            {
+                goto skipConversion;
+            }
+        }
+
+        // Now we found that conversion is possible. Let's choose between array-based search
+        // and set-based search depending on the set size.
+        //TODO: Tailor the threshold based on the actual search algorithms used and WIDTH/SIMD_WIDTH
+        if (filterCount <= ParsedColumnFilter::noSetFilterThreshold)
+        {
+            // Assign filter mode of array-based filtering
+            if (BOP == BOP_OR)
+                ret->columnFilterMode = ONE_OF_VALUES_IN_ARRAY;
+            else
+                ret->columnFilterMode = NONE_OF_VALUES_IN_ARRAY;
+        }
+        else
+        {
+            // Assign filter mode of set-based filtering
+            if (BOP == BOP_OR)
+                ret->columnFilterMode = ONE_OF_VALUES_IN_SET;
+            else
+                ret->columnFilterMode = NONE_OF_VALUES_IN_SET;
+
+            ret->populatePrestoredSet<T>();
+        }
+
+        skipConversion:;
+    }
+
+    return ret;
+}
+
 } //namespace primitives
 
 #endif
diff --git a/primitives/primproc/batchprimitiveprocessor.h b/primitives/primproc/batchprimitiveprocessor.h
index 144361253..1f9b2541c 100644
--- a/primitives/primproc/batchprimitiveprocessor.h
+++ b/primitives/primproc/batchprimitiveprocessor.h
@@ -169,6 +169,14 @@ public:
     {
         return doJoin;
     }
+    primitives::PrimitiveProcessor& getPrimitiveProcessor()
+    {
+        return pp;
+    }
+    uint32_t getOutMsgSize() const
+    {
+        return outMsgSize;
+    }
 private:
     BatchPrimitiveProcessor();
     BatchPrimitiveProcessor(const BatchPrimitiveProcessor&);
diff --git a/primitives/primproc/columncommand.cpp b/primitives/primproc/columncommand.cpp
index e604e12d1..c75631f3d 100644
--- a/primitives/primproc/columncommand.cpp
+++ b/primitives/primproc/columncommand.cpp
@@ -1,4 +1,5 @@
 /* Copyright (C) 2014 InfiniDB, Inc.
+   Copyright (C) 2016-2021 MariaDB Corporation
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
@@ -249,7 +250,36 @@ void ColumnCommand::loadData()
 
 void ColumnCommand::issuePrimitive()
 {
-    _issuePrimitive();
+    loadData();
+
+    if (!suppressFilter)
+        bpp->getPrimitiveProcessor().setParsedColumnFilter(parsedColumnFilter);
+    else
+        bpp->getPrimitiveProcessor().setParsedColumnFilter(emptyFilter);
+
+    switch(colType.colWidth)
+    {
+        case 1:
+            _issuePrimitive<1>();
+            break;
+        case 2:
+            _issuePrimitive<2>();
+            break;
+        case 4:
+            _issuePrimitive<4>();
+            break;
+        case 8:
+            _issuePrimitive<8>();
+            break;
+        case 16:
+            _issuePrimitive<16>();
+            break;
+        default:
+            throw NotImplementedExcept(std::string("ColumnCommand::_issuePrimitive does not support ")
+                                       + std::to_string(colType.colWidth)
+                                       + std::string(" byte width."));
+    }
+
     if (_isScan)
     {
         if (LIKELY(colType.isNarrow()))
@@ -259,18 +289,29 @@ void ColumnCommand::issuePrimitive()
     }
 }
 
-void ColumnCommand::_issuePrimitive()
+template<int W>
+void ColumnCommand::_issuePrimitiveNarrow()
 {
-    uint32_t resultSize;
-
-    loadData();
+    _loadData<W>();
 
     if (!suppressFilter)
-        bpp->pp.setParsedColumnFilter(parsedColumnFilter);
+        bpp->getPrimitiveProcessor().setParsedColumnFilter(parsedColumnFilter);
     else
-        bpp->pp.setParsedColumnFilter(emptyFilter);
+        bpp->getPrimitiveProcessor().setParsedColumnFilter(emptyFilter);
 
-    bpp->pp.p_Col(primMsg, outMsg, bpp->outMsgSize, (unsigned int*)&resultSize);
+    _issuePrimitive<W>();
+
+    if (_isScan)
+        updateCPDataNarrow();
+}
+
+
+template<int W>
+void ColumnCommand::_issuePrimitive()
+{
+    using IntegralType = typename datatypes::WidthToSIntegralType<W>::type;
+    uint32_t resultSize;
+    bpp->getPrimitiveProcessor().columnScanAndFilter<IntegralType>(primMsg, outMsg, bpp->getOutMsgSize(), (unsigned int*)&resultSize);
 } // _issuePrimitive()
 
 void ColumnCommand::updateCPDataNarrow()
@@ -559,14 +600,33 @@ void ColumnCommand::createCommand(ByteStream& bs)
     deserializeInlineVector(bs, lastLbid);
 
     Command::createCommand(bs);
+    switch (colType.colWidth)
+    {
+        case 1:
+            createColumnFilter<datatypes::WidthToSIntegralType<1>::type>();
+            break;
 
-    parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth,
-                         colType.colDataType, filterCount, BOP);
+        case 2:
+            createColumnFilter<datatypes::WidthToSIntegralType<2>::type>();
+            break;
 
-    /* OR hack */
-    emptyFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth,
-                  colType.colDataType, 0, BOP);
+        case 4:
+            createColumnFilter<datatypes::WidthToSIntegralType<4>::type>();
+            break;
 
+        case 8:
+            createColumnFilter<datatypes::WidthToSIntegralType<8>::type>();
+            break;
+
+        case 16:
+            createColumnFilter<datatypes::WidthToSIntegralType<16>::type>();
+            break;
+
+        default:
+            throw NotImplementedExcept(std::string("ColumnCommand::createCommand does not support ")
+                                       + std::to_string(colType.colWidth)
+                                       + std::string(" byte width."));
+    }
 }
 
 void ColumnCommand::createCommand(execplan::CalpontSystemCatalog::ColType& aColType, ByteStream& bs)
@@ -1023,6 +1083,18 @@ ColumnCommand* ColumnCommandFabric::createCommand(messageqcpp::ByteStream& bs)
     return nullptr;
 }
 
+template<typename T>
+void ColumnCommand::createColumnFilter()
+{
+    parsedColumnFilter = primitives::_parseColumnFilter<T>(filterString.buf(),
+                                                           colType.colDataType,
+                                                           filterCount, BOP);
+    /* OR hack */
+    emptyFilter = primitives::_parseColumnFilter<T>(filterString.buf(),
+                                                    colType.colDataType,
+                                                    0, BOP);
+}
+
 ColumnCommand* ColumnCommandFabric::duplicate(const ColumnCommandUniquePtr& rhs)
 {
     auto & command = *rhs;
@@ -1066,12 +1138,7 @@ ColumnCommand* ColumnCommandFabric::duplicate(const ColumnCommandUniquePtr& rhs)
 ColumnCommandInt8::ColumnCommandInt8(execplan::CalpontSystemCatalog::ColType& aColType, messageqcpp::ByteStream& bs)
 {
     ColumnCommand::createCommand(aColType, bs);
-    parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth,
-                     colType.colDataType, filterCount, BOP);
-
-    /* OR hack */
-    emptyFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth,
-                      colType.colDataType, 0, BOP);
+    createColumnFilter<IntegralType>();
 }
 
 void ColumnCommandInt8::prep(int8_t outputType, bool absRids)
@@ -1104,17 +1171,15 @@ void ColumnCommandInt8::projectResultRG(RowGroup& rg, uint32_t pos)
     _projectResultRG<size>(rg, pos);
 }
 
+void ColumnCommandInt8::issuePrimitive()
+{
+    _issuePrimitiveNarrow<size>();
+}
+
 ColumnCommandInt16::ColumnCommandInt16(execplan::CalpontSystemCatalog::ColType& aColType, messageqcpp::ByteStream& bs)
 {
     ColumnCommand::createCommand(aColType, bs);
-    parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(),
-                                                       colType.colWidth,
-                                                       colType.colDataType,
-                                                       filterCount, BOP);
-    /* OR hack */
-    emptyFilter = primitives::parseColumnFilter(filterString.buf(),
-                                                colType.colWidth,
-                                                colType.colDataType, 0, BOP);
+    createColumnFilter<IntegralType>();
 }
 
 void ColumnCommandInt16::prep(int8_t outputType, bool absRids)
@@ -1147,15 +1212,15 @@ void ColumnCommandInt16::projectResultRG(RowGroup& rg, uint32_t pos)
     _projectResultRG<size>(rg, pos);
 }
 
+void ColumnCommandInt16::issuePrimitive()
+{
+    _issuePrimitiveNarrow<size>();
+}
+
 ColumnCommandInt32::ColumnCommandInt32(execplan::CalpontSystemCatalog::ColType& aColType, messageqcpp::ByteStream& bs)
 {
     ColumnCommand::createCommand(aColType, bs);
-    parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth,
-                     colType.colDataType, filterCount, BOP);
-
-    /* OR hack */
-    emptyFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth,
-                      colType.colDataType, 0, BOP);
+    createColumnFilter<IntegralType>();
 }
 
 void ColumnCommandInt32::prep(int8_t outputType, bool absRids)
@@ -1188,15 +1253,15 @@ void ColumnCommandInt32::projectResultRG(RowGroup& rg, uint32_t pos)
     _projectResultRG<size>(rg, pos);
 }
 
+void ColumnCommandInt32::issuePrimitive()
+{
+    _issuePrimitiveNarrow<size>();
+}
+
 ColumnCommandInt64::ColumnCommandInt64(execplan::CalpontSystemCatalog::ColType& aColType, messageqcpp::ByteStream& bs)
 {
     ColumnCommand::createCommand(aColType, bs);
-    parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth,
-                     colType.colDataType, filterCount, BOP);
-
-    /* OR hack */
-    emptyFilter = primitives::parseColumnFilter(filterString.buf(), colType.colWidth,
-                      colType.colDataType, 0, BOP);
+    createColumnFilter<IntegralType>();
 }
 
 void ColumnCommandInt64::prep(int8_t outputType, bool absRids)
@@ -1229,17 +1294,15 @@ void ColumnCommandInt64::projectResultRG(RowGroup& rg, uint32_t pos)
     _projectResultRG<size>(rg, pos);
 }
 
+void ColumnCommandInt64::issuePrimitive()
+{
+    _issuePrimitiveNarrow<size>();
+}
+
 ColumnCommandInt128::ColumnCommandInt128(execplan::CalpontSystemCatalog::ColType& aColType, messageqcpp::ByteStream& bs)
 {
     ColumnCommand::createCommand(aColType, bs);
-    parsedColumnFilter = primitives::parseColumnFilter(filterString.buf(),
-                                                       colType.colWidth,
-                                                       colType.colDataType,
-                                                       filterCount, BOP);
-    /* OR hack */
-    emptyFilter = primitives::parseColumnFilter(filterString.buf(),
-                                                colType.colWidth,
-                                                colType.colDataType, 0, BOP);
+    createColumnFilter<IntegralType>();
 }
 
 void ColumnCommandInt128::prep(int8_t outputType, bool absRids)
@@ -1272,5 +1335,19 @@ void ColumnCommandInt128::projectResultRG(RowGroup& rg, uint32_t pos)
     _projectResultRG<size>(rg, pos);
 }
 
+void ColumnCommandInt128::issuePrimitive()
+{
+    loadData();
+
+    if (!suppressFilter)
+        bpp->getPrimitiveProcessor().setParsedColumnFilter(parsedColumnFilter);
+    else
+        bpp->getPrimitiveProcessor().setParsedColumnFilter(emptyFilter);
+
+    _issuePrimitive<size>();
+    if (_isScan)
+        updateCPDataWide();
+}
+
 }
 // vim:ts=4 sw=4:
diff --git a/primitives/primproc/columncommand.h b/primitives/primproc/columncommand.h
index d2b94b094..10cb02d70 100644
--- a/primitives/primproc/columncommand.h
+++ b/primitives/primproc/columncommand.h
@@ -113,9 +113,15 @@ protected:
     void _loadData();
     void updateCPDataNarrow();
     void updateCPDataWide();
+    template<int W>
+    void _issuePrimitiveNarrow();
+    template<int W>
     void _issuePrimitive();
+    virtual void issuePrimitive();
     void duplicate(ColumnCommand*);
     void fillInPrimitiveMessageHeader(const int8_t outputType, const bool absRids);
+    template<typename T>
+    void createColumnFilter();
 
     // we only care about the width and type fields.
     //On the PM the rest is uninitialized
@@ -125,7 +131,6 @@ protected:
     ColumnCommand& operator=(const ColumnCommand&);
 
     void _execute();
-    void issuePrimitive();
     void processResult();
     template<int W>
     void _process_OT_BOTH();
@@ -202,6 +207,7 @@ class ColumnCommandInt8 : public ColumnCommand
 {
   public:
     static constexpr uint8_t size = 1;
+    using IntegralType = datatypes::WidthToSIntegralType<size>::type;
     ColumnCommandInt8() : ColumnCommand() { };
     ColumnCommandInt8(execplan::CalpontSystemCatalog::ColType& colType, messageqcpp::ByteStream& bs);
     void prep(int8_t outputType, bool absRids) override;
@@ -209,12 +215,14 @@ class ColumnCommandInt8 : public ColumnCommand
     void process_OT_BOTH() override;
     void process_OT_DATAVALUE() override;
     void projectResultRG(rowgroup::RowGroup& rg, uint32_t pos) override;
+    void issuePrimitive() override;
 };
 
 class ColumnCommandInt16 : public ColumnCommand
 {
   public:
     static constexpr uint8_t size = 2;
+    using IntegralType = datatypes::WidthToSIntegralType<size>::type;
     ColumnCommandInt16() : ColumnCommand() { };
     ColumnCommandInt16(execplan::CalpontSystemCatalog::ColType& colType, messageqcpp::ByteStream& bs);
     void prep(int8_t outputType, bool absRids) override;
@@ -222,12 +230,14 @@ class ColumnCommandInt16 : public ColumnCommand
     void process_OT_BOTH() override;
     void process_OT_DATAVALUE() override;
     void projectResultRG(rowgroup::RowGroup& rg, uint32_t pos) override;
+    void issuePrimitive() override;
 };
 
 class ColumnCommandInt32 : public ColumnCommand
 {
   public:
     static constexpr uint8_t size = 4;
+    using IntegralType = datatypes::WidthToSIntegralType<size>::type;
     ColumnCommandInt32() : ColumnCommand() { };
     ColumnCommandInt32(execplan::CalpontSystemCatalog::ColType& colType, messageqcpp::ByteStream& bs);
     void prep(int8_t outputType, bool absRids) override;
@@ -235,12 +245,14 @@ class ColumnCommandInt32 : public ColumnCommand
     void process_OT_BOTH() override;
     void process_OT_DATAVALUE() override;
     void projectResultRG(rowgroup::RowGroup& rg, uint32_t pos) override;
+    void issuePrimitive() override;
 };
 
 class ColumnCommandInt64 : public ColumnCommand
 {
   public:
     static constexpr uint8_t size = 8;
+    using IntegralType = datatypes::WidthToSIntegralType<size>::type;
     ColumnCommandInt64() : ColumnCommand() { };
     ColumnCommandInt64(execplan::CalpontSystemCatalog::ColType& colType, messageqcpp::ByteStream& bs);
     void prep(int8_t outputType, bool absRids) override;
@@ -248,12 +260,14 @@ class ColumnCommandInt64 : public ColumnCommand
     void process_OT_BOTH() override;
     void process_OT_DATAVALUE() override;
     void projectResultRG(rowgroup::RowGroup& rg, uint32_t pos) override;
+    void issuePrimitive() override;
 };
 
 class ColumnCommandInt128 : public ColumnCommand
 {
   public:
     static constexpr uint8_t size = 16;
+    using IntegralType = datatypes::WidthToSIntegralType<size>::type;
     ColumnCommandInt128() : ColumnCommand() { };
     ColumnCommandInt128(execplan::CalpontSystemCatalog::ColType& colType, messageqcpp::ByteStream& bs);
     void prep(int8_t outputType, bool absRids) override;
@@ -261,6 +275,7 @@ class ColumnCommandInt128 : public ColumnCommand
     void process_OT_BOTH() override;
     void process_OT_DATAVALUE() override;
     void projectResultRG(rowgroup::RowGroup& rg, uint32_t pos) override;
+    void issuePrimitive() override;
 };
 
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 746d77124..356fa402b 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -48,3 +48,10 @@ if (WITH_COMPRESSION_UT)
     target_link_libraries(compression_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS})
     install(TARGETS compression_tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
 endif()
+
+if (WITH_PP_SCAN_UT)
+    add_executable(column-scan-filter-tests primitives_column_scan_and_filter.cpp)
+    target_include_directories(column-scan-filter-tests PUBLIC ${ENGINE_COMMON_INCLUDES} ${ENGINE_BLOCKCACHE_INCLUDE} ${ENGINE_PRIMPROC_INCLUDE} )
+    target_link_libraries(column-scan-filter-tests ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS} ${GTEST_LIBRARIES} processor dbbc)
+    install(TARGETS column-scan-filter-tests DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)
+endif()
diff --git a/tests/primitives_column_scan_and_filter.cpp b/tests/primitives_column_scan_and_filter.cpp
new file mode 100644
index 000000000..02a973017
--- /dev/null
+++ b/tests/primitives_column_scan_and_filter.cpp
@@ -0,0 +1,629 @@
+/* Copyright (C) 2021 MariaDB Corporation
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA. */
+
+#include <iostream>
+#include <gtest/gtest.h>
+#include "datatypes/mcs_datatype.h"
+#include "stats.h"
+#include "primitives/linux-port/primitiveprocessor.h"
+
+using namespace primitives;
+using namespace datatypes;
+using namespace std;
+
+class ColumnScanFilterTest : public ::testing::Test
+{
+  protected:
+  PrimitiveProcessor pp;
+  uint8_t input[BLOCK_SIZE];
+  uint8_t output[4 * BLOCK_SIZE];
+  uint8_t block[BLOCK_SIZE];
+  uint16_t* rids;
+  uint32_t i;
+  uint32_t written;
+  NewColRequestHeader* in;
+  NewColResultHeader* out;
+  ColArgs* args;
+  
+  void SetUp() override
+  {
+    memset(input, 0, BLOCK_SIZE);
+    memset(output, 0, 4 * BLOCK_SIZE);
+    in = reinterpret_cast<NewColRequestHeader*>(input);
+    out = reinterpret_cast<NewColResultHeader*>(output);
+    rids = reinterpret_cast<uint16_t*>(&in[1]);
+    args = reinterpret_cast<ColArgs*>(&in[1]);
+  }
+
+  uint8_t* readBlockFromFile(const std::string& fileName, uint8_t* block)
+  {
+    int fd;
+    uint32_t i;
+
+    fd = open(fileName.c_str(), O_RDONLY);
+
+    if (fd < 0)
+    {
+        cerr << "getBlock(): skipping this test; needs the index list file "
+             << fileName << endl;
+        return nullptr;
+    }
+
+    i = read(fd, block, BLOCK_SIZE);
+
+    if (i <= 0)
+    {
+        cerr << "getBlock(): Couldn't read the file " << fileName << endl;
+        close(fd);
+        return nullptr;
+    }
+
+    if (i != BLOCK_SIZE)
+    {
+        cerr << "getBlock(): could not read a whole block" << endl;
+        close(fd);
+        return nullptr;
+    }
+
+    close(fd);
+    return block;
+  }
+};
+
+TEST_F(ColumnScanFilterTest, ColumnScan1Byte)
+{
+  constexpr const uint8_t W = 1;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  datatypes::make_unsigned<IntegralType>::type* results;
+  in->colType = ColRequestHeaderDataType();
+  in->colType.DataSize = 1;
+  in->colType.DataType = SystemCatalog::CHAR;
+  in->OutputType = OT_DATAVALUE;
+  in->NOPS = 0;
+  in->NVALS = 0;
+
+  pp.setBlockPtr((int*) readBlockFromFile("col1block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = &output[sizeof(NewColResultHeader)];
+  EXPECT_EQ(out->NVALS, 8160);
+
+  for (i = 0; i < 300; i++)
+      EXPECT_EQ(results[i],i % 255);
+
+}
+
+TEST_F(ColumnScanFilterTest, ColumnScan2Bytes)
+{
+  constexpr const uint8_t W = 2;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  datatypes::make_unsigned<IntegralType>::type* results;
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_DATAVALUE;
+  in->NOPS = 0;
+  in->NVALS = 0;
+
+  pp.setBlockPtr((int*) readBlockFromFile("col2block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<uint16_t*>(&output[sizeof(NewColResultHeader)]);
+  EXPECT_EQ(out->NVALS, 4096);
+
+  for (i = 0; i < out->NVALS; i++)
+      EXPECT_EQ(results[i], i);
+}
+
+TEST_F(ColumnScanFilterTest, ColumnScan4Bytes)
+{
+  constexpr const uint8_t W = 4;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  datatypes::make_unsigned<IntegralType>::type* results;
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_DATAVALUE;
+  in->NOPS = 0;
+  in->NVALS = 0;
+
+  pp.setBlockPtr((int*) readBlockFromFile("col4block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<uint32_t*>(&output[sizeof(NewColResultHeader)]);
+  EXPECT_EQ(out->NVALS, 2048);
+
+  for (i = 0; i < out->NVALS; i++)
+      EXPECT_EQ(results[i], (uint32_t) i);
+}
+
+TEST_F(ColumnScanFilterTest, ColumnScan8Bytes)
+{
+  constexpr const uint8_t W = 8;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  datatypes::make_unsigned<IntegralType>::type* results;
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_DATAVALUE;
+  in->NOPS = 0;
+  in->NVALS = 0;
+
+  pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<u_int64_t*>(&output[sizeof(NewColResultHeader)]);
+  ASSERT_EQ(out->NVALS, 1024);
+
+  for (i = 0; i < out->NVALS; i++)
+      ASSERT_EQ(results[i], (uint32_t) i);
+}
+
+TEST_F(ColumnScanFilterTest, ColumnScan1ByteUsingRID)
+{
+  constexpr const uint8_t W = 1;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  datatypes::make_unsigned<IntegralType>::type* results;
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_DATAVALUE;
+  in->NOPS = 0;
+  in->NVALS = 2;
+  rids[0] = 20;
+  rids[1] = 17;
+
+  pp.setBlockPtr((int*) readBlockFromFile("col1block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<uint8_t*>(&output[sizeof(NewColResultHeader)]);
+  ASSERT_EQ(out->NVALS, 2);
+
+  for (i = 0; i < out->NVALS; i++)
+      ASSERT_EQ(results[i], rids[i]);
+}
+
+TEST_F(ColumnScanFilterTest, ColumnScan4Bytes1Filter)
+{
+  constexpr const uint8_t W = 4;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  datatypes::make_unsigned<IntegralType>::type* results;
+  IntegralType tmp;
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_DATAVALUE;
+  in->NOPS = 2;
+  in->BOP = BOP_AND;
+  in->NVALS = 0;
+
+  tmp = 20;
+  args->COP = COMPARE_LT;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) +
+                                           sizeof(ColArgs) + in->colType.DataSize]);
+  args->COP = COMPARE_GT;
+  tmp = 10;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+
+  pp.setBlockPtr((int*) readBlockFromFile("col4block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<uint32_t*>(&output[sizeof(NewColResultHeader)]);
+  ASSERT_EQ(out->NVALS, 9);
+
+  for (i = 0; i < out->NVALS; i++)
+      ASSERT_EQ(results[i], 11 + (uint32_t)i);
+}
+
+//void p_Col_7()
+TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2CompFilters)
+{
+  constexpr const uint8_t W = 8;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  datatypes::make_unsigned<IntegralType>::type* results;
+  IntegralType tmp;
+
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_DATAVALUE;
+  in->NOPS = 2;
+  in->BOP = BOP_OR;
+  in->NVALS = 0;
+  
+  tmp = 10;
+  args->COP = COMPARE_LT;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) +
+                                           sizeof(ColArgs) + in->colType.DataSize]);
+  args->COP = COMPARE_GT;
+  tmp = 1000;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+
+  pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<u_int64_t*>(&output[sizeof(NewColResultHeader)]);
+  ASSERT_EQ(out->NVALS, 33);
+
+  for (i = 0; i < out->NVALS; i++)
+      ASSERT_EQ(results[i], (uint32_t) (i < 10 ? i : i - 10 + 1001));
+}
+
+TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFilters)
+{
+  constexpr const uint8_t W = 8;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  datatypes::make_unsigned<IntegralType>::type* results;
+  IntegralType tmp;
+
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_DATAVALUE;
+  in->NOPS = 2;
+  in->BOP = BOP_OR;
+  in->NVALS = 0;
+
+  tmp = 10;
+  args->COP = COMPARE_EQ;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) +
+                                    sizeof(ColArgs) + in->colType.DataSize]);
+  args->COP = COMPARE_EQ;
+  tmp = 1000;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+
+  pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<u_int64_t*>(&output[sizeof(NewColResultHeader)]);
+
+  ASSERT_EQ(out->NVALS, 2);
+  ASSERT_EQ(results[0], 10);
+  ASSERT_EQ(results[1], 1000);
+}
+
+TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFiltersRID)
+{
+  constexpr const uint8_t W = 8;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  datatypes::make_unsigned<IntegralType>::type* results;
+  IntegralType tmp;
+
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_DATAVALUE;
+  in->NOPS = 2;
+  in->BOP = BOP_OR;
+  in->NVALS = 2;
+
+  tmp = 10;
+  args->COP = COMPARE_EQ;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) +
+                                           sizeof(ColArgs) + in->colType.DataSize]);
+  args->COP = COMPARE_EQ;
+  tmp = 1000;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+
+  rids = reinterpret_cast<uint16_t*>(&input[sizeof(NewColRequestHeader) +
+                                     2 * (sizeof(ColArgs) + in->colType.DataSize)]);
+
+  rids[0] = 10;
+  rids[1] = 100;
+
+  pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<u_int64_t*>(&output[sizeof(NewColResultHeader)]);
+  ASSERT_EQ(out->NVALS, 1);
+  ASSERT_EQ(results[0], 10);
+}
+
+TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFiltersRIDOutputRid)
+{
+  constexpr const uint8_t W = 8;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  int16_t* results;
+  IntegralType tmp;
+
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_RID;
+  in->NOPS = 2;
+  in->BOP = BOP_OR;
+  in->NVALS = 0;
+  
+  tmp = 10;
+  args->COP = COMPARE_LT;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) +
+                                           sizeof(ColArgs) + in->colType.DataSize]);
+  args->COP = COMPARE_GT;
+  tmp = 1000;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+
+  pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<int16_t*>(&output[sizeof(NewColResultHeader)]);
+  ASSERT_EQ(out->NVALS, 33);
+
+  for (i = 0; i < out->NVALS; i++)
+      ASSERT_EQ(results[i], (i < 10 ? i : i - 10 + 1001));
+}
+
+TEST_F(ColumnScanFilterTest, ColumnScan8Bytes2EqFiltersRIDOutputBoth)
+{
+  constexpr const uint8_t W = 8;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  IntegralType tmp;
+  IntegralType* resultVal;
+  int16_t* resultRid;
+
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_BOTH;
+  in->NOPS = 2;
+  in->BOP = BOP_OR;
+  in->NVALS = 0;
+  
+  tmp = 10;
+  args->COP = COMPARE_LT;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) +
+                                           sizeof(ColArgs) + in->colType.DataSize]);
+  args->COP = COMPARE_GT;
+  tmp = 1000;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+
+  pp.setBlockPtr((int*) readBlockFromFile("col8block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  ASSERT_EQ(out->NVALS, 33);
+
+  for (i = 0; i < out->NVALS; i++)
+  {
+      resultRid = reinterpret_cast<int16_t*>(&output[
+      sizeof(NewColResultHeader) + i * (sizeof(int16_t) + in->colType.DataSize)]);
+      resultVal = reinterpret_cast<int64_t*>(&resultRid[1]);
+      ASSERT_EQ(*resultRid, (i < 10 ? i : i - 10 + 1001));
+      ASSERT_EQ(*resultVal, (i < 10 ? i : i - 10 + 1001));
+  }
+}
+
+//void p_Col_12()
+TEST_F(ColumnScanFilterTest, ColumnScan1Byte2CompFilters)
+{
+  constexpr const uint8_t W = 1;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  datatypes::make_unsigned<IntegralType>::type* results;
+
+  in->colType = ColRequestHeaderDataType();
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::CHAR;
+  in->OutputType = OT_DATAVALUE;
+  in->BOP = BOP_AND;
+  in->NOPS = 2;
+  in->NVALS = 0;
+
+  args->COP = COMPARE_GT;
+  args->val[0] = '2';
+  // Filter is COP(1 byte)/rf(1 byte)/val(1 byte)
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) + 2 + W]);
+  args->COP = COMPARE_LT;
+  args->val[0] = '4';
+
+  pp.setBlockPtr((int*) readBlockFromFile("col1block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = &output[sizeof(NewColResultHeader)];
+  ASSERT_EQ(out->NVALS, 32);
+
+  for (i = 0; i < out->NVALS; i++)
+      ASSERT_EQ( (int)'3', results[i]);
+}
+/*
+The code doesn't support filters with RID and literal
+//void p_Col_13()
+TEST_F(ColumnScanFilterTest, ColumnScan4Bytes2CompFiltersOutputRID)
+{
+  constexpr const uint8_t W = 4;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  IntegralType tmp;
+  int16_t ridTmp;
+  int16_t* results;
+  in->colType.DataSize = 4;
+  in->colType.DataType = SystemCatalog::INT;
+  in->OutputType = OT_RID;
+  in->NOPS = 3;
+  in->BOP = BOP_OR;
+  in->NVALS = 3;
+
+  // first argument "is RID 8 < 10?"  Answer is yes
+  tmp = 10;	// value to check
+  ridTmp = 8;
+  args->COP = COMPARE_LT;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+  memcpy(&args->val[in->colType.DataSize], &ridTmp, 2);
+
+  // second argument "is RID 5 > 10?"  Answer is no
+  args = reinterpret_cast<ColArgs*>(&args->val[in->colType.DataSize + 2]);
+  args->COP = COMPARE_GT;
+  tmp = 10;
+  ridTmp = 5;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+  memcpy(&args->val[in->colType.DataSize], &ridTmp, 2);
+
+  // third argument "is RID 11 < 1000?"  Answer is yes
+  args = reinterpret_cast<ColArgs*>(&args->val[in->colType.DataSize + 2]);
+  args->COP = COMPARE_LT;
+  tmp = 1000;
+  ridTmp = 11;
+  memcpy(args->val, &tmp, in->colType.DataSize);
+  memcpy(&args->val[in->colType.DataSize], &ridTmp, 2);
+
+  pp.setBlockPtr((int*) readBlockFromFile("col4block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<int16_t*>(&output[sizeof(NewColResultHeader)]);
+  ASSERT_EQ(out->NVALS, 2);
+  ASSERT_EQ(results[0], 8);
+  ASSERT_EQ(results[1], 11);
+}
+*/
+//void p_Col_double_1()
+TEST_F(ColumnScanFilterTest, ColumnScan8BytesDouble2CompFilters)
+{
+  constexpr const uint8_t W = 8;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  double* results;
+  double tmp;
+
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::DOUBLE;
+  in->OutputType = OT_DATAVALUE;
+  in->BOP = BOP_AND;
+  in->NOPS = 2;
+  in->NVALS = 0;
+
+  tmp = 10.5;
+  args->COP = COMPARE_GT;
+  memcpy(args->val, &tmp, sizeof(tmp));
+  tmp = 15;
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) + 10]);
+  args->COP = COMPARE_LT;
+  memcpy(args->val, &tmp, sizeof(tmp));
+
+  pp.setBlockPtr((int*) readBlockFromFile("col_double_block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<double*>(&output[sizeof(NewColResultHeader)]);
+  ASSERT_EQ(out->NVALS, 8);
+
+  for (i = 0; i < out->NVALS; i++)
+      ASSERT_EQ(results[i], 11 + (i * 0.5));
+}
+
+//void p_Col_float_1()
+TEST_F(ColumnScanFilterTest, ColumnScan4BytesFloat2CompFiltersOutputBoth)
+{
+  constexpr const uint8_t W = 4;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  float* resultVal;
+  float tmp;
+  int16_t* resultRid;
+
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::FLOAT;
+  in->OutputType = OT_BOTH;
+  in->BOP = BOP_AND;
+  in->NOPS = 2;
+  in->NVALS = 0;
+  
+  tmp = 10.5;
+  args->COP = COMPARE_GT;
+  memcpy(args->val, &tmp, sizeof(tmp));
+  tmp = 15;
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) + 6]);
+  args->COP = COMPARE_LT;
+  memcpy(args->val, &tmp, sizeof(tmp));
+
+  pp.setBlockPtr((int*) readBlockFromFile("col_float_block.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  ASSERT_EQ(out->NVALS, 8);
+
+  for (i = 0; i < out->NVALS; i++)
+  {
+      resultRid = reinterpret_cast<int16_t*>(&output[
+      sizeof(NewColResultHeader) + i * (sizeof(int16_t) + in->colType.DataSize)]);
+      resultVal = reinterpret_cast<float*>(&resultRid[1]);
+      ASSERT_EQ(*resultVal, 11 + (i * 0.5));
+  }
+}
+
+//void p_Col_neg_float_1()
+TEST_F(ColumnScanFilterTest, ColumnScan4BytesNegFloat2CompFiltersOutputBoth)
+{
+  constexpr const uint8_t W = 4;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  float* resultVal;
+  float tmp;
+  int16_t* resultRid;
+
+  in->colType.DataSize = 4;
+  in->colType.DataType = SystemCatalog::FLOAT;
+  in->OutputType = OT_BOTH;
+  in->BOP = BOP_AND;
+  in->NOPS = 2;
+  in->NVALS = 0;
+  
+  tmp = -5.0;
+  args->COP = COMPARE_GT;
+  memcpy(args->val, &tmp, sizeof(tmp));
+  tmp = 5.0;
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) + 6]);
+  args->COP = COMPARE_LT;
+  memcpy(args->val, &tmp, sizeof(tmp));
+
+  pp.setBlockPtr((int*) readBlockFromFile("col_neg_float.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+  ASSERT_EQ(out->NVALS, 19);
+
+  for (i = 0; i < out->NVALS; i++)
+  {
+      resultRid = reinterpret_cast<int16_t*>(&output[
+      sizeof(NewColResultHeader) + i * (sizeof(int16_t) + in->colType.DataSize)]);
+      resultVal = reinterpret_cast<float*>(&resultRid[1]);
+      ASSERT_EQ(*resultVal, -4.5 + (i * 0.5));
+  }
+}
+
+//void p_Col_neg_double_1()
+TEST_F(ColumnScanFilterTest, ColumnScan4BytesNegDouble2CompFilters)
+{
+  constexpr const uint8_t W = 8;
+  using IntegralType = datatypes::WidthToSIntegralType<W>::type;
+  double* results;
+  double tmp;
+
+  in->colType.DataSize = W;
+  in->colType.DataType = SystemCatalog::DOUBLE;
+  in->OutputType = OT_DATAVALUE;
+  in->BOP = BOP_AND;
+  in->NOPS = 2;
+  in->NVALS = 0;
+
+  tmp = -5.0;
+  args->COP = COMPARE_GT;
+  memcpy(args->val, &tmp, sizeof(tmp));
+  tmp = 5.0;
+  args = reinterpret_cast<ColArgs*>(&input[sizeof(NewColRequestHeader) + 10]);
+  args->COP = COMPARE_LT;
+  memcpy(args->val, &tmp, sizeof(tmp));
+
+  pp.setBlockPtr((int*) readBlockFromFile("col_neg_double.cdf", block));
+  pp.columnScanAndFilter<IntegralType>(in, out, 4 * BLOCK_SIZE, &written);
+
+  results = reinterpret_cast<double*>(&output[sizeof(NewColResultHeader)]);
+  ASSERT_EQ(out->NVALS, 19);
+
+  for (i = 0; i < out->NVALS; i++)
+      ASSERT_EQ(results[i], -4.5 + (i * 0.5));
+}
+
+TEST_F(ColumnScanFilterTest, ColumnScan16Bytes2CompFilters)
+{
+//TBD
+}
+// vim:ts=2 sw=2: