From fd9fe182d5b0f6e7b2e1ee2fbbf668d285d91f2a Mon Sep 17 00:00:00 2001
From: Roman Nozdrin <rnozdrin@mariadb.com>
Date: Tue, 16 Aug 2022 20:04:09 +0000
Subject: [PATCH] MCOL-5199 This patch solves the overal performance
 degradation introduced with a new way of char columns hashing in aggregation
 code The patch disables padding that forces hasher to calculate over the
 whole 2k buffer. This patch also moves hashing code into the common place
 where it belongs.

---
 dbcon/mysql/ha_mcs_sysvars.cpp |   2 +
 utils/common/collation.h       |   4 ++
 utils/common/conststring.h     |   7 +++
 utils/common/hasher.h          |  77 ++++++++++++++++++++++++
 utils/rowgroup/rowstorage.cpp  | 107 +++++++++------------------------
 5 files changed, 118 insertions(+), 79 deletions(-)
diff --git a/dbcon/mysql/ha_mcs_sysvars.cpp b/dbcon/mysql/ha_mcs_sysvars.cpp
index 1a129819e..4596bf1cf 100644
--- a/dbcon/mysql/ha_mcs_sysvars.cpp
+++ b/dbcon/mysql/ha_mcs_sysvars.cpp
@@ -39,6 +39,8 @@ static MYSQL_THDVAR_ENUM(compression_type, PLUGIN_VAR_RQCMDARG,
                          "SNAPPY segment files are Snappy compressed (default);"
 #ifdef HAVE_LZ4
                          "LZ4 segment files are LZ4 compressed;",
+# else
+			 ,
 #endif
                          NULL,                              // check
                          NULL,                              // update
diff --git a/utils/common/collation.h b/utils/common/collation.h
index 1e540d0fe..280bcd366 100644
--- a/utils/common/collation.h
+++ b/utils/common/collation.h
@@ -183,6 +183,10 @@ class Charset
   {
     return flags_;
   }
+  size_t strnxfrm(uchar* dst, size_t dstlen, uint nweights, const uchar* src, size_t srclen, uint flags)
+  {
+    return mCharset->coll->strnxfrm(mCharset, dst, dstlen, nweights, src, srclen, flags);
+  }
 };
 
 class CollationAwareHasher : public Charset
diff --git a/utils/common/conststring.h b/utils/common/conststring.h
index 14dc63101..451733538 100644
--- a/utils/common/conststring.h
+++ b/utils/common/conststring.h
@@ -64,6 +64,13 @@ class ConstString
     }
     return *this;
   }
+  ConstString& rtrimSpaces()
+  {
+    for (; mLength && mStr[mLength - 1] == ' '; --mLength)
+    {
+    }
+    return *this;
+  }
 };
 
 }  // namespace utils
diff --git a/utils/common/hasher.h b/utils/common/hasher.h
index c13ab7633..08ccf02e4 100644
--- a/utils/common/hasher.h
+++ b/utils/common/hasher.h
@@ -27,8 +27,10 @@
 #ifndef UTILS_HASHER_H
 #define UTILS_HASHER_H
 
+#include <cstddef>
 #include <stdint.h>
 #include <string.h>
+#include <string>
 #include "mcs_basic_types.h"
 
 namespace utils
@@ -204,6 +206,81 @@ class Hasher_r
   }
 };
 
+// This stream hasher was borrowed from RobinHood
+class Hasher64_r
+{
+ public:
+  inline uint64_t operator()(const void* ptr, uint32_t len, uint64_t x = 0ULL)
+  {
+    auto const* const data64 = static_cast<uint64_t const*>(ptr);
+    uint64_t h = seed ^ (len * m);
+
+    std::size_t const n_blocks = len / 8;
+    if (x)
+    {
+      x *= m;
+      x ^= x >> r;
+      x *= m;
+      h ^= x;
+      h *= m;
+    }
+    for (std::size_t i = 0; i < n_blocks; ++i)
+    {
+      uint64_t k;
+      memcpy(&k, data64 + i, sizeof(k));
+
+      k *= m;
+      k ^= k >> r;
+      k *= m;
+
+      h ^= k;
+      h *= m;
+    }
+
+    auto const* const data8 = reinterpret_cast<uint8_t const*>(data64 + n_blocks);
+    switch (len & 7U)
+    {
+      case 7:
+        h ^= static_cast<uint64_t>(data8[6]) << 48U;
+        // FALLTHROUGH
+      case 6:
+        h ^= static_cast<uint64_t>(data8[5]) << 40U;
+        // FALLTHROUGH
+      case 5:
+        h ^= static_cast<uint64_t>(data8[4]) << 32U;
+        // FALLTHROUGH
+      case 4:
+        h ^= static_cast<uint64_t>(data8[3]) << 24U;
+        // FALLTHROUGH
+      case 3:
+        h ^= static_cast<uint64_t>(data8[2]) << 16U;
+        // FALLTHROUGH
+      case 2:
+        h ^= static_cast<uint64_t>(data8[1]) << 8U;
+        // FALLTHROUGH
+      case 1:
+        h ^= static_cast<uint64_t>(data8[0]);
+        h *= m;
+        // FALLTHROUGH
+      default: break;
+    }
+    return h;
+  }
+
+  inline uint64_t finalize(uint64_t h, uint64_t len) const
+  {
+    h ^= h >> r;
+    h *= m;
+    h ^= h >> r;
+    return h;
+  }
+
+ private:
+  static constexpr uint64_t m = 0xc6a4a7935bd1e995ULL;
+  static constexpr uint64_t seed = 0xe17a1465ULL;
+  static constexpr unsigned int r = 47;
+};
+
 class Hasher128
 {
  public:
diff --git a/utils/rowgroup/rowstorage.cpp b/utils/rowgroup/rowstorage.cpp
index 28152122d..bfa60dcea 100644
--- a/utils/rowgroup/rowstorage.cpp
+++ b/utils/rowgroup/rowstorage.cpp
@@ -79,73 +79,6 @@ std::string errorString(int errNo)
   auto* buf = strerror_r(errNo, tmp, sizeof(tmp));
   return {buf};
 }
-
-inline uint64_t hashData(const void* ptr, uint32_t len, uint64_t x = 0ULL)
-{
-  static constexpr uint64_t m = 0xc6a4a7935bd1e995ULL;
-  static constexpr uint64_t seed = 0xe17a1465ULL;
-  static constexpr unsigned int r = 47;
-
-  auto const* const data64 = static_cast<uint64_t const*>(ptr);
-  uint64_t h = seed ^ (len * m);
-
-  std::size_t const n_blocks = len / 8;
-  if (x)
-  {
-    x *= m;
-    x ^= x >> r;
-    x *= m;
-    h ^= x;
-    h *= m;
-  }
-  for (std::size_t i = 0; i < n_blocks; ++i)
-  {
-    uint64_t k;
-    memcpy(&k, data64 + i, sizeof(k));
-
-    k *= m;
-    k ^= k >> r;
-    k *= m;
-
-    h ^= k;
-    h *= m;
-  }
-
-  auto const* const data8 = reinterpret_cast<uint8_t const*>(data64 + n_blocks);
-  switch (len & 7U)
-  {
-    case 7:
-      h ^= static_cast<uint64_t>(data8[6]) << 48U;
-      // FALLTHROUGH
-    case 6:
-      h ^= static_cast<uint64_t>(data8[5]) << 40U;
-      // FALLTHROUGH
-    case 5:
-      h ^= static_cast<uint64_t>(data8[4]) << 32U;
-      // FALLTHROUGH
-    case 4:
-      h ^= static_cast<uint64_t>(data8[3]) << 24U;
-      // FALLTHROUGH
-    case 3:
-      h ^= static_cast<uint64_t>(data8[2]) << 16U;
-      // FALLTHROUGH
-    case 2:
-      h ^= static_cast<uint64_t>(data8[1]) << 8U;
-      // FALLTHROUGH
-    case 1:
-      h ^= static_cast<uint64_t>(data8[0]);
-      h *= m;
-      // FALLTHROUGH
-    default: break;
-  }
-
-  h ^= h >> r;
-  h *= m;
-  h ^= h >> r;
-
-  return h;
-}
-
 }  // anonymous namespace
 
 namespace rowgroup
@@ -157,7 +90,10 @@ uint64_t hashRow(const rowgroup::Row& r, std::size_t lastCol)
     return 0;
 
   datatypes::MariaDBHasher h;
+  utils::Hasher64_r columnHasher;
+
   bool strHashUsed = false;
+
   for (uint32_t i = 0; i <= lastCol; ++i)
   {
     switch (r.getColType(i))
@@ -167,34 +103,47 @@ uint64_t hashRow(const rowgroup::Row& r, std::size_t lastCol)
       case execplan::CalpontSystemCatalog::BLOB:
       case execplan::CalpontSystemCatalog::TEXT:
       {
+        auto cs = r.getCharset(i);
         auto strColValue = r.getConstString(i);
-        if (strColValue.length() > MaxConstStrSize)
+        auto strColValueLen = strColValue.length();
+        if (strColValueLen > MaxConstStrSize)
         {
-          h.add(r.getCharset(i), strColValue);
+          h.add(cs, strColValue);
           strHashUsed = true;
         }
         else
         {
-          auto cs = r.getCharset(i);
-          uchar buf[MaxConstStrBufSize];
-          uint nActualWeights = cs->strnxfrm(buf, MaxConstStrBufSize, MaxConstStrBufSize,
-            reinterpret_cast<const uchar*>(strColValue.str()), strColValue.length(),
-            datatypes::Charset::getDefaultFlags());
-          ret = hashData(buf, nActualWeights, ret);
+          // This is relatively big stack allocation.
+          // It is aligned for future vectorization of hash calculation.
+          uchar buf[MaxConstStrBufSize] __attribute__((aligned(64)));
+          // Pay attention to the last strxfrm argument value.
+          // It is called flags and in many cases it has padding
+          // enabled(MY_STRXFRM_PAD_WITH_SPACE bit). With padding enabled
+          // strxfrm returns MaxConstStrBufSize bytes and not the actual
+          // weights array length. Here I disable padding.
+          auto charset = datatypes::Charset(cs);
+          auto trimStrColValue = strColValue.rtrimSpaces();
+          // The padding is disabled b/c we previously use rtrimSpaces().
+          // strColValueLen is used here.
+          size_t nActualWeights = charset.strnxfrm(buf, MaxConstStrBufSize, strColValueLen,
+                                                   reinterpret_cast<const uchar*>(trimStrColValue.str()),
+                                                   trimStrColValue.length(), 0);
+          ret = columnHasher(reinterpret_cast<const void*>(buf), nActualWeights, ret);
         }
         break;
       }
-      default: ret = hashData(r.getData() + r.getOffset(i), r.getColumnWidth(i), ret); break;
+      default: ret = columnHasher(r.getData() + r.getOffset(i), r.getColumnWidth(i), ret); break;
     }
   }
 
+  // The properties of the hash produced are worse if MDB hasher results are incorporated
+  // so late but these results must be used very infrequently.
   if (strHashUsed)
   {
     uint64_t strhash = h.finalize();
-    ret = hashData(&strhash, sizeof(strhash), ret);
+    ret = columnHasher(&strhash, sizeof(strhash), ret);
   }
-
-  return ret;
+  return columnHasher.finalize(ret, lastCol << 2);
 }
 
 /** @brief NoOP interface to LRU-cache used by RowGroupStorage & HashStorage