MCOL-4044: Add oracle mode functions.

2025-11-02 06:13:16 +03:00 · 2021-04-21 16:07:42 -05:00
parent 1f46baa980
commit 870d672efb
20 changed files with 2624 additions and 0 deletions
--- a/utils/funcexp/func_trim_oracle.cpp
+++ b/utils/funcexp/func_trim_oracle.cpp
@@ -0,0 +1,175 @@
+/* Copyright (C) 2021 MariaDB Corporation
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA. */
+
+
+#include <string>
+using namespace std;
+
+#include "functor_str.h"
+#include "functioncolumn.h"
+#include "utils_utf8.h"
+using namespace execplan;
+
+#include "rowgroup.h"
+using namespace rowgroup;
+
+#include "joblisttypes.h"
+using namespace joblist;
+
+#include "collation.h"
+
+namespace funcexp
+{
+CalpontSystemCatalog::ColType Func_trim_oracle::operationType(FunctionParm& fp, CalpontSystemCatalog::ColType& resultType)
+{
+    // operation type is not used by this functor
+    return fp[0]->data()->resultType();
+}
+
+
+std::string Func_trim_oracle::getStrVal(rowgroup::Row& row,
+                                 FunctionParm& fp,
+                                 bool& isNull,
+                                 execplan::CalpontSystemCatalog::ColType& type)
+{
+    CHARSET_INFO* cs = type.getCharset();
+    // The original string
+    const string& src = fp[0]->data()->getStrVal(row, isNull);
+    if (isNull)
+        return "";
+    if (src.empty() || src.length() == 0)
+        return src;
+    // binLen represents the number of bytes in src
+    size_t binLen = src.length();
+    const char* pos = src.c_str();
+    const char* end = pos + binLen;
+    // strLen = the number of characters in src
+    size_t strLen = cs->numchars(pos, end);
+
+    // The trim characters.
+    const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " ");
+    // binTLen represents the number of bytes in trim
+    size_t binTLen = trim.length();
+    const char* posT = trim.c_str();
+    // strTLen = the number of characters in trim
+    size_t strTLen = cs->numchars(posT, posT+binTLen);
+    if (strTLen == 0 || strTLen > strLen)
+        return src;
+
+    if (binTLen == 1)
+    {
+        // If the trim string is 1 byte, don't waste cpu for memcmp
+        // Trim leading
+        while (pos < end && *pos == *posT)
+        {
+            ++pos;
+            --binLen;
+        }
+        // Trim trailing
+        const char* ptr = pos;
+        if (cs->use_mb())   // This is a multi-byte charset
+        {
+            const char* p = pos;
+            uint32 l;
+            // Multibyte characters in the string give us alignment problems
+            // What we do here is skip past any multibyte characters. Whn
+            // don with this loop, ptr is pointing to a singlebyte char that
+            // is after all multibyte chars in the string, or to end.
+            while (ptr < end)
+            {
+                if ((l = my_ismbchar(cs, ptr, end))) // returns the number of bytes in the leading char or zero if one byte
+                {
+                    ptr += l;
+                    p = ptr;
+                }
+                else
+                {
+                    ++ptr;
+                }
+            }
+            ptr = p;
+        }
+        while (ptr < end && end[-1] == *posT)
+        {
+            --end;
+            --binLen;
+        }
+    }
+    else
+    {
+        // Trim leading is easy
+        while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
+        {
+            pos += binTLen;
+            binLen -= binTLen;
+        }
+        
+        // Trim trailing
+        if (cs->use_mb())   // This is a multi-byte charset
+        {
+            // The problem is that the byte pattern at the end could
+            // match memcmp, but not be correct since the first byte compared
+            // may actually be a second or later byte from a previous char.
+            
+            // We start at the beginning of the string and move forward
+            // one character at a time until we reach the end. Then we can
+            // safely compare and remove one character. Then back to the beginning 
+            // and try again.
+            while (end - binTLen >= pos)
+            {
+                const char* p = pos;
+                uint32_t l;
+                while (p + binTLen < end)
+                {
+                    if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
+                        p += l;
+                    else
+                        ++p;
+                }
+                if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
+                {
+                    end -= binTLen;
+                    binLen -= binTLen;
+                }
+                else
+                {
+                    break;  // We've run out of places to look
+                }
+            }
+        }
+        else
+        {
+            while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
+            {
+                end -= binTLen;
+                binLen -= binTLen;
+            }
+        }
+    }
+    // Turn back to a string
+    std::string ret(pos, binLen);
+    if (binLen == 0)
+    {
+        isNull = true;
+    }
+    return ret;
+}
+
+
+} // namespace funcexp
+// vim:ts=4 sw=4:
+