mariadb-columnstore-engine/utils/funcexp/func_trim_oracle.cpp

/* Copyright (C) 2021 MariaDB Corporation

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; version 2 of
   the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
   MA 02110-1301, USA. */


#include <string>
using namespace std;

#include "functor_str.h"
#include "functioncolumn.h"
#include "utils_utf8.h"
using namespace execplan;

#include "rowgroup.h"
using namespace rowgroup;

#include "joblisttypes.h"
using namespace joblist;

#include "collation.h"

namespace funcexp
{
CalpontSystemCatalog::ColType Func_trim_oracle::operationType(FunctionParm& fp, CalpontSystemCatalog::ColType& resultType)
{
    // operation type is not used by this functor
    return fp[0]->data()->resultType();
}


std::string Func_trim_oracle::getStrVal(rowgroup::Row& row,
                                 FunctionParm& fp,
                                 bool& isNull,
                                 execplan::CalpontSystemCatalog::ColType& type)
{
    CHARSET_INFO* cs = type.getCharset();
    // The original string
    const string& src = fp[0]->data()->getStrVal(row, isNull);
    if (isNull)
        return "";
    if (src.empty() || src.length() == 0)
        return src;
    // binLen represents the number of bytes in src
    size_t binLen = src.length();
    const char* pos = src.c_str();
    const char* end = pos + binLen;
    // strLen = the number of characters in src
    size_t strLen = cs->numchars(pos, end);

    // The trim characters.
    const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " ");
    // binTLen represents the number of bytes in trim
    size_t binTLen = trim.length();
    const char* posT = trim.c_str();
    // strTLen = the number of characters in trim
    size_t strTLen = cs->numchars(posT, posT+binTLen);
    if (strTLen == 0 || strTLen > strLen)
        return src;

    if (binTLen == 1)
    {
        // If the trim string is 1 byte, don't waste cpu for memcmp
        // Trim leading
        while (pos < end && *pos == *posT)
        {
            ++pos;
            --binLen;
        }
        // Trim trailing
        const char* ptr = pos;
        if (cs->use_mb())   // This is a multi-byte charset
        {
            const char* p = pos;
            uint32 l;
            // Multibyte characters in the string give us alignment problems
            // What we do here is skip past any multibyte characters. Whn
            // don with this loop, ptr is pointing to a singlebyte char that
            // is after all multibyte chars in the string, or to end.
            while (ptr < end)
            {
                if ((l = my_ismbchar(cs, ptr, end))) // returns the number of bytes in the leading char or zero if one byte
                {
                    ptr += l;
                    p = ptr;
                }
                else
                {
                    ++ptr;
                }
            }
            ptr = p;
        }
        while (ptr < end && end[-1] == *posT)
        {
            --end;
            --binLen;
        }
    }
    else
    {
        // Trim leading is easy
        while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
        {
            pos += binTLen;
            binLen -= binTLen;
        }

        // Trim trailing
        if (cs->use_mb())   // This is a multi-byte charset
        {
            // The problem is that the byte pattern at the end could
            // match memcmp, but not be correct since the first byte compared
            // may actually be a second or later byte from a previous char.

            // We start at the beginning of the string and move forward
            // one character at a time until we reach the end. Then we can
            // safely compare and remove one character. Then back to the beginning
            // and try again.
            while (end - binTLen >= pos)
            {
                const char* p = pos;
                uint32_t l;
                while (p + binTLen < end)
                {
                    if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
                        p += l;
                    else
                        ++p;
                }
                if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
                {
                    end -= binTLen;
                    binLen -= binTLen;
                }
                else
                {
                    break;  // We've run out of places to look
                }
            }
        }
        else
        {
            while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
            {
                end -= binTLen;
                binLen -= binTLen;
            }
        }
    }
    // Turn back to a string
    std::string ret(pos, binLen);
    if (binLen == 0)
    {
        isNull = true;
    }
    return ret;
}


} // namespace funcexp
// vim:ts=4 sw=4: