You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-06-13 16:01:32 +03:00
MCOL-4044: Add oracle mode functions.
This commit is contained in:
175
utils/funcexp/func_trim_oracle.cpp
Normal file
175
utils/funcexp/func_trim_oracle.cpp
Normal file
@ -0,0 +1,175 @@
|
||||
/* Copyright (C) 2021 MariaDB Corporation
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; version 2 of
|
||||
the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
#include "functor_str.h"
|
||||
#include "functioncolumn.h"
|
||||
#include "utils_utf8.h"
|
||||
using namespace execplan;
|
||||
|
||||
#include "rowgroup.h"
|
||||
using namespace rowgroup;
|
||||
|
||||
#include "joblisttypes.h"
|
||||
using namespace joblist;
|
||||
|
||||
#include "collation.h"
|
||||
|
||||
namespace funcexp
|
||||
{
|
||||
CalpontSystemCatalog::ColType Func_trim_oracle::operationType(FunctionParm& fp, CalpontSystemCatalog::ColType& resultType)
|
||||
{
|
||||
// operation type is not used by this functor
|
||||
return fp[0]->data()->resultType();
|
||||
}
|
||||
|
||||
|
||||
std::string Func_trim_oracle::getStrVal(rowgroup::Row& row,
|
||||
FunctionParm& fp,
|
||||
bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& type)
|
||||
{
|
||||
CHARSET_INFO* cs = type.getCharset();
|
||||
// The original string
|
||||
const string& src = fp[0]->data()->getStrVal(row, isNull);
|
||||
if (isNull)
|
||||
return "";
|
||||
if (src.empty() || src.length() == 0)
|
||||
return src;
|
||||
// binLen represents the number of bytes in src
|
||||
size_t binLen = src.length();
|
||||
const char* pos = src.c_str();
|
||||
const char* end = pos + binLen;
|
||||
// strLen = the number of characters in src
|
||||
size_t strLen = cs->numchars(pos, end);
|
||||
|
||||
// The trim characters.
|
||||
const string& trim = (fp.size() > 1 ? fp[1]->data()->getStrVal(row, isNull) : " ");
|
||||
// binTLen represents the number of bytes in trim
|
||||
size_t binTLen = trim.length();
|
||||
const char* posT = trim.c_str();
|
||||
// strTLen = the number of characters in trim
|
||||
size_t strTLen = cs->numchars(posT, posT+binTLen);
|
||||
if (strTLen == 0 || strTLen > strLen)
|
||||
return src;
|
||||
|
||||
if (binTLen == 1)
|
||||
{
|
||||
// If the trim string is 1 byte, don't waste cpu for memcmp
|
||||
// Trim leading
|
||||
while (pos < end && *pos == *posT)
|
||||
{
|
||||
++pos;
|
||||
--binLen;
|
||||
}
|
||||
// Trim trailing
|
||||
const char* ptr = pos;
|
||||
if (cs->use_mb()) // This is a multi-byte charset
|
||||
{
|
||||
const char* p = pos;
|
||||
uint32 l;
|
||||
// Multibyte characters in the string give us alignment problems
|
||||
// What we do here is skip past any multibyte characters. Whn
|
||||
// don with this loop, ptr is pointing to a singlebyte char that
|
||||
// is after all multibyte chars in the string, or to end.
|
||||
while (ptr < end)
|
||||
{
|
||||
if ((l = my_ismbchar(cs, ptr, end))) // returns the number of bytes in the leading char or zero if one byte
|
||||
{
|
||||
ptr += l;
|
||||
p = ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
ptr = p;
|
||||
}
|
||||
while (ptr < end && end[-1] == *posT)
|
||||
{
|
||||
--end;
|
||||
--binLen;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Trim leading is easy
|
||||
while (pos+binTLen <= end && memcmp(pos,posT,binTLen) == 0)
|
||||
{
|
||||
pos += binTLen;
|
||||
binLen -= binTLen;
|
||||
}
|
||||
|
||||
// Trim trailing
|
||||
if (cs->use_mb()) // This is a multi-byte charset
|
||||
{
|
||||
// The problem is that the byte pattern at the end could
|
||||
// match memcmp, but not be correct since the first byte compared
|
||||
// may actually be a second or later byte from a previous char.
|
||||
|
||||
// We start at the beginning of the string and move forward
|
||||
// one character at a time until we reach the end. Then we can
|
||||
// safely compare and remove one character. Then back to the beginning
|
||||
// and try again.
|
||||
while (end - binTLen >= pos)
|
||||
{
|
||||
const char* p = pos;
|
||||
uint32_t l;
|
||||
while (p + binTLen < end)
|
||||
{
|
||||
if ((l = my_ismbchar(cs, p, end))) // returns the number of bytes in the leading char or zero if one byte
|
||||
p += l;
|
||||
else
|
||||
++p;
|
||||
}
|
||||
if (p + binTLen == end && memcmp(p,posT,binTLen) == 0)
|
||||
{
|
||||
end -= binTLen;
|
||||
binLen -= binTLen;
|
||||
}
|
||||
else
|
||||
{
|
||||
break; // We've run out of places to look
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (end-binTLen >= pos && memcmp(end-binTLen,posT,binTLen) == 0)
|
||||
{
|
||||
end -= binTLen;
|
||||
binLen -= binTLen;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Turn back to a string
|
||||
std::string ret(pos, binLen);
|
||||
if (binLen == 0)
|
||||
{
|
||||
isNull = true;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
} // namespace funcexp
|
||||
// vim:ts=4 sw=4:
|
||||
|
Reference in New Issue
Block a user