1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

regexp_replace, regexp_substr and regexp_instr functions

This commit is contained in:
Leonid Fedorov
2024-03-06 15:51:43 +00:00
parent e2e694b323
commit 0c3d4004bf
3 changed files with 177 additions and 8 deletions

View File

@ -28,6 +28,7 @@ using namespace std;
#include "utils/pcre2/jpcre2.hpp"
#include "functor_bool.h"
#include "functor_str.h"
#include "functioncolumn.h"
#include "predicateoperator.h"
#include "constantcolumn.h"
@ -42,10 +43,17 @@ using namespace logging;
namespace
{
inline bool getBool(rowgroup::Row& row, funcexp::FunctionParm& pm, bool& isNull,
CalpontSystemCatalog::ColType& ct, long timeZone)
struct RegExpParams
{
string expr ;
std::string expression;
std::string pattern;
};
inline RegExpParams getEpressionAndPattern(rowgroup::Row& row, funcexp::FunctionParm& pm, bool& isNull,
CalpontSystemCatalog::ColType& ct, long timeZone)
{
string expr;
string pattern;
switch (pm[0]->data()->resultType().colDataType)
@ -207,23 +215,126 @@ inline bool getBool(rowgroup::Row& row, funcexp::FunctionParm& pm, bool& isNull,
}
}
jpcre2::select<char>::Regex re(pattern);
return re.match(expr);
return RegExpParams{expr, pattern};
}
} // namespace
namespace funcexp
{
CalpontSystemCatalog::ColType Func_regexp_replace::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
// operation type is not used by this functor
return fp[0]->data()->resultType();
}
CalpontSystemCatalog::ColType Func_regexp_substr::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
// operation type is not used by this functor
return fp[0]->data()->resultType();
}
CalpontSystemCatalog::ColType Func_regexp_instr::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
// operation type is not used by this functor
return fp[0]->data()->resultType();
}
CalpontSystemCatalog::ColType Func_regexp::operationType(FunctionParm& fp,
CalpontSystemCatalog::ColType& resultType)
{
return resultType;
}
bool Func_regexp::getBoolVal(rowgroup::Row& row, FunctionParm& pm, bool& isNull,
using jp = jpcre2::select<char>;
/*
returns the string subject with all occurrences of the regular expression pattern replaced by
the string replace. If no occurrences are found, then subject is returned as is.
https://mariadb.com/kb/en/regexp_replace/
*/
std::string Func_regexp_replace::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& ct)
{
if (isNull)
return std::string{};
RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone());
const auto& replace_with = fp[2]->data()->getStrVal(row, isNull);
if (replace_with.isNull())
return std::string{};
jp::Regex re(param.pattern);
return re.replace(param.expression, replace_with.unsafeStringRef(), "g");
}
/*
Returns the part of the string subject that matches the regular expression pattern, or an empty string if
pattern was not found. https://mariadb.com/kb/en/regexp_substr/
*/
std::string Func_regexp_substr::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& ct)
{
if (isNull)
return std::string{};
RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone());
jp::Regex re(param.pattern);
jp::RegexMatch rm(&re);
jp::VecNum vec_num;
size_t count = rm.setSubject(param.expression).setNumberedSubstringVector(&vec_num).match();
if (count == 0)
return std::string{};
return vec_num[0][0];
}
/*
Returns the position of the first occurrence of the regular expression pattern in the string subject, or 0
if pattern was not found. https://mariadb.com/kb/en/regexp_instr/
*/
std::string Func_regexp_instr::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& ct)
{
if (isNull)
return std::string{};
RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone());
jp::Regex re(param.pattern);
jp::RegexMatch rm(&re);
jpcre2::VecOff vec_soff;
size_t count = rm.setSubject(param.expression).setMatchStartOffsetVector(&vec_soff).match();
if (count == 0)
return "0";
return std::to_string(vec_soff[0] + 1);
}
/*
https://mariadb.com/kb/en/regexp/
*/
bool Func_regexp::getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
CalpontSystemCatalog::ColType& ct)
{
return getBool(row, pm, isNull, ct, ct.getTimeZone()) && !isNull;
if (isNull)
return false;
RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone());
jp::Regex re(param.pattern);
return re.match(param.expression);
}
} // namespace funcexp

View File

@ -215,7 +215,10 @@ FuncExp::FuncExp()
fFuncMap["quarter"] = new Func_quarter();
fFuncMap["radians"] = new Func_radians(); // dlh
fFuncMap["rand"] = new Func_rand();
fFuncMap["regexp"] = new Func_regexp(); // dlh
fFuncMap["regexp"] = new Func_regexp();
fFuncMap["regexp_instr"] = new Func_regexp_instr();
fFuncMap["regexp_replace"] = new Func_regexp_replace();
fFuncMap["regexp_substr"] = new Func_regexp_substr(); // dlh
fFuncMap["repeat"] = new Func_repeat(); // dlh
fFuncMap["replace"] = new Func_replace(); // dlh
fFuncMap["replace_oracle"] = new Func_replace_oracle(); // dlh

View File

@ -470,6 +470,61 @@ class Func_replace : public Func_Str
execplan::CalpontSystemCatalog::ColType& op_ct);
};
class Func_regexp_replace : public Func_Str
{
public:
Func_regexp_replace() : Func_Str("regexp_replace")
{
}
virtual ~Func_regexp_replace()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct);
};
class Func_regexp_instr : public Func_Str
{
public:
Func_regexp_instr() : Func_Str("regexp_instr")
{
}
virtual ~Func_regexp_instr()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct);
};
class Func_regexp_substr : public Func_Str
{
public:
Func_regexp_substr() : Func_Str("regexp_substr")
{
}
virtual ~Func_regexp_substr()
{
}
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
execplan::CalpontSystemCatalog::ColType& resultType);
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
execplan::CalpontSystemCatalog::ColType& op_ct);
};
class Func_replace_oracle : public Func_Str
{
public: