You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
MCOL-5328: use PCRE2 and JPCRE wrapper (#3137)
PCRE2 for regexp functions in columnstore
This commit is contained in:
@ -125,13 +125,13 @@ local testRun(platform) =
|
||||
|
||||
local testPreparation(platform) =
|
||||
local platform_map = {
|
||||
'centos:7': 'yum -y install epel-release && yum install -y git cppunit-devel cmake3 boost-devel snappy-devel',
|
||||
'rockylinux:8': rockylinux8_build_deps + ' && dnf install -y git lz4 cppunit-devel cmake3 boost-devel snappy-devel',
|
||||
'rockylinux:9': rockylinux9_build_deps + ' && dnf install -y git lz4 cppunit-devel cmake3 boost-devel snappy-devel',
|
||||
'debian:11': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake',
|
||||
'debian:12': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake',
|
||||
'ubuntu:20.04': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake',
|
||||
'ubuntu:22.04': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake',
|
||||
'centos:7': 'yum -y install epel-release && yum install -y git cppunit-devel cmake3 boost-devel snappy-devel pcre2-devel',
|
||||
'rockylinux:8': rockylinux8_build_deps + ' && dnf install -y git lz4 cppunit-devel cmake3 boost-devel snappy-devel pcre2-devel',
|
||||
'rockylinux:9': rockylinux9_build_deps + ' && dnf install -y git lz4 cppunit-devel cmake3 boost-devel snappy-devel pcre2-devel',
|
||||
'debian:11': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake libpcre2-dev',
|
||||
'debian:12': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake libpcre2-dev',
|
||||
'ubuntu:20.04': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake libpcre2-dev',
|
||||
'ubuntu:22.04': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake libpcre2-dev',
|
||||
};
|
||||
platform_map[platform];
|
||||
|
||||
|
@ -64,10 +64,12 @@ INSTALL_PREFIX="/usr/"
|
||||
DATA_DIR="/var/lib/mysql/data"
|
||||
CMAKE_BIN_NAME=cmake
|
||||
CTEST_BIN_NAME=ctest
|
||||
CONFIG_DIR="/etc/my.cnf.d"
|
||||
RPM_CONFIG_DIR="/etc/my.cnf.d"
|
||||
DEB_CONFIG_DIR="/etc/mysql/mariadb.conf.d"
|
||||
CONFIG_DIR=$RPM_CONFIG_DIR
|
||||
|
||||
if [[ $OS = 'Ubuntu' || $OS = 'Debian' ]]; then
|
||||
CONFIG_DIR="/etc/mysql/mariadb.conf.d"
|
||||
CONFIG_DIR=$DEB_CONFIG_DIR
|
||||
fi
|
||||
|
||||
export CLICOLOR_FORCE=1
|
||||
@ -202,6 +204,8 @@ clean_old_installation()
|
||||
rm -rf /var/run/mysqld
|
||||
rm -rf $DATA_DIR
|
||||
rm -rf /etc/mysql
|
||||
rm -rf /etc/my.cnf.d/columnstore.cnf
|
||||
rm -rf /etc/mysql/mariadb.conf.d/columnstore.cnf
|
||||
}
|
||||
|
||||
build()
|
||||
@ -442,25 +446,30 @@ fix_config_files()
|
||||
systemctl daemon-reload
|
||||
}
|
||||
|
||||
make_dir()
|
||||
{
|
||||
mkdir -p $1
|
||||
chown mysql:mysql $1
|
||||
}
|
||||
|
||||
install()
|
||||
{
|
||||
message_split
|
||||
message "Installing MariaDB"
|
||||
disable_plugins_for_bootstrap
|
||||
|
||||
mkdir -p $REPORT_PATH
|
||||
make_dir $REPORT_PATH
|
||||
chmod 777 $REPORT_PATH
|
||||
|
||||
check_user_and_group
|
||||
|
||||
mkdir -p /etc/my.cnf.d
|
||||
make_dir $CONFIG_DIR
|
||||
|
||||
bash -c 'echo "[client-server]
|
||||
socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf'
|
||||
echo "[client-server]
|
||||
socket=/run/mysqld/mysqld.sock" > $CONFIG_DIR/socket.cnf
|
||||
|
||||
mv $INSTALL_PREFIX/lib/mysql/plugin/ha_columnstore.so /tmp/ha_columnstore_1.so || mv $INSTALL_PREFIX/lib64/mysql/plugin/ha_columnstore.so /tmp/ha_columnstore_2.so
|
||||
mkdir -p /var/lib/mysql
|
||||
chown mysql:mysql /var/lib/mysql
|
||||
make_dir /var/lib/mysql
|
||||
|
||||
message "Running mysql_install_db"
|
||||
sudo -u mysql mysql_install_db --rpm --user=mysql > /dev/null
|
||||
@ -468,7 +477,7 @@ socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf'
|
||||
|
||||
enable_columnstore_back
|
||||
|
||||
mkdir -p /etc/columnstore
|
||||
make_dir /etc/columnstore
|
||||
|
||||
cp $MDB_SOURCE_PATH/storage/columnstore/columnstore/oam/etc/Columnstore.xml /etc/columnstore/Columnstore.xml
|
||||
cp $MDB_SOURCE_PATH/storage/columnstore/columnstore/storage-manager/storagemanager.cnf /etc/columnstore/storagemanager.cnf
|
||||
@ -477,8 +486,8 @@ socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf'
|
||||
cp $MDB_SOURCE_PATH/storage/columnstore/columnstore/oam/install_scripts/*.service /lib/systemd/system/
|
||||
|
||||
if [[ "$OS" = 'Ubuntu' || "$OS" = 'Debian' ]]; then
|
||||
mkdir -p /usr/share/mysql
|
||||
mkdir -p /etc/mysql/
|
||||
make_dir /usr/share/mysql
|
||||
make_dir /etc/mysql/
|
||||
cp $MDB_SOURCE_PATH/debian/additions/debian-start.inc.sh /usr/share/mysql/debian-start.inc.sh
|
||||
cp $MDB_SOURCE_PATH/debian/additions/debian-start /etc/mysql/debian-start
|
||||
> /etc/mysql/debian.cnf
|
||||
@ -496,15 +505,11 @@ socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf'
|
||||
cp -rp /etc/mysql/conf.d/* /etc/my.cnf.d
|
||||
fi
|
||||
|
||||
mkdir -p /var/lib/columnstore/data1
|
||||
mkdir -p /var/lib/columnstore/data1/systemFiles
|
||||
mkdir -p /var/lib/columnstore/data1/systemFiles/dbrm
|
||||
mkdir -p /run/mysqld/
|
||||
|
||||
mkdir -p $DATA_DIR
|
||||
chown -R mysql:mysql $DATA_DIR
|
||||
chown -R mysql:mysql /var/lib/columnstore/
|
||||
chown -R mysql:mysql /run/mysqld/
|
||||
make_dir /var/lib/columnstore/data1
|
||||
make_dir /var/lib/columnstore/data1/systemFiles
|
||||
make_dir /var/lib/columnstore/data1/systemFiles/dbrm
|
||||
make_dir /run/mysqld/
|
||||
make_dir $DATA_DIR
|
||||
|
||||
chmod +x $INSTALL_PREFIX/bin/mariadb*
|
||||
|
||||
@ -513,7 +518,7 @@ socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf'
|
||||
start_storage_manager_if_needed
|
||||
|
||||
message "Running columnstore-post-install"
|
||||
mkdir -p /var/lib/columnstore/local
|
||||
make_dir /var/lib/columnstore/local
|
||||
columnstore-post-install --rpmmode=install
|
||||
message "Running install_mcs_mysql"
|
||||
install_mcs_mysql.sh
|
||||
|
@ -4387,7 +4387,7 @@ ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& non
|
||||
|
||||
// A few functions use a different collation than that found in
|
||||
// the base ifp class
|
||||
if (funcName == "locate" || funcName == "find_in_set" || funcName == "strcmp")
|
||||
if (funcName == "locate" || funcName == "find_in_set" || funcName == "strcmp" || funcName == "regexp_instr")
|
||||
{
|
||||
DTCollation dt;
|
||||
ifp->Type_std_attributes::agg_arg_charsets_for_comparison(dt, ifp->func_name_cstring(),
|
||||
|
@ -67,7 +67,7 @@ extern boost::condition_variable cond;
|
||||
|
||||
namespace
|
||||
{
|
||||
const std::string myname = "DMLProc";
|
||||
[[maybe_unused]] const std::string myname = "DMLProc";
|
||||
}
|
||||
|
||||
namespace dmlprocessor
|
||||
|
@ -42,6 +42,96 @@ t1_INT t1_INT REGEXP '-26'
|
||||
-7299 0
|
||||
103 0
|
||||
9913 0
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '99$') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_SUBSTR(t1_INT, '99$')
|
||||
-7299 99
|
||||
103
|
||||
9913
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '\.99$') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_SUBSTR(t1_INT, '\.99$')
|
||||
-7299 299
|
||||
103
|
||||
9913
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '03$') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_SUBSTR(t1_INT, '03$')
|
||||
-7299
|
||||
103 03
|
||||
9913
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '\.+') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_SUBSTR(t1_INT, '\.+')
|
||||
-7299 -7299
|
||||
103 103
|
||||
9913 9913
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '0?') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_SUBSTR(t1_INT, '0?')
|
||||
-7299
|
||||
103
|
||||
9913
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '-26') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_SUBSTR(t1_INT, '-26')
|
||||
-7299
|
||||
103
|
||||
9913
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '99$') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_INSTR(t1_INT, '99$')
|
||||
-7299 4
|
||||
103 0
|
||||
9913 0
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '\.99$') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_INSTR(t1_INT, '\.99$')
|
||||
-7299 3
|
||||
103 0
|
||||
9913 0
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '03$') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_INSTR(t1_INT, '03$')
|
||||
-7299 0
|
||||
103 2
|
||||
9913 0
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '\.+') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_INSTR(t1_INT, '\.+')
|
||||
-7299 1
|
||||
103 1
|
||||
9913 1
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '0?') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_INSTR(t1_INT, '0?')
|
||||
-7299 1
|
||||
103 1
|
||||
9913 1
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '-26') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_INSTR(t1_INT, '-26')
|
||||
-7299 0
|
||||
103 0
|
||||
9913 0
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '99$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_REPLACE(t1_INT, '99$', 'TeddyBear')
|
||||
-7299 -72TeddyBear
|
||||
103 103
|
||||
9913 9913
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '\.99$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_REPLACE(t1_INT, '\.99$', 'TeddyBear')
|
||||
-7299 -7TeddyBear
|
||||
103 103
|
||||
9913 9913
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '03$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_REPLACE(t1_INT, '03$', 'TeddyBear')
|
||||
-7299 -7299
|
||||
103 1TeddyBear
|
||||
9913 9913
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '\.+', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_REPLACE(t1_INT, '\.+', 'TeddyBear')
|
||||
-7299 TeddyBear
|
||||
103 TeddyBear
|
||||
9913 TeddyBear
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '0?', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_REPLACE(t1_INT, '0?', 'TeddyBear')
|
||||
-7299 TeddyBear-TeddyBear7TeddyBear2TeddyBear9TeddyBear9TeddyBear
|
||||
103 TeddyBear1TeddyBearTeddyBear3TeddyBear
|
||||
9913 TeddyBear9TeddyBear9TeddyBear1TeddyBear3TeddyBear
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '-26', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_INT REGEXP_REPLACE(t1_INT, '-26', 'TeddyBear')
|
||||
-7299 -7299
|
||||
103 103
|
||||
9913 9913
|
||||
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '99$' FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL t1_DECIMAL REGEXP '99$'
|
||||
111.99000 0
|
||||
@ -72,6 +162,96 @@ t1_DECIMAL t1_DECIMAL REGEXP '-26'
|
||||
111.99000 0
|
||||
1234.56990 0
|
||||
98765.43210 0
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '99$') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '99$')
|
||||
111.99000
|
||||
1234.56990
|
||||
98765.43210
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '\.99$') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '\.99$')
|
||||
111.99000
|
||||
1234.56990
|
||||
98765.43210
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '03$') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '03$')
|
||||
111.99000
|
||||
1234.56990
|
||||
98765.43210
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '\.+') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '\.+')
|
||||
111.99000 111.99000
|
||||
1234.56990 1234.56990
|
||||
98765.43210 98765.43210
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '0?') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '0?')
|
||||
111.99000
|
||||
1234.56990
|
||||
98765.43210
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '-26') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '-26')
|
||||
111.99000
|
||||
1234.56990
|
||||
98765.43210
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '99$') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '99$')
|
||||
111.99000 0
|
||||
1234.56990 0
|
||||
98765.43210 0
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '\.99$') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '\.99$')
|
||||
111.99000 0
|
||||
1234.56990 0
|
||||
98765.43210 0
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '03$') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '03$')
|
||||
111.99000 0
|
||||
1234.56990 0
|
||||
98765.43210 0
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '\.+') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '\.+')
|
||||
111.99000 1
|
||||
1234.56990 1
|
||||
98765.43210 1
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '0?') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '0?')
|
||||
111.99000 1
|
||||
1234.56990 1
|
||||
98765.43210 1
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '-26') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '-26')
|
||||
111.99000 0
|
||||
1234.56990 0
|
||||
98765.43210 0
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '99$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '99$', 'TeddyBear')
|
||||
111.99000 111.99000
|
||||
1234.56990 1234.56990
|
||||
98765.43210 98765.43210
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '\.99$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '\.99$', 'TeddyBear')
|
||||
111.99000 111.99000
|
||||
1234.56990 1234.56990
|
||||
98765.43210 98765.43210
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '03$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '03$', 'TeddyBear')
|
||||
111.99000 111.99000
|
||||
1234.56990 1234.56990
|
||||
98765.43210 98765.43210
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '\.+', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '\.+', 'TeddyBear')
|
||||
111.99000 TeddyBear
|
||||
1234.56990 TeddyBear
|
||||
98765.43210 TeddyBear
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '0?', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '0?', 'TeddyBear')
|
||||
111.99000 TeddyBear1TeddyBear1TeddyBear1TeddyBear.TeddyBear9TeddyBear9TeddyBearTeddyBearTeddyBearTeddyBear
|
||||
1234.56990 TeddyBear1TeddyBear2TeddyBear3TeddyBear4TeddyBear.TeddyBear5TeddyBear6TeddyBear9TeddyBear9TeddyBearTeddyBear
|
||||
98765.43210 TeddyBear9TeddyBear8TeddyBear7TeddyBear6TeddyBear5TeddyBear.TeddyBear4TeddyBear3TeddyBear2TeddyBear1TeddyBearTeddyBear
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '-26', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '-26', 'TeddyBear')
|
||||
111.99000 111.99000
|
||||
1234.56990 1234.56990
|
||||
98765.43210 98765.43210
|
||||
SELECT t1_TEXT, t1_TEXT REGEXP 'oooo$' FROM t1 ORDER BY 1;
|
||||
t1_TEXT t1_TEXT REGEXP 'oooo$'
|
||||
ooooo 1
|
||||
@ -112,6 +292,131 @@ t1_TEXT t1_TEXT REGEXP 'o?o'
|
||||
ooooo 1
|
||||
ppppp 0
|
||||
qqqqq 0
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'oooo$') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'oooo$')
|
||||
ooooo oooo
|
||||
ppppp
|
||||
qqqqq
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'q$') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'q$')
|
||||
ooooo
|
||||
ppppp
|
||||
qqqqq q
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'ppppp$') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'ppppp$')
|
||||
ooooo
|
||||
ppppp ppppp
|
||||
qqqqq
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, '(ooo)+') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_SUBSTR(t1_TEXT, '(ooo)+')
|
||||
ooooo ooo
|
||||
ppppp
|
||||
qqqqq
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, '(qqqqq)+') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_SUBSTR(t1_TEXT, '(qqqqq)+')
|
||||
ooooo
|
||||
ppppp
|
||||
qqqqq qqqqq
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'p*') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'p*')
|
||||
ooooo
|
||||
ppppp ppppp
|
||||
qqqqq
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'qq+q') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'qq+q')
|
||||
ooooo
|
||||
ppppp
|
||||
qqqqq qqqqq
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'o?o') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'o?o')
|
||||
ooooo oo
|
||||
ppppp
|
||||
qqqqq
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'oooo$') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_INSTR(t1_TEXT, 'oooo$')
|
||||
ooooo 2
|
||||
ppppp 0
|
||||
qqqqq 0
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'q$') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_INSTR(t1_TEXT, 'q$')
|
||||
ooooo 0
|
||||
ppppp 0
|
||||
qqqqq 5
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'ppppp$') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_INSTR(t1_TEXT, 'ppppp$')
|
||||
ooooo 0
|
||||
ppppp 1
|
||||
qqqqq 0
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, '(ooo)+') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_INSTR(t1_TEXT, '(ooo)+')
|
||||
ooooo 1
|
||||
ppppp 0
|
||||
qqqqq 0
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, '(qqqqq)+') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_INSTR(t1_TEXT, '(qqqqq)+')
|
||||
ooooo 0
|
||||
ppppp 0
|
||||
qqqqq 1
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'p*') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_INSTR(t1_TEXT, 'p*')
|
||||
ooooo 1
|
||||
ppppp 1
|
||||
qqqqq 1
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'qq+q') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_INSTR(t1_TEXT, 'qq+q')
|
||||
ooooo 0
|
||||
ppppp 0
|
||||
qqqqq 1
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'o?o') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_INSTR(t1_TEXT, 'o?o')
|
||||
ooooo 1
|
||||
ppppp 0
|
||||
qqqqq 0
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'oooo$', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_REPLACE(t1_TEXT, 'oooo$', 'KittyCat')
|
||||
ooooo oKittyCat
|
||||
ppppp ppppp
|
||||
qqqqq qqqqq
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'q$', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_REPLACE(t1_TEXT, 'q$', 'KittyCat')
|
||||
ooooo ooooo
|
||||
ppppp ppppp
|
||||
qqqqq qqqqKittyCat
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'ppppp$', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_REPLACE(t1_TEXT, 'ppppp$', 'KittyCat')
|
||||
ooooo ooooo
|
||||
ppppp KittyCat
|
||||
qqqqq qqqqq
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, '(ooo)+', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_REPLACE(t1_TEXT, '(ooo)+', 'KittyCat')
|
||||
ooooo KittyCatoo
|
||||
ppppp ppppp
|
||||
qqqqq qqqqq
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, '(qqqqq)+', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_REPLACE(t1_TEXT, '(qqqqq)+', 'KittyCat')
|
||||
ooooo ooooo
|
||||
ppppp ppppp
|
||||
qqqqq KittyCat
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'p*', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_REPLACE(t1_TEXT, 'p*', 'KittyCat')
|
||||
ooooo KittyCatoKittyCatoKittyCatoKittyCatoKittyCatoKittyCat
|
||||
ppppp KittyCatKittyCat
|
||||
qqqqq KittyCatqKittyCatqKittyCatqKittyCatqKittyCatqKittyCat
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'qq+q', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_REPLACE(t1_TEXT, 'qq+q', 'KittyCat')
|
||||
ooooo ooooo
|
||||
ppppp ppppp
|
||||
qqqqq KittyCat
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'o?o', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_REPLACE(t1_TEXT, 'o?o', 'KittyCat')
|
||||
ooooo KittyCatKittyCatKittyCat
|
||||
ppppp ppppp
|
||||
qqqqq qqqqq
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'q', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TEXT REGEXP_REPLACE(t1_TEXT, 'q', 'KittyCat')
|
||||
ooooo ooooo
|
||||
ppppp ppppp
|
||||
qqqqq KittyCatKittyCatKittyCatKittyCatKittyCat
|
||||
SELECT t1_DATE, t1_DATE REGEXP '(1997)+' FROM t1 ORDER BY 1;
|
||||
t1_DATE t1_DATE REGEXP '(1997)+'
|
||||
1997-12-12 1
|
||||
@ -132,6 +437,66 @@ t1_DATE t1_DATE REGEXP '(09-12-)+'
|
||||
1997-12-12 0
|
||||
2001-01-01 0
|
||||
2009-12-11 1
|
||||
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(1997)+') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_SUBSTR(t1_DATE, '(1997)+')
|
||||
1997-12-12 1997
|
||||
2001-01-01
|
||||
2009-12-11
|
||||
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '01$') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_SUBSTR(t1_DATE, '01$')
|
||||
1997-12-12
|
||||
2001-01-01 01
|
||||
2009-12-11
|
||||
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(59)+') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_SUBSTR(t1_DATE, '(59)+')
|
||||
1997-12-12
|
||||
2001-01-01
|
||||
2009-12-11
|
||||
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(09-12-)+') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_SUBSTR(t1_DATE, '(09-12-)+')
|
||||
1997-12-12
|
||||
2001-01-01
|
||||
2009-12-11 09-12-
|
||||
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(1997)+') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_INSTR(t1_DATE, '(1997)+')
|
||||
1997-12-12 1
|
||||
2001-01-01 0
|
||||
2009-12-11 0
|
||||
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '01$') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_INSTR(t1_DATE, '01$')
|
||||
1997-12-12 0
|
||||
2001-01-01 9
|
||||
2009-12-11 0
|
||||
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(59)+') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_INSTR(t1_DATE, '(59)+')
|
||||
1997-12-12 0
|
||||
2001-01-01 0
|
||||
2009-12-11 0
|
||||
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(09-12-)+') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_INSTR(t1_DATE, '(09-12-)+')
|
||||
1997-12-12 0
|
||||
2001-01-01 0
|
||||
2009-12-11 3
|
||||
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(1997)+', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_REPLACE(t1_DATE, '(1997)+', 'TeddyBear')
|
||||
1997-12-12 TeddyBear-12-12
|
||||
2001-01-01 2001-01-01
|
||||
2009-12-11 2009-12-11
|
||||
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '01$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_REPLACE(t1_DATE, '01$', 'TeddyBear')
|
||||
1997-12-12 1997-12-12
|
||||
2001-01-01 2001-01-TeddyBear
|
||||
2009-12-11 2009-12-11
|
||||
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(59)+', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_REPLACE(t1_DATE, '(59)+', 'TeddyBear')
|
||||
1997-12-12 1997-12-12
|
||||
2001-01-01 2001-01-01
|
||||
2009-12-11 2009-12-11
|
||||
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(09-12-)+', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
t1_DATE REGEXP_REPLACE(t1_DATE, '(09-12-)+', 'TeddyBear')
|
||||
1997-12-12 1997-12-12
|
||||
2001-01-01 2001-01-01
|
||||
2009-12-11 20TeddyBear11
|
||||
SELECT t1_TIME, t1_TIME REGEXP '(59)+' FROM t1 ORDER BY 1;
|
||||
t1_TIME t1_TIME REGEXP '(59)+'
|
||||
01:08:59 1
|
||||
@ -142,4 +507,161 @@ t1_TIME t1_TIME REGEXP '22$'
|
||||
01:08:59 0
|
||||
22:12:02 0
|
||||
23:59:59 0
|
||||
SELECT t1_TIME, REGEXP_SUBSTR(t1_TIME, '(59)+') FROM t1 ORDER BY 1;
|
||||
t1_TIME REGEXP_SUBSTR(t1_TIME, '(59)+')
|
||||
01:08:59 59
|
||||
22:12:02
|
||||
23:59:59 59
|
||||
SELECT t1_TIME, REGEXP_SUBSTR(t1_TIME, '22$') FROM t1 ORDER BY 1;
|
||||
t1_TIME REGEXP_SUBSTR(t1_TIME, '22$')
|
||||
01:08:59
|
||||
22:12:02
|
||||
23:59:59
|
||||
SELECT t1_TIME, REGEXP_INSTR(t1_TIME, '(59)+') FROM t1 ORDER BY 1;
|
||||
t1_TIME REGEXP_INSTR(t1_TIME, '(59)+')
|
||||
01:08:59 7
|
||||
22:12:02 0
|
||||
23:59:59 4
|
||||
SELECT t1_TIME, REGEXP_INSTR(t1_TIME, '22$') FROM t1 ORDER BY 1;
|
||||
t1_TIME REGEXP_INSTR(t1_TIME, '22$')
|
||||
01:08:59 0
|
||||
22:12:02 0
|
||||
23:59:59 0
|
||||
SELECT t1_TIME, REGEXP_REPLACE(t1_TIME, '(59)+', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TIME REGEXP_REPLACE(t1_TIME, '(59)+', 'KittyCat')
|
||||
01:08:59 01:08:KittyCat
|
||||
22:12:02 22:12:02
|
||||
23:59:59 23:KittyCat:KittyCat
|
||||
SELECT t1_TIME, REGEXP_REPLACE(t1_TIME, '22$', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
t1_TIME REGEXP_REPLACE(t1_TIME, '22$', 'KittyCat')
|
||||
01:08:59 01:08:59
|
||||
22:12:02 22:12:02
|
||||
23:59:59 23:59:59
|
||||
SET character_set_connection = 'utf8';
|
||||
SET NAMES utf8mb3;
|
||||
CREATE TABLE t2 (hello text) DEFAULT CHARSET=utf8 engine columnstore;
|
||||
INSERT INTO t2 values('こんにちは');
|
||||
INSERT INTO t2 values('привет');
|
||||
INSERT INTO t2 values('Γεια');
|
||||
INSERT INTO t2 values('სალამი');
|
||||
SELECT hello, hello regexp 'ん.ち' FROM t2;
|
||||
hello hello regexp 'ん.ち'
|
||||
こんにちは 0
|
||||
привет 0
|
||||
Γεια 0
|
||||
სალამი 0
|
||||
SELECT hello, hello regexp 'и.е' FROM t2;
|
||||
hello hello regexp 'и.е'
|
||||
こんにちは 0
|
||||
привет 0
|
||||
Γεια 0
|
||||
სალამი 0
|
||||
SELECT hello, hello regexp 'ε.α' FROM t2;
|
||||
hello hello regexp 'ε.α'
|
||||
こんにちは 0
|
||||
привет 0
|
||||
Γεια 0
|
||||
სალამი 0
|
||||
SELECT hello, hello regexp 'ა.ა' FROM t2;
|
||||
hello hello regexp 'ა.ა'
|
||||
こんにちは 0
|
||||
привет 0
|
||||
Γεια 0
|
||||
სალამი 0
|
||||
SELECT hello, regexp_substr(hello, 'ん.ち') FROM t2;
|
||||
hello regexp_substr(hello, 'ん.ち')
|
||||
こんにちは んにち
|
||||
привет
|
||||
Γεια
|
||||
სალამი
|
||||
SELECT hello, regexp_substr(hello, 'и.е') FROM t2;
|
||||
hello regexp_substr(hello, 'и.е')
|
||||
こんにちは
|
||||
привет иве
|
||||
Γεια
|
||||
სალამი
|
||||
SELECT hello, regexp_substr(hello, 'ε.α') FROM t2;
|
||||
hello regexp_substr(hello, 'ε.α')
|
||||
こんにちは
|
||||
привет
|
||||
Γεια εια
|
||||
სალამი
|
||||
SELECT hello, regexp_substr(hello, 'ა.ა') FROM t2;
|
||||
hello regexp_substr(hello, 'ა.ა')
|
||||
こんにちは
|
||||
привет
|
||||
Γεια
|
||||
სალამი ალა
|
||||
SELECT hello, regexp_instr(hello, 'ん.ち') FROM t2;
|
||||
hello regexp_instr(hello, 'ん.ち')
|
||||
こんにちは 2
|
||||
привет 0
|
||||
Γεια 0
|
||||
სალამი 0
|
||||
SELECT hello, regexp_instr(hello, 'и.е') FROM t2;
|
||||
hello regexp_instr(hello, 'и.е')
|
||||
こんにちは 0
|
||||
привет 3
|
||||
Γεια 0
|
||||
სალამი 0
|
||||
SELECT hello, regexp_instr(hello, 'ε.α') FROM t2;
|
||||
hello regexp_instr(hello, 'ε.α')
|
||||
こんにちは 0
|
||||
привет 0
|
||||
Γεια 2
|
||||
სალამი 0
|
||||
SELECT hello, regexp_instr(hello, 'ა.ა') FROM t2;
|
||||
hello regexp_instr(hello, 'ა.ა')
|
||||
こんにちは 0
|
||||
привет 0
|
||||
Γεια 0
|
||||
სალამი 2
|
||||
SELECT hello, regexp_replace(hello, 'ん.ち', 'Достоевский') FROM t2;
|
||||
hello regexp_replace(hello, 'ん.ち', 'Достоевский')
|
||||
こんにちは こДостоевскийは
|
||||
привет привет
|
||||
Γεια Γεια
|
||||
სალამი სალამი
|
||||
SELECT hello, regexp_replace(hello, 'и.е', 'Достоевский') FROM t2;
|
||||
hello regexp_replace(hello, 'и.е', 'Достоевский')
|
||||
こんにちは こんにちは
|
||||
привет прДостоевскийт
|
||||
Γεια Γεια
|
||||
სალამი სალამი
|
||||
SELECT hello, regexp_replace(hello, 'ε.α', 'Достоевский') FROM t2;
|
||||
hello regexp_replace(hello, 'ε.α', 'Достоевский')
|
||||
こんにちは こんにちは
|
||||
привет привет
|
||||
Γεια ΓДостоевский
|
||||
სალამი სალამი
|
||||
SELECT hello, regexp_replace(hello, 'ა.ა', 'Достоевский') FROM t2;
|
||||
hello regexp_replace(hello, 'ა.ა', 'Достоевский')
|
||||
こんにちは こんにちは
|
||||
привет привет
|
||||
Γεια Γεια
|
||||
სალამი სДостоевскийმი
|
||||
SHOW VARIABLES LIKE 'character_set%';
|
||||
Variable_name Value
|
||||
character_set_client utf8mb3
|
||||
character_set_connection utf8mb3
|
||||
character_set_database utf8mb3
|
||||
character_set_filesystem binary
|
||||
character_set_results utf8mb3
|
||||
character_set_server utf8mb3
|
||||
character_set_system utf8mb3
|
||||
character_sets_dir /usr/share/mysql/charsets/
|
||||
CREATE TABLE tw(hello text) DEFAULT CHARSET=cp1251 ENGINE COLUMNSTORE;
|
||||
INSERT INTO tw values(convert('привет' USING cp1251));
|
||||
SELECT hello, regexp_instr(hello, convert('и.е' USING cp1251)) FROM tw;
|
||||
hello regexp_instr(hello, convert('и.е' USING cp1251))
|
||||
привет 3
|
||||
SELECT hello, convert(regexp_substr(hello, convert('и.е' USING cp1251)) using utf8) FROM tw;
|
||||
hello convert(regexp_substr(hello, convert('и.е' USING cp1251)) using utf8)
|
||||
привет иве
|
||||
SELECT hello, convert(regexp_replace(hello, convert('и.е' USING cp1251), convert('Достоевкий' USING cp1251)) using utf8) FROM tw;
|
||||
hello convert(regexp_replace(hello, convert('и.е' USING cp1251), convert('Достоевкий' USING cp1251)) using utf8)
|
||||
привет привет
|
||||
SELECT hello, hello regexp convert('и.е' USING cp1251) FROM tw;
|
||||
hello hello regexp convert('и.е' USING cp1251)
|
||||
привет 1
|
||||
DROP DATABASE mcs228_db;
|
||||
|
@ -13,5 +13,8 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02');
|
||||
INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59');
|
||||
INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59');
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'o') FROM t1 ORDER BY 1;
|
||||
ERROR 42000: The storage engine for the table doesn't support MCS-1001: Function 'regexp_instr' isn't supported.
|
||||
t1_TEXT REGEXP_INSTR(t1_TEXT, 'o')
|
||||
ooooo 1
|
||||
ppppp 0
|
||||
qqqqq 0
|
||||
DROP DATABASE mcs244_db;
|
||||
|
@ -13,5 +13,8 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02');
|
||||
INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59');
|
||||
INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59');
|
||||
SELECT REGEXP_REPLACE(t1_TEXT, 'o', 'X') FROM t1 ORDER BY 1;
|
||||
ERROR 42000: The storage engine for the table doesn't support MCS-1001: Function 'regexp_replace' isn't supported.
|
||||
REGEXP_REPLACE(t1_TEXT, 'o', 'X')
|
||||
ppppp
|
||||
qqqqq
|
||||
XXXXX
|
||||
DROP DATABASE mcs245_db;
|
||||
|
@ -13,5 +13,8 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02');
|
||||
INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59');
|
||||
INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59');
|
||||
SELECT REGEXP_SUBSTR(t1_TEXT, '[a-z]+') FROM t1 ORDER BY 1;
|
||||
ERROR 42000: The storage engine for the table doesn't support MCS-1001: Function 'regexp_substr' isn't supported.
|
||||
REGEXP_SUBSTR(t1_TEXT, '[a-z]+')
|
||||
ooooo
|
||||
ppppp
|
||||
qqqqq
|
||||
DROP DATABASE mcs246_db;
|
||||
|
@ -30,6 +30,27 @@ SELECT t1_INT, t1_INT REGEXP '\.+' FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, t1_INT REGEXP '0?' FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, t1_INT REGEXP '-26' FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '99$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '\.99$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '03$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '\.+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '0?') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '-26') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '99$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '\.99$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '03$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '\.+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '0?') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_INSTR(t1_INT, '-26') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '99$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '\.99$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '03$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '\.+', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '0?', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '-26', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '99$' FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '\.99$' FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '03$' FROM t1 ORDER BY 1;
|
||||
@ -37,6 +58,28 @@ SELECT t1_DECIMAL, t1_DECIMAL REGEXP '\.+' FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '0?' FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '-26' FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '99$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '\.99$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '03$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '\.+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '0?') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '-26') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '99$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '\.99$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '03$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '\.+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '0?') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '-26') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '99$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '\.99$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '03$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '\.+', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '0?', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '-26', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
|
||||
|
||||
SELECT t1_TEXT, t1_TEXT REGEXP 'oooo$' FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, t1_TEXT REGEXP 'q$' FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, t1_TEXT REGEXP 'ppppp$' FROM t1 ORDER BY 1;
|
||||
@ -46,13 +89,104 @@ SELECT t1_TEXT, t1_TEXT REGEXP 'p*' FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, t1_TEXT REGEXP 'qq+q' FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, t1_TEXT REGEXP 'o?o' FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'oooo$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'q$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'ppppp$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, '(ooo)+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, '(qqqqq)+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'p*') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'qq+q') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'o?o') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'oooo$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'q$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'ppppp$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, '(ooo)+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, '(qqqqq)+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'p*') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'qq+q') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'o?o') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'oooo$', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'q$', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'ppppp$', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, '(ooo)+', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, '(qqqqq)+', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'p*', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'qq+q', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'o?o', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'q', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_DATE, t1_DATE REGEXP '(1997)+' FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, t1_DATE REGEXP '01$' FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, t1_DATE REGEXP '(59)+' FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, t1_DATE REGEXP '(09-12-)+' FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(1997)+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '01$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(59)+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(09-12-)+') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(1997)+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '01$') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(59)+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(09-12-)+') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(1997)+', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '01$', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(59)+', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(09-12-)+', 'TeddyBear') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_TIME, t1_TIME REGEXP '(59)+' FROM t1 ORDER BY 1;
|
||||
SELECT t1_TIME, t1_TIME REGEXP '22$' FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_TIME, REGEXP_SUBSTR(t1_TIME, '(59)+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TIME, REGEXP_SUBSTR(t1_TIME, '22$') FROM t1 ORDER BY 1;
|
||||
|
||||
SELECT t1_TIME, REGEXP_INSTR(t1_TIME, '(59)+') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TIME, REGEXP_INSTR(t1_TIME, '22$') FROM t1 ORDER BY 1;
|
||||
|
||||
|
||||
SELECT t1_TIME, REGEXP_REPLACE(t1_TIME, '(59)+', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
SELECT t1_TIME, REGEXP_REPLACE(t1_TIME, '22$', 'KittyCat') FROM t1 ORDER BY 1;
|
||||
|
||||
SET character_set_connection = 'utf8';
|
||||
SET NAMES utf8mb3;
|
||||
|
||||
CREATE TABLE t2 (hello text) DEFAULT CHARSET=utf8 engine columnstore;
|
||||
INSERT INTO t2 values('こんにちは');
|
||||
INSERT INTO t2 values('привет');
|
||||
INSERT INTO t2 values('Γεια');
|
||||
INSERT INTO t2 values('სალამი');
|
||||
|
||||
SELECT hello, hello regexp 'ん.ち' FROM t2;
|
||||
SELECT hello, hello regexp 'и.е' FROM t2;
|
||||
SELECT hello, hello regexp 'ε.α' FROM t2;
|
||||
SELECT hello, hello regexp 'ა.ა' FROM t2;
|
||||
|
||||
SELECT hello, regexp_substr(hello, 'ん.ち') FROM t2;
|
||||
SELECT hello, regexp_substr(hello, 'и.е') FROM t2;
|
||||
SELECT hello, regexp_substr(hello, 'ε.α') FROM t2;
|
||||
SELECT hello, regexp_substr(hello, 'ა.ა') FROM t2;
|
||||
|
||||
SELECT hello, regexp_instr(hello, 'ん.ち') FROM t2;
|
||||
SELECT hello, regexp_instr(hello, 'и.е') FROM t2;
|
||||
SELECT hello, regexp_instr(hello, 'ε.α') FROM t2;
|
||||
SELECT hello, regexp_instr(hello, 'ა.ა') FROM t2;
|
||||
|
||||
SELECT hello, regexp_replace(hello, 'ん.ち', 'Достоевский') FROM t2;
|
||||
SELECT hello, regexp_replace(hello, 'и.е', 'Достоевский') FROM t2;
|
||||
SELECT hello, regexp_replace(hello, 'ε.α', 'Достоевский') FROM t2;
|
||||
SELECT hello, regexp_replace(hello, 'ა.ა', 'Достоевский') FROM t2;
|
||||
|
||||
SHOW VARIABLES LIKE 'character_set%';
|
||||
CREATE TABLE tw(hello text) DEFAULT CHARSET=cp1251 ENGINE COLUMNSTORE;
|
||||
INSERT INTO tw values(convert('привет' USING cp1251));
|
||||
SELECT hello, regexp_instr(hello, convert('и.е' USING cp1251)) FROM tw;
|
||||
SELECT hello, convert(regexp_substr(hello, convert('и.е' USING cp1251)) using utf8) FROM tw;
|
||||
SELECT hello, convert(regexp_replace(hello, convert('и.е' USING cp1251), convert('Достоевкий' USING cp1251)) using utf8) FROM tw;
|
||||
|
||||
SELECT hello, hello regexp convert('и.е' USING cp1251) FROM tw;
|
||||
|
||||
# Clean UP
|
||||
DROP DATABASE mcs228_db;
|
||||
|
@ -23,7 +23,6 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02');
|
||||
INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59');
|
||||
INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59');
|
||||
|
||||
--error ER_CHECK_NOT_IMPLEMENTED
|
||||
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'o') FROM t1 ORDER BY 1;
|
||||
|
||||
# Clean UP
|
||||
|
@ -23,7 +23,6 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02');
|
||||
INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59');
|
||||
INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59');
|
||||
|
||||
--error ER_CHECK_NOT_IMPLEMENTED
|
||||
SELECT REGEXP_REPLACE(t1_TEXT, 'o', 'X') FROM t1 ORDER BY 1;
|
||||
|
||||
# Clean UP
|
||||
|
@ -23,7 +23,6 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02');
|
||||
INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59');
|
||||
INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59');
|
||||
|
||||
--error ER_CHECK_NOT_IMPLEMENTED
|
||||
SELECT REGEXP_SUBSTR(t1_TEXT, '[a-z]+') FROM t1 ORDER BY 1;
|
||||
|
||||
# Clean UP
|
||||
|
@ -146,6 +146,23 @@ class Charset
|
||||
Charset(CHARSET_INFO* cs = nullptr) : mCharset(cs ? cs : &my_charset_bin)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator==(const Charset& rhs)
|
||||
{
|
||||
return rhs.getCharset().cs_name.str == getCharset().cs_name.str;
|
||||
}
|
||||
|
||||
std::string convert(const std::string& from, const datatypes::Charset& fromCs) const
|
||||
{
|
||||
std::string result;
|
||||
uint dummy_errors;
|
||||
result.resize(from.size() * getCharset().mbmaxlen);
|
||||
size_t resultingSize = my_convert(const_cast<char*>(result.c_str()), result.size(), &getCharset(), from.c_str(),
|
||||
from.size(), &fromCs.getCharset(), &dummy_errors);
|
||||
result.resize(resultingSize);
|
||||
return result;
|
||||
}
|
||||
|
||||
Charset(uint32_t charsetNumber);
|
||||
void setCharset(uint32_t charsetNumber);
|
||||
CHARSET_INFO& getCharset() const
|
||||
|
@ -150,7 +150,7 @@ add_library(funcexp SHARED ${funcexp_LIB_SRCS})
|
||||
|
||||
add_dependencies(funcexp loggingcpp)
|
||||
|
||||
target_link_libraries(funcexp ${NETSNMP_LIBRARIES} pron)
|
||||
target_link_libraries(funcexp ${NETSNMP_LIBRARIES} pron pcre2-8)
|
||||
|
||||
install(TARGETS funcexp DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine)
|
||||
|
||||
|
@ -25,14 +25,10 @@
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
#ifdef __linux__
|
||||
#include <regex.h>
|
||||
#else
|
||||
#include <regex>
|
||||
using namespace boost;
|
||||
#endif
|
||||
#include "utils/pcre2/jpcre2.hpp"
|
||||
|
||||
#include "functor_bool.h"
|
||||
#include "functor_str.h"
|
||||
#include "functioncolumn.h"
|
||||
#include "predicateoperator.h"
|
||||
#include "constantcolumn.h"
|
||||
@ -43,11 +39,57 @@ using namespace execplan;
|
||||
#include "errorcodes.h"
|
||||
#include "idberrorinfo.h"
|
||||
#include "errorids.h"
|
||||
|
||||
using namespace logging;
|
||||
|
||||
namespace
|
||||
{
|
||||
inline bool getBool(rowgroup::Row& row, funcexp::FunctionParm& pm, bool& isNull,
|
||||
|
||||
using jp = jpcre2::select<char>;
|
||||
|
||||
struct PCREOptions
|
||||
{
|
||||
PCREOptions(execplan::CalpontSystemCatalog::ColType& ct);
|
||||
|
||||
datatypes::Charset dataCharset = my_charset_utf8mb3_general_ci;
|
||||
datatypes::Charset libraryCharset = my_charset_utf8mb3_general_ci;
|
||||
jpcre2::Uint flags = 0;
|
||||
bool conversionIsNeeded = false;
|
||||
};
|
||||
|
||||
PCREOptions::PCREOptions(execplan::CalpontSystemCatalog::ColType& ct)
|
||||
{
|
||||
datatypes::Charset cs = ct.getCharset();
|
||||
|
||||
// TODO use system variable instead if hardcode default_regex_flags_pcre(_current_thd());
|
||||
// PCRE2_DOTALL | PCRE2_DUPNAMES | PCRE2_EXTENDED | PCRE2_EXTENDED_MORE | PCRE2_MULTILINE | PCRE2_UNGREEDY;
|
||||
|
||||
jpcre2::Uint defaultFlags = 0;
|
||||
|
||||
flags = (cs != &my_charset_bin ? (PCRE2_UTF | PCRE2_UCP) : 0) |
|
||||
((cs.getCharset().state & (MY_CS_BINSORT | MY_CS_CSSORT)) ? 0 : PCRE2_CASELESS) | defaultFlags;
|
||||
|
||||
// Convert text data to utf-8.
|
||||
dataCharset = cs;
|
||||
libraryCharset = cs == my_charset_bin ? my_charset_bin : my_charset_utf8mb3_general_ci;
|
||||
}
|
||||
|
||||
struct RegExpParams
|
||||
{
|
||||
std::string expression;
|
||||
std::string pattern;
|
||||
RegExpParams& CharsetFix(const PCREOptions options)
|
||||
{
|
||||
if (options.conversionIsNeeded)
|
||||
return *this;
|
||||
|
||||
expression = options.libraryCharset.convert(expression, options.dataCharset);
|
||||
pattern = options.libraryCharset.convert(pattern, options.dataCharset);
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
inline RegExpParams getEpressionAndPattern(rowgroup::Row& row, funcexp::FunctionParm& pm, bool& isNull,
|
||||
CalpontSystemCatalog::ColType& ct, long timeZone)
|
||||
{
|
||||
string expr;
|
||||
@ -212,39 +254,148 @@ inline bool getBool(rowgroup::Row& row, funcexp::FunctionParm& pm, bool& isNull,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
regex_t re;
|
||||
|
||||
regcomp(&re, pattern.c_str(), REG_EXTENDED | REG_NOSUB);
|
||||
|
||||
int res = regexec(&re, expr.c_str(), 0, NULL, 0);
|
||||
regfree(&re);
|
||||
|
||||
if (res == 0)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
|
||||
#else
|
||||
std::regex pat(pattern.c_str());
|
||||
return std::regex_search(expr.c_str(), pat);
|
||||
#endif
|
||||
return RegExpParams{expr, pattern};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace funcexp
|
||||
{
|
||||
CalpontSystemCatalog::ColType Func_regexp_replace::operationType(FunctionParm& fp,
|
||||
CalpontSystemCatalog::ColType& resultType)
|
||||
{
|
||||
// operation type is not used by this functor
|
||||
return fp[0]->data()->resultType();
|
||||
}
|
||||
|
||||
CalpontSystemCatalog::ColType Func_regexp_substr::operationType(FunctionParm& fp,
|
||||
CalpontSystemCatalog::ColType& resultType)
|
||||
{
|
||||
// operation type is not used by this functor
|
||||
return fp[0]->data()->resultType();
|
||||
}
|
||||
|
||||
CalpontSystemCatalog::ColType Func_regexp_instr::operationType(FunctionParm& fp,
|
||||
CalpontSystemCatalog::ColType& resultType)
|
||||
{
|
||||
// operation type is not used by this functor
|
||||
return fp[0]->data()->resultType();
|
||||
}
|
||||
|
||||
CalpontSystemCatalog::ColType Func_regexp::operationType(FunctionParm& fp,
|
||||
CalpontSystemCatalog::ColType& resultType)
|
||||
{
|
||||
return resultType;
|
||||
}
|
||||
|
||||
bool Func_regexp::getBoolVal(rowgroup::Row& row, FunctionParm& pm, bool& isNull,
|
||||
/*
|
||||
returns the string subject with all occurrences of the regular expression pattern replaced by
|
||||
the string replace. If no occurrences are found, then subject is returned as is.
|
||||
https://mariadb.com/kb/en/regexp_replace/
|
||||
*/
|
||||
std::string Func_regexp_replace::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& ct)
|
||||
|
||||
{
|
||||
RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone());
|
||||
|
||||
if (isNull)
|
||||
return std::string{};
|
||||
|
||||
const auto& replaceWith = fp[2]->data()->getStrVal(row, isNull);
|
||||
|
||||
if (replaceWith.isNull())
|
||||
return param.expression;
|
||||
|
||||
PCREOptions options(ct);
|
||||
param.CharsetFix(options);
|
||||
jp::Regex re(param.pattern, options.flags);
|
||||
|
||||
const auto& replaceWithStr = replaceWith.unsafeStringRef();
|
||||
if (options.conversionIsNeeded)
|
||||
{
|
||||
const auto& convertedReplaceToken = options.libraryCharset.convert(replaceWithStr, options.dataCharset);
|
||||
return re.replace(param.expression, convertedReplaceToken, "g");
|
||||
}
|
||||
|
||||
return re.replace(param.expression, replaceWithStr, "g");
|
||||
}
|
||||
|
||||
/*
|
||||
Returns the part of the string subject that matches the regular expression pattern, or an empty string if
|
||||
pattern was not found. https://mariadb.com/kb/en/regexp_substr/
|
||||
*/
|
||||
std::string Func_regexp_substr::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& ct)
|
||||
|
||||
{
|
||||
RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone());
|
||||
|
||||
if (isNull)
|
||||
return std::string{};
|
||||
|
||||
PCREOptions options(ct);
|
||||
param.CharsetFix(options);
|
||||
|
||||
jp::Regex re(param.pattern, options.flags);
|
||||
jp::RegexMatch rm(&re);
|
||||
jp::VecNum vec_num;
|
||||
|
||||
size_t count = rm.setSubject(param.expression).setNumberedSubstringVector(&vec_num).match();
|
||||
|
||||
if (count == 0)
|
||||
return std::string{};
|
||||
|
||||
return vec_num[0][0];
|
||||
}
|
||||
|
||||
/*
|
||||
Returns the position of the first occurrence of the regular expression pattern in the string subject, or 0
|
||||
if pattern was not found. https://mariadb.com/kb/en/regexp_instr/
|
||||
*/
|
||||
std::string Func_regexp_instr::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& ct)
|
||||
|
||||
{
|
||||
RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone());
|
||||
|
||||
if (isNull)
|
||||
return std::string{};
|
||||
|
||||
PCREOptions options(ct);
|
||||
param.CharsetFix(options);
|
||||
|
||||
jp::Regex re(param.pattern, options.flags);
|
||||
jp::RegexMatch rm(&re);
|
||||
jpcre2::VecOff vec_soff;
|
||||
|
||||
size_t count = rm.setSubject(param.expression).setMatchStartOffsetVector(&vec_soff).match();
|
||||
|
||||
if (count == 0)
|
||||
return "0";
|
||||
|
||||
size_t offset = vec_soff[0];
|
||||
size_t charNumber = options.libraryCharset.getCharset().numchars(param.expression.c_str(),
|
||||
param.expression.c_str() + offset);
|
||||
|
||||
return std::to_string(charNumber + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
https://mariadb.com/kb/en/regexp/
|
||||
*/
|
||||
bool Func_regexp::getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
||||
CalpontSystemCatalog::ColType& ct)
|
||||
{
|
||||
return getBool(row, pm, isNull, ct, ct.getTimeZone()) && !isNull;
|
||||
RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone());
|
||||
|
||||
if (isNull)
|
||||
return false;
|
||||
|
||||
PCREOptions options(ct);
|
||||
param.CharsetFix(options);
|
||||
|
||||
jp::Regex re(param.pattern, options.flags);
|
||||
return re.match(param.expression);
|
||||
}
|
||||
|
||||
} // namespace funcexp
|
||||
|
@ -215,7 +215,10 @@ FuncExp::FuncExp()
|
||||
fFuncMap["quarter"] = new Func_quarter();
|
||||
fFuncMap["radians"] = new Func_radians(); // dlh
|
||||
fFuncMap["rand"] = new Func_rand();
|
||||
fFuncMap["regexp"] = new Func_regexp(); // dlh
|
||||
fFuncMap["regexp"] = new Func_regexp();
|
||||
fFuncMap["regexp_instr"] = new Func_regexp_instr();
|
||||
fFuncMap["regexp_replace"] = new Func_regexp_replace();
|
||||
fFuncMap["regexp_substr"] = new Func_regexp_substr(); // dlh
|
||||
fFuncMap["repeat"] = new Func_repeat(); // dlh
|
||||
fFuncMap["replace"] = new Func_replace(); // dlh
|
||||
fFuncMap["replace_oracle"] = new Func_replace_oracle(); // dlh
|
||||
|
@ -470,6 +470,61 @@ class Func_replace : public Func_Str
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct);
|
||||
};
|
||||
|
||||
class Func_regexp_replace : public Func_Str
|
||||
{
|
||||
public:
|
||||
Func_regexp_replace() : Func_Str("regexp_replace")
|
||||
{
|
||||
}
|
||||
virtual ~Func_regexp_replace()
|
||||
{
|
||||
}
|
||||
|
||||
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
|
||||
execplan::CalpontSystemCatalog::ColType& resultType);
|
||||
|
||||
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct);
|
||||
};
|
||||
|
||||
|
||||
class Func_regexp_instr : public Func_Str
|
||||
{
|
||||
public:
|
||||
Func_regexp_instr() : Func_Str("regexp_instr")
|
||||
{
|
||||
}
|
||||
virtual ~Func_regexp_instr()
|
||||
{
|
||||
}
|
||||
|
||||
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
|
||||
execplan::CalpontSystemCatalog::ColType& resultType);
|
||||
|
||||
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct);
|
||||
};
|
||||
|
||||
|
||||
class Func_regexp_substr : public Func_Str
|
||||
{
|
||||
public:
|
||||
Func_regexp_substr() : Func_Str("regexp_substr")
|
||||
{
|
||||
}
|
||||
virtual ~Func_regexp_substr()
|
||||
{
|
||||
}
|
||||
|
||||
execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp,
|
||||
execplan::CalpontSystemCatalog::ColType& resultType);
|
||||
|
||||
std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull,
|
||||
execplan::CalpontSystemCatalog::ColType& op_ct);
|
||||
};
|
||||
|
||||
|
||||
|
||||
class Func_replace_oracle : public Func_Str
|
||||
{
|
||||
public:
|
||||
|
5153
utils/pcre2/jpcre2.hpp
Normal file
5153
utils/pcre2/jpcre2.hpp
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user