1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-5328: PCRE based regexp regexp_substr regexp_instr regexp_replace [stable-23.10] (#3215)

* MCOL-5328: PCRE based regexp regexp_substr regexp_instr regexp_replace

* Add qa test for MCOL-5328

---------

Co-authored-by: Susil Behera <susil.behera@mariadb.com>
This commit is contained in:
Leonid Fedorov
2024-06-27 14:20:08 +04:00
committed by GitHub
parent 2cd8f716c1
commit 6c6fa7d5a4
20 changed files with 6159 additions and 66 deletions

View File

@ -30,6 +30,27 @@ SELECT t1_INT, t1_INT REGEXP '\.+' FROM t1 ORDER BY 1;
SELECT t1_INT, t1_INT REGEXP '0?' FROM t1 ORDER BY 1;
SELECT t1_INT, t1_INT REGEXP '-26' FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '99$') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '\.99$') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '03$') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '\.+') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '0?') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '-26') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_INSTR(t1_INT, '99$') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_INSTR(t1_INT, '\.99$') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_INSTR(t1_INT, '03$') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_INSTR(t1_INT, '\.+') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_INSTR(t1_INT, '0?') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_INSTR(t1_INT, '-26') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '99$', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '\.99$', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '03$', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '\.+', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '0?', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_INT, REGEXP_REPLACE(t1_INT, '-26', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '99$' FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '\.99$' FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '03$' FROM t1 ORDER BY 1;
@ -37,6 +58,28 @@ SELECT t1_DECIMAL, t1_DECIMAL REGEXP '\.+' FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '0?' FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, t1_DECIMAL REGEXP '-26' FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '99$') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '\.99$') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '03$') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '\.+') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '0?') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '-26') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '99$') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '\.99$') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '03$') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '\.+') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '0?') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '-26') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '99$', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '\.99$', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '03$', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '\.+', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '0?', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '-26', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_TEXT, t1_TEXT REGEXP 'oooo$' FROM t1 ORDER BY 1;
SELECT t1_TEXT, t1_TEXT REGEXP 'q$' FROM t1 ORDER BY 1;
SELECT t1_TEXT, t1_TEXT REGEXP 'ppppp$' FROM t1 ORDER BY 1;
@ -46,13 +89,104 @@ SELECT t1_TEXT, t1_TEXT REGEXP 'p*' FROM t1 ORDER BY 1;
SELECT t1_TEXT, t1_TEXT REGEXP 'qq+q' FROM t1 ORDER BY 1;
SELECT t1_TEXT, t1_TEXT REGEXP 'o?o' FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'oooo$') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'q$') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'ppppp$') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, '(ooo)+') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, '(qqqqq)+') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'p*') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'qq+q') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'o?o') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'oooo$') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'q$') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'ppppp$') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, '(ooo)+') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, '(qqqqq)+') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'p*') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'qq+q') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'o?o') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'oooo$', 'KittyCat') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'q$', 'KittyCat') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'ppppp$', 'KittyCat') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, '(ooo)+', 'KittyCat') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, '(qqqqq)+', 'KittyCat') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'p*', 'KittyCat') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'qq+q', 'KittyCat') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'o?o', 'KittyCat') FROM t1 ORDER BY 1;
SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'q', 'KittyCat') FROM t1 ORDER BY 1;
SELECT t1_DATE, t1_DATE REGEXP '(1997)+' FROM t1 ORDER BY 1;
SELECT t1_DATE, t1_DATE REGEXP '01$' FROM t1 ORDER BY 1;
SELECT t1_DATE, t1_DATE REGEXP '(59)+' FROM t1 ORDER BY 1;
SELECT t1_DATE, t1_DATE REGEXP '(09-12-)+' FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(1997)+') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '01$') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(59)+') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(09-12-)+') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(1997)+') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '01$') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(59)+') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(09-12-)+') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(1997)+', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '01$', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(59)+', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(09-12-)+', 'TeddyBear') FROM t1 ORDER BY 1;
SELECT t1_TIME, t1_TIME REGEXP '(59)+' FROM t1 ORDER BY 1;
SELECT t1_TIME, t1_TIME REGEXP '22$' FROM t1 ORDER BY 1;
SELECT t1_TIME, REGEXP_SUBSTR(t1_TIME, '(59)+') FROM t1 ORDER BY 1;
SELECT t1_TIME, REGEXP_SUBSTR(t1_TIME, '22$') FROM t1 ORDER BY 1;
SELECT t1_TIME, REGEXP_INSTR(t1_TIME, '(59)+') FROM t1 ORDER BY 1;
SELECT t1_TIME, REGEXP_INSTR(t1_TIME, '22$') FROM t1 ORDER BY 1;
SELECT t1_TIME, REGEXP_REPLACE(t1_TIME, '(59)+', 'KittyCat') FROM t1 ORDER BY 1;
SELECT t1_TIME, REGEXP_REPLACE(t1_TIME, '22$', 'KittyCat') FROM t1 ORDER BY 1;
SET character_set_connection = 'utf8';
SET NAMES utf8mb3;
CREATE TABLE t2 (hello text) DEFAULT CHARSET=utf8 engine columnstore;
INSERT INTO t2 values('こんにちは');
INSERT INTO t2 values('привет');
INSERT INTO t2 values('Γεια');
INSERT INTO t2 values('სალამი');
SELECT hello, hello regexp 'ん.ち' FROM t2;
SELECT hello, hello regexp 'и.е' FROM t2;
SELECT hello, hello regexp 'ε.α' FROM t2;
SELECT hello, hello regexp 'ა.ა' FROM t2;
SELECT hello, regexp_substr(hello, 'ん.ち') FROM t2;
SELECT hello, regexp_substr(hello, 'и.е') FROM t2;
SELECT hello, regexp_substr(hello, 'ε.α') FROM t2;
SELECT hello, regexp_substr(hello, 'ა.ა') FROM t2;
SELECT hello, regexp_instr(hello, 'ん.ち') FROM t2;
SELECT hello, regexp_instr(hello, 'и.е') FROM t2;
SELECT hello, regexp_instr(hello, 'ε.α') FROM t2;
SELECT hello, regexp_instr(hello, 'ა.ა') FROM t2;
SELECT hello, regexp_replace(hello, 'ん.ち', 'Достоевский') FROM t2;
SELECT hello, regexp_replace(hello, 'и.е', 'Достоевский') FROM t2;
SELECT hello, regexp_replace(hello, 'ε.α', 'Достоевский') FROM t2;
SELECT hello, regexp_replace(hello, 'ა.ა', 'Достоевский') FROM t2;
SHOW VARIABLES LIKE 'character_set%';
CREATE TABLE tw(hello text) DEFAULT CHARSET=cp1251 ENGINE COLUMNSTORE;
INSERT INTO tw values(convert('привет' USING cp1251));
SELECT hello, regexp_instr(hello, convert('и.е' USING cp1251)) FROM tw;
SELECT hello, convert(regexp_substr(hello, convert('и.е' USING cp1251)) using utf8) FROM tw;
SELECT hello, convert(regexp_replace(hello, convert('и.е' USING cp1251), convert('Достоевкий' USING cp1251)) using utf8) FROM tw;
SELECT hello, hello regexp convert('и.е' USING cp1251) FROM tw;
# Clean UP
DROP DATABASE mcs228_db;

View File

@ -23,7 +23,6 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02');
INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59');
INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59');
--error ER_CHECK_NOT_IMPLEMENTED
SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'o') FROM t1 ORDER BY 1;
# Clean UP

View File

@ -23,7 +23,6 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02');
INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59');
INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59');
--error ER_CHECK_NOT_IMPLEMENTED
SELECT REGEXP_REPLACE(t1_TEXT, 'o', 'X') FROM t1 ORDER BY 1;
# Clean UP

View File

@ -23,7 +23,6 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02');
INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59');
INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59');
--error ER_CHECK_NOT_IMPLEMENTED
SELECT REGEXP_SUBSTR(t1_TEXT, '[a-z]+') FROM t1 ORDER BY 1;
# Clean UP