mirror of
https://github.com/postgres/postgres.git
synced 2025-12-19 17:02:53 +03:00
Fix conversion of SIMILAR TO regexes for character classes
The code that translates SIMILAR TO pattern matching expressions to
POSIX-style regular expressions did not consider that square brackets
can be nested. For example, in an expression like [[:alpha:]%_], the
logic replaced the placeholders '_' and '%' but it should not.
This commit fixes the conversion logic by tracking the nesting level of
square brackets marking character class areas, while considering that
in expressions like []] or [^]] the first closing square bracket is a
regular character. Multiple tests are added to show how the conversions
should or should not apply applied while in a character class area, with
specific cases added for all the characters converted outside character
classes like an opening parenthesis '(', dollar sign '$', etc.
Author: Laurenz Albe <laurenz.albe@cybertec.at>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/16ab039d1af455652bdf4173402ddda145f2c73b.camel@cybertec.at
Backpatch-through: 13
This commit is contained in:
@@ -614,6 +614,68 @@ SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null;
|
||||
SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error;
|
||||
ERROR: invalid escape string
|
||||
HINT: Escape string must be empty or one character.
|
||||
-- Characters that should be left alone in character classes when a
|
||||
-- SIMILAR TO regexp pattern is converted to POSIX style.
|
||||
-- Underscore "_"
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '_[_[:alpha:]_]_';
|
||||
QUERY PLAN
|
||||
------------------------------------------------
|
||||
Seq Scan on text_tbl
|
||||
Filter: (f1 ~ '^(?:.[_[:alpha:]_].)$'::text)
|
||||
(2 rows)
|
||||
|
||||
-- Percentage "%"
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '%[%[:alnum:]%]%';
|
||||
QUERY PLAN
|
||||
--------------------------------------------------
|
||||
Seq Scan on text_tbl
|
||||
Filter: (f1 ~ '^(?:.*[%[:alnum:]%].*)$'::text)
|
||||
(2 rows)
|
||||
|
||||
-- Dot "."
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].';
|
||||
QUERY PLAN
|
||||
--------------------------------------------------
|
||||
Seq Scan on text_tbl
|
||||
Filter: (f1 ~ '^(?:\.[.[:alnum:].]\.)$'::text)
|
||||
(2 rows)
|
||||
|
||||
-- Dollar "$"
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$';
|
||||
QUERY PLAN
|
||||
--------------------------------------------------
|
||||
Seq Scan on text_tbl
|
||||
Filter: (f1 ~ '^(?:\$[$[:alnum:]$]\$)$'::text)
|
||||
(2 rows)
|
||||
|
||||
-- Opening parenthesis "("
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '([([:alnum:](](';
|
||||
ERROR: invalid regular expression: parentheses () not balanced
|
||||
-- Caret "^"
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^';
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------
|
||||
Seq Scan on text_tbl
|
||||
Filter: (f1 ~ '^(?:\^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]\^)$'::text)
|
||||
(2 rows)
|
||||
|
||||
-- Closing square bracket "]" at the beginning of character class
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%';
|
||||
QUERY PLAN
|
||||
------------------------------------------------
|
||||
Seq Scan on text_tbl
|
||||
Filter: (f1 ~ '^(?:[]%][^]%][^%].*)$'::text)
|
||||
(2 rows)
|
||||
|
||||
-- Closing square bracket effective after two carets at the beginning
|
||||
-- of character class.
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^';
|
||||
QUERY PLAN
|
||||
---------------------------------------
|
||||
Seq Scan on text_tbl
|
||||
Filter: (f1 ~ '^(?:[^^]\^)$'::text)
|
||||
(2 rows)
|
||||
|
||||
-- Test backslash escapes in regexp_replace's replacement string
|
||||
SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
|
||||
regexp_replace
|
||||
|
||||
@@ -197,6 +197,26 @@ SELECT 'abcd\efg' SIMILAR TO '_bcd\%' ESCAPE '' AS true;
|
||||
SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null;
|
||||
SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error;
|
||||
|
||||
-- Characters that should be left alone in character classes when a
|
||||
-- SIMILAR TO regexp pattern is converted to POSIX style.
|
||||
-- Underscore "_"
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '_[_[:alpha:]_]_';
|
||||
-- Percentage "%"
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '%[%[:alnum:]%]%';
|
||||
-- Dot "."
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].';
|
||||
-- Dollar "$"
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$';
|
||||
-- Opening parenthesis "("
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '([([:alnum:](](';
|
||||
-- Caret "^"
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^';
|
||||
-- Closing square bracket "]" at the beginning of character class
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%';
|
||||
-- Closing square bracket effective after two carets at the beginning
|
||||
-- of character class.
|
||||
EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^';
|
||||
|
||||
-- Test backslash escapes in regexp_replace's replacement string
|
||||
SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
|
||||
SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\&Y', 'g');
|
||||
|
||||
Reference in New Issue
Block a user