diff --git a/mysql-test/r/func_regexp_pcre.result b/mysql-test/r/func_regexp_pcre.result index 641c4fddbf7..aa090c9faf6 100644 --- a/mysql-test/r/func_regexp_pcre.result +++ b/mysql-test/r/func_regexp_pcre.result @@ -845,3 +845,32 @@ SET default_regex_flags=DEFAULT; SELECT REGEXP_REPLACE('1 foo and bar', '(\\d+) foo and (\\d+ )?bar', '\\1 this and \\2that'); REGEXP_REPLACE('1 foo and bar', '(\\d+) foo and (\\d+ )?bar', '\\1 this and \\2that') 1 this and that +# +# MDEV-8102 REGEXP function fails to match hex values when expression is stored as a variable +# +# Testing a warning +SET NAMES latin1; +SET @regCheck= '\\xE0\\x01'; +SELECT 0xE001 REGEXP @regCheck; +0xE001 REGEXP @regCheck +0 +Warnings: +Warning 1139 Got error 'pcre_exec: Invalid utf8 byte sequence in the subject string' from regexp +# Testing workaround N1: This makes the pattern to be a binary string: +SET NAMES latin1; +SET @regCheck= X'E001'; +SELECT 0xE001 REGEXP @regCheck; +0xE001 REGEXP @regCheck +1 +# Testing workaround N2: This also makes the pattern to be a binary string, using a different syntax: +SET NAMES latin1; +SET @regCheck= _binary '\\xE0\\x01'; +SELECT 0xE001 REGEXP @regCheck; +0xE001 REGEXP @regCheck +1 +# Testing workarond N3: This makes derivation of the subject string stronger (IMLICIT instead of COERCIBLE) +SET NAMES latin1; +SET @regCheck= '\\xE0\\x01'; +SELECT CAST(0xE001 AS BINARY) REGEXP @regCheck; +CAST(0xE001 AS BINARY) REGEXP @regCheck +1 diff --git a/mysql-test/r/func_regexp_pcre_debug.result b/mysql-test/r/func_regexp_pcre_debug.result new file mode 100644 index 00000000000..e44492fca72 --- /dev/null +++ b/mysql-test/r/func_regexp_pcre_debug.result @@ -0,0 +1,10 @@ +SET debug_dbug='+d,pcre_exec_error_123'; +SELECT 'a' RLIKE 'a'; +'a' RLIKE 'a' +0 +Warnings: +Warning 1139 Got error 'pcre_exec: Internal error (-123)' from regexp +SET debug_dbug=''; +SELECT 'a' RLIKE 'a'; +'a' RLIKE 'a' +1 diff --git a/mysql-test/t/func_regexp_pcre.test b/mysql-test/t/func_regexp_pcre.test index 6710f5cf096..c9b4c10007d 100644 --- a/mysql-test/t/func_regexp_pcre.test +++ b/mysql-test/t/func_regexp_pcre.test @@ -402,3 +402,27 @@ SET default_regex_flags=DEFAULT; --echo # MDEV-6965 non-captured group \2 in regexp_replace --echo # SELECT REGEXP_REPLACE('1 foo and bar', '(\\d+) foo and (\\d+ )?bar', '\\1 this and \\2that'); + +--echo # +--echo # MDEV-8102 REGEXP function fails to match hex values when expression is stored as a variable +--echo # + +--echo # Testing a warning +SET NAMES latin1; +SET @regCheck= '\\xE0\\x01'; +SELECT 0xE001 REGEXP @regCheck; + +--echo # Testing workaround N1: This makes the pattern to be a binary string: +SET NAMES latin1; +SET @regCheck= X'E001'; +SELECT 0xE001 REGEXP @regCheck; + +--echo # Testing workaround N2: This also makes the pattern to be a binary string, using a different syntax: +SET NAMES latin1; +SET @regCheck= _binary '\\xE0\\x01'; +SELECT 0xE001 REGEXP @regCheck; + +--echo # Testing workarond N3: This makes derivation of the subject string stronger (IMLICIT instead of COERCIBLE) +SET NAMES latin1; +SET @regCheck= '\\xE0\\x01'; +SELECT CAST(0xE001 AS BINARY) REGEXP @regCheck; diff --git a/mysql-test/t/func_regexp_pcre_debug.test b/mysql-test/t/func_regexp_pcre_debug.test new file mode 100644 index 00000000000..c2581fa4110 --- /dev/null +++ b/mysql-test/t/func_regexp_pcre_debug.test @@ -0,0 +1,6 @@ +--source include/have_debug.inc + +SET debug_dbug='+d,pcre_exec_error_123'; +SELECT 'a' RLIKE 'a'; +SET debug_dbug=''; +SELECT 'a' RLIKE 'a'; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 1f1982ffb80..90eef1ea55c 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5160,10 +5160,65 @@ bool Regexp_processor_pcre::compile(Item *item, bool send_error) } +/** + Send a warning explaining an error code returned by pcre_exec(). +*/ +void Regexp_processor_pcre::pcre_exec_warn(int rc) const +{ + char buf[64]; + const char *errmsg= NULL; + /* + Make a descriptive message only for those pcre_exec() error codes + that can actually happen in MariaDB. + */ + switch (rc) + { + case PCRE_ERROR_NOMEMORY: + errmsg= "pcre_exec: Out of memory"; + break; + case PCRE_ERROR_BADUTF8: + errmsg= "pcre_exec: Invalid utf8 byte sequence in the subject string"; + break; + case PCRE_ERROR_RECURSELOOP: + errmsg= "pcre_exec: Recursion loop detected"; + break; + default: + /* + As other error codes should normally not happen, + we just report the error code without textual description + of the code. + */ + my_snprintf(buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc); + errmsg= buf; + } + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_REGEXP_ERROR, ER(ER_REGEXP_ERROR), errmsg); +} + + +/** + Call pcre_exec() and send a warning if pcre_exec() returned with an error. +*/ +int Regexp_processor_pcre::pcre_exec_with_warn(const pcre *code, + const pcre_extra *extra, + const char *subject, + int length, int startoffset, + int options, int *ovector, + int ovecsize) +{ + int rc= pcre_exec(code, extra, subject, length, + startoffset, options, ovector, ovecsize); + DBUG_EXECUTE_IF("pcre_exec_error_123", rc= -123;); + if (rc < PCRE_ERROR_NOMATCH) + pcre_exec_warn(rc); + return rc; +} + + bool Regexp_processor_pcre::exec(const char *str, int length, int offset) { - m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str, length, - offset, 0, m_SubStrVec, m_subpatterns_needed * 3); + m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, NULL, str, length, offset, 0, + m_SubStrVec, m_subpatterns_needed * 3); return false; } @@ -5173,8 +5228,10 @@ bool Regexp_processor_pcre::exec(String *str, int offset, { if (!(str= convert_if_needed(str, &subject_converter))) return true; - m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str->c_ptr_safe(), str->length(), - offset, 0, m_SubStrVec, m_subpatterns_needed * 3); + m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, NULL, + str->c_ptr_safe(), str->length(), + offset, 0, + m_SubStrVec, m_subpatterns_needed * 3); if (m_pcre_exec_rc > 0) { uint i; diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 8611182f32d..c4933e6d7ed 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -1549,6 +1549,10 @@ class Regexp_processor_pcre int m_pcre_exec_rc; int m_SubStrVec[30]; uint m_subpatterns_needed; + void pcre_exec_warn(int rc) const; + int pcre_exec_with_warn(const pcre *code, const pcre_extra *extra, + const char *subject, int length, int startoffset, + int options, int *ovector, int ovecsize); public: String *convert_if_needed(String *src, String *converter); String subject_converter;