1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-29 08:01:23 +03:00

Fix the initial-prefix optimization for the REGEXP extension such that it

works even if the prefix contains characters that require a 3-byte UTF8
encoding.  This should fix the problem reported by
[forum:/forumpost/96692f8ba5|forum post 96692f8ba5].

FossilOrigin-Name: c94595a6e15490b432f099fefbe2429fa19287f7bdc86332cba0fd1e08f65bd6
This commit is contained in:
drh
2022-07-03 14:25:47 +00:00
parent a57ac0a827
commit f28727f61e
4 changed files with 31 additions and 9 deletions

View File

@ -239,4 +239,26 @@ do_execsql_test regexp1-2.22 {
SELECT 'abc$¢€xyz' REGEXP '^abc[^\u0025-X][^ -\u007f][^\u20ab]xyz$'
} {1}
# 2022-07-03
# https://sqlite.org/forum/forumpost/96692f8ba5
# The REGEXP extension mishandles the prefix search optimization when
# the prefix contains 3-byte UTF8 characters.
#
reset_db
load_static_extension db regexp
do_execsql_test regexp1-3.1 {
CREATE TABLE t1(id INTEGER PRIMARY KEY, a TEXT);
INSERT INTO t1(id, a) VALUES(1, '日本語');
SELECT a, hex(a), length(a) FROM t1;
} {日本語 E697A5E69CACE8AA9E 3}
do_execsql_test regexp1-3.2 {
SELECT * FROM t1 WHERE a='日本語';
} {1 日本語}
do_execsql_test regexp1-3.3 {
SELECT * FROM t1 WHERE a LIKE '日本語';
} {1 日本語}
do_execsql_test regexp1-3.4 {
SELECT * FROM t1 wHERE a REGEXP '日本語';
} {1 日本語}
finish_test