mirror of
https://github.com/MariaDB/server.git
synced 2025-07-27 18:02:13 +03:00
MDEV-11233 CREATE FULLTEXT INDEX with a token longer than 127 bytes
crashes server This bug is the result of merging the Oracle MySQL follow-up fix BUG#22963169 MYSQL CRASHES ON CREATE FULLTEXT INDEX without merging the base bug fix: Bug#79475 Insert a token of 84 4-bytes chars into fts index causes server crash. Unlike the above mentioned fixes in MySQL, our fix will not change the storage format of fulltext indexes in InnoDB or XtraDB when a character encoding with mbmaxlen=2 or mbmaxlen=3 and the length of a word is between 128 and 84*mbmaxlen bytes. The Oracle fix would allocate 2 length bytes for these cases. Compatibility with other MySQL and MariaDB releases is ensured by persisting the used maximum length in the SYS_COLUMNS table in the InnoDB data dictionary. This fix also removes some unnecessary strcmp() calls when checking for the legacy default collation my_charset_latin1 (my_charset_latin1.name=="latin1_swedish_ci"). fts_create_one_index_table(): Store the actual length in bytes. This metadata will be written to the SYS_COLUMNS table. fts_zip_initialize(): Initialize only the first byte of the buffer. Actually the code should not even care about this first byte, because the length is set as 0. FTX_MAX_WORD_LEN: Define as HA_FT_MAXCHARLEN * 4 aka 336 bytes, not as 254 bytes. row_merge_create_fts_sort_index(): Set the actual maximum length of the column in bytes, similar to fts_create_one_index_table(). row_merge_fts_doc_tokenize(): Remove the redundant parameter word_dtype. Use the actual maximum length of the column. Calculate the extra_size in the same way as row_merge_buf_encode() does.
This commit is contained in:
168
mysql-test/suite/innodb_fts/r/create.result
Normal file
168
mysql-test/suite/innodb_fts/r/create.result
Normal file
@ -0,0 +1,168 @@
|
||||
SET NAMES utf8mb4;
|
||||
#
|
||||
# MDEV-11233 CREATE FULLTEXT INDEX with a token
|
||||
# longer than 127 bytes crashes server
|
||||
#
|
||||
CREATE TABLE t(t TEXT CHARACTER SET utf8mb3) ENGINE=InnoDB;
|
||||
INSERT INTO t SET t=REPEAT(CONCAT(REPEAT(_utf8mb3 0xE0B987, 4), REPEAT(_utf8mb3 0xE0B989, 5)), 5);
|
||||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc90,84);
|
||||
INSERT INTO t SET t=REPEAT('befor',17);
|
||||
INSERT INTO t SET t='BeforeTheIndexCreation';
|
||||
CREATE FULLTEXT INDEX ft ON t(t);
|
||||
Warnings:
|
||||
Warning 124 InnoDB rebuilding table to add column FTS_DOC_ID
|
||||
INSERT INTO t SET t='this was inserted after creating the index';
|
||||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc91,84);
|
||||
INSERT INTO t SET t=REPEAT('after',17);
|
||||
INSERT INTO t SET t=REPEAT(_utf8mb3 0xe794b2e9aaa8e69687, 15);
|
||||
# The data below is not 3-byte UTF-8, but 4-byte chars.
|
||||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9695, 84);
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xF0\x9F\x96\x95\xF0\x9F...' for column 't' at row 1
|
||||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9696, 85);
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xF0\x9F\x96\x96\xF0\x9F...' for column 't' at row 1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST
|
||||
(REPEAT(CONCAT(REPEAT(_utf8mb3 0xE0B987, 4), REPEAT(_utf8mb3 0xE0B989, 5)), 5));
|
||||
COUNT(*)
|
||||
1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('BeforeTheIndexCreation');
|
||||
COUNT(*)
|
||||
1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('befor',17));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('after');
|
||||
COUNT(*)
|
||||
1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('after',17));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 83));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 84));
|
||||
COUNT(*)
|
||||
1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 85));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 83));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 84));
|
||||
COUNT(*)
|
||||
1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 85));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 83));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 84));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 84));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 85));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT * FROM t;
|
||||
t
|
||||
็็็็้้้้้็็็็้้้้้็็็็้้้้้็็็็้้้้้็็็็้้้้้
|
||||
000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
||||
beforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbefor
|
||||
BeforeTheIndexCreation
|
||||
this was inserted after creating the index
|
||||
111111111111111111111111111111111111111111111111111111111111111111111111111111111111
|
||||
afterafterafterafterafterafterafterafterafterafterafterafterafterafterafterafterafter
|
||||
甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文
|
||||
????????????????????????????????????????????????????????????????????????????????????
|
||||
?????????????????????????????????????????????????????????????????????????????????????
|
||||
SELECT len,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS where name='word' GROUP BY len;
|
||||
len COUNT(*)
|
||||
252 6
|
||||
DROP TABLE t;
|
||||
CREATE TABLE t(t TEXT CHARACTER SET utf8mb4) ENGINE=InnoDB;
|
||||
INSERT INTO t SET t=REPEAT(_utf8mb3 0xe794b2e9aaa8e69687, 15);
|
||||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc90,84);
|
||||
INSERT INTO t SET t=REPEAT('befor',17);
|
||||
INSERT INTO t SET t='BeforeTheIndexCreation';
|
||||
CREATE FULLTEXT INDEX ft ON t(t);
|
||||
Warnings:
|
||||
Warning 124 InnoDB rebuilding table to add column FTS_DOC_ID
|
||||
INSERT INTO t SET t='this was inserted after creating the index';
|
||||
INSERT INTO t SET t=REPEAT(_utf8 0xefbc91,84);
|
||||
INSERT INTO t SET t=REPEAT('after',17);
|
||||
INSERT INTO t SET t=REPEAT(concat(repeat(_utf8mb3 0xE0B987, 4), repeat(_utf8mb3 0xE0B989, 5)), 5);
|
||||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9695, 84);
|
||||
# The token below exceeds the 84-character limit.
|
||||
INSERT INTO t SET t=REPEAT(_utf8mb4 0xf09f9696, 85);
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb3 0xe794b2e9aaa8e69687, 15));
|
||||
COUNT(*)
|
||||
1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('BeforeTheIndexCreation');
|
||||
COUNT(*)
|
||||
1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('befor',17));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST ('after');
|
||||
COUNT(*)
|
||||
1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT('after',17));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 83));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 84));
|
||||
COUNT(*)
|
||||
1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc90, 85));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 83));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 84));
|
||||
COUNT(*)
|
||||
1
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8 0xefbc91, 85));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 83));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9695, 84));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 84));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT COUNT(*) FROM t WHERE MATCH t AGAINST (REPEAT(_utf8mb4 0xf09f9696, 85));
|
||||
COUNT(*)
|
||||
0
|
||||
SELECT * FROM t;
|
||||
t
|
||||
甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文甲骨文
|
||||
000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
||||
beforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbeforbefor
|
||||
BeforeTheIndexCreation
|
||||
this was inserted after creating the index
|
||||
111111111111111111111111111111111111111111111111111111111111111111111111111111111111
|
||||
afterafterafterafterafterafterafterafterafterafterafterafterafterafterafterafterafter
|
||||
็็็็้้้้้็็็็้้้้้็็็็้้้้้็็็็้้้้้็็็็้้้้้
|
||||
🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕🖕
|
||||
🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖🖖
|
||||
SELECT len,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS where name='word' GROUP BY len;
|
||||
len COUNT(*)
|
||||
336 6
|
||||
DROP TABLE t;
|
||||
CREATE TABLE t(t TEXT CHARACTER SET latin1, FULLTEXT INDEX(t))
|
||||
ENGINE=InnoDB;
|
||||
SELECT len,COUNT(*) FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS where name='word' GROUP BY len;
|
||||
len COUNT(*)
|
||||
84 6
|
||||
DROP TABLE t;
|
Reference in New Issue
Block a user