mirror of
https://github.com/MariaDB/server.git
synced 2025-08-07 00:04:31 +03:00
BUG#29299 - repeatable myisam fulltext index corruption
Fulltext index may get corrupt by certain gbk characters. The problem was that when skipping leading non-true-word-characters, we assumed that these characters are always 1 byte long. This is not the case with gbk character set, since non-true-word-characters may be 2 bytes long. Affects 5.0 only. myisam/ft_parser.c: Leading non-true-word-characters may also be multi-byte (e.g. in gbk character set). mysql-test/r/fulltext2.result: A test case for BUG#29299. mysql-test/t/fulltext2.test: A test case for BUG#29299.
This commit is contained in:
@@ -111,7 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
|
|||||||
|
|
||||||
while (doc<end)
|
while (doc<end)
|
||||||
{
|
{
|
||||||
for (;doc<end;doc++)
|
for (; doc < end; doc+= mbl)
|
||||||
{
|
{
|
||||||
if (true_word_char(cs,*doc)) break;
|
if (true_word_char(cs,*doc)) break;
|
||||||
if (*doc == FTB_RQUOT && param->quot)
|
if (*doc == FTB_RQUOT && param->quot)
|
||||||
@@ -120,6 +120,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
|
|||||||
*start=doc+1;
|
*start=doc+1;
|
||||||
return 3; /* FTB_RBR */
|
return 3; /* FTB_RBR */
|
||||||
}
|
}
|
||||||
|
mbl= my_mbcharlen(cs, *(uchar *)doc);
|
||||||
if (!param->quot)
|
if (!param->quot)
|
||||||
{
|
{
|
||||||
if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
|
if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
|
||||||
@@ -187,10 +188,11 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end,
|
|||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
for (;; doc++)
|
for (;; doc+= mbl)
|
||||||
{
|
{
|
||||||
if (doc >= end) DBUG_RETURN(0);
|
if (doc >= end) DBUG_RETURN(0);
|
||||||
if (true_word_char(cs, *doc)) break;
|
if (true_word_char(cs, *doc)) break;
|
||||||
|
mbl= my_mbcharlen(cs, *(uchar *)doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
mwc= length= 0;
|
mwc= length= 0;
|
||||||
|
@@ -241,3 +241,15 @@ select * from t1 where match a against('ab c' in boolean mode);
|
|||||||
a
|
a
|
||||||
drop table t1;
|
drop table t1;
|
||||||
set names latin1;
|
set names latin1;
|
||||||
|
CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a));
|
||||||
|
SET NAMES utf8;
|
||||||
|
INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161);
|
||||||
|
SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE);
|
||||||
|
HEX(a)
|
||||||
|
BEF361616197C22061616161
|
||||||
|
DELETE FROM t1 LIMIT 1;
|
||||||
|
CHECK TABLE t1;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t1 check status OK
|
||||||
|
SET NAMES latin1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
@@ -220,4 +220,16 @@ select * from t1 where match a against('ab c' in boolean mode);
|
|||||||
drop table t1;
|
drop table t1;
|
||||||
set names latin1;
|
set names latin1;
|
||||||
|
|
||||||
|
#
|
||||||
|
# BUG#29299 - repeatable myisam fulltext index corruption
|
||||||
|
#
|
||||||
|
CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a));
|
||||||
|
SET NAMES utf8;
|
||||||
|
INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161);
|
||||||
|
SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE);
|
||||||
|
DELETE FROM t1 LIMIT 1;
|
||||||
|
CHECK TABLE t1;
|
||||||
|
SET NAMES latin1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
# End of 4.1 tests
|
# End of 4.1 tests
|
||||||
|
Reference in New Issue
Block a user