1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-07 00:04:31 +03:00

BUG#29464 - load data infile into table with big5 chinese fulltext index

hangs 100% cpu

Fulltext parser may fall into infinite loop when it gets illegal
multibyte sequence (or a sequence that doesn't have mapping to unicode).

Affects 5.1 only.


mysql-test/r/fulltext.result:
  A test case for BUG#29464.
mysql-test/t/fulltext.test:
  A test case for BUG#29464.
storage/myisam/ft_parser.c:
  ctype() may return negative value, which was stored in unsigned
  variable.
  
  Also ctype() may return negative length for correct multibyte
  sequence that doesn't have a mapping to unicode. These characters
  are skipped correctly with this patch.
This commit is contained in:
unknown
2007-07-05 15:17:06 +05:00
parent 435df1859f
commit 3bd52b0b7b
3 changed files with 23 additions and 6 deletions

View File

@@ -476,3 +476,7 @@ ALTER TABLE t1 DISABLE KEYS;
SELECT * FROM t1 WHERE MATCH(a) AGAINST('test'); SELECT * FROM t1 WHERE MATCH(a) AGAINST('test');
ERROR HY000: Can't find FULLTEXT index matching the column list ERROR HY000: Can't find FULLTEXT index matching the column list
DROP TABLE t1; DROP TABLE t1;
CREATE TABLE t1(a VARCHAR(2) CHARACTER SET big5 COLLATE big5_chinese_ci,
FULLTEXT(a));
INSERT INTO t1 VALUES(0xA3C2);
DROP TABLE t1;

View File

@@ -399,4 +399,13 @@ ALTER TABLE t1 DISABLE KEYS;
SELECT * FROM t1 WHERE MATCH(a) AGAINST('test'); SELECT * FROM t1 WHERE MATCH(a) AGAINST('test');
DROP TABLE t1; DROP TABLE t1;
#
# BUG#29464 - load data infile into table with big5 chinese fulltext index
# hangs 100% cpu
#
CREATE TABLE t1(a VARCHAR(2) CHARACTER SET big5 COLLATE big5_chinese_ci,
FULLTEXT(a));
INSERT INTO t1 VALUES(0xA3C2);
DROP TABLE t1;
# End of 4.1 tests # End of 4.1 tests

View File

@@ -111,7 +111,8 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
{ {
uchar *doc=*start; uchar *doc=*start;
int ctype; int ctype;
uint mwc, length, mbl; uint mwc, length;
int mbl;
param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0); param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
param->weight_adjust= param->wasign= 0; param->weight_adjust= param->wasign= 0;
@@ -119,7 +120,7 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
while (doc<end) while (doc<end)
{ {
for (; doc < end; doc+= (mbl > 0 ? mbl : 1)) for (; doc < end; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{ {
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
if (true_word_char(ctype, *doc)) if (true_word_char(ctype, *doc))
@@ -157,7 +158,8 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
} }
mwc=length=0; mwc=length=0;
for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) for (word->pos= doc; doc < end; length++,
doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{ {
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
if (true_word_char(ctype, *doc)) if (true_word_char(ctype, *doc))
@@ -200,13 +202,14 @@ uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end,
FT_WORD *word, my_bool skip_stopwords) FT_WORD *word, my_bool skip_stopwords)
{ {
uchar *doc= *start; uchar *doc= *start;
uint mwc, length, mbl; uint mwc, length;
int mbl;
int ctype; int ctype;
DBUG_ENTER("ft_simple_get_word"); DBUG_ENTER("ft_simple_get_word");
do do
{ {
for (;; doc+= (mbl > 0 ? mbl : 1)) for (;; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{ {
if (doc >= end) if (doc >= end)
DBUG_RETURN(0); DBUG_RETURN(0);
@@ -216,7 +219,8 @@ uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end,
} }
mwc= length= 0; mwc= length= 0;
for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) for (word->pos= doc; doc < end; length++,
doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{ {
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
if (true_word_char(ctype, *doc)) if (true_word_char(ctype, *doc))