mirror of
https://github.com/MariaDB/server.git
synced 2025-08-07 00:04:31 +03:00
BUG#29464 - load data infile into table with big5 chinese fulltext index
hangs 100% cpu Fulltext parser may fall into infinite loop when it gets illegal multibyte sequence (or a sequence that doesn't have mapping to unicode). Affects 5.1 only. mysql-test/r/fulltext.result: A test case for BUG#29464. mysql-test/t/fulltext.test: A test case for BUG#29464. storage/myisam/ft_parser.c: ctype() may return negative value, which was stored in unsigned variable. Also ctype() may return negative length for correct multibyte sequence that doesn't have a mapping to unicode. These characters are skipped correctly with this patch.
This commit is contained in:
@@ -476,3 +476,7 @@ ALTER TABLE t1 DISABLE KEYS;
|
|||||||
SELECT * FROM t1 WHERE MATCH(a) AGAINST('test');
|
SELECT * FROM t1 WHERE MATCH(a) AGAINST('test');
|
||||||
ERROR HY000: Can't find FULLTEXT index matching the column list
|
ERROR HY000: Can't find FULLTEXT index matching the column list
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
CREATE TABLE t1(a VARCHAR(2) CHARACTER SET big5 COLLATE big5_chinese_ci,
|
||||||
|
FULLTEXT(a));
|
||||||
|
INSERT INTO t1 VALUES(0xA3C2);
|
||||||
|
DROP TABLE t1;
|
||||||
|
@@ -399,4 +399,13 @@ ALTER TABLE t1 DISABLE KEYS;
|
|||||||
SELECT * FROM t1 WHERE MATCH(a) AGAINST('test');
|
SELECT * FROM t1 WHERE MATCH(a) AGAINST('test');
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
#
|
||||||
|
# BUG#29464 - load data infile into table with big5 chinese fulltext index
|
||||||
|
# hangs 100% cpu
|
||||||
|
#
|
||||||
|
CREATE TABLE t1(a VARCHAR(2) CHARACTER SET big5 COLLATE big5_chinese_ci,
|
||||||
|
FULLTEXT(a));
|
||||||
|
INSERT INTO t1 VALUES(0xA3C2);
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
# End of 4.1 tests
|
# End of 4.1 tests
|
||||||
|
@@ -111,7 +111,8 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
|
|||||||
{
|
{
|
||||||
uchar *doc=*start;
|
uchar *doc=*start;
|
||||||
int ctype;
|
int ctype;
|
||||||
uint mwc, length, mbl;
|
uint mwc, length;
|
||||||
|
int mbl;
|
||||||
|
|
||||||
param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
|
param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
|
||||||
param->weight_adjust= param->wasign= 0;
|
param->weight_adjust= param->wasign= 0;
|
||||||
@@ -119,7 +120,7 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
|
|||||||
|
|
||||||
while (doc<end)
|
while (doc<end)
|
||||||
{
|
{
|
||||||
for (; doc < end; doc+= (mbl > 0 ? mbl : 1))
|
for (; doc < end; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
|
||||||
{
|
{
|
||||||
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
||||||
if (true_word_char(ctype, *doc))
|
if (true_word_char(ctype, *doc))
|
||||||
@@ -157,7 +158,8 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
|
|||||||
}
|
}
|
||||||
|
|
||||||
mwc=length=0;
|
mwc=length=0;
|
||||||
for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
|
for (word->pos= doc; doc < end; length++,
|
||||||
|
doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
|
||||||
{
|
{
|
||||||
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
||||||
if (true_word_char(ctype, *doc))
|
if (true_word_char(ctype, *doc))
|
||||||
@@ -200,13 +202,14 @@ uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end,
|
|||||||
FT_WORD *word, my_bool skip_stopwords)
|
FT_WORD *word, my_bool skip_stopwords)
|
||||||
{
|
{
|
||||||
uchar *doc= *start;
|
uchar *doc= *start;
|
||||||
uint mwc, length, mbl;
|
uint mwc, length;
|
||||||
|
int mbl;
|
||||||
int ctype;
|
int ctype;
|
||||||
DBUG_ENTER("ft_simple_get_word");
|
DBUG_ENTER("ft_simple_get_word");
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
for (;; doc+= (mbl > 0 ? mbl : 1))
|
for (;; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
|
||||||
{
|
{
|
||||||
if (doc >= end)
|
if (doc >= end)
|
||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
@@ -216,7 +219,8 @@ uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end,
|
|||||||
}
|
}
|
||||||
|
|
||||||
mwc= length= 0;
|
mwc= length= 0;
|
||||||
for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
|
for (word->pos= doc; doc < end; length++,
|
||||||
|
doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
|
||||||
{
|
{
|
||||||
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
||||||
if (true_word_char(ctype, *doc))
|
if (true_word_char(ctype, *doc))
|
||||||
|
Reference in New Issue
Block a user