mirror of
https://github.com/MariaDB/server.git
synced 2025-12-24 11:21:21 +03:00
BUG#29464 - load data infile into table with big5 chinese fulltext index
hangs 100% cpu Fulltext parser may fall into infinite loop when it gets illegal multibyte sequence (or a sequence that doesn't have mapping to unicode). Affects 5.1 only. mysql-test/r/fulltext.result: A test case for BUG#29464. mysql-test/t/fulltext.test: A test case for BUG#29464. storage/myisam/ft_parser.c: ctype() may return negative value, which was stored in unsigned variable. Also ctype() may return negative length for correct multibyte sequence that doesn't have a mapping to unicode. These characters are skipped correctly with this patch.
This commit is contained in:
@@ -111,7 +111,8 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
|
||||
{
|
||||
uchar *doc=*start;
|
||||
int ctype;
|
||||
uint mwc, length, mbl;
|
||||
uint mwc, length;
|
||||
int mbl;
|
||||
|
||||
param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
|
||||
param->weight_adjust= param->wasign= 0;
|
||||
@@ -119,7 +120,7 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
|
||||
|
||||
while (doc<end)
|
||||
{
|
||||
for (; doc < end; doc+= (mbl > 0 ? mbl : 1))
|
||||
for (; doc < end; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
|
||||
{
|
||||
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
||||
if (true_word_char(ctype, *doc))
|
||||
@@ -157,7 +158,8 @@ uchar ft_get_word(CHARSET_INFO *cs, uchar **start, uchar *end,
|
||||
}
|
||||
|
||||
mwc=length=0;
|
||||
for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
|
||||
for (word->pos= doc; doc < end; length++,
|
||||
doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
|
||||
{
|
||||
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
||||
if (true_word_char(ctype, *doc))
|
||||
@@ -200,13 +202,14 @@ uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end,
|
||||
FT_WORD *word, my_bool skip_stopwords)
|
||||
{
|
||||
uchar *doc= *start;
|
||||
uint mwc, length, mbl;
|
||||
uint mwc, length;
|
||||
int mbl;
|
||||
int ctype;
|
||||
DBUG_ENTER("ft_simple_get_word");
|
||||
|
||||
do
|
||||
{
|
||||
for (;; doc+= (mbl > 0 ? mbl : 1))
|
||||
for (;; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
|
||||
{
|
||||
if (doc >= end)
|
||||
DBUG_RETURN(0);
|
||||
@@ -216,7 +219,8 @@ uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end,
|
||||
}
|
||||
|
||||
mwc= length= 0;
|
||||
for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
|
||||
for (word->pos= doc; doc < end; length++,
|
||||
doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
|
||||
{
|
||||
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
||||
if (true_word_char(ctype, *doc))
|
||||
|
||||
Reference in New Issue
Block a user