1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

BUG#29299 - repeatable myisam fulltext index corruption

Fulltext index may get corrupt by certain gbk characters.

The problem was that when skipping leading non-true-word-characters,
we assumed that these characters are always 1 byte long. This is not
the case with gbk character set, since non-true-word-characters may
be 2 bytes long.

Affects 5.0 only.


myisam/ft_parser.c:
  Leading non-true-word-characters may also be multi-byte (e.g. in
  gbk character set).
mysql-test/r/fulltext2.result:
  A test case for BUG#29299.
mysql-test/t/fulltext2.test:
  A test case for BUG#29299.
This commit is contained in:
unknown
2007-06-27 18:10:19 +05:00
parent b3b8d5165d
commit 030d98d397
3 changed files with 28 additions and 2 deletions

View File

@ -111,7 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
while (doc<end)
{
for (;doc<end;doc++)
for (; doc < end; doc+= mbl)
{
if (true_word_char(cs,*doc)) break;
if (*doc == FTB_RQUOT && param->quot)
@ -120,6 +120,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
*start=doc+1;
return 3; /* FTB_RBR */
}
mbl= my_mbcharlen(cs, *(uchar *)doc);
if (!param->quot)
{
if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
@ -187,10 +188,11 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end,
do
{
for (;; doc++)
for (;; doc+= mbl)
{
if (doc >= end) DBUG_RETURN(0);
if (true_word_char(cs, *doc)) break;
mbl= my_mbcharlen(cs, *(uchar *)doc);
}
mwc= length= 0;