mirror of
https://github.com/MariaDB/server.git
synced 2025-08-07 00:04:31 +03:00
Merge svojtovich@bk-internal.mysql.com:/home/bk/mysql-5.1-engines
into may.pils.ru:/home/svoj/devel/mysql/BUG19580/mysql-5.1-new storage/myisam/ft_parser.c: Auto merged storage/myisam/ft_update.c: Auto merged storage/myisam/ftdefs.h: Auto merged
This commit is contained in:
@@ -241,3 +241,11 @@ select * from t1 where match a against('ab c' in boolean mode);
|
||||
a
|
||||
drop table t1;
|
||||
set names latin1;
|
||||
SET NAMES utf8;
|
||||
CREATE TABLE t1(a VARCHAR(255), FULLTEXT(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||||
INSERT INTO t1 VALUES('„MySQL“');
|
||||
SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE);
|
||||
a
|
||||
„MySQL“
|
||||
DROP TABLE t1;
|
||||
SET NAMES latin1;
|
||||
|
@@ -221,3 +221,13 @@ drop table t1;
|
||||
set names latin1;
|
||||
|
||||
# End of 4.1 tests
|
||||
|
||||
#
|
||||
# BUG#19580 - FULLTEXT search produces wrong results on UTF-8 columns
|
||||
#
|
||||
SET NAMES utf8;
|
||||
CREATE TABLE t1(a VARCHAR(255), FULLTEXT(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||||
INSERT INTO t1 VALUES('„MySQL“');
|
||||
SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE);
|
||||
DROP TABLE t1;
|
||||
SET NAMES latin1;
|
||||
|
@@ -111,6 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
|
||||
FT_WORD *word, MYSQL_FTPARSER_BOOLEAN_INFO *param)
|
||||
{
|
||||
byte *doc=*start;
|
||||
int ctype;
|
||||
uint mwc, length, mbl;
|
||||
|
||||
param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
|
||||
@@ -119,9 +120,11 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
|
||||
|
||||
while (doc<end)
|
||||
{
|
||||
for (;doc<end;doc++)
|
||||
for (; doc < end; doc+= (mbl > 0 ? mbl : 1))
|
||||
{
|
||||
if (true_word_char(cs,*doc)) break;
|
||||
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
||||
if (true_word_char(ctype, *doc))
|
||||
break;
|
||||
if (*doc == FTB_RQUOT && param->quot)
|
||||
{
|
||||
param->quot=doc;
|
||||
@@ -155,14 +158,16 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
|
||||
}
|
||||
|
||||
mwc=length=0;
|
||||
for (word->pos=doc; doc<end; length++, mbl=my_mbcharlen(cs, *(uchar *)doc), doc+=(mbl ? mbl : 1))
|
||||
if (true_word_char(cs,*doc))
|
||||
for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
|
||||
{
|
||||
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
||||
if (true_word_char(ctype, *doc))
|
||||
mwc=0;
|
||||
else if (!misc_word_char(*doc) || mwc)
|
||||
break;
|
||||
else
|
||||
mwc++;
|
||||
|
||||
}
|
||||
param->prev='A'; /* be sure *prev is true_word_char */
|
||||
word->len= (uint)(doc-word->pos) - mwc;
|
||||
if ((param->trunc=(doc<end && *doc == FTB_TRUNC)))
|
||||
@@ -197,24 +202,31 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end,
|
||||
{
|
||||
byte *doc= *start;
|
||||
uint mwc, length, mbl;
|
||||
int ctype;
|
||||
DBUG_ENTER("ft_simple_get_word");
|
||||
|
||||
do
|
||||
{
|
||||
for (;; doc++)
|
||||
for (;; doc+= (mbl > 0 ? mbl : 1))
|
||||
{
|
||||
if (doc >= end) DBUG_RETURN(0);
|
||||
if (true_word_char(cs, *doc)) break;
|
||||
if (doc >= end)
|
||||
DBUG_RETURN(0);
|
||||
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
||||
if (true_word_char(ctype, *doc))
|
||||
break;
|
||||
}
|
||||
|
||||
mwc= length= 0;
|
||||
for (word->pos=doc; doc<end; length++, mbl=my_mbcharlen(cs, *(uchar *)doc), doc+=(mbl ? mbl : 1))
|
||||
if (true_word_char(cs,*doc))
|
||||
for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
|
||||
{
|
||||
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
|
||||
if (true_word_char(ctype, *doc))
|
||||
mwc= 0;
|
||||
else if (!misc_word_char(*doc) || mwc)
|
||||
break;
|
||||
else
|
||||
mwc++;
|
||||
}
|
||||
|
||||
word->len= (uint)(doc-word->pos) - mwc;
|
||||
|
||||
|
@@ -174,11 +174,6 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2)
|
||||
FT_SEG_ITERATOR ftsi1, ftsi2;
|
||||
CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
|
||||
DBUG_ENTER("_mi_ft_cmp");
|
||||
#ifndef MYSQL_HAS_TRUE_CTYPE_IMPLEMENTATION
|
||||
if (cs->mbmaxlen > 1)
|
||||
DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
|
||||
#endif
|
||||
|
||||
_mi_ft_segiterator_init(info, keynr, rec1, &ftsi1);
|
||||
_mi_ft_segiterator_init(info, keynr, rec2, &ftsi2);
|
||||
|
||||
|
@@ -24,9 +24,10 @@
|
||||
#include <queues.h>
|
||||
#include <mysql/plugin.h>
|
||||
|
||||
#define true_word_char(s,X) (my_isalnum(s,X) || (X)=='_')
|
||||
#define true_word_char(ctype, character) \
|
||||
((ctype) & (_MY_U | _MY_L | _MY_NMR) || \
|
||||
(character) == '_')
|
||||
#define misc_word_char(X) 0
|
||||
#define word_char(s,X) (true_word_char(s,X) || misc_word_char(X))
|
||||
|
||||
#define FT_MAX_WORD_LEN_FOR_SORT 31
|
||||
|
||||
|
Reference in New Issue
Block a user