1
0
mirror of https://github.com/MariaDB/server.git synced 2025-10-24 07:13:33 +03:00
Files
mariadb/mysql-test/t/fulltext2.test
unknown 528e85a4c0 BUG#19580 - FULLTEXT search produces wrong results on UTF-8 columns
The problem was that MySQL hadn't true ctype implementation. As a
result many multibyte punctuation/whitespace characters were
treated as word characters.

This fix uses recently added CTYPE table for unicode character sets
(WL1386) to detect unicode punctuation/whitespace characters
correctly.

Note: this is incompatible change since it changes parser behavior.
One will have to use REPAIR TABLE statement to rebuild fulltext
indexes.


mysql-test/r/fulltext2.result:
  Testcase for BUG#19580.
mysql-test/t/fulltext2.test:
  Testcase for BUG#19580.
storage/myisam/ft_parser.c:
  Use WL1386 "CTYPE table for unicode character sets" functionality.
storage/myisam/ft_update.c:
  Use WL1386 "CTYPE table for unicode character sets" functionality.
  
  Reverse fix for BUG#16489 "utf8 + fulltext leads to corrupt index
  file.". It is not needed anymore, since we have true ctype
  implementation.
storage/myisam/ftdefs.h:
  Use WL1386 "CTYPE table for unicode character sets" functionality.
  
  Rework true_word_char macro so it accepts ctype instead of charset
  as first param. It doesn't use my_isalnum anymore, but instead
  directly checks ctype.
  Obsolete word_char macro removed.
2006-05-29 16:46:46 +05:00

234 lines
8.1 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#
# test of new fulltext search features
#
#
# two-level tree
#
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
CREATE TABLE t1 (
i int(10) unsigned not null auto_increment primary key,
a varchar(255) not null,
FULLTEXT KEY (a)
) ENGINE=MyISAM;
# two-level entry, second-level tree with depth 2
--disable_query_log
let $1=260;
while ($1)
{
eval insert t1 (a) values ('aaaxxx');
dec $1;
}
# two-level entry, second-level tree has only one page
let $1=255;
while ($1)
{
eval insert t1 (a) values ('aaazzz');
dec $1;
}
# one-level entry (entries)
let $1=250;
while ($1)
{
eval insert t1 (a) values ('aaayyy');
dec $1;
}
--enable_query_log
# converting to two-level
repair table t1 quick;
check table t1;
optimize table t1; # BUG#5327 - mi_sort_index() of 2-level tree
check table t1;
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaax*' in boolean mode);
select count(*) from t1 where match a against ('aaay*' in boolean mode);
select count(*) from t1 where match a against ('aaa*' in boolean mode);
# mi_write:
insert t1 (a) values ('aaaxxx'),('aaayyy');
# call to enlarge_root() below
insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
# mi_delete
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
delete from t1 where match a against ('000000');
select count(*) from t1 where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx');
delete from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
# double-check without index
select count(*) from t1 where a = 'aaaxxx';
select count(*) from t1 where a = 'aaayyy';
select count(*) from t1 where a = 'aaazzz';
# update
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
update t1 set a='aaazzz' where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
update t1 set a='aaazzz' where a = 'aaaxxx';
update t1 set a='aaaxxx' where a = 'aaayyy';
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
drop table t1;
CREATE TABLE t1 (
i int(10) unsigned not null auto_increment primary key,
a varchar(255) not null,
FULLTEXT KEY (a)
) ENGINE=MyISAM;
#
# now same as about but w/o repair table
# 2-level tree created by mi_write
#
# two-level entry, second-level tree with depth 2
--disable_query_log
let $1=260;
while ($1)
{
eval insert t1 (a) values ('aaaxxx');
dec $1;
}
let $1=255;
while ($1)
{
eval insert t1 (a) values ('aaazzz');
dec $1;
}
let $1=250;
while ($1)
{
eval insert t1 (a) values ('aaayyy');
dec $1;
}
--enable_query_log
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaax*' in boolean mode);
select count(*) from t1 where match a against ('aaay*' in boolean mode);
select count(*) from t1 where match a against ('aaa*' in boolean mode);
# mi_write:
insert t1 (a) values ('aaaxxx'),('aaayyy');
insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
# mi_delete
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
delete from t1 where match a against ('000000');
select count(*) from t1 where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx');
delete from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
# double-check without index
select count(*) from t1 where a = 'aaaxxx';
select count(*) from t1 where a = 'aaayyy';
select count(*) from t1 where a = 'aaazzz';
# update
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
update t1 set a='aaazzz' where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
update t1 set a='aaazzz' where a = 'aaaxxx';
update t1 set a='aaaxxx' where a = 'aaayyy';
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
drop table t1;
#
# BUG#11336
#
# for uca collation isalnum and strnncollsp don't agree on whether
# 0xC2A0 is a space (strnncollsp is right, isalnum is wrong).
#
# they still don't, the bug was fixed by avoiding strnncollsp
#
set names utf8;
create table t1(a text,fulltext(a)) collate=utf8_swedish_ci;
insert into t1 values('test test '),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test');
delete from t1 limit 1;
#
# BUG#16489: utf8 + fulltext leads to corrupt index file.
#
truncate table t1;
insert into t1 values('ab c d');
update t1 set a='ab c d';
select * from t1 where match a against('ab c' in boolean mode);
drop table t1;
set names latin1;
# End of 4.1 tests
#
# BUG#19580 - FULLTEXT search produces wrong results on UTF-8 columns
#
SET NAMES utf8;
CREATE TABLE t1(a VARCHAR(255), FULLTEXT(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8;
INSERT INTO t1 VALUES('„MySQL“');
SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE);
DROP TABLE t1;
SET NAMES latin1;