mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-28 17:15:19 +03:00 
			
		
		
		
	The problem was that MySQL hadn't true ctype implementation. As a result many multibyte punctuation/whitespace characters were treated as word characters. This fix uses recently added CTYPE table for unicode character sets (WL1386) to detect unicode punctuation/whitespace characters correctly. Note: this is incompatible change since it changes parser behavior. One will have to use REPAIR TABLE statement to rebuild fulltext indexes. mysql-test/r/fulltext2.result: Testcase for BUG#19580. mysql-test/t/fulltext2.test: Testcase for BUG#19580. storage/myisam/ft_parser.c: Use WL1386 "CTYPE table for unicode character sets" functionality. storage/myisam/ft_update.c: Use WL1386 "CTYPE table for unicode character sets" functionality. Reverse fix for BUG#16489 "utf8 + fulltext leads to corrupt index file.". It is not needed anymore, since we have true ctype implementation. storage/myisam/ftdefs.h: Use WL1386 "CTYPE table for unicode character sets" functionality. Rework true_word_char macro so it accepts ctype instead of charset as first param. It doesn't use my_isalnum anymore, but instead directly checks ctype. Obsolete word_char macro removed.
		
			
				
	
	
		
			234 lines
		
	
	
		
			8.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			234 lines
		
	
	
		
			8.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| #
 | ||
| # test of new fulltext search features
 | ||
| #
 | ||
| 
 | ||
| #
 | ||
| # two-level tree
 | ||
| #
 | ||
| 
 | ||
| --disable_warnings
 | ||
| DROP TABLE IF EXISTS t1;
 | ||
| --enable_warnings
 | ||
| 
 | ||
| CREATE TABLE t1 (
 | ||
|   i int(10) unsigned not null auto_increment primary key,
 | ||
|   a varchar(255) not null,
 | ||
|   FULLTEXT KEY (a)
 | ||
| ) ENGINE=MyISAM;
 | ||
| 
 | ||
| # two-level entry, second-level tree with depth 2
 | ||
| --disable_query_log
 | ||
| let $1=260;
 | ||
| while ($1)
 | ||
| {
 | ||
|   eval insert t1 (a) values ('aaaxxx');
 | ||
|   dec $1;
 | ||
| }
 | ||
| 
 | ||
| # two-level entry, second-level tree has only one page
 | ||
| let $1=255;
 | ||
| while ($1)
 | ||
| {
 | ||
|   eval insert t1 (a) values ('aaazzz');
 | ||
|   dec $1;
 | ||
| }
 | ||
| 
 | ||
| # one-level entry (entries)
 | ||
| let $1=250;
 | ||
| while ($1)
 | ||
| {
 | ||
|   eval insert t1 (a) values ('aaayyy');
 | ||
|   dec $1;
 | ||
| }
 | ||
| --enable_query_log
 | ||
| 
 | ||
| # converting to two-level
 | ||
| repair table t1 quick;
 | ||
| check table t1;
 | ||
| optimize table t1; # BUG#5327 - mi_sort_index() of 2-level tree
 | ||
| check table t1;
 | ||
| 
 | ||
| select count(*) from t1 where match a against ('aaaxxx');
 | ||
| select count(*) from t1 where match a against ('aaayyy');
 | ||
| select count(*) from t1 where match a against ('aaazzz');
 | ||
| select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaayyy' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaazzz' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
 | ||
| select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
 | ||
| 
 | ||
| select count(*) from t1 where match a against ('aaax*' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaay*' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaa*' in boolean mode);
 | ||
| 
 | ||
| # mi_write:
 | ||
| 
 | ||
| insert t1 (a) values ('aaaxxx'),('aaayyy');
 | ||
| # call to enlarge_root() below
 | ||
| insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
 | ||
| select count(*) from t1 where match a against ('aaaxxx');
 | ||
| select count(*) from t1 where match a against ('aaayyy');
 | ||
| select count(*) from t1 where match a against ('aaazzz');
 | ||
| 
 | ||
| # mi_delete
 | ||
| insert t1 (a) values ('aaaxxx 000000');
 | ||
| select count(*) from t1 where match a against ('000000');
 | ||
| delete from t1 where match a against ('000000');
 | ||
| select count(*) from t1 where match a against ('000000');
 | ||
| select count(*) from t1 where match a against ('aaaxxx');
 | ||
| delete from t1 where match a against ('aaazzz');
 | ||
| select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaayyy' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaazzz' in boolean mode);
 | ||
| # double-check without index
 | ||
| select count(*) from t1 where a = 'aaaxxx';
 | ||
| select count(*) from t1 where a = 'aaayyy';
 | ||
| select count(*) from t1 where a = 'aaazzz';
 | ||
| 
 | ||
| # update
 | ||
| insert t1 (a) values ('aaaxxx 000000');
 | ||
| select count(*) from t1 where match a against ('000000');
 | ||
| update t1 set a='aaazzz' where match a against ('000000');
 | ||
| select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaazzz' in boolean mode);
 | ||
| update t1 set a='aaazzz' where a = 'aaaxxx';
 | ||
| update t1 set a='aaaxxx' where a = 'aaayyy';
 | ||
| select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaayyy' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaazzz' in boolean mode);
 | ||
| 
 | ||
| drop table t1;
 | ||
| 
 | ||
| CREATE TABLE t1 (
 | ||
|   i int(10) unsigned not null auto_increment primary key,
 | ||
|   a varchar(255) not null,
 | ||
|   FULLTEXT KEY (a)
 | ||
| ) ENGINE=MyISAM;
 | ||
| 
 | ||
| #
 | ||
| # now same as about but w/o repair table
 | ||
| # 2-level tree created by mi_write
 | ||
| #
 | ||
| 
 | ||
| # two-level entry, second-level tree with depth 2
 | ||
| --disable_query_log
 | ||
| let $1=260;
 | ||
| while ($1)
 | ||
| {
 | ||
|   eval insert t1 (a) values ('aaaxxx');
 | ||
|   dec $1;
 | ||
| }
 | ||
| let $1=255;
 | ||
| while ($1)
 | ||
| {
 | ||
|   eval insert t1 (a) values ('aaazzz');
 | ||
|   dec $1;
 | ||
| }
 | ||
| let $1=250;
 | ||
| while ($1)
 | ||
| {
 | ||
|   eval insert t1 (a) values ('aaayyy');
 | ||
|   dec $1;
 | ||
| }
 | ||
| --enable_query_log
 | ||
| 
 | ||
| select count(*) from t1 where match a against ('aaaxxx');
 | ||
| select count(*) from t1 where match a against ('aaayyy');
 | ||
| select count(*) from t1 where match a against ('aaazzz');
 | ||
| select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaayyy' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaazzz' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
 | ||
| select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
 | ||
| 
 | ||
| select count(*) from t1 where match a against ('aaax*' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaay*' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaa*' in boolean mode);
 | ||
| 
 | ||
| # mi_write:
 | ||
| 
 | ||
| insert t1 (a) values ('aaaxxx'),('aaayyy');
 | ||
| insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
 | ||
| select count(*) from t1 where match a against ('aaaxxx');
 | ||
| select count(*) from t1 where match a against ('aaayyy');
 | ||
| select count(*) from t1 where match a against ('aaazzz');
 | ||
| 
 | ||
| # mi_delete
 | ||
| insert t1 (a) values ('aaaxxx 000000');
 | ||
| select count(*) from t1 where match a against ('000000');
 | ||
| delete from t1 where match a against ('000000');
 | ||
| select count(*) from t1 where match a against ('000000');
 | ||
| select count(*) from t1 where match a against ('aaaxxx');
 | ||
| delete from t1 where match a against ('aaazzz');
 | ||
| select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaayyy' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaazzz' in boolean mode);
 | ||
| # double-check without index
 | ||
| select count(*) from t1 where a = 'aaaxxx';
 | ||
| select count(*) from t1 where a = 'aaayyy';
 | ||
| select count(*) from t1 where a = 'aaazzz';
 | ||
| 
 | ||
| # update
 | ||
| insert t1 (a) values ('aaaxxx 000000');
 | ||
| select count(*) from t1 where match a against ('000000');
 | ||
| update t1 set a='aaazzz' where match a against ('000000');
 | ||
| select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaazzz' in boolean mode);
 | ||
| update t1 set a='aaazzz' where a = 'aaaxxx';
 | ||
| update t1 set a='aaaxxx' where a = 'aaayyy';
 | ||
| select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaayyy' in boolean mode);
 | ||
| select count(*) from t1 where match a against ('aaazzz' in boolean mode);
 | ||
| drop table t1;
 | ||
| 
 | ||
| #
 | ||
| # BUG#11336
 | ||
| #
 | ||
| # for uca collation isalnum and strnncollsp don't agree on whether
 | ||
| # 0xC2A0 is a space (strnncollsp is right, isalnum is wrong).
 | ||
| #
 | ||
| # they still don't, the bug was fixed by avoiding strnncollsp
 | ||
| #
 | ||
| 
 | ||
| set names utf8;
 | ||
| create table t1(a text,fulltext(a)) collate=utf8_swedish_ci;
 | ||
| insert into t1 values('test test '),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
 | ||
| ('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test');
 | ||
| delete from t1 limit 1;
 | ||
| 
 | ||
| #
 | ||
| # BUG#16489: utf8 + fulltext leads to corrupt index file.
 | ||
| #
 | ||
| truncate table t1;
 | ||
| insert into t1 values('ab c d');
 | ||
| update t1 set a='ab c d';
 | ||
| select * from t1 where match a against('ab c' in boolean mode);
 | ||
| drop table t1;
 | ||
| set names latin1;
 | ||
| 
 | ||
| # End of 4.1 tests
 | ||
| 
 | ||
| #
 | ||
| # BUG#19580 - FULLTEXT search produces wrong results on UTF-8 columns
 | ||
| #
 | ||
| SET NAMES utf8;
 | ||
| CREATE TABLE t1(a VARCHAR(255), FULLTEXT(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8;
 | ||
| INSERT INTO t1 VALUES('„MySQL“');
 | ||
| SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE);
 | ||
| DROP TABLE t1;
 | ||
| SET NAMES latin1;
 |