mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
Fulltext manual changed, MERGE bug fixed
This commit is contained in:
117
Docs/manual.texi
117
Docs/manual.texi
@ -29233,42 +29233,36 @@ index.
|
||||
Full-text search is performed with the @code{MATCH} function.
|
||||
|
||||
@example
|
||||
mysql> CREATE TABLE t (a VARCHAR(200), b TEXT, FULLTEXT (a,b));
|
||||
mysql> CREATE TABLE articles (
|
||||
-> id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
|
||||
-> title VARCHAR(200),
|
||||
-> body TEXT,
|
||||
-> FULLTEXT (title,body)
|
||||
-> );
|
||||
Query OK, 0 rows affected (0.00 sec)
|
||||
|
||||
mysql> INSERT INTO t VALUES
|
||||
-> ('MySQL has now support', 'for full-text search'),
|
||||
-> ('Full-text indexes', 'are called collections'),
|
||||
-> ('Only MyISAM tables','support collections'),
|
||||
-> ('Function MATCH ... AGAINST()','is used to do a search'),
|
||||
-> ('Full-text search in MySQL', 'implements vector space model');
|
||||
mysql> INSERT INTO articles VALUES
|
||||
-> (0,'MySQL Tutorial', 'DBMS stands for DataBase Management ...'),
|
||||
-> (0,'How To Use MySQL Efficiently', 'After you went through a ...'),
|
||||
-> (0,'Optimizing MySQL','In this tutorial we will show how to ...'),
|
||||
-> (0,'1001 MySQL Trick','1. Never run mysqld as root. 2. Normalize ...'),
|
||||
-> (0,'MySQL vs. YourSQL', 'In the following database comparison we ...'),
|
||||
-> (0,'MySQL Security', 'When configured properly, MySQL could be ...');
|
||||
Query OK, 5 rows affected (0.00 sec)
|
||||
Records: 5 Duplicates: 0 Warnings: 0
|
||||
|
||||
mysql> SELECT * FROM t WHERE MATCH (a,b) AGAINST ('MySQL');
|
||||
+---------------------------+-------------------------------+
|
||||
| a | b |
|
||||
+---------------------------+-------------------------------+
|
||||
| MySQL has now support | for full-text search |
|
||||
| Full-text search in MySQL | implements vector-space-model |
|
||||
+---------------------------+-------------------------------+
|
||||
mysql> SELECT * FROM articles WHERE MATCH (title,body) AGAINST ('database');
|
||||
+----+-------------------+---------------------------------------------+
|
||||
| id | title | body |
|
||||
+----+-------------------+---------------------------------------------+
|
||||
| 5 | MySQL vs. YourSQL | In the following database comparison we ... |
|
||||
| 1 | MySQL Tutorial | DBMS stands for DataBase Management ... |
|
||||
+----+-------------------+---------------------------------------------+
|
||||
2 rows in set (0.00 sec)
|
||||
|
||||
mysql> SELECT *,MATCH a,b AGAINST ('collections support') as x FROM t;
|
||||
+------------------------------+-------------------------------+--------+
|
||||
| a | b | x |
|
||||
+------------------------------+-------------------------------+--------+
|
||||
| MySQL has now support | for full-text search | 0.3834 |
|
||||
| Full-text indexes | are called collections | 0.3834 |
|
||||
| Only MyISAM tables | support collections | 0.7668 |
|
||||
| Function MATCH ... AGAINST() | is used to do a search | 0 |
|
||||
| Full-text search in MySQL | implements vector space model | 0 |
|
||||
+------------------------------+-------------------------------+--------+
|
||||
5 rows in set (0.00 sec)
|
||||
@end example
|
||||
|
||||
The function @code{MATCH} matches a natural language query @code{AGAINST}
|
||||
a text collection (which is simply the columns that are covered by a
|
||||
a text collection (which is simply the set of columns covered by a
|
||||
@code{FULLTEXT} index). For every row in a table it returns relevance -
|
||||
a similarity measure between the text in that row (in the columns that are
|
||||
part of the collection) and the query. When it is used in a @code{WHERE}
|
||||
@ -29279,10 +29273,51 @@ number of words in the row, the number of unique words in that row, the
|
||||
total number of words in the collection, and the number of documents (rows)
|
||||
that contain a particular word.
|
||||
|
||||
MySQL uses a very simple parser to split text into words. A ``word'' is
|
||||
any sequence of letters, numbers, @samp{'}, and @samp{_}. Any ``word''
|
||||
that is present in the stopword list or just too short (3 characters
|
||||
or less) is ignored.
|
||||
The above is a basic example of using @code{MATCH} function. Rows are
|
||||
returned with relevance decreasing.
|
||||
|
||||
@example
|
||||
mysql> SELECT id,MATCH (title,body) AGAINST ('Tutorial') FROM articles;
|
||||
+----+-----------------------------------------+
|
||||
| id | MATCH (title,body) AGAINST ('Tutorial') |
|
||||
+----+-----------------------------------------+
|
||||
| 1 | 0.64840710366884 |
|
||||
| 2 | 0 |
|
||||
| 3 | 0.66266459031789 |
|
||||
| 4 | 0 |
|
||||
| 5 | 0 |
|
||||
| 6 | 0 |
|
||||
+----+-----------------------------------------+
|
||||
5 rows in set (0.00 sec)
|
||||
@end example
|
||||
|
||||
This example shows how to retrieve the relevances. As neither @code{WHERE}
|
||||
nor @code{ORDER BY} clauses are present, returned rows are not ordered.
|
||||
|
||||
@example
|
||||
mysql> SELECT id, body, MATCH (title,body) AGAINST (
|
||||
-> 'Security implications of running MySQL as root') AS score
|
||||
-> FROM articles WHERE MATCH (title,body) AGAINST
|
||||
-> ('Security implications of running MySQL as root');
|
||||
+----+-----------------------------------------------+-----------------+
|
||||
| id | body | score |
|
||||
+----+-----------------------------------------------+-----------------+
|
||||
| 4 | 1. Never run mysqld as root. 2. Normalize ... | 1.5055546709332 |
|
||||
| 6 | When configured properly, MySQL could be ... | 1.31140957288 |
|
||||
+----+-----------------------------------------------+-----------------+
|
||||
2 rows in set (0.00 sec)
|
||||
@end example
|
||||
|
||||
This is more complex example - the query returns the relevance and still
|
||||
sorts the rows with relevance decreasing. To achieve it one should specify
|
||||
@code{MATCH} twice. Note, that this will cause no additional overhead, as
|
||||
@strong{MySQL} optimizer will notice that these two @code{MATCH} calls are
|
||||
identical and will call full-text search code only once.
|
||||
|
||||
@strong{MySQL} uses a very simple parser to split text into words. A
|
||||
``word'' is any sequence of letters, numbers, @samp{'}, and @samp{_}. Any
|
||||
``word'' that is present in the stopword list or just too short (3
|
||||
characters or less) is ignored.
|
||||
|
||||
Every correct word in the collection and in the query is weighted,
|
||||
according to its significance in the query or collection. This way, a
|
||||
@ -29297,17 +29332,22 @@ carefully tuned this way). For very small tables, word distribution
|
||||
does not reflect adequately their semantical value, and this model
|
||||
may sometimes produce bizarre results.
|
||||
|
||||
For example, search for the word "search" will produce no results in the
|
||||
above example. Word "search" is present in more than half of rows, and
|
||||
as such, is effectively treated as a stopword (that is, with semantical value
|
||||
zero). It is, really, the desired behavior - a natural language query
|
||||
should not return every other row in 1GB table.
|
||||
@example
|
||||
mysql> SELECT * FROM articles WHERE MATCH (title,body) AGAINST ('MySQL');
|
||||
Empty set (0.00 sec)
|
||||
@end example
|
||||
|
||||
Search for the word @code{MySQL} produces no results in the above example.
|
||||
Word @code{MySQL} is present in more than half of rows, and as such, is
|
||||
effectively treated as a stopword (that is, with semantical value zero).
|
||||
It is, really, the desired behavior - a natural language query should not
|
||||
return every second row in 1GB table.
|
||||
|
||||
A word that matches half of rows in a table is less likely to locate relevant
|
||||
documents. In fact, it will most likely find plenty of irrelevant documents.
|
||||
We all know this happens far too often when we are trying to find something on
|
||||
the Internet with a search engine. It is with this reasoning that such rows
|
||||
have been assigned a low semantical value in @strong{a particular dataset}.
|
||||
have been assigned a low semantical value in @strong{this particular dataset}.
|
||||
|
||||
@menu
|
||||
* Fulltext Fine-tuning::
|
||||
@ -44120,6 +44160,9 @@ not yet 100% confident in this code.
|
||||
@appendixsubsec Changes in release 3.23.38
|
||||
@itemize @bullet
|
||||
@item
|
||||
Fixed a bug when @code{SELECT} from @code{MERGE} table
|
||||
sometimes results in incorrectly ordered rows.
|
||||
@item
|
||||
Fixed a bug in @code{REPLACE()} when using the ujis character set.
|
||||
@item
|
||||
Applied Sleepycat BDB patches 3.2.9.1 and 3.2.9.2.
|
||||
|
@ -29,4 +29,3 @@ extern pthread_mutex_t THR_LOCK_open;
|
||||
#endif
|
||||
|
||||
int _myrg_init_queue(MYRG_INFO *info,int inx,enum ha_rkey_function search_flag);
|
||||
int _myrg_finish_scan(MYRG_INFO *info, int inx, enum ha_rkey_function type);
|
||||
|
@ -44,7 +44,6 @@ int myrg_rkey(MYRG_INFO *info,byte *record,int inx, const byte *key,
|
||||
MYRG_TABLE *table;
|
||||
MI_INFO *mi;
|
||||
int err;
|
||||
byte *buf=((search_flag == HA_READ_KEY_EXACT) ? record: 0);
|
||||
LINT_INIT(key_buff);
|
||||
LINT_INIT(pack_key_length);
|
||||
|
||||
@ -57,14 +56,14 @@ int myrg_rkey(MYRG_INFO *info,byte *record,int inx, const byte *key,
|
||||
|
||||
if (table == info->open_tables)
|
||||
{
|
||||
err=mi_rkey(mi,buf,inx,key,key_len,search_flag);
|
||||
err=mi_rkey(mi,0,inx,key,key_len,search_flag);
|
||||
key_buff=(byte*) mi->lastkey+mi->s->base.max_key_length;
|
||||
pack_key_length=mi->last_rkey_length;
|
||||
}
|
||||
else
|
||||
{
|
||||
mi->use_packed_key=1;
|
||||
err=mi_rkey(mi,buf,inx,key_buff,pack_key_length,search_flag);
|
||||
err=mi_rkey(mi,0,inx,key_buff,pack_key_length,search_flag);
|
||||
mi->use_packed_key=0;
|
||||
}
|
||||
info->last_used_table=table+1;
|
||||
@ -78,12 +77,6 @@ int myrg_rkey(MYRG_INFO *info,byte *record,int inx, const byte *key,
|
||||
/* adding to queue */
|
||||
queue_insert(&(info->by_key),(byte *)table);
|
||||
|
||||
/* if looking for KEY_EXACT, return first matched now */
|
||||
if (buf)
|
||||
{
|
||||
info->current_table=table;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!info->by_key.elements)
|
||||
|
@ -29,7 +29,11 @@ int myrg_rnext(MYRG_INFO *info, byte *buf, int inx)
|
||||
if ((err=mi_rnext(info->current_table->table,NULL,inx)))
|
||||
{
|
||||
if (err == HA_ERR_END_OF_FILE)
|
||||
{
|
||||
queue_remove(&(info->by_key),0);
|
||||
if (!info->by_key.elements)
|
||||
return HA_ERR_END_OF_FILE;
|
||||
}
|
||||
else
|
||||
return err;
|
||||
}
|
||||
@ -40,48 +44,7 @@ int myrg_rnext(MYRG_INFO *info, byte *buf, int inx)
|
||||
queue_replaced(&(info->by_key));
|
||||
}
|
||||
|
||||
/* next, let's finish myrg_rkey's initial scan */
|
||||
if ((err=_myrg_finish_scan(info, inx, HA_READ_KEY_OR_NEXT)))
|
||||
return err;
|
||||
|
||||
if (!info->by_key.elements)
|
||||
return HA_ERR_END_OF_FILE;
|
||||
|
||||
/* now, mymerge's read_next is as simple as one queue_top */
|
||||
mi=(info->current_table=(MYRG_TABLE *)queue_top(&(info->by_key)))->table;
|
||||
return mi_rrnd(mi,buf,mi->lastpos);
|
||||
}
|
||||
|
||||
|
||||
/* let's finish myrg_rkey's initial scan */
|
||||
|
||||
int _myrg_finish_scan(MYRG_INFO *info, int inx, enum ha_rkey_function type)
|
||||
{
|
||||
int err;
|
||||
MYRG_TABLE *table=info->last_used_table;
|
||||
if (table < info->end_table)
|
||||
{
|
||||
MI_INFO *mi= table[-1].table;
|
||||
byte *key_buff=(byte*) mi->lastkey+mi->s->base.max_key_length;
|
||||
uint pack_key_length= mi->last_rkey_length;
|
||||
|
||||
for (; table < info->end_table ; table++)
|
||||
{
|
||||
mi=table->table;
|
||||
mi->use_packed_key=1;
|
||||
err=mi_rkey(mi,NULL,inx,key_buff,pack_key_length,type);
|
||||
mi->use_packed_key=0;
|
||||
if (err)
|
||||
{
|
||||
if (err == HA_ERR_KEY_NOT_FOUND) /* If end of file */
|
||||
continue;
|
||||
return err;
|
||||
}
|
||||
/* Found here, adding to queue */
|
||||
queue_insert(&(info->by_key),(byte *) table);
|
||||
}
|
||||
/* All tables are now used */
|
||||
info->last_used_table=table;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -29,7 +29,11 @@ int myrg_rprev(MYRG_INFO *info, byte *buf, int inx)
|
||||
if ((err=mi_rprev(info->current_table->table,NULL,inx)))
|
||||
{
|
||||
if (err == HA_ERR_END_OF_FILE)
|
||||
{
|
||||
queue_remove(&(info->by_key),0);
|
||||
if (!info->by_key.elements)
|
||||
return HA_ERR_END_OF_FILE;
|
||||
}
|
||||
else
|
||||
return err;
|
||||
}
|
||||
@ -40,16 +44,7 @@ int myrg_rprev(MYRG_INFO *info, byte *buf, int inx)
|
||||
queue_replaced(&(info->by_key));
|
||||
}
|
||||
|
||||
/* next, let's finish myrg_rkey's initial scan */
|
||||
if ((err=_myrg_finish_scan(info, inx, HA_READ_KEY_OR_PREV)))
|
||||
return err;
|
||||
|
||||
if (!info->by_key.elements)
|
||||
return HA_ERR_END_OF_FILE;
|
||||
|
||||
/* now, mymerge's read_prev is as simple as one queue_top */
|
||||
mi=(info->current_table=(MYRG_TABLE *)queue_top(&(info->by_key)))->table;
|
||||
return mi_rrnd(mi,buf,mi->lastpos);
|
||||
}
|
||||
|
||||
|
||||
|
@ -127,3 +127,6 @@ t3 CREATE TABLE `t3` (
|
||||
`othr` int(11) NOT NULL default '0'
|
||||
) TYPE=MRG_MyISAM UNION=(t1,t2)
|
||||
a
|
||||
a b
|
||||
1 1
|
||||
1 2
|
||||
|
@ -96,3 +96,16 @@ drop table t3,t2,t1;
|
||||
create table t1 (a int not null) type=merge;
|
||||
select * from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug found by Monty.
|
||||
#
|
||||
|
||||
drop table if exists t3, t2, t1;
|
||||
create table t1 (a int not null, b int not null, key(a,b));
|
||||
create table t2 (a int not null, b int not null, key(a,b));
|
||||
create table t3 (a int not null, b int not null, key(a,b)) TYPE=MERGE UNION=(t1,t2);
|
||||
insert into t1 values (1,2),(2,1),(0,0),(4,4),(5,5),(6,6);
|
||||
insert into t2 values (1,1),(2,2),(0,0),(4,4),(5,5),(6,6);
|
||||
flush tables;
|
||||
select * from t3 where a=1 order by b limit 2;
|
||||
|
Reference in New Issue
Block a user