mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
Fulltext manual changed, MERGE bug fixed
This commit is contained in:
117
Docs/manual.texi
117
Docs/manual.texi
@ -29233,42 +29233,36 @@ index.
|
|||||||
Full-text search is performed with the @code{MATCH} function.
|
Full-text search is performed with the @code{MATCH} function.
|
||||||
|
|
||||||
@example
|
@example
|
||||||
mysql> CREATE TABLE t (a VARCHAR(200), b TEXT, FULLTEXT (a,b));
|
mysql> CREATE TABLE articles (
|
||||||
|
-> id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
|
||||||
|
-> title VARCHAR(200),
|
||||||
|
-> body TEXT,
|
||||||
|
-> FULLTEXT (title,body)
|
||||||
|
-> );
|
||||||
Query OK, 0 rows affected (0.00 sec)
|
Query OK, 0 rows affected (0.00 sec)
|
||||||
|
|
||||||
mysql> INSERT INTO t VALUES
|
mysql> INSERT INTO articles VALUES
|
||||||
-> ('MySQL has now support', 'for full-text search'),
|
-> (0,'MySQL Tutorial', 'DBMS stands for DataBase Management ...'),
|
||||||
-> ('Full-text indexes', 'are called collections'),
|
-> (0,'How To Use MySQL Efficiently', 'After you went through a ...'),
|
||||||
-> ('Only MyISAM tables','support collections'),
|
-> (0,'Optimizing MySQL','In this tutorial we will show how to ...'),
|
||||||
-> ('Function MATCH ... AGAINST()','is used to do a search'),
|
-> (0,'1001 MySQL Trick','1. Never run mysqld as root. 2. Normalize ...'),
|
||||||
-> ('Full-text search in MySQL', 'implements vector space model');
|
-> (0,'MySQL vs. YourSQL', 'In the following database comparison we ...'),
|
||||||
|
-> (0,'MySQL Security', 'When configured properly, MySQL could be ...');
|
||||||
Query OK, 5 rows affected (0.00 sec)
|
Query OK, 5 rows affected (0.00 sec)
|
||||||
Records: 5 Duplicates: 0 Warnings: 0
|
Records: 5 Duplicates: 0 Warnings: 0
|
||||||
|
|
||||||
mysql> SELECT * FROM t WHERE MATCH (a,b) AGAINST ('MySQL');
|
mysql> SELECT * FROM articles WHERE MATCH (title,body) AGAINST ('database');
|
||||||
+---------------------------+-------------------------------+
|
+----+-------------------+---------------------------------------------+
|
||||||
| a | b |
|
| id | title | body |
|
||||||
+---------------------------+-------------------------------+
|
+----+-------------------+---------------------------------------------+
|
||||||
| MySQL has now support | for full-text search |
|
| 5 | MySQL vs. YourSQL | In the following database comparison we ... |
|
||||||
| Full-text search in MySQL | implements vector-space-model |
|
| 1 | MySQL Tutorial | DBMS stands for DataBase Management ... |
|
||||||
+---------------------------+-------------------------------+
|
+----+-------------------+---------------------------------------------+
|
||||||
2 rows in set (0.00 sec)
|
2 rows in set (0.00 sec)
|
||||||
|
|
||||||
mysql> SELECT *,MATCH a,b AGAINST ('collections support') as x FROM t;
|
|
||||||
+------------------------------+-------------------------------+--------+
|
|
||||||
| a | b | x |
|
|
||||||
+------------------------------+-------------------------------+--------+
|
|
||||||
| MySQL has now support | for full-text search | 0.3834 |
|
|
||||||
| Full-text indexes | are called collections | 0.3834 |
|
|
||||||
| Only MyISAM tables | support collections | 0.7668 |
|
|
||||||
| Function MATCH ... AGAINST() | is used to do a search | 0 |
|
|
||||||
| Full-text search in MySQL | implements vector space model | 0 |
|
|
||||||
+------------------------------+-------------------------------+--------+
|
|
||||||
5 rows in set (0.00 sec)
|
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
The function @code{MATCH} matches a natural language query @code{AGAINST}
|
The function @code{MATCH} matches a natural language query @code{AGAINST}
|
||||||
a text collection (which is simply the columns that are covered by a
|
a text collection (which is simply the set of columns covered by a
|
||||||
@code{FULLTEXT} index). For every row in a table it returns relevance -
|
@code{FULLTEXT} index). For every row in a table it returns relevance -
|
||||||
a similarity measure between the text in that row (in the columns that are
|
a similarity measure between the text in that row (in the columns that are
|
||||||
part of the collection) and the query. When it is used in a @code{WHERE}
|
part of the collection) and the query. When it is used in a @code{WHERE}
|
||||||
@ -29279,10 +29273,51 @@ number of words in the row, the number of unique words in that row, the
|
|||||||
total number of words in the collection, and the number of documents (rows)
|
total number of words in the collection, and the number of documents (rows)
|
||||||
that contain a particular word.
|
that contain a particular word.
|
||||||
|
|
||||||
MySQL uses a very simple parser to split text into words. A ``word'' is
|
The above is a basic example of using @code{MATCH} function. Rows are
|
||||||
any sequence of letters, numbers, @samp{'}, and @samp{_}. Any ``word''
|
returned with relevance decreasing.
|
||||||
that is present in the stopword list or just too short (3 characters
|
|
||||||
or less) is ignored.
|
@example
|
||||||
|
mysql> SELECT id,MATCH (title,body) AGAINST ('Tutorial') FROM articles;
|
||||||
|
+----+-----------------------------------------+
|
||||||
|
| id | MATCH (title,body) AGAINST ('Tutorial') |
|
||||||
|
+----+-----------------------------------------+
|
||||||
|
| 1 | 0.64840710366884 |
|
||||||
|
| 2 | 0 |
|
||||||
|
| 3 | 0.66266459031789 |
|
||||||
|
| 4 | 0 |
|
||||||
|
| 5 | 0 |
|
||||||
|
| 6 | 0 |
|
||||||
|
+----+-----------------------------------------+
|
||||||
|
5 rows in set (0.00 sec)
|
||||||
|
@end example
|
||||||
|
|
||||||
|
This example shows how to retrieve the relevances. As neither @code{WHERE}
|
||||||
|
nor @code{ORDER BY} clauses are present, returned rows are not ordered.
|
||||||
|
|
||||||
|
@example
|
||||||
|
mysql> SELECT id, body, MATCH (title,body) AGAINST (
|
||||||
|
-> 'Security implications of running MySQL as root') AS score
|
||||||
|
-> FROM articles WHERE MATCH (title,body) AGAINST
|
||||||
|
-> ('Security implications of running MySQL as root');
|
||||||
|
+----+-----------------------------------------------+-----------------+
|
||||||
|
| id | body | score |
|
||||||
|
+----+-----------------------------------------------+-----------------+
|
||||||
|
| 4 | 1. Never run mysqld as root. 2. Normalize ... | 1.5055546709332 |
|
||||||
|
| 6 | When configured properly, MySQL could be ... | 1.31140957288 |
|
||||||
|
+----+-----------------------------------------------+-----------------+
|
||||||
|
2 rows in set (0.00 sec)
|
||||||
|
@end example
|
||||||
|
|
||||||
|
This is more complex example - the query returns the relevance and still
|
||||||
|
sorts the rows with relevance decreasing. To achieve it one should specify
|
||||||
|
@code{MATCH} twice. Note, that this will cause no additional overhead, as
|
||||||
|
@strong{MySQL} optimizer will notice that these two @code{MATCH} calls are
|
||||||
|
identical and will call full-text search code only once.
|
||||||
|
|
||||||
|
@strong{MySQL} uses a very simple parser to split text into words. A
|
||||||
|
``word'' is any sequence of letters, numbers, @samp{'}, and @samp{_}. Any
|
||||||
|
``word'' that is present in the stopword list or just too short (3
|
||||||
|
characters or less) is ignored.
|
||||||
|
|
||||||
Every correct word in the collection and in the query is weighted,
|
Every correct word in the collection and in the query is weighted,
|
||||||
according to its significance in the query or collection. This way, a
|
according to its significance in the query or collection. This way, a
|
||||||
@ -29297,17 +29332,22 @@ carefully tuned this way). For very small tables, word distribution
|
|||||||
does not reflect adequately their semantical value, and this model
|
does not reflect adequately their semantical value, and this model
|
||||||
may sometimes produce bizarre results.
|
may sometimes produce bizarre results.
|
||||||
|
|
||||||
For example, search for the word "search" will produce no results in the
|
@example
|
||||||
above example. Word "search" is present in more than half of rows, and
|
mysql> SELECT * FROM articles WHERE MATCH (title,body) AGAINST ('MySQL');
|
||||||
as such, is effectively treated as a stopword (that is, with semantical value
|
Empty set (0.00 sec)
|
||||||
zero). It is, really, the desired behavior - a natural language query
|
@end example
|
||||||
should not return every other row in 1GB table.
|
|
||||||
|
Search for the word @code{MySQL} produces no results in the above example.
|
||||||
|
Word @code{MySQL} is present in more than half of rows, and as such, is
|
||||||
|
effectively treated as a stopword (that is, with semantical value zero).
|
||||||
|
It is, really, the desired behavior - a natural language query should not
|
||||||
|
return every second row in 1GB table.
|
||||||
|
|
||||||
A word that matches half of rows in a table is less likely to locate relevant
|
A word that matches half of rows in a table is less likely to locate relevant
|
||||||
documents. In fact, it will most likely find plenty of irrelevant documents.
|
documents. In fact, it will most likely find plenty of irrelevant documents.
|
||||||
We all know this happens far too often when we are trying to find something on
|
We all know this happens far too often when we are trying to find something on
|
||||||
the Internet with a search engine. It is with this reasoning that such rows
|
the Internet with a search engine. It is with this reasoning that such rows
|
||||||
have been assigned a low semantical value in @strong{a particular dataset}.
|
have been assigned a low semantical value in @strong{this particular dataset}.
|
||||||
|
|
||||||
@menu
|
@menu
|
||||||
* Fulltext Fine-tuning::
|
* Fulltext Fine-tuning::
|
||||||
@ -44120,6 +44160,9 @@ not yet 100% confident in this code.
|
|||||||
@appendixsubsec Changes in release 3.23.38
|
@appendixsubsec Changes in release 3.23.38
|
||||||
@itemize @bullet
|
@itemize @bullet
|
||||||
@item
|
@item
|
||||||
|
Fixed a bug when @code{SELECT} from @code{MERGE} table
|
||||||
|
sometimes results in incorrectly ordered rows.
|
||||||
|
@item
|
||||||
Fixed a bug in @code{REPLACE()} when using the ujis character set.
|
Fixed a bug in @code{REPLACE()} when using the ujis character set.
|
||||||
@item
|
@item
|
||||||
Applied Sleepycat BDB patches 3.2.9.1 and 3.2.9.2.
|
Applied Sleepycat BDB patches 3.2.9.1 and 3.2.9.2.
|
||||||
|
@ -29,4 +29,3 @@ extern pthread_mutex_t THR_LOCK_open;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
int _myrg_init_queue(MYRG_INFO *info,int inx,enum ha_rkey_function search_flag);
|
int _myrg_init_queue(MYRG_INFO *info,int inx,enum ha_rkey_function search_flag);
|
||||||
int _myrg_finish_scan(MYRG_INFO *info, int inx, enum ha_rkey_function type);
|
|
||||||
|
@ -44,7 +44,6 @@ int myrg_rkey(MYRG_INFO *info,byte *record,int inx, const byte *key,
|
|||||||
MYRG_TABLE *table;
|
MYRG_TABLE *table;
|
||||||
MI_INFO *mi;
|
MI_INFO *mi;
|
||||||
int err;
|
int err;
|
||||||
byte *buf=((search_flag == HA_READ_KEY_EXACT) ? record: 0);
|
|
||||||
LINT_INIT(key_buff);
|
LINT_INIT(key_buff);
|
||||||
LINT_INIT(pack_key_length);
|
LINT_INIT(pack_key_length);
|
||||||
|
|
||||||
@ -57,14 +56,14 @@ int myrg_rkey(MYRG_INFO *info,byte *record,int inx, const byte *key,
|
|||||||
|
|
||||||
if (table == info->open_tables)
|
if (table == info->open_tables)
|
||||||
{
|
{
|
||||||
err=mi_rkey(mi,buf,inx,key,key_len,search_flag);
|
err=mi_rkey(mi,0,inx,key,key_len,search_flag);
|
||||||
key_buff=(byte*) mi->lastkey+mi->s->base.max_key_length;
|
key_buff=(byte*) mi->lastkey+mi->s->base.max_key_length;
|
||||||
pack_key_length=mi->last_rkey_length;
|
pack_key_length=mi->last_rkey_length;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mi->use_packed_key=1;
|
mi->use_packed_key=1;
|
||||||
err=mi_rkey(mi,buf,inx,key_buff,pack_key_length,search_flag);
|
err=mi_rkey(mi,0,inx,key_buff,pack_key_length,search_flag);
|
||||||
mi->use_packed_key=0;
|
mi->use_packed_key=0;
|
||||||
}
|
}
|
||||||
info->last_used_table=table+1;
|
info->last_used_table=table+1;
|
||||||
@ -78,12 +77,6 @@ int myrg_rkey(MYRG_INFO *info,byte *record,int inx, const byte *key,
|
|||||||
/* adding to queue */
|
/* adding to queue */
|
||||||
queue_insert(&(info->by_key),(byte *)table);
|
queue_insert(&(info->by_key),(byte *)table);
|
||||||
|
|
||||||
/* if looking for KEY_EXACT, return first matched now */
|
|
||||||
if (buf)
|
|
||||||
{
|
|
||||||
info->current_table=table;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!info->by_key.elements)
|
if (!info->by_key.elements)
|
||||||
|
@ -29,7 +29,11 @@ int myrg_rnext(MYRG_INFO *info, byte *buf, int inx)
|
|||||||
if ((err=mi_rnext(info->current_table->table,NULL,inx)))
|
if ((err=mi_rnext(info->current_table->table,NULL,inx)))
|
||||||
{
|
{
|
||||||
if (err == HA_ERR_END_OF_FILE)
|
if (err == HA_ERR_END_OF_FILE)
|
||||||
|
{
|
||||||
queue_remove(&(info->by_key),0);
|
queue_remove(&(info->by_key),0);
|
||||||
|
if (!info->by_key.elements)
|
||||||
|
return HA_ERR_END_OF_FILE;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -40,48 +44,7 @@ int myrg_rnext(MYRG_INFO *info, byte *buf, int inx)
|
|||||||
queue_replaced(&(info->by_key));
|
queue_replaced(&(info->by_key));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* next, let's finish myrg_rkey's initial scan */
|
|
||||||
if ((err=_myrg_finish_scan(info, inx, HA_READ_KEY_OR_NEXT)))
|
|
||||||
return err;
|
|
||||||
|
|
||||||
if (!info->by_key.elements)
|
|
||||||
return HA_ERR_END_OF_FILE;
|
|
||||||
|
|
||||||
/* now, mymerge's read_next is as simple as one queue_top */
|
/* now, mymerge's read_next is as simple as one queue_top */
|
||||||
mi=(info->current_table=(MYRG_TABLE *)queue_top(&(info->by_key)))->table;
|
mi=(info->current_table=(MYRG_TABLE *)queue_top(&(info->by_key)))->table;
|
||||||
return mi_rrnd(mi,buf,mi->lastpos);
|
return mi_rrnd(mi,buf,mi->lastpos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* let's finish myrg_rkey's initial scan */
|
|
||||||
|
|
||||||
int _myrg_finish_scan(MYRG_INFO *info, int inx, enum ha_rkey_function type)
|
|
||||||
{
|
|
||||||
int err;
|
|
||||||
MYRG_TABLE *table=info->last_used_table;
|
|
||||||
if (table < info->end_table)
|
|
||||||
{
|
|
||||||
MI_INFO *mi= table[-1].table;
|
|
||||||
byte *key_buff=(byte*) mi->lastkey+mi->s->base.max_key_length;
|
|
||||||
uint pack_key_length= mi->last_rkey_length;
|
|
||||||
|
|
||||||
for (; table < info->end_table ; table++)
|
|
||||||
{
|
|
||||||
mi=table->table;
|
|
||||||
mi->use_packed_key=1;
|
|
||||||
err=mi_rkey(mi,NULL,inx,key_buff,pack_key_length,type);
|
|
||||||
mi->use_packed_key=0;
|
|
||||||
if (err)
|
|
||||||
{
|
|
||||||
if (err == HA_ERR_KEY_NOT_FOUND) /* If end of file */
|
|
||||||
continue;
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
/* Found here, adding to queue */
|
|
||||||
queue_insert(&(info->by_key),(byte *) table);
|
|
||||||
}
|
|
||||||
/* All tables are now used */
|
|
||||||
info->last_used_table=table;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
@ -29,7 +29,11 @@ int myrg_rprev(MYRG_INFO *info, byte *buf, int inx)
|
|||||||
if ((err=mi_rprev(info->current_table->table,NULL,inx)))
|
if ((err=mi_rprev(info->current_table->table,NULL,inx)))
|
||||||
{
|
{
|
||||||
if (err == HA_ERR_END_OF_FILE)
|
if (err == HA_ERR_END_OF_FILE)
|
||||||
|
{
|
||||||
queue_remove(&(info->by_key),0);
|
queue_remove(&(info->by_key),0);
|
||||||
|
if (!info->by_key.elements)
|
||||||
|
return HA_ERR_END_OF_FILE;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -40,16 +44,7 @@ int myrg_rprev(MYRG_INFO *info, byte *buf, int inx)
|
|||||||
queue_replaced(&(info->by_key));
|
queue_replaced(&(info->by_key));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* next, let's finish myrg_rkey's initial scan */
|
|
||||||
if ((err=_myrg_finish_scan(info, inx, HA_READ_KEY_OR_PREV)))
|
|
||||||
return err;
|
|
||||||
|
|
||||||
if (!info->by_key.elements)
|
|
||||||
return HA_ERR_END_OF_FILE;
|
|
||||||
|
|
||||||
/* now, mymerge's read_prev is as simple as one queue_top */
|
/* now, mymerge's read_prev is as simple as one queue_top */
|
||||||
mi=(info->current_table=(MYRG_TABLE *)queue_top(&(info->by_key)))->table;
|
mi=(info->current_table=(MYRG_TABLE *)queue_top(&(info->by_key)))->table;
|
||||||
return mi_rrnd(mi,buf,mi->lastpos);
|
return mi_rrnd(mi,buf,mi->lastpos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -127,3 +127,6 @@ t3 CREATE TABLE `t3` (
|
|||||||
`othr` int(11) NOT NULL default '0'
|
`othr` int(11) NOT NULL default '0'
|
||||||
) TYPE=MRG_MyISAM UNION=(t1,t2)
|
) TYPE=MRG_MyISAM UNION=(t1,t2)
|
||||||
a
|
a
|
||||||
|
a b
|
||||||
|
1 1
|
||||||
|
1 2
|
||||||
|
@ -96,3 +96,16 @@ drop table t3,t2,t1;
|
|||||||
create table t1 (a int not null) type=merge;
|
create table t1 (a int not null) type=merge;
|
||||||
select * from t1;
|
select * from t1;
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
|
||||||
|
#
|
||||||
|
# Bug found by Monty.
|
||||||
|
#
|
||||||
|
|
||||||
|
drop table if exists t3, t2, t1;
|
||||||
|
create table t1 (a int not null, b int not null, key(a,b));
|
||||||
|
create table t2 (a int not null, b int not null, key(a,b));
|
||||||
|
create table t3 (a int not null, b int not null, key(a,b)) TYPE=MERGE UNION=(t1,t2);
|
||||||
|
insert into t1 values (1,2),(2,1),(0,0),(4,4),(5,5),(6,6);
|
||||||
|
insert into t2 values (1,1),(2,2),(0,0),(4,4),(5,5),(6,6);
|
||||||
|
flush tables;
|
||||||
|
select * from t3 where a=1 order by b limit 2;
|
||||||
|
Reference in New Issue
Block a user