1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-01 03:47:19 +03:00

ft_* variables added

This commit is contained in:
serg@serg.mysql.com
2001-04-17 21:30:02 +02:00
parent f751ab1a9a
commit 185c2beaea
13 changed files with 77 additions and 17 deletions

View File

@ -21342,6 +21342,9 @@ differ somewhat:
| delayed_queue_size | 1000 | | delayed_queue_size | 1000 |
| flush | OFF | | flush | OFF |
| flush_time | 0 | | flush_time | 0 |
| ft_min_word_len | 4 |
| ft_max_word_len | 254 |
| ft_max_word_len_for_sort| 20 |
| have_bdb | YES | | have_bdb | YES |
| have_gemini | NO | | have_gemini | NO |
| have_innodb | YES | | have_innodb | YES |
@ -21525,6 +21528,31 @@ tables will be closed (to free up resources and sync things to disk). We
only recommend this option on Win95, Win98, or on systems where you have only recommend this option on Win95, Win98, or on systems where you have
very little resources. very little resources.
@item @code{ft_min_word_len}
The minimum length of the word to be included in a @code{FULLTEXT} index.
@strong{Note: @code{FULLTEXT} index have to be rebuilt after changing
this variable.}
@item @code{ft_max_word_len}
The maximum length of the word to be included in a @code{FULLTEXT} index.
@strong{Note: @code{FULLTEXT} index have to be rebuilt after changing
this variable.}
@item @code{ft_max_word_len_sort}
The maximum length of the word in a @code{FULLTEXT} index
to be used in fast index recreation method in
@code{REPAIR}, @code{CREATE INDEX}, or
@code{ALTER TABLE}. Longer words are inserted the slow way.
The rule of the thumb is as follows: with @code{ft_max_word_len_sort}
increasing, @strong{MySQL} will create bigger temporary files
(thus slowing the process down, due to disk I/O), and will put
fewer keys in one sort block (againg, decreasing the efficiency).
When @code{ft_max_word_len_sort} is too small, instead,
@strong{MySQL} will insert a lot of words into index the slow way -
but short words will be inserted very fast. It applies only to
Index recreation during @code{REPAIR}, @code{CREATE INDEX}, or
@code{ALTER TABLE}.
@item @code{have_bdb} @item @code{have_bdb}
@code{YES} if @code{mysqld} supports Berkeley DB tables. @code{DISABLED} @code{YES} if @code{mysqld} supports Berkeley DB tables. @code{DISABLED}
if @code{--skip-bdb} is used. if @code{--skip-bdb} is used.
@ -28279,12 +28307,9 @@ unless you know what you are doing!
@itemize @itemize
@item @item
Minimal length of word to be indexed is defined in Minimal length of word to be indexed is defined by @strong{MySQL}
@code{myisam/ftdefs.h} file by the line variable @code{ft_min_word_length}. @xref{SHOW VARIABLES}.
@example Change it to the value you prefer, and rebuild
#define MIN_WORD_LEN 4
@end example
Change it to the value you prefer, recompile @strong{MySQL}, and rebuild
your @code{FULLTEXT} indexes. your @code{FULLTEXT} indexes.
@item @item
@ -42463,6 +42488,8 @@ Responsible for @strong{MySQL} configure.
Full-text search. Full-text search.
@item @item
Added keys to the @code{MERGE} library. Added keys to the @code{MERGE} library.
@item
@code{HANDLER} command.
@end itemize @end itemize
@item Jeremy Cole @item Jeremy Cole
@ -42801,6 +42828,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
@itemize @bullet @itemize @bullet
@item @item
Added @code{HANDLER} command.
@item
Added @code{SQL_CALC_FOUND_ROWS} and @code{FOUND_ROWS()}. This make it Added @code{SQL_CALC_FOUND_ROWS} and @code{FOUND_ROWS()}. This make it
possible to know how many rows a query would have returned if one hadn't possible to know how many rows a query would have returned if one hadn't
used @code{LIMIT}. used @code{LIMIT}.
@ -42903,6 +42932,9 @@ not yet 100% confident in this code.
@appendixsubsec Changes in release 3.23.37 @appendixsubsec Changes in release 3.23.37
@itemize @bullet @itemize @bullet
@item @item
Added variables @code{ft_min_word_len}, @code{ft_max_word_len}, and
@code{ft_max_word_len_for_sort}.
@item
Changed @code{INNOBASE} to @code{INNODB} (because the @code{INNOBASE} Changed @code{INNOBASE} to @code{INNODB} (because the @code{INNOBASE}
name was already used). Note that all @code{configure} options and name was already used). Note that all @code{configure} options and
@code{mysqld} start options are now using @code{innodb} instead of @code{mysqld} start options are now using @code{innodb} instead of

View File

@ -27,6 +27,7 @@ extern "C" {
#endif #endif
#define FT_QUERY_MAXLEN 1024 #define FT_QUERY_MAXLEN 1024
#define HA_FT_MAXLEN 254
typedef struct ft_doc_rec { typedef struct ft_doc_rec {
my_off_t dpos; my_off_t dpos;
@ -42,6 +43,10 @@ typedef struct st_ft_doclist {
extern const char *ft_precompiled_stopwords[]; extern const char *ft_precompiled_stopwords[];
extern uint ft_min_word_len;
extern uint ft_max_word_len;
extern uint ft_max_word_len_for_sort;
int ft_init_stopwords(const char **); int ft_init_stopwords(const char **);
void ft_free_stopwords(void); void ft_free_stopwords(void);

View File

@ -25,7 +25,7 @@ static void complain(int val);
static int count=0, stats=0, dump=0, verbose=0; static int count=0, stats=0, dump=0, verbose=0;
static char *query=NULL; static char *query=NULL;
#define MAX (MAX_WORD_LEN+10) #define MAX (HA_FT_MAXLEN+10)
#define HOW_OFTEN_TO_WRITE 1000 #define HOW_OFTEN_TO_WRITE 1000
int main(int argc,char *argv[]) int main(int argc,char *argv[])

View File

@ -164,7 +164,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param)
if ((param->trunc=(doc<end && *doc == FTB_TRUNC))) if ((param->trunc=(doc<end && *doc == FTB_TRUNC)))
doc++; doc++;
if (word->len >= MIN_WORD_LEN && word->len < MAX_WORD_LEN && if (word->len >= ft_min_word_len && word->len < ft_max_word_len &&
!is_stopword(word->pos, word->len)) !is_stopword(word->pos, word->len))
{ {
*start=doc; *start=doc;
@ -195,7 +195,7 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word)
word->len= (uint)(doc-word->pos) - mwc; word->len= (uint)(doc-word->pos) - mwc;
if (word->len >= MIN_WORD_LEN && word->len < MAX_WORD_LEN && if (word->len >= ft_min_word_len && word->len < ft_max_word_len &&
!is_stopword(word->pos, word->len)) !is_stopword(word->pos, word->len))
{ {
*start=doc; *start=doc;

View File

@ -18,6 +18,10 @@
#include "ftdefs.h" #include "ftdefs.h"
uint ft_min_word_len=4;
uint ft_max_word_len=HA_FT_MAXLEN;
uint ft_max_word_len_for_sort=20;
const MI_KEYSEG ft_keysegs[FT_SEGS]={ const MI_KEYSEG ft_keysegs[FT_SEGS]={
{ {
HA_KEYTYPE_VARTEXT, /* type */ HA_KEYTYPE_VARTEXT, /* type */

View File

@ -48,7 +48,7 @@ int ft_init_stopwords(const char **sws)
for(;*sws;sws++) for(;*sws;sws++)
{ {
if( (sw.len= (uint) strlen(sw.pos=*sws)) < MIN_WORD_LEN) continue; if( (sw.len= (uint) strlen(sw.pos=*sws)) < ft_min_word_len) continue;
if(!tree_insert(stopwords3, &sw, 0)) if(!tree_insert(stopwords3, &sw, 0))
{ {
delete_tree(stopwords3); /* purecov: inspected */ delete_tree(stopwords3); /* purecov: inspected */

View File

@ -22,10 +22,6 @@
#include <m_ctype.h> #include <m_ctype.h>
#include <my_tree.h> #include <my_tree.h>
#define MIN_WORD_LEN 4
#define MAX_WORD_LEN HA_FT_MAXLEN
#define MAX_WORD_LEN_FOR_SORT 20
#define HYPHEN_IS_DELIM #define HYPHEN_IS_DELIM
#define HYPHEN_IS_CONCAT /* not used for now */ #define HYPHEN_IS_CONCAT /* not used for now */

View File

@ -24,7 +24,6 @@
/* shoudn't be def'ed when linking with mysql */ /* shoudn't be def'ed when linking with mysql */
#undef EVAL_RUN #undef EVAL_RUN
#define HA_FT_MAXLEN 254
#define HA_FT_WTYPE HA_KEYTYPE_FLOAT #define HA_FT_WTYPE HA_KEYTYPE_FLOAT
#define HA_FT_WLEN 4 #define HA_FT_WLEN 4
#ifdef EVAL_RUN #ifdef EVAL_RUN

View File

@ -1875,10 +1875,10 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
if (sort_info->keyinfo->flag & HA_FULLTEXT) if (sort_info->keyinfo->flag & HA_FULLTEXT)
{ {
sort_param.max_records=sort_info->max_records= sort_param.max_records=sort_info->max_records=
(ha_rows) (sort_info->filelength/MAX_WORD_LEN_FOR_SORT+1); (ha_rows) (sort_info->filelength/ft_max_word_len_for_sort+1);
sort_param.key_read=sort_ft_key_read; sort_param.key_read=sort_ft_key_read;
sort_param.key_length+=MAX_WORD_LEN_FOR_SORT-MAX_WORD_LEN; sort_param.key_length+=ft_max_word_len_for_sort-ft_max_word_len;
} }
else else
sort_param.key_read=sort_key_read; sort_param.key_read=sort_key_read;

View File

@ -152,6 +152,12 @@ static CHANGEABLE_VAR changeable_vars[] = {
{ "sort_key_blocks",(long*) &check_param.sort_key_blocks,BUFFERS_WHEN_SORTING,4L,100L,0L, { "sort_key_blocks",(long*) &check_param.sort_key_blocks,BUFFERS_WHEN_SORTING,4L,100L,0L,
1L }, 1L },
{ "decode_bits",(long*) &decode_bits,9L,4L,17L,0L,1L }, { "decode_bits",(long*) &decode_bits,9L,4L,17L,0L,1L },
{ "ft_min_word_len", (long*) &ft_min_word_len,
4, 1, HA_FT_MAXLEN, 0, 1 },
{ "ft_max_word_len", (long*) &ft_max_word_len,
HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1 },
{ "ft_max_word_len_for_sort",(long*) &ft_max_word_len_for_sort,
20, 4, HA_FT_MAXLEN, 0, 1 },
{ NullS,(long*) 0,0L,0L,0L,0L,0L,} }; { NullS,(long*) 0,0L,0L,0L,0L,0L,} };
enum options {OPT_CHARSETS_DIR=256, OPT_SET_CHARSET,OPT_START_CHECK_POS}; enum options {OPT_CHARSETS_DIR=256, OPT_SET_CHARSET,OPT_START_CHECK_POS};

View File

@ -0,0 +1,4 @@
Variable_name Value
ft_min_word_len 4
ft_max_word_len 254
ft_max_word_len_for_sort 20

View File

@ -0,0 +1,5 @@
#
# Fulltext configurable parameters
#
show variables like "ft\_%";

View File

@ -2664,6 +2664,12 @@ CHANGEABLE_VAR changeable_vars[] = {
DELAYED_QUEUE_SIZE, 1, ~0L, 0, 1 }, DELAYED_QUEUE_SIZE, 1, ~0L, 0, 1 },
{ "flush_time", (long*) &flush_time, { "flush_time", (long*) &flush_time,
FLUSH_TIME, 0, ~0L, 0, 1 }, FLUSH_TIME, 0, ~0L, 0, 1 },
{ "ft_min_word_len", (long*) &ft_min_word_len,
4, 1, HA_FT_MAXLEN, 0, 1 },
{ "ft_max_word_len", (long*) &ft_max_word_len,
HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1 },
{ "ft_max_word_len_for_sort",(long*) &ft_max_word_len_for_sort,
20, 4, HA_FT_MAXLEN, 0, 1 },
#ifdef HAVE_GEMINI_DB #ifdef HAVE_GEMINI_DB
{ "gemini_buffer_cache", (long*) &gemini_buffer_cache, { "gemini_buffer_cache", (long*) &gemini_buffer_cache,
128 * 8192, 16, LONG_MAX, 0, 1 }, 128 * 8192, 16, LONG_MAX, 0, 1 },
@ -2804,6 +2810,9 @@ struct show_var_st init_vars[]= {
{"delayed_queue_size", (char*) &delayed_queue_size, SHOW_LONG}, {"delayed_queue_size", (char*) &delayed_queue_size, SHOW_LONG},
{"flush", (char*) &myisam_flush, SHOW_MY_BOOL}, {"flush", (char*) &myisam_flush, SHOW_MY_BOOL},
{"flush_time", (char*) &flush_time, SHOW_LONG}, {"flush_time", (char*) &flush_time, SHOW_LONG},
{"ft_min_word_len", (char*) &ft_min_word_len, SHOW_LONG},
{"ft_max_word_len", (char*) &ft_max_word_len, SHOW_LONG},
{"ft_max_word_len_for_sort",(char*) &ft_max_word_len_for_sort, SHOW_LONG},
#ifdef HAVE_GEMINI_DB #ifdef HAVE_GEMINI_DB
{"gemini_buffer_cache", (char*) &gemini_buffer_cache, SHOW_LONG}, {"gemini_buffer_cache", (char*) &gemini_buffer_cache, SHOW_LONG},
{"gemini_connection_limit", (char*) &gemini_connection_limit, SHOW_LONG}, {"gemini_connection_limit", (char*) &gemini_connection_limit, SHOW_LONG},