diff --git a/include/ft_global.h b/include/ft_global.h index 9870e0854c1..52fb8d38a9a 100644 --- a/include/ft_global.h +++ b/include/ft_global.h @@ -49,6 +49,7 @@ extern const char *ft_precompiled_stopwords[]; extern ulong ft_min_word_len; extern ulong ft_max_word_len; extern ulong ft_max_word_len_for_sort; +extern char *ft_boolean_syntax; int ft_init_stopwords(const char **); void ft_free_stopwords(void); diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index de9d16e42c8..70ba7fc9df2 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -36,7 +36,7 @@ static double _wghts[11]={ 3.375000000000000, 5.062500000000000, 7.593750000000000}; -static double *wghts=_wghts+5; // wghts[i] = 1.5**i +static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */ static double _nwghts[11]={ -0.065843621399177, @@ -50,7 +50,7 @@ static double _nwghts[11]={ -1.687500000000000, -2.531250000000000, -3.796875000000000}; -static double *nwghts=_nwghts+5; // nwghts[i] = -0.5*1.5**i +static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ typedef struct st_ftb_expr FTB_EXPR; struct st_ftb_expr { @@ -114,20 +114,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, byte r=param.plusminus; float weight=(param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; switch (res) { - case FTB_LBR: - ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); - ftbe->yesno=param.yesno; - ftbe->weight=weight; - ftbe->up=up; - ftbe->ythresh=0; - ftbe->docid=HA_POS_ERROR; - if (ftbe->yesno > 0) up->ythresh++; - _ftb_parse_query(ftb, start, end, ftbe, depth+1, - (param.yesno<0 ? depth+1 : ndepth)); - break; - case FTB_RBR: - return; - case 1: + case 1: /* word found */ ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root, sizeof(FTB_WORD) + (param.trunc ? MI_MAX_KEY_BUFF : w.len+extra)); ftbw->len=w.len+1; @@ -142,6 +129,19 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, if (ftbw->yesno > 0) up->ythresh++; queue_insert(& ftb->queue, (byte *)ftbw); break; + case 2: /* left bracket */ + ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); + ftbe->yesno=param.yesno; + ftbe->weight=weight; + ftbe->up=up; + ftbe->ythresh=0; + ftbe->docid=HA_POS_ERROR; + if (ftbe->yesno > 0) up->ythresh++; + _ftb_parse_query(ftb, start, end, ftbe, depth+1, + (param.yesno<0 ? depth+1 : ndepth)); + break; + case 3: /* right bracket */ + return; } } return; @@ -339,7 +339,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) /* curdoc matched ! */ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); /* why is this ? */ - /* info->lastpos=curdoc; */ /* do I need this ? */ + info->lastpos=curdoc; if (!(*info->read_record)(info,curdoc,record)) { info->update|= HA_STATE_AKTIV; /* Record is read */ @@ -348,6 +348,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) return my_errno; } } + ftb->state=INDEX_DONE; return my_errno=HA_ERR_END_OF_FILE; } @@ -359,17 +360,33 @@ float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid, byte *record) FTB_EXPR *ftbe; uint i; - if (ftb->state == READY) + if (ftb->state == READY || ftb->state == INDEX_DONE) { + for (i=1; i<=ftb->queue.elements; i++) + { + ftbw=(FTB_WORD *)(ftb->queue.root[i]); + ftbw->docid=HA_POS_ERROR; + for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) + { + if (ftbe->docid != HA_POS_ERROR) + { + ftbe->cur_weight=ftbe->yesses=ftbe->nos=0; + ftbe->docid=HA_POS_ERROR; + } + else + break; + } + } + queue_fix(& ftb->queue); ftb->state=SCAN; } else if (ftb->state != SCAN) - return -1.0; + return -2.0; bzero(&ptree, sizeof(ptree)); if (_mi_ft_parse(& ptree, ftb->info, ftb->keynr, record)) - return -1.0; + return -3.0; for (i=1; i<=ftb->queue.elements; i++) { diff --git a/myisam/ft_nlq_search.c b/myisam/ft_nlq_search.c index 75249a1432a..2a55ff839ca 100644 --- a/myisam/ft_nlq_search.c +++ b/myisam/ft_nlq_search.c @@ -253,7 +253,7 @@ float ft_nlq_find_relevance(FT_INFO *handler, my_off_t docid, int a,b,c; FT_DOC *docs=handler->doc; - // Assuming docs[] is sorted by dpos... + /* Assuming docs[] is sorted by dpos... */ for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) { diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index b09ede78364..35e5959b556 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -33,9 +33,6 @@ typedef struct st_ft_docstat { double max, nsum, nsum2; #endif /* EVAL_RUN */ -// MI_INFO *info; -// uint keynr; -// byte *keybuf; } FT_DOCSTAT; static int FT_WORD_cmp(void* cmp_arg, FT_WORD *w1, FT_WORD *w2) @@ -63,9 +60,7 @@ static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) /* transforms tree of words into the array, applying normalization */ -FT_WORD * ft_linearize(//MI_INFO *info, uint keynr, - //byte *keybuf, - TREE *wtree) +FT_WORD * ft_linearize(TREE *wtree) { FT_WORD *wlist,*p; FT_DOCSTAT docstat; @@ -74,9 +69,6 @@ FT_WORD * ft_linearize(//MI_INFO *info, uint keynr, if ((wlist=(FT_WORD *) my_malloc(sizeof(FT_WORD)* (1+wtree->elements_in_tree),MYF(0)))) { -// docstat.info=info; -// docstat.keynr=keynr; -// docstat.keybuf=keybuf; docstat.list=wlist; docstat.uniq=wtree->elements_in_tree; #ifdef EVAL_RUN @@ -122,12 +114,20 @@ FT_WORD * ft_linearize(//MI_INFO *info, uint keynr, #endif #define word_char(X) (true_word_char(X) || misc_word_char(X)) + +/* returns: + * 0 - eof + * 1 - word found + * 2 - left bracket + * 3 - right bracket + */ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) { byte *doc=*start; int mwc; - param->yesno=param->plusminus=param->pmsign=0; + param->yesno=(FTB_YES==' ')?1:0; + param->plusminus=param->pmsign=0; while (docprev=' '; */ *start=doc+1; - return *doc; + return (*doc == FTB_RBR)+2; } if (param->prev == ' ') { - switch (*doc) { - case FTB_YES: param->yesno=+1; continue; - case FTB_NO: param->yesno=-1; continue; - case FTB_INC: param->plusminus++; continue; - case FTB_DEC: param->plusminus--; continue; - case FTB_NEG: param->pmsign=!param->pmsign; continue; - default: break; - } + if (*doc == FTB_YES ) { param->yesno=+1; continue; } else + if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else + if (*doc == FTB_NO ) { param->yesno=-1; continue; } else + if (*doc == FTB_INC ) { param->plusminus++; continue; } else + if (*doc == FTB_DEC ) { param->plusminus--; continue; } else + if (*doc == FTB_NEG ) { param->pmsign=!param->pmsign; continue; } } param->prev=*doc; param->yesno=param->plusminus=param->pmsign=0; @@ -162,7 +160,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) else if (!misc_word_char(*doc) || mwc++) break; - param->prev='A'; // be sure *prev is true_word_char + param->prev='A'; /* be sure *prev is true_word_char */ word->len= (uint)(doc-word->pos) - mwc; if ((param->trunc=(doc' -#define FTB_DEC '<' -#define FTB_LBR '(' -#define FTB_RBR ')' -#define FTB_NEG '~' -#define FTB_TRUNC '*' +#define FTB_YES (ft_boolean_syntax[0]) +#define FTB_EGAL (ft_boolean_syntax[1]) +#define FTB_NO (ft_boolean_syntax[2]) +#define FTB_INC (ft_boolean_syntax[3]) +#define FTB_DEC (ft_boolean_syntax[4]) +#define FTB_LBR (ft_boolean_syntax[5]) +#define FTB_RBR (ft_boolean_syntax[6]) +#define FTB_NEG (ft_boolean_syntax[7]) +#define FTB_TRUNC (ft_boolean_syntax[8]) typedef struct st_ft_word { byte * pos; diff --git a/mysql-test/r/fulltext_order_by.result b/mysql-test/r/fulltext_order_by.result index e096de2af1b..8d88b8d6ebf 100644 --- a/mysql-test/r/fulltext_order_by.result +++ b/mysql-test/r/fulltext_order_by.result @@ -11,17 +11,40 @@ SELECT a, MATCH (message) AGAINST ('steve') FROM t1 WHERE MATCH (message) AGAINS a MATCH (message) AGAINST ('steve') 4 0.90587323904037 7 0.89568990468979 +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) FROM t1 WHERE MATCH (message) AGAINST ('steve'); +a MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) +4 1 +7 1 +SELECT a, MATCH (message) AGAINST ('steve') FROM t1 WHERE MATCH (message) AGAINST ('steve' IN BOOLEAN MODE); +a MATCH (message) AGAINST ('steve') +4 0.90587323904037 +7 0.89568990468979 +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) FROM t1 WHERE MATCH (message) AGAINST ('steve' IN BOOLEAN MODE); +a MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) +4 1 +7 1 SELECT a, MATCH (message) AGAINST ('steve') FROM t1 WHERE MATCH (message) AGAINST ('steve') ORDER BY a; a MATCH (message) AGAINST ('steve') 4 0.90587323904037 7 0.89568990468979 +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) FROM t1 WHERE MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) ORDER BY a; +a MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) +4 1 +7 1 SELECT a, MATCH (message) AGAINST ('steve') FROM t1 WHERE a in (2,7,4) and MATCH (message) AGAINST ('steve') ORDER BY a DESC; a MATCH (message) AGAINST ('steve') 7 0.89568990468979 4 0.90587323904037 +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) FROM t1 WHERE a in (2,7,4) and MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) ORDER BY a DESC; +a MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) +7 1 +4 1 SELECT a, MATCH (message) AGAINST ('steve') FROM t1 WHERE a=7 and MATCH (message) AGAINST ('steve') ORDER BY 1; a MATCH (message) AGAINST ('steve') 7 0.89568990468979 +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) FROM t1 WHERE a=7 and MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) ORDER BY 1; +a MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) +7 1 SELECT a, MATCH (message) AGAINST ('steve') as rel FROM t1 ORDER BY rel; a rel 1 0 @@ -31,4 +54,13 @@ a rel 6 0 7 0.89568990468979 4 0.90587323904037 +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) as rel FROM t1 ORDER BY rel; +a rel +1 0 +2 0 +3 0 +5 0 +6 0 +4 1 +7 1 drop table t1; diff --git a/mysql-test/r/fulltext_var.result b/mysql-test/r/fulltext_var.result index ececb63d31f..d9c37df56be 100644 --- a/mysql-test/r/fulltext_var.result +++ b/mysql-test/r/fulltext_var.result @@ -3,3 +3,4 @@ Variable_name Value ft_min_word_len 4 ft_max_word_len 254 ft_max_word_len_for_sort 20 +ft_boolean_syntax + -><()~* diff --git a/mysql-test/t/fulltext_order_by.test b/mysql-test/t/fulltext_order_by.test index 9f35d58e699..d5cb99ef0ee 100644 --- a/mysql-test/t/fulltext_order_by.test +++ b/mysql-test/t/fulltext_order_by.test @@ -9,17 +9,24 @@ INSERT INTO t1 (message) VALUES ("Testing"),("table"),("testbug"), ("steve"),("is"),("cool"),("steve is cool"); # basic MATCH SELECT a, MATCH (message) AGAINST ('steve') FROM t1 WHERE MATCH (message) AGAINST ('steve'); +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) FROM t1 WHERE MATCH (message) AGAINST ('steve'); +SELECT a, MATCH (message) AGAINST ('steve') FROM t1 WHERE MATCH (message) AGAINST ('steve' IN BOOLEAN MODE); +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) FROM t1 WHERE MATCH (message) AGAINST ('steve' IN BOOLEAN MODE); # MATCH + ORDER BY (with ft-ranges) SELECT a, MATCH (message) AGAINST ('steve') FROM t1 WHERE MATCH (message) AGAINST ('steve') ORDER BY a; +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) FROM t1 WHERE MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) ORDER BY a; # MATCH + ORDER BY (with normal ranges) + UNIQUE SELECT a, MATCH (message) AGAINST ('steve') FROM t1 WHERE a in (2,7,4) and MATCH (message) AGAINST ('steve') ORDER BY a DESC; +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) FROM t1 WHERE a in (2,7,4) and MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) ORDER BY a DESC; # MATCH + ORDER BY + UNIQUE (const_table) SELECT a, MATCH (message) AGAINST ('steve') FROM t1 WHERE a=7 and MATCH (message) AGAINST ('steve') ORDER BY 1; +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) FROM t1 WHERE a=7 and MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) ORDER BY 1; # ORDER BY MATCH SELECT a, MATCH (message) AGAINST ('steve') as rel FROM t1 ORDER BY rel; +SELECT a, MATCH (message) AGAINST ('steve' IN BOOLEAN MODE) as rel FROM t1 ORDER BY rel; drop table t1; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 2774e2a4050..77a562f0217 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -1892,7 +1892,7 @@ The server will not act as a slave."); if (opt_myisam_log) (void) mi_log( 1 ); - ft_init_stopwords(ft_precompiled_stopwords); /* SerG */ + ft_init_stopwords(ft_precompiled_stopwords); #ifdef __WIN__ #define MYSQL_ERR_FILE "mysql.err" @@ -2998,6 +2998,7 @@ struct show_var_st init_vars[]= { {"ft_min_word_len", (char*) &ft_min_word_len, SHOW_LONG}, {"ft_max_word_len", (char*) &ft_max_word_len, SHOW_LONG}, {"ft_max_word_len_for_sort",(char*) &ft_max_word_len_for_sort, SHOW_LONG}, + {"ft_boolean_syntax", ft_boolean_syntax, SHOW_CHAR}, {"have_bdb", (char*) &have_berkeley_db, SHOW_HAVE}, {"have_innodb", (char*) &have_innodb, SHOW_HAVE}, {"have_isam", (char*) &have_isam, SHOW_HAVE}, diff --git a/sql/opt_ft.h b/sql/opt_ft.h index dcbbb8abcec..b055edc107c 100644 --- a/sql/opt_ft.h +++ b/sql/opt_ft.h @@ -29,7 +29,7 @@ public: TABLE_REF *ref; FT_SELECT(TABLE *table, TABLE_REF *tref) : - QUICK_SELECT (table,tref->key,1), ref(tref) {} + QUICK_SELECT (table,tref->key,1), ref(tref) { init(); } int init() { return error=file->ft_init(); } int get_next() { return error=file->ft_read(record); } diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 3849159ca39..4c22dc5b6a2 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -407,11 +407,6 @@ QUICK_SELECT::~QUICK_SELECT() } } -int QUICK_SELECT::init() -{ - return error=file->index_init(index); -} - QUICK_RANGE::QUICK_RANGE() :min_key(0),max_key(0),min_length(0),max_length(0), flag(NO_MIN_RANGE | NO_MAX_RANGE) diff --git a/sql/opt_range.h b/sql/opt_range.h index 50215b94be0..07d1216a42f 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -75,7 +75,7 @@ public: QUICK_SELECT(TABLE *table,uint index_arg,bool no_alloc=0); virtual ~QUICK_SELECT(); void reset(void) { next=0; it.rewind(); } - virtual int init(); + int init() { return error=file->index_init(index); } virtual int get_next(); int cmp_next(QUICK_RANGE *range); bool unique_key_range();