1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-01 03:47:19 +03:00

Two-level index structure for FULLTEXT indexes

This commit is contained in:
serg@serg.mysql.com
2003-01-21 19:24:34 +01:00
parent 241f4143e8
commit 0dff67cba2
25 changed files with 1556 additions and 354 deletions

View File

@ -21,6 +21,7 @@
#define FT_CORE
#include "ftdefs.h"
#include <queues.h>
#include <assert.h> /* for DBUG_ASSERT() */
/* search with boolean queries */
@ -63,42 +64,44 @@ struct st_ftb_expr
{
FTB_EXPR *up;
byte *quot, *qend;
float weight;
uint flags;
my_off_t docid[2]; /* for index search and for scan */
float weight;
float cur_weight;
int yesses; /* number of "yes" words matched */
int nos; /* number of "no" words matched */
int ythresh; /* number of "yes" words in expr */
int yweaks; /* number of "yes" words for scan only */
uint flags;
uint yesses; /* number of "yes" words matched */
uint nos; /* number of "no" words matched */
uint ythresh; /* number of "yes" words in expr */
uint yweaks; /* number of "yes" words for scan only */
};
typedef struct st_ftb_word
{
FTB_EXPR *up;
float weight;
uint flags;
my_off_t docid[2]; /* for index search and for scan */
uint ndepth;
int len;
/* ... docid cache can be added here. SerG */
byte word[1];
FTB_EXPR *up;
MI_KEYDEF *keyinfo;
my_off_t docid[2]; /* for index search and for scan */
my_off_t key_root;
float weight;
uint ndepth;
uint flags;
uint len;
uchar off;
byte word[1];
} FTB_WORD;
typedef struct st_ft_info
{
struct _ft_vft *please;
MI_INFO *info;
uint keynr;
CHARSET_INFO *charset;
enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE /*, SCAN*/ } state;
uint with_scan;
my_off_t lastpos;
FTB_EXPR *root;
QUEUE queue;
TREE no_dupes;
FTB_WORD **list;
MEM_ROOT mem_root;
QUEUE queue;
TREE no_dupes;
my_off_t lastpos;
uint keynr;
uchar with_scan;
enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state;
} FTB;
static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
@ -160,6 +163,7 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
ftbw->up=up;
ftbw->docid[0]=ftbw->docid[1]=HA_POS_ERROR;
ftbw->ndepth= (param.yesno<0) + depth;
ftbw->key_root=HA_POS_ERROR;
memcpy(ftbw->word+1, w.pos, w.len);
ftbw->word[0]=w.len;
if (param.yesno > 0) up->ythresh++;
@ -194,22 +198,98 @@ static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)),
return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b)));
}
/* returns 1 if the search was finished (must-word wasn't found) */
static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
{
int r;
uint off;
int subkeys;
MI_INFO *info=ftb->info;
if (init_search)
{
ftbw->key_root=info->s->state.key_root[ftb->keynr];
ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
ftbw->off=0;
r=_mi_search(info, ftbw->keyinfo, (uchar*) ftbw->word, ftbw->len,
SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root);
}
else
{
r=_mi_search(info, ftbw->keyinfo, (uchar*) ftbw->word+ftbw->off,
USE_WHOLE_KEY, SEARCH_BIGGER, ftbw->key_root);
}
if (!r && !ftbw->off)
{
r= mi_compare_text(ftb->charset,
info->lastkey + (ftbw->flags & FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC),
(uchar*) ftbw->word + (ftbw->flags & FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC),
0);
}
if (r) /* not found */
{
if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC))
{
ftbw->docid[0]=HA_POS_ERROR;
if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0)
{
/*
This word MUST BE present in every document returned,
so we can stop the search right now
*/
ftb->state=INDEX_DONE;
return 1; /* search is done */
}
else
return 0;
}
/* going up to the first-level tree to continue search there */
_mi_dpointer(info, ftbw->word+ftbw->off+HA_FT_WLEN, ftbw->key_root);
ftbw->key_root=info->s->state.key_root[ftb->keynr];
ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
ftbw->off=0;
return _ft2_search(ftb, ftbw, 0);
}
/* matching key found */
memcpy(ftbw->word+ftbw->off, info->lastkey, info->lastkey_length);
if (!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC)))
{
/* going down ? */
get_key_full_length_rdonly(off, info->lastkey);
subkeys=ft_sintXkorr(info->lastkey+off);
if (subkeys<0)
{
/* yep, going down, to the second-level tree */
/* TODO here: subkey-based optimization */
ftbw->off=off;
ftbw->key_root=info->lastpos;
ftbw->keyinfo=& info->s->ft2_keyinfo;
r=_mi_search_first(info, ftbw->keyinfo, ftbw->key_root);
DBUG_ASSERT(r==0); /* found something */
memcpy(ftbw->word+off, info->lastkey, info->lastkey_length);
}
}
ftbw->docid[0]=info->lastpos;
return 0;
}
static void _ftb_init_index_search(FT_INFO *ftb)
{
int i, r;
int i;
FTB_WORD *ftbw;
MI_INFO *info=ftb->info;
MI_KEYDEF *keyinfo;
my_off_t keyroot;
if ((ftb->state != READY && ftb->state !=INDEX_DONE) ||
ftb->keynr == NO_SUCH_KEY)
return;
ftb->state=INDEX_SEARCH;
keyinfo=info->s->keyinfo+ftb->keynr;
keyroot=info->s->state.key_root[ftb->keynr];
for (i=ftb->queue.elements; i; i--)
{
ftbw=(FTB_WORD *)(ftb->queue.root[i]);
@ -248,34 +328,9 @@ static void _ftb_init_index_search(FT_INFO *ftb)
}
}
}
r=_mi_search(info, keyinfo, (uchar*) ftbw->word, ftbw->len,
SEARCH_FIND | SEARCH_BIGGER, keyroot);
if (!r)
{
r= mi_compare_text(ftb->charset,
info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
(uchar*) ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
0);
}
if (r) /* not found */
{
if (ftbw->flags&FTB_FLAG_YES && ftbw->up->up==0)
{
/*
This word MUST BE present in every document returned,
so we can abort the search right now
*/
ftb->state=INDEX_DONE;
return;
}
}
else
{
memcpy(ftbw->word, info->lastkey, info->lastkey_length);
ftbw->docid[0]=info->lastpos;
}
if (_ft2_search(ftb, ftbw, 1))
return;
}
queue_fix(& ftb->queue);
}
@ -436,10 +491,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
FTB_EXPR *ftbe;
FTB_WORD *ftbw;
MI_INFO *info=ftb->info;
MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr;
my_off_t keyroot=info->s->state.key_root[ftb->keynr];
my_off_t curdoc;
int r;
if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
return -1;
@ -466,34 +518,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
_ftb_climb_the_tree(ftb, ftbw, 0);
/* update queue */
r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY,
SEARCH_BIGGER , keyroot);
if (!r)
{
r= mi_compare_text(ftb->charset,
info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
(uchar*) ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
0);
}
if (r) /* not found */
{
ftbw->docid[0]=HA_POS_ERROR;
if (ftbw->flags&FTB_FLAG_YES && ftbw->up->up==0)
{
/*
This word MUST BE present in every document returned,
so we can stop the search right now
*/
ftb->state=INDEX_DONE;
}
}
else
{
memcpy(ftbw->word, info->lastkey, info->lastkey_length);
ftbw->docid[0]=info->lastpos;
}
_ft2_search(ftb, ftbw, 0);
queue_replaced(& ftb->queue);
}
@ -503,9 +528,9 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
{
/* curdoc matched ! */
if (is_tree_inited(&ftb->no_dupes) &&
tree_insert(&ftb->no_dupes, &curdoc, 0,
tree_insert(&ftb->no_dupes, &curdoc, 0,
ftb->no_dupes.custom_arg)->count >1)
/* but it managed to get past this line once */
/* but it managed already to get past this line once */
continue;
info->lastpos=curdoc;