mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
phrase search
This commit is contained in:
@ -59,6 +59,7 @@ static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
|
||||
typedef struct st_ftb_expr FTB_EXPR;
|
||||
struct st_ftb_expr {
|
||||
FTB_EXPR *up;
|
||||
byte *quot, *qend;
|
||||
float weight;
|
||||
uint flags;
|
||||
my_off_t docid[2]; /* for index search and for scan */
|
||||
@ -113,7 +114,7 @@ int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
|
||||
}
|
||||
|
||||
void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
|
||||
FTB_EXPR *up, uint depth)
|
||||
FTB_EXPR *up, uint depth)
|
||||
{
|
||||
byte res;
|
||||
FTB_PARAM param;
|
||||
@ -126,16 +127,17 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
|
||||
return;
|
||||
|
||||
param.prev=' ';
|
||||
param.quot=up->quot;
|
||||
while ((res=ft_get_word(start,end,&w,¶m)))
|
||||
{
|
||||
int r=param.plusminus;
|
||||
int r=param.plusminus;
|
||||
float weight= (float) (param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
|
||||
switch (res) {
|
||||
case 1: /* word found */
|
||||
ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root,
|
||||
sizeof(FTB_WORD) +
|
||||
(param.trunc ? MI_MAX_KEY_BUFF :
|
||||
w.len+extra));
|
||||
sizeof(FTB_WORD) +
|
||||
(param.trunc ? MI_MAX_KEY_BUFF :
|
||||
w.len+extra));
|
||||
ftbw->len=w.len+1;
|
||||
ftbw->flags=0;
|
||||
if (param.yesno>0) ftbw->flags|=FTB_FLAG_YES;
|
||||
@ -149,7 +151,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
|
||||
ftbw->word[0]=w.len;
|
||||
if (param.yesno > 0) up->ythresh++;
|
||||
queue_insert(& ftb->queue, (byte *)ftbw);
|
||||
ftb->with_scan|=param.trunc;
|
||||
ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC);
|
||||
break;
|
||||
case 2: /* left bracket */
|
||||
ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR));
|
||||
@ -160,10 +162,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
|
||||
ftbe->up=up;
|
||||
ftbe->ythresh=ftbe->yweaks=0;
|
||||
ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR;
|
||||
if ((ftbe->quot=param.quot)) ftb->with_scan|=2;
|
||||
if (param.yesno > 0) up->ythresh++;
|
||||
_ftb_parse_query(ftb, start, end, ftbe, depth+1);
|
||||
break;
|
||||
case 3: /* right bracket */
|
||||
if (up->quot) up->qend=param.quot;
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -209,7 +213,7 @@ void _ftb_init_index_search(FT_INFO *ftb)
|
||||
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
|
||||
ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC),
|
||||
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
|
||||
0);
|
||||
0);
|
||||
}
|
||||
if (r) /* not found */
|
||||
{
|
||||
@ -260,7 +264,7 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
|
||||
ftbe->weight=1;
|
||||
ftbe->flags=FTB_FLAG_YES;
|
||||
ftbe->nos=1;
|
||||
ftbe->up=0;
|
||||
ftbe->quot=ftbe->up=0;
|
||||
ftbe->ythresh=ftbe->yweaks=0;
|
||||
ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR;
|
||||
ftb->root=ftbe;
|
||||
@ -270,16 +274,39 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
|
||||
memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
|
||||
qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
|
||||
(qsort2_cmp)FTB_WORD_cmp_list, ftb->charset);
|
||||
if (ftb->queue.elements<2) ftb->with_scan=0;
|
||||
if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
|
||||
ftb->state=READY;
|
||||
return ftb;
|
||||
}
|
||||
|
||||
void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode)
|
||||
/* returns 1 if str0 contain str1 */
|
||||
int _ftb_strstr(const byte *s0, const byte *e0,
|
||||
const byte *s1, const byte *e1,
|
||||
CHARSET_INFO *cs)
|
||||
{
|
||||
const byte *p;
|
||||
|
||||
while (s0 < e0)
|
||||
{
|
||||
while (s0 < e0 && cs->to_upper[*s0++] != cs->to_upper[*s1])
|
||||
/* no-op */;
|
||||
if (s0 >= e0)
|
||||
return 0;
|
||||
p=s1+1;
|
||||
while (s0 < e0 && p < e1 && cs->to_upper[*s0++] == cs->to_upper[*p++])
|
||||
/* no-op */;
|
||||
if (p >= e1)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig)
|
||||
{
|
||||
FT_SEG_ITERATOR ftsi;
|
||||
FTB_EXPR *ftbe;
|
||||
float weight=ftbw->weight;
|
||||
int yn=ftbw->flags, ythresh;
|
||||
int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0);
|
||||
my_off_t curdoc=ftbw->docid[mode];
|
||||
|
||||
for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
|
||||
@ -300,6 +327,20 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode)
|
||||
{
|
||||
yn=ftbe->flags;
|
||||
weight=ftbe->cur_weight*ftbe->weight;
|
||||
if (mode && ftbe->quot)
|
||||
{
|
||||
int not_found=1;
|
||||
|
||||
memcpy(&ftsi, ftsi_orig, sizeof(ftsi));
|
||||
while (_mi_ft_segiterator(&ftsi) && not_found)
|
||||
{
|
||||
if (!ftsi.pos)
|
||||
continue;
|
||||
not_found = ! _ftb_strstr(ftsi.pos, ftsi.pos+ftsi.len,
|
||||
ftbe->quot, ftbe->qend, ftb->charset);
|
||||
}
|
||||
if (not_found) break;
|
||||
} /* ftbe->quot */
|
||||
}
|
||||
else
|
||||
break;
|
||||
@ -356,7 +397,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
|
||||
{
|
||||
while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0])
|
||||
{
|
||||
_ftb_climb_the_tree(ftbw,0);
|
||||
_ftb_climb_the_tree(ftb, ftbw, 0);
|
||||
|
||||
/* update queue */
|
||||
r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY,
|
||||
@ -367,7 +408,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
|
||||
info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC),
|
||||
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
|
||||
ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC),
|
||||
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
|
||||
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
|
||||
0);
|
||||
}
|
||||
if (r) /* not found */
|
||||
@ -414,7 +455,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
|
||||
FT_WORD word;
|
||||
FTB_WORD *ftbw;
|
||||
FTB_EXPR *ftbe;
|
||||
FT_SEG_ITERATOR ftsi;
|
||||
FT_SEG_ITERATOR ftsi, ftsi2;
|
||||
const byte *end;
|
||||
my_off_t docid=ftb->info->lastpos;
|
||||
|
||||
@ -423,17 +464,11 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
|
||||
if (!ftb->queue.elements)
|
||||
return 0;
|
||||
|
||||
#if NOT_USED
|
||||
if (ftb->state == READY || ftb->state == INDEX_DONE)
|
||||
ftb->state=SCAN;
|
||||
else if (ftb->state != SCAN)
|
||||
return -3.0;
|
||||
#endif
|
||||
|
||||
if (ftb->keynr==NO_SUCH_KEY)
|
||||
_mi_ft_segiterator_dummy_init(record, length, &ftsi);
|
||||
else
|
||||
_mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
|
||||
memcpy(&ftsi2, &ftsi, sizeof(ftsi));
|
||||
|
||||
while (_mi_ft_segiterator(&ftsi))
|
||||
{
|
||||
@ -464,7 +499,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
|
||||
if (ftbw->docid[1] == docid)
|
||||
continue;
|
||||
ftbw->docid[1]=docid;
|
||||
_ftb_climb_the_tree(ftbw,1);
|
||||
_ftb_climb_the_tree(ftb, ftbw, &ftsi2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user