1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-05 15:55:57 +03:00

Add a Bloom filter to the automatic-index mechanism.

FossilOrigin-Name: 50ac4de1d7cbb586ea7969e1ae80ea8b021e194edc2fa7db19374b4ee9369bee
This commit is contained in:
drh
2021-12-01 16:31:02 +00:00
parent c1085ea412
commit 2db144c33b
9 changed files with 143 additions and 16 deletions

View File

@@ -671,6 +671,35 @@ static Mem *out2Prerelease(Vdbe *p, VdbeOp *pOp){
}
}
/*
** Default size of a bloom filter, in bytes
*/
#define SQLITE_BLOOM_SZ 10000
/*
** Compute a bloom filter hash using pOp->p4.i registers from aMem[] beginning
** with pOp->p3. Return the hash.
*/
static unsigned int filterHash(const Mem *aMem, const Op *pOp){
int i, mx;
u32 h = 0;
i = pOp->p3;
assert( pOp->p4type==P4_INT32 );
mx = i + pOp->p4.i;
for(i=pOp->p3, mx=i+pOp->p4.i; i<mx; i++){
const Mem *p = &aMem[i];
if( p->flags & (MEM_Int|MEM_IntReal) ){
h += (u32)(p->u.i&0xffffffff);
}else if( p->flags & MEM_Real ){
h += (u32)(sqlite3VdbeIntValue(p)&0xffffffff);
}else if( p->flags & (MEM_Str|MEM_Blob) ){
h += p->n;
}
}
return h % (SQLITE_BLOOM_SZ*8);
}
/*
** Return the symbolic name for the data type of a pMem
*/
@@ -8129,6 +8158,83 @@ case OP_Function: { /* group */
break;
}
/* Opcode: FilterInit P1 * * * *
** Synopsis: filter(P1) = empty
**
** Initialize register P1 so that is an empty bloom filter.
*/
case OP_FilterInit: {
assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) );
pIn1 = &aMem[pOp->p1];
sqlite3VdbeMemSetZeroBlob(pIn1, SQLITE_BLOOM_SZ);
if( sqlite3VdbeMemExpandBlob(pIn1) ) goto no_mem;
break;
}
/* Opcode: FilterAdd P1 * P3 P4 *
** Synopsis: filter(P1) += key(P3@P4)
**
** Compute a hash on the P4 registers starting with r[P3] and
** add that hash to the bloom filter contained in r[P1].
*/
case OP_FilterAdd: {
u32 h;
assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) );
pIn1 = &aMem[pOp->p1];
assert( pIn1->flags & MEM_Blob );
assert( pIn1->n==SQLITE_BLOOM_SZ );
h = filterHash(aMem, pOp);
#ifdef SQLITE_DEBUG
if( db->flags&SQLITE_VdbeTrace ){
int ii;
for(ii=pOp->p3; ii<pOp->p3+pOp->p4.i; ii++){
registerTrace(ii, &aMem[ii]);
}
printf("hash = %u\n", h);
}
#endif
assert( h>=0 && h<SQLITE_BLOOM_SZ*8 );
pIn1->z[h/8] |= 1<<(h&7);
break;
}
/* Opcode: Filter P1 P2 P3 P4 *
** Synopsis: if key(P3@P4) not in filter(P1) goto P2
**
** Compute a hash on the key contained in the P4 registers starting
** with r[P3]. Check to see if that hash is found in the
** bloom filter hosted by register P1. If it is not present then
** maybe jump to P2. Otherwise fall through.
**
** False negatives are harmless. It is always safe to fall through,
** even if the value is in the bloom filter. A false negative causes
** more CPU cycles to be used, but it should still yield the correct
** answer. However, an incorrect answer may well arise from a
** false positive - if the jump is taken when it should fall through.
*/
case OP_Filter: { /* jump */
u32 h;
assert( pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor) );
pIn1 = &aMem[pOp->p1];
assert( pIn1->flags & MEM_Blob );
assert( pIn1->n==SQLITE_BLOOM_SZ );
h = filterHash(aMem, pOp);
#ifdef SQLITE_DEBUG
if( db->flags&SQLITE_VdbeTrace ){
int ii;
for(ii=pOp->p3; ii<pOp->p3+pOp->p4.i; ii++){
registerTrace(ii, &aMem[ii]);
}
printf("hash = %u\n", h);
}
#endif
assert( h>=0 && h<SQLITE_BLOOM_SZ*8 );
if( (pIn1->z[h/8] & (1<<(h&7)))==0 ) goto jump_to_p2;
break;
}
/* Opcode: Trace P1 P2 * P4 *
**
** Write P4 on the statement trace output if statement tracing is