mirror of
https://github.com/sqlite/sqlite.git
synced 2025-11-18 10:21:03 +03:00
Only choose to scan an IN operator rather than use an index if we have
real STAT1 data to suggest it is advantageous. FossilOrigin-Name: 30e874661dcc1a2ecb40df2ef74582151d85bb36c754a38548829a3b6285f18d
This commit is contained in:
60
src/where.c
60
src/where.c
@@ -2471,34 +2471,38 @@ static int whereLoopAddBtreeIndex(
|
||||
assert( nIn>0 ); /* RHS always has 2 or more terms... The parser
|
||||
** changes "x IN (?)" into "x=?". */
|
||||
}
|
||||
/* Let:
|
||||
** N = the total number of rows in the table
|
||||
** K = the number of entries on the right-hand side of the IN operator
|
||||
** M = the number of rows in the table that match terms to the
|
||||
** to the left in the same index. If the IN operator is on
|
||||
** the left-most index column, M==N.
|
||||
**
|
||||
** Given the definitions above, it is better to omit the IN operator
|
||||
** from the index lookup and instead do a scan of the M elements,
|
||||
** testing each scanned row against the IN operator separately, if:
|
||||
**
|
||||
** M*log(K) < K*log(N)
|
||||
**
|
||||
** Our estimates for M, K, and N might be inaccurate, so we build in
|
||||
** a safety margin of 2 (LogEst: 10) that favors using the IN operator
|
||||
** with the index, as using an index has better worst-case behavior.
|
||||
*/
|
||||
M = pProbe->aiRowLogEst[saved_nEq+1];
|
||||
logK = sqlite3LogEst(nIn);
|
||||
if( M + logK + 10 < nIn + rLogSize ){
|
||||
WHERETRACE(0x40,
|
||||
("IN operator costs more than scan on column %d of \"%s\" (%d<%d)\n",
|
||||
saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize));
|
||||
continue;
|
||||
}else{
|
||||
WHERETRACE(0x40,
|
||||
("IN operator cheaper than scan on column %d of \"%s\" (%d>=%d)\n",
|
||||
saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize));
|
||||
if( pProbe->hasStat1 ){
|
||||
/* Let:
|
||||
** N = the total number of rows in the table
|
||||
** K = the number of entries on the RHS of the IN operator
|
||||
** M = the number of rows in the table that match terms to the
|
||||
** to the left in the same index. If the IN operator is on
|
||||
** the left-most index column, M==N.
|
||||
**
|
||||
** Given the definitions above, it is better to omit the IN operator
|
||||
** from the index lookup and instead do a scan of the M elements,
|
||||
** testing each scanned row against the IN operator separately, if:
|
||||
**
|
||||
** M*log(K) < K*log(N)
|
||||
**
|
||||
** Our estimates for M, K, and N might be inaccurate, so we build in
|
||||
** a safety margin of 2 (LogEst: 10) that favors using the IN operator
|
||||
** with the index, as using an index has better worst-case behavior.
|
||||
** If we do not have real sqlite_stat1 data, always prefer to use
|
||||
** the index.
|
||||
*/
|
||||
M = pProbe->aiRowLogEst[saved_nEq];
|
||||
logK = estLog(nIn);
|
||||
if( M + logK + 10 < nIn + rLogSize ){
|
||||
WHERETRACE(0x40,
|
||||
("Scan preferred over IN operator on column %d of \"%s\" (%d<%d)\n",
|
||||
saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize));
|
||||
continue;
|
||||
}else{
|
||||
WHERETRACE(0x40,
|
||||
("IN operator preferred on column %d of \"%s\" (%d>=%d)\n",
|
||||
saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize));
|
||||
}
|
||||
}
|
||||
pNew->wsFlags |= WHERE_COLUMN_IN;
|
||||
}else if( eOp & (WO_EQ|WO_IS) ){
|
||||
|
||||
Reference in New Issue
Block a user