mirror of
https://github.com/sqlite/sqlite.git
synced 2025-11-16 23:02:26 +03:00
Improvements to cost estimation for evaluating the IN operator.
Ticket #3757. (CVS 6403) FossilOrigin-Name: 0c438e813c411e8f9e92d6c7405fccb7a36e110a
This commit is contained in:
13
manifest
13
manifest
@@ -1,5 +1,5 @@
|
|||||||
C Previous\scommit\s((6401))\sdid\snot\squite\sfix\sthe\sproblem.\s\sThis\sshould\swork\nbetter.\s(CVS\s6402)
|
C Improvements\sto\scost\sestimation\sfor\sevaluating\sthe\sIN\soperator.\nTicket\s#3757.\s(CVS\s6403)
|
||||||
D 2009-03-28T23:47:11
|
D 2009-03-29T00:13:03
|
||||||
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
|
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
|
||||||
F Makefile.in 583e87706abc3026960ed759aff6371faf84c211
|
F Makefile.in 583e87706abc3026960ed759aff6371faf84c211
|
||||||
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
|
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
|
||||||
@@ -209,7 +209,7 @@ F src/vdbeblob.c e67757450ae8581a8b354d9d7e467e41502dfe38
|
|||||||
F src/vdbemem.c 38615b5d4b1b3b5a1221a5623578e5e3864e4888
|
F src/vdbemem.c 38615b5d4b1b3b5a1221a5623578e5e3864e4888
|
||||||
F src/vtab.c f1aba5a6dc1f83b97a39fbbc58ff8cbc76311347
|
F src/vtab.c f1aba5a6dc1f83b97a39fbbc58ff8cbc76311347
|
||||||
F src/walker.c 42bd3f00ca2ef5ae842304ec0d59903ef051412d
|
F src/walker.c 42bd3f00ca2ef5ae842304ec0d59903ef051412d
|
||||||
F src/where.c d0a78f876593b596c1e97286921cdc746e2f65ce
|
F src/where.c 72b84f31a0bed42c665fb922b74e9aea5ae3ced2
|
||||||
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
|
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
|
||||||
F test/alias.test 597662c5d777a122f9a3df0047ea5c5bd383a911
|
F test/alias.test 597662c5d777a122f9a3df0047ea5c5bd383a911
|
||||||
F test/all.test 14165b3e32715b700b5f0cbf8f6e3833dda0be45
|
F test/all.test 14165b3e32715b700b5f0cbf8f6e3833dda0be45
|
||||||
@@ -631,6 +631,7 @@ F test/tkt35xx.test 53bca895091e968126a858ee7da186f59f328994
|
|||||||
F test/tkt3630.test 929f64852103054125200bc825c316d5f75d42f7
|
F test/tkt3630.test 929f64852103054125200bc825c316d5f75d42f7
|
||||||
F test/tkt3718.test 3ee5e25702f3f5a31340b2766d7a7fac2b5ce99c
|
F test/tkt3718.test 3ee5e25702f3f5a31340b2766d7a7fac2b5ce99c
|
||||||
F test/tkt3731.test 8a6e3732f5a8a24eb875a6faf287ef77bb8c0579
|
F test/tkt3731.test 8a6e3732f5a8a24eb875a6faf287ef77bb8c0579
|
||||||
|
F test/tkt3757.test 8f2208930655bbd4f92c14e19e72303a43e098ef
|
||||||
F test/tokenize.test ce430a7aed48fc98301611429595883fdfcab5d7
|
F test/tokenize.test ce430a7aed48fc98301611429595883fdfcab5d7
|
||||||
F test/trace.test 951cd0f5f571e7f36bf7bfe04be70f90fb16fb00
|
F test/trace.test 951cd0f5f571e7f36bf7bfe04be70f90fb16fb00
|
||||||
F test/trans.test 8b79967a7e085289ec64890c6fdf9d089e1b4a5f
|
F test/trans.test 8b79967a7e085289ec64890c6fdf9d089e1b4a5f
|
||||||
@@ -710,7 +711,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81
|
|||||||
F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
|
F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
|
||||||
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
|
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
|
||||||
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
|
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
|
||||||
P fb35cff855e17771caee2a992e7b2b4105b94862
|
P 2e7d3cc9f04de1fe7ef95cd5736dbc409c209cef
|
||||||
R 6c5a32ed1dbda923c028324f6e381112
|
R f4372d788a5fa3a6a07058dbb62c2eb3
|
||||||
U drh
|
U drh
|
||||||
Z f8f9e15c31b81a3e0355740fc1bcee00
|
Z 1bf6a0cf4ef6100a42ff75750d967094
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
2e7d3cc9f04de1fe7ef95cd5736dbc409c209cef
|
0c438e813c411e8f9e92d6c7405fccb7a36e110a
|
||||||
33
src/where.c
33
src/where.c
@@ -16,7 +16,7 @@
|
|||||||
** so is applicable. Because this module is responsible for selecting
|
** so is applicable. Because this module is responsible for selecting
|
||||||
** indices, you might also think of this module as the "query optimizer".
|
** indices, you might also think of this module as the "query optimizer".
|
||||||
**
|
**
|
||||||
** $Id: where.c,v 1.377 2009/03/25 16:51:43 drh Exp $
|
** $Id: where.c,v 1.378 2009/03/29 00:13:03 drh Exp $
|
||||||
*/
|
*/
|
||||||
#include "sqliteInt.h"
|
#include "sqliteInt.h"
|
||||||
|
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
#if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)
|
#if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)
|
||||||
int sqlite3WhereTrace = 0;
|
int sqlite3WhereTrace = 0;
|
||||||
#endif
|
#endif
|
||||||
#if 0
|
#if 1
|
||||||
# define WHERETRACE(X) if(sqlite3WhereTrace) sqlite3DebugPrintf X
|
# define WHERETRACE(X) if(sqlite3WhereTrace) sqlite3DebugPrintf X
|
||||||
#else
|
#else
|
||||||
# define WHERETRACE(X)
|
# define WHERETRACE(X)
|
||||||
@@ -1926,12 +1926,18 @@ static void bestIndex(
|
|||||||
pProbe = pSrc->pIndex;
|
pProbe = pSrc->pIndex;
|
||||||
}
|
}
|
||||||
for(; pProbe; pProbe=(pSrc->pIndex ? 0 : pProbe->pNext)){
|
for(; pProbe; pProbe=(pSrc->pIndex ? 0 : pProbe->pNext)){
|
||||||
double inMultiplier = 1;
|
double inMultiplier = 1; /* Number of equality look-ups needed */
|
||||||
|
int inMultIsEst = 0; /* True if inMultiplier is an estimate */
|
||||||
|
|
||||||
WHERETRACE(("... index %s:\n", pProbe->zName));
|
WHERETRACE(("... index %s:\n", pProbe->zName));
|
||||||
|
|
||||||
/* Count the number of columns in the index that are satisfied
|
/* Count the number of columns in the index that are satisfied
|
||||||
** by x=EXPR constraints or x IN (...) constraints.
|
** by x=EXPR constraints or x IN (...) constraints. For a term
|
||||||
|
** of the form x=EXPR we only have to do a single binary search.
|
||||||
|
** But for x IN (...) we have to do a number of binary searched
|
||||||
|
** equal to the number of entries on the RHS of the IN operator.
|
||||||
|
** The inMultipler variable with try to estimate the number of
|
||||||
|
** binary searches needed.
|
||||||
*/
|
*/
|
||||||
wsFlags = 0;
|
wsFlags = 0;
|
||||||
for(i=0; i<pProbe->nColumn; i++){
|
for(i=0; i<pProbe->nColumn; i++){
|
||||||
@@ -1944,21 +1950,31 @@ static void bestIndex(
|
|||||||
wsFlags |= WHERE_COLUMN_IN;
|
wsFlags |= WHERE_COLUMN_IN;
|
||||||
if( ExprHasProperty(pExpr, EP_xIsSelect) ){
|
if( ExprHasProperty(pExpr, EP_xIsSelect) ){
|
||||||
inMultiplier *= 25;
|
inMultiplier *= 25;
|
||||||
|
inMultIsEst = 1;
|
||||||
}else if( pExpr->x.pList ){
|
}else if( pExpr->x.pList ){
|
||||||
inMultiplier *= pExpr->x.pList->nExpr + 1;
|
inMultiplier *= pExpr->x.pList->nExpr + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nRow = pProbe->aiRowEst[i] * inMultiplier;
|
nRow = pProbe->aiRowEst[i] * inMultiplier;
|
||||||
cost = nRow * estLog(inMultiplier);
|
/* If inMultiplier is an estimate and that estimate results in an
|
||||||
|
** nRow it that is more than half number of rows in the table,
|
||||||
|
** then reduce inMultipler */
|
||||||
|
if( inMultIsEst && nRow*2 > pProbe->aiRowEst[0] ){
|
||||||
|
nRow = pProbe->aiRowEst[0]/2;
|
||||||
|
inMultiplier = nRow/pProbe->aiRowEst[i];
|
||||||
|
}
|
||||||
|
cost = nRow + inMultiplier*estLog(pProbe->aiRowEst[0]);
|
||||||
nEq = i;
|
nEq = i;
|
||||||
if( pProbe->onError!=OE_None && (wsFlags & WHERE_COLUMN_IN)==0
|
if( pProbe->onError!=OE_None && (wsFlags & WHERE_COLUMN_IN)==0
|
||||||
&& nEq==pProbe->nColumn ){
|
&& nEq==pProbe->nColumn ){
|
||||||
wsFlags |= WHERE_UNIQUE;
|
wsFlags |= WHERE_UNIQUE;
|
||||||
}
|
}
|
||||||
WHERETRACE(("...... nEq=%d inMult=%.9g cost=%.9g\n",nEq,inMultiplier,cost));
|
WHERETRACE(("...... nEq=%d inMult=%.9g nRow=%.9g cost=%.9g\n",
|
||||||
|
nEq, inMultiplier, nRow, cost));
|
||||||
|
|
||||||
/* Look for range constraints
|
/* Look for range constraints. Assume that each range constraint
|
||||||
|
** makes the search space 1/3rd smaller.
|
||||||
*/
|
*/
|
||||||
if( nEq<pProbe->nColumn ){
|
if( nEq<pProbe->nColumn ){
|
||||||
int j = pProbe->aiColumn[nEq];
|
int j = pProbe->aiColumn[nEq];
|
||||||
@@ -1975,7 +1991,8 @@ static void bestIndex(
|
|||||||
cost /= 3;
|
cost /= 3;
|
||||||
nRow /= 3;
|
nRow /= 3;
|
||||||
}
|
}
|
||||||
WHERETRACE(("...... range reduces cost to %.9g\n", cost));
|
WHERETRACE(("...... range reduces nRow to %.9g and cost to %.9g\n",
|
||||||
|
nRow, cost));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
60
test/tkt3757.test
Normal file
60
test/tkt3757.test
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# 2009 March 28
|
||||||
|
#
|
||||||
|
# The author disclaims copyright to this source code. In place of
|
||||||
|
# a legal notice, here is a blessing:
|
||||||
|
#
|
||||||
|
# May you do good and not evil.
|
||||||
|
# May you find forgiveness for yourself and forgive others.
|
||||||
|
# May you share freely, never taking more than you give.
|
||||||
|
#
|
||||||
|
#***********************************************************************
|
||||||
|
#
|
||||||
|
# Ticket #3757: The cost functions on the query optimizer for the
|
||||||
|
# IN operator can be improved.
|
||||||
|
#
|
||||||
|
# $Id: tkt3757.test,v 1.1 2009/03/29 00:13:04 drh Exp $
|
||||||
|
|
||||||
|
set testdir [file dirname $argv0]
|
||||||
|
source $testdir/tester.tcl
|
||||||
|
|
||||||
|
# Evaluate SQL. Return the result set followed by the
|
||||||
|
# and the number of full-scan steps.
|
||||||
|
#
|
||||||
|
proc count_steps {sql} {
|
||||||
|
set r [db eval $sql]
|
||||||
|
lappend r scan [db status step] sort [db status sort]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Construct tables
|
||||||
|
#
|
||||||
|
do_test tkt3757-1.1 {
|
||||||
|
db eval {
|
||||||
|
CREATE TABLE t1(x INTEGER, y INTEGER, z TEXT);
|
||||||
|
CREATE INDEX t1i1 ON t1(y,z);
|
||||||
|
INSERT INTO t1 VALUES(1,2,'three');
|
||||||
|
CREATE TABLE t2(a INTEGER, b TEXT);
|
||||||
|
INSERT INTO t2 VALUES(2, 'two');
|
||||||
|
ANALYZE;
|
||||||
|
SELECT * FROM sqlite_stat1;
|
||||||
|
}
|
||||||
|
} {t1 t1i1 {1 1 1}}
|
||||||
|
|
||||||
|
# Modify statistics in order to make the optimizer then that:
|
||||||
|
#
|
||||||
|
# (1) Table T1 has about 250K entries
|
||||||
|
# (2) There are only about 5 distinct values of T1.
|
||||||
|
#
|
||||||
|
# Then run a query with "t1.y IN (SELECT ..)" in the WHERE clause.
|
||||||
|
# Make sure the index is used.
|
||||||
|
#
|
||||||
|
do_test tkt3757-1.2 {
|
||||||
|
db eval {
|
||||||
|
DELETE FROM sqlite_stat1;
|
||||||
|
INSERT INTO sqlite_stat1 VALUES('t1','t1i1','250000 50000 30');
|
||||||
|
}
|
||||||
|
count_steps {
|
||||||
|
SELECT * FROM t1 WHERE y IN (SELECT a FROM t2)
|
||||||
|
}
|
||||||
|
} {1 2 three scan 0 sort 0}
|
||||||
|
|
||||||
|
finish_test
|
||||||
Reference in New Issue
Block a user