1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Improved query planner cost estimates. Fix for ticket [e8b674241947eb3b].

FossilOrigin-Name: 1b779afa3ed2f35a110e460fc6ed13cba744db85b9924149ab028b100d1e1e12
This commit is contained in:
drh
2022-12-05 02:52:37 +00:00
7 changed files with 101 additions and 32 deletions

View File

@ -1,5 +1,5 @@
C Fix\ssafe\smode\sauthorizer\scallback\sto\sreject\sdisallowed\sUDFs.\sReported\sat\s[forum:/forumpost/07beac8056151b2f|Forum\spost\s07beac8056151b2f].
D 2022-12-04T23:20:38.999
C Improved\squery\splanner\scost\sestimates.\s\sFix\sfor\sticket\s[e8b674241947eb3b].
D 2022-12-05T02:52:37.959
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -732,7 +732,7 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9
F src/wal.c b9df133a705093da8977da5eb202eaadb844839f1c7297c08d33471f5491843d
F src/wal.h c3aa7825bfa2fe0d85bef2db94655f99870a285778baa36307c0a16da32b226a
F src/walker.c f890a3298418d7cba3b69b8803594fdc484ea241206a8dfa99db6dd36f8cbb3b
F src/where.c bf470b5d1ba03af8d558a0c98cc1fa97b330e03a198a7af61895e5a2e8d93f20
F src/where.c 20f4f51d2d5fb19b984e6ea381b26cf627cc93e64dd9b2ce6a94531aec2f5916
F src/whereInt.h e25203e5bfee149f5f1225ae0166cfb4f1e65490c998a024249e98bb0647377c
F src/wherecode.c ee52c2781c36004d23c85bf111063b78fc16e5e1b6a0d424326af8bf90babb0b
F src/whereexpr.c 05295b44b54eea76d1ba766f0908928d0e20e990c249344c9521454d3d09c7ae
@ -1368,7 +1368,7 @@ F test/openv2.test 0d3040974bf402e19b7df4b783e447289d7ab394
F test/optfuzz-db01.c 9f2fa80b8f84ebbf1f2e8b13421a4e0477fe300f6686fbd76cac1d2db66e0fdc
F test/optfuzz-db01.txt 21f6bdeadc701cf11528276e2a55c70bfcb846ba42df327f979bd9e7b6ce7041
F test/optfuzz.c 690430a0bf0ad047d5a168bf52b05b2ee97aedaad8c14337e9eb5050faa64994
F test/orderby1.test a4bba04b9c60a21e53486fbc173a596b29641a3b3a57a0f26a1cbef1360358e9
F test/orderby1.test 02cfd870127a7342170b829175c5c53e9e7405744451ac1aeb2f7e2b0c18ca76
F test/orderby2.test bc11009f7cd99d96b1b11e57b199b00633eb5b04
F test/orderby3.test 8619d06a3debdcd80a27c0fdea5c40b468854b99
F test/orderby4.test 4d39bfbaaa3ae64d026ca2ff166353d2edca4ba4
@ -1525,7 +1525,7 @@ F test/snapshot_fault.test f6c5ef7cb93bf92fbb4e864ecc5c87df7d3a250064838822db5b4
F test/snapshot_up.test a0a29c4cf33475fcef07c3f8e64af795e24ab91b4cc68295863402a393cdd41c
F test/soak.test 18944cf21b94a7fe0df02016a6ee1e9632bc4e8d095a0cb49d95e15d5cca2d5c
F test/softheap1.test 843cd84db9891b2d01b9ab64cef3e9020f98d087
F test/sort.test c2adc635c2564241fefec0b3a68391ef6868fd3b
F test/sort.test f86751134159abb5e5fd4381a0d7038c91013638cd1e3fa1d7850901f6df6196
F test/sort2.test cc23b7c19d684657559e8a55b02f7fcee03851d0
F test/sort3.test 1480ed7c4c157682542224e05e3b75faf4a149e5
F test/sort4.test 5c34d9623a4ae5921d956dfa2b70e77ed0fc6e5c
@ -1884,7 +1884,7 @@ F test/walthread.test 14b20fcfa6ae152f5d8e12f5dc8a8a724b7ef189f5d8ef1e2ceab79f2a
F test/walvfs.test e1a6ad0f3c78e98b55c3d5f0889cf366cc0d0a1cb2bccb44ac9ec67384adc4a1
F test/wapp.tcl b440cd8cf57953d3a49e7ee81e6a18f18efdaf113b69f7d8482b0710a64566ec
F test/wapptest.tcl 1bea58a6a8e68a73f542ee4fca28b771b84ed803bd0c9e385087070b3d747b3c x
F test/where.test d13cd7c24e80009d2b54e2f7a8893c457afa49c64f99359c9eb3fe668ba1d9d4
F test/where.test 98208c95b574269980132c347b4bdb8992c6d5fc30c1954938593336d12e7447
F test/where2.test 03c21a11e7b90e2845fc3c8b4002fc44cc2797fa74c86ee47d70bd7ea4f29ed6
F test/where3.test 5b4ffc0ac2ea0fe92f02b1244b7531522fe4d7bccf6fa8741d54e82c10e67753
F test/where4.test 4a371bfcc607f41d233701bdec33ac2972908ba8
@ -2040,7 +2040,7 @@ F tool/symbols-mingw.sh 4dbcea7e74768305384c9fd2ed2b41bbf9f0414d
F tool/symbols.sh 1612bd947750e21e7b47befad5f6b3825b06cce0705441f903bf35ced65ae9b9
F tool/varint.c 5d94cb5003db9dbbcbcc5df08d66f16071aee003
F tool/vdbe-compress.tcl 1dcb7632e57cf57105248029e6e162fddaf6c0fccb3bb9e6215603752c5a2d4a
F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f
F tool/vdbe_profile.tcl 3ac5a4a9449f4baf77059358ea050db3e34395ccf59c5464d29b91746d5b961e
F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7
F tool/warnings.sh d58dc38367cc776550f90327e205d7946802d4004fb9f291fd8b81256bc1eedd
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
@ -2065,9 +2065,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P a60e56627fc0ef8831429941d429ee02c6ee51ce5a2c1af581dc5bc5a00d911e
Q +4d934f00634fa31827c0bf2503a5011117c1fe9fd2f41c2a4bf954fb416d9b0f
R 7ec03feb65d26dfd70421ee745ff5ed3
U larrybr
Z e9f5a072853fed1de334998b75e8a6f0
P cefc032473ac5ad244c0b6402c541b2f76c0c65a041bda03bfbe7c0e2c11fac2 df3818997b822743ac407dde45c5fd75845ca40f461e31350d86963dffec6cd6
R 3c93957174c9bb89beba8a6d7c9fdaab
T +closed df3818997b822743ac407dde45c5fd75845ca40f461e31350d86963dffec6cd6
U drh
Z 2d83b2d52ce6d0637e1b84504b86ac00
# Remove this line to create a well-formed Fossil manifest.

View File

@ -1 +1 @@
cefc032473ac5ad244c0b6402c541b2f76c0c65a041bda03bfbe7c0e2c11fac2
1b779afa3ed2f35a110e460fc6ed13cba744db85b9924149ab028b100d1e1e12

View File

@ -3092,7 +3092,15 @@ static int whereLoopAddBtreeIndex(
** seek only. Then, if this is a non-covering index, add the cost of
** visiting the rows in the main table. */
assert( pSrc->pTab->szTabRow>0 );
rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow;
if( pProbe->idxType==SQLITE_IDXTYPE_IPK ){
/* The pProbe->szIdxRow is low for an IPK table since the interior
** pages are small. Thuse szIdxRow gives a good estimate of seek cost.
** But the leaf pages are full-size, so pProbe->szIdxRow would badly
** under-estimate the scanning cost. */
rCostIdx = pNew->nOut + 16;
}else{
rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow;
}
pNew->rRun = sqlite3LogEstAdd(rLogSize, rCostIdx);
if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK|WHERE_EXPRIDX))==0 ){
pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16);
@ -3472,7 +3480,7 @@ static int whereLoopAddBtree(
sPk.aiRowLogEst = aiRowEstPk;
sPk.onError = OE_Replace;
sPk.pTable = pTab;
sPk.szIdxRow = pTab->szTabRow;
sPk.szIdxRow = 3; /* TUNING: Interior rows of IPK table are very small */
sPk.idxType = SQLITE_IDXTYPE_IPK;
aiRowEstPk[0] = pTab->nRowLogEst;
aiRowEstPk[1] = 0;
@ -4803,37 +4811,56 @@ static const char *wherePathName(WherePath *pPath, int nLoop, WhereLoop *pLast){
** order.
*/
static LogEst whereSortingCost(
WhereInfo *pWInfo,
LogEst nRow,
int nOrderBy,
int nSorted
WhereInfo *pWInfo, /* Query planning context */
LogEst nRow, /* Estimated number of rows to sort */
int nOrderBy, /* Number of ORDER BY clause terms */
int nSorted /* Number of initial ORDER BY terms naturally in order */
){
/* TUNING: Estimated cost of a full external sort, where N is
/* Estimated cost of a full external sort, where N is
** the number of rows to sort is:
**
** cost = (3.0 * N * log(N)).
** cost = (K * N * log(N)).
**
** Or, if the order-by clause has X terms but only the last Y
** terms are out of order, then block-sorting will reduce the
** sorting cost to:
**
** cost = (3.0 * N * log(N)) * (Y/X)
** cost = (K * N * log(N)) * (Y/X)
**
** The (Y/X) term is implemented using stack variable rScale
** below.
** The constant K is at least 2.0 but will be larger if there are a
** large number of columns to be sorted, as the sorting time is
** proportional to the amount of content to be sorted. The algorithm
** does not currently distinguish between fat columns (BLOBs and TEXTs)
** and skinny columns (INTs). It just uses the number of columns as
** an approximation for the row width.
**
** And extra factor of 2.0 or 3.0 is added to the sorting cost if the sort
** is built using OP_IdxInsert and OP_Sort rather than with OP_SorterInsert.
*/
LogEst rScale, rSortCost;
assert( nOrderBy>0 && 66==sqlite3LogEst(100) );
rScale = sqlite3LogEst((nOrderBy-nSorted)*100/nOrderBy) - 66;
rSortCost = nRow + rScale + 16;
LogEst rSortCost, nCol;
assert( pWInfo->pSelect!=0 );
assert( pWInfo->pSelect->pEList!=0 );
/* TUNING: sorting cost proportional to the number of output columns: */
nCol = sqlite3LogEst((pWInfo->pSelect->pEList->nExpr+59)/30);
rSortCost = nRow + nCol;
if( nSorted>0 ){
/* Scale the result by (Y/X) */
rSortCost += sqlite3LogEst((nOrderBy-nSorted)*100/nOrderBy) - 66;
}
/* Multiple by log(M) where M is the number of output rows.
** Use the LIMIT for M if it is smaller. Or if this sort is for
** a DISTINCT operator, M will be the number of distinct output
** rows, so fudge it downwards a bit.
*/
if( (pWInfo->wctrlFlags & WHERE_USE_LIMIT)!=0 && pWInfo->iLimit<nRow ){
nRow = pWInfo->iLimit;
if( (pWInfo->wctrlFlags & WHERE_USE_LIMIT)!=0 ){
rSortCost += 10; /* TUNING: Extra 2.0x if using LIMIT */
if( nSorted!=0 ){
rSortCost += 6; /* TUNING: Extra 1.5x if also using partial sort */
}
if( pWInfo->iLimit<nRow ){
nRow = pWInfo->iLimit;
}
}else if( (pWInfo->wctrlFlags & WHERE_WANT_DISTINCT) ){
/* TUNING: In the sort for a DISTINCT operator, assume that the DISTINCT
** reduces the number of output rows by a factor of 2 */
@ -4985,11 +5012,11 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
pWInfo, nRowEst, nOrderBy, isOrdered
);
}
/* TUNING: Add a small extra penalty (5) to sorting as an
/* TUNING: Add a small extra penalty (3) to sorting as an
** extra encouragment to the query planner to select a plan
** where the rows emerge in the correct order without any sorting
** required. */
rCost = sqlite3LogEstAdd(rUnsorted, aSortCost[isOrdered]) + 5;
rCost = sqlite3LogEstAdd(rUnsorted, aSortCost[isOrdered]) + 3;
WHERETRACE(0x002,
("---- sort cost=%-3d (%d/%d) increases cost %3d to %-3d\n",

View File

@ -43,6 +43,7 @@ do_test 1.0 {
(NULL, 1, 3, 'one-c'),
(NULL, 2, 1, 'two-a'),
(NULL, 3, 1, 'three-a');
ANALYZE;
COMMIT;
}
} {}
@ -180,6 +181,7 @@ do_test 2.0 {
(1, 3, 'one-c'),
(20, 1, 'two-a'),
(3, 1, 'three-a');
ANALYZE;
COMMIT;
}
} {}
@ -327,6 +329,7 @@ do_test 3.0 {
(NULL, 1, 3, 'one-c'),
(NULL, 2, 1, 'two-a'),
(NULL, 3, 1, 'three-a');
ANALYZE;
COMMIT;
}
} {}

View File

@ -595,4 +595,36 @@ do_execsql_test 17.1 {
SELECT * FROM sqlite_master ORDER BY sql;
} {}
# 2022-12-03 Ticket e8b674241947eb3b
# Improve estimates for the cost of sorting relative
# to the cost of doing an index lookup, so as to get
# a better query plan. See the ticket for a deetailed
# example.
#
reset_db
do_execsql_test 18.1 {
CREATE TABLE t1(a INTEGER PRIMARY KEY, b, c);
WITH RECURSIVE c(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM c WHERE x<50)
-- increase to 5000 for actual test data ----^^
INSERT INTO t1(a,b,c) SELECT x, random()%5000, random()%5000 FROM c;
CREATE TABLE t2(d,e,f);
WITH RECURSIVE c(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM c WHERE x<500)
-- increase to 50000 for actual test data -----^^^
INSERT INTO t2(d,e,f) SELECT
NULLIF(0, random()%2), random()%5000, random()%5000
FROM c;
ANALYZE;
UPDATE sqlite_stat1 SET stat='50000' WHERE tbl='t2';
UPDATE sqlite_stat1 SET stat='5000' WHERE tbl='t1';
ANALYZE sqlite_schema;
} {}
do_execsql_test 18.2 {
EXPLAIN QUERY PLAN
SELECT a FROM t1 JOIN t2
WHERE a IN (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
AND a=CASE WHEN d IS NOT NULL THEN e ELSE f END
ORDER BY a;
} {/.*SCAN t2.*SEARCH t1.*/}
# ^^^^^^^--^^^^^^^^^--- t2 should be the outer loop.
finish_test

View File

@ -545,6 +545,7 @@ do_test where-6.1 {
CREATE INDEX t3acb ON t3(a,c,b);
INSERT INTO t3 SELECT w, 101-w, y FROM t1;
SELECT count(*), sum(a), sum(b), sum(c) FROM t3;
ANALYZE;
}
} {100 5050 5050 348550}
do_test where-6.2 {

View File

@ -66,6 +66,8 @@ foreach stmt $allstmt {
puts "********************************************************************"
puts [string trim $sql($stmt)]
puts "Execution count: $cnt($stmt)"
set tcx 0
set ttx 0
for {set i 0} {[info exists stat($stmt,$i)]} {incr i} {
foreach {cx tx detail} $stat($stmt,$i) break
if {$cx==0} {
@ -74,7 +76,11 @@ foreach stmt $allstmt {
set ax [expr {$tx/$cx}]
}
puts [format {%8d %12d %12d %4d %s} $cx $tx $ax $i $detail]
incr tcx $cx
incr ttx $tx
}
set tax [expr {$tcx>0?$ttx/$tcx:0}]
puts [format {%8d %12d %12d TOTAL} $tcx $ttx $tax]
}
puts "********************************************************************"
puts "OPCODES:"