mirror of
https://github.com/sqlite/sqlite.git
synced 2025-12-24 14:17:58 +03:00
Add a heuristic in between the two solver() passes of the query planner that
tries to prevent a very slow query plan in cases where the output row count estimate is imprecise. FossilOrigin-Name: 8018417b0143ea11535f2457bf3e4b3755717c554a17df1076425b4251b5f2c6
This commit is contained in:
16
manifest
16
manifest
@@ -1,5 +1,5 @@
|
||||
C Improved\scomments\sin\sthe\squery\splanner\slogic\sthat\scomputes\sthe\scost\sfor\sa\nparticular\sstep\sin\sa\squery\splan.\s\sNo\scode\schanges.
|
||||
D 2024-04-01T15:38:15.301
|
||||
C Add\sa\sheuristic\sin\sbetween\sthe\stwo\ssolver()\spasses\sof\sthe\squery\splanner\sthat\ntries\sto\sprevent\sa\svery\sslow\squery\splan\sin\scases\swhere\sthe\soutput\srow\scount\nestimate\sis\simprecise.
|
||||
D 2024-04-02T11:44:44.648
|
||||
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
|
||||
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
|
||||
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
|
||||
@@ -835,7 +835,7 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9
|
||||
F src/wal.c 887fc4ca3f020ebb2e376f222069570834ac63bf50111ef0cbf3ae417048ed89
|
||||
F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452
|
||||
F src/walker.c 7c7ea0115345851c3da4e04e2e239a29983b61fb5b038b94eede6aba462640e2
|
||||
F src/where.c a15429ae9d42c7b824b9faad7e8234b006abf0a4526166dc4dfc0cc9cd2c9e6a
|
||||
F src/where.c ba9d22fda3850b508e075f8bdc4f4d4f1ff314551597a6f14842bb03aeec0d3a
|
||||
F src/whereInt.h 82a13766f13d1a53b05387c2e60726289ef26404bc7b9b1f7770204d97357fb8
|
||||
F src/wherecode.c 5d77db30a2a3dd532492ae882de114edba2fae672622056b1c7fd61f5917a8f1
|
||||
F src/whereexpr.c 7b64295f1d82ad0928df435925dd7bbd5997b44a026153113eace0d9e71ff435
|
||||
@@ -2017,6 +2017,7 @@ F test/whereJ.test fc05e374cc9f2dc204148d6c06822c380ad388895fe97a6d335b94a26a08a
|
||||
F test/whereK.test 0270ab7f04ba5436fb9156d31d642a1c82727f4c4bfe5ba90d435c78cf44684a
|
||||
F test/whereL.test 438a397fa883b77bb6361c08a8befa41b52e9cfbe15a2a43715d122f8cfa8649
|
||||
F test/whereM.test 0dbc9998783458ddcf3cc078ca7c2951d8b2677d472ecf0028f449ed327c0250
|
||||
F test/whereN.test 63a3584b71acfb6963416de82f26c6b1644abc5ca6080c76546b9246734c8803
|
||||
F test/wherefault.test 6cf2a9c5712952d463d3f45ebee7f6caf400984df51a195d884cfb7eb0e837a7
|
||||
F test/wherelfault.test 9012e4ef5259058b771606616bd007af5d154e64cc25fa9fd4170f6411db44e3
|
||||
F test/wherelimit.test afb46397c6d7e964e6e294ba3569864a0c570fe3807afc634236c2b752372f31
|
||||
@@ -2183,8 +2184,11 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
|
||||
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
|
||||
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
|
||||
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
|
||||
P 090943dc31e7a3af5c11c1c0953cb82ae3ca07ba000189bb85deaecc76921504
|
||||
R 27a9be3de32bd888ad6632e320231186
|
||||
P 0b2ac2cdc767db764e3ea8bbc33898cac4e1ec27fe8c9b60ce08a1785f921e6d
|
||||
R bbe24560206aebee12aa3740963fd4bf
|
||||
T *branch * interstage-heuristic
|
||||
T *sym-interstage-heuristic *
|
||||
T -sym-trunk *
|
||||
U drh
|
||||
Z 13460dee49d687f64f99bd37193c204d
|
||||
Z f6898799f068ada043c459f74f460b9b
|
||||
# Remove this line to create a well-formed Fossil manifest.
|
||||
|
||||
@@ -1 +1 @@
|
||||
0b2ac2cdc767db764e3ea8bbc33898cac4e1ec27fe8c9b60ce08a1785f921e6d
|
||||
8018417b0143ea11535f2457bf3e4b3755717c554a17df1076425b4251b5f2c6
|
||||
64
src/where.c
64
src/where.c
@@ -5530,7 +5530,6 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pWInfo->nRowOut = pFrom->nRow;
|
||||
|
||||
/* Free temporary memory and return success */
|
||||
@@ -5538,6 +5537,68 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** This routine implements a heuristic designed to improve query planning.
|
||||
** This routine is called in between the first and second call to
|
||||
** wherePathSolver(). Hence the name "Interstage" "Heuristic".
|
||||
**
|
||||
** The first call to wherePathSolver() (hereafter just "solver()") computes
|
||||
** the best path without regard to the order of the outputs. The second call
|
||||
** to the solver() builds upon the first call to try to find an alternative
|
||||
** path that satisfies the ORDER BY clause.
|
||||
**
|
||||
** This routine looks at the results of the first solver() run, and for
|
||||
** every FROM clause term in the resulting query plan that uses an equality
|
||||
** constraint against an index, disable other WhereLoops for that same
|
||||
** FROM clause term that would try to do a full-table scan. This prevents
|
||||
** an index search from being converted into a full-table scan in order to
|
||||
** satisfy an ORDER BY clause, since even though we might get slightly better
|
||||
** performance using the full-scan without sorting if the output size
|
||||
** estimates are very precise, we might also get severe performance
|
||||
** degradation using the full-scan if the output size estimate is too large.
|
||||
** It is better to err on the side of caution.
|
||||
**
|
||||
** Except, if the first solver() call generated a full-table scan in an outer
|
||||
** loop then stop this analysis at the first full-scan, since the second
|
||||
** solver() run might try to swap that full-scan for another in order to
|
||||
** get the output into the correct order. In other words, we do *not* want
|
||||
** to inhibit a rewrites like this:
|
||||
**
|
||||
** First Solver() Second Solver()
|
||||
** |-- SCAN t1 |-- SCAN t2
|
||||
** |-- SEARCH t2 `-- SEARCH t1
|
||||
** `-- SORT USING B-TREE
|
||||
**
|
||||
** Rather, the purpose of this routine is to inhibit rewrites such as:
|
||||
**
|
||||
** First Solver() Second Solver()
|
||||
** |-- SEARCH t1 |-- SCAN t2 <--- bad!
|
||||
** |-- SEARCH t2 `-- SEARCH t1
|
||||
** `-- SORT USING B-TREE
|
||||
**
|
||||
** See test cases in test/whereN.test for the real-world query that
|
||||
** originally provoked this heuristic.
|
||||
*/
|
||||
static SQLITE_NOINLINE void whereInterstageHeuristic(WhereInfo *pWInfo){
|
||||
int i;
|
||||
for(i=0; i<pWInfo->nLevel; i++){
|
||||
WhereLoop *p = pWInfo->a[i].pWLoop;
|
||||
if( p==0 ) break;
|
||||
if( (p->wsFlags & WHERE_VIRTUALTABLE)!=0 ) continue;
|
||||
if( (p->wsFlags & (WHERE_COLUMN_EQ|WHERE_COLUMN_NULL|WHERE_COLUMN_IN))!=0 ){
|
||||
u8 iTab = p->iTab;
|
||||
WhereLoop *pLoop;
|
||||
for(pLoop=pWInfo->pLoops; pLoop; pLoop=pLoop->pNextLoop){
|
||||
if( pLoop->iTab!=iTab ) continue;
|
||||
if( (pLoop->wsFlags & WHERE_CONSTRAINT)!=0 ) continue;
|
||||
pLoop->prereq = ALLBITS; /* Prevent 2nd solver() from using this one */
|
||||
}
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Most queries use only a single table (they are not joins) and have
|
||||
** simple == constraints against indexed fields. This routine attempts
|
||||
@@ -6320,6 +6381,7 @@ WhereInfo *sqlite3WhereBegin(
|
||||
wherePathSolver(pWInfo, 0);
|
||||
if( db->mallocFailed ) goto whereBeginError;
|
||||
if( pWInfo->pOrderBy ){
|
||||
whereInterstageHeuristic(pWInfo);
|
||||
wherePathSolver(pWInfo, pWInfo->nRowOut+1);
|
||||
if( db->mallocFailed ) goto whereBeginError;
|
||||
}
|
||||
|
||||
103
test/whereN.test
Normal file
103
test/whereN.test
Normal file
@@ -0,0 +1,103 @@
|
||||
# 2024-04-02
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#***********************************************************************
|
||||
# Tests for the whereInterstageHeuristic() routine in the query planner.
|
||||
#
|
||||
|
||||
set testdir [file dirname $argv0]
|
||||
source $testdir/tester.tcl
|
||||
set testprefix whereN
|
||||
|
||||
# The following is a simplified and "sanitized" version of the original
|
||||
# real-world query that brought the problem to light.
|
||||
#
|
||||
# The issue is a slow query. The answer is correct, but it was taking too
|
||||
# much time, because it was doing a full table scan rather than an indexed
|
||||
# lookup.
|
||||
#
|
||||
# The problem was that the query planner was overestimating the number of
|
||||
# output rows. The estimated number of output rows is accurate if the
|
||||
# DSNAME parameter is "ds-one". In that case, a large fraction of the rows
|
||||
# in "violation" end up being output. The query planner correctly deduces
|
||||
# that it is faster to do a full table scan of the large "violation" table
|
||||
# to avoid the after-query sort that implements the ORDER BY clause. However,
|
||||
# if the DSNAME is "ds-two", then only a few rows (about 6) are generated,
|
||||
# and it is much much faster to do an indexed lookup of "violation" followed
|
||||
# by a sort operation to implement ORDER BY
|
||||
#
|
||||
# The problem, of course, is that the query planner has no way of knowing
|
||||
# in advance how many rows will be generated. The query planner tries to
|
||||
# estimate a worst case, which is a large number of output rows, and it picks
|
||||
# the best plan for that case. However, the plan choosen is very inefficient
|
||||
# when the number of output rows is small.
|
||||
#
|
||||
# The whereInterstageHeuristic() routine in the query planner attempts to
|
||||
# correct this by adjusting the query plan such that it avoids the very bad
|
||||
# query plan for a small number of rows, at the expense of a slightly less
|
||||
# efficient plan for a large number of rows. The large number of rows case
|
||||
# is perhaps 5% slower with the revised plan, but the small number of
|
||||
# rows case is around 100 times faster. That seems like a good tradeoff.
|
||||
#
|
||||
do_execsql_test 1.0 {
|
||||
CREATE TABLE datasource(dsid INT, name TEXT);
|
||||
INSERT INTO datasource VALUES(1,'ds-one'),(2,'ds-two'),(3,'ds-three');
|
||||
CREATE INDEX ds1 ON datasource(name, dsid);
|
||||
|
||||
CREATE TABLE rule(rid INT, team_id INT, dsid INT);
|
||||
WITH RECURSIVE c(n) AS (VALUES(1) UNION ALL SELECT n+1 FROM c WHERE n<9)
|
||||
INSERT INTO rule(rid,team_id,dsid) SELECT n, 1, 1 FROM c;
|
||||
WITH RECURSIVE c(n) AS (VALUES(10) UNION ALL SELECT n+1 FROM c WHERE n<24)
|
||||
INSERT INTO rule(rid,team_id,dsid) SELECT n, 2, 2 FROM c;
|
||||
CREATE INDEX rule2 ON rule(dsid, rid);
|
||||
|
||||
CREATE TABLE violation(vid INT, rid INT, vx BLOB);
|
||||
/*** Uncomment to insert actual data
|
||||
WITH src(rid, cnt) AS (VALUES(1,3586),(2,1343),(3,6505),(5,76230),
|
||||
(6,740),(7,287794),(8,457),(12,1),
|
||||
(14,1),(16,1),(17,1),(18,1),(19,1))
|
||||
INSERT INTO violation(vid, rid, vx)
|
||||
SELECT rid*1000000+value, rid, randomblob(15)
|
||||
FROM src, generate_series(1,cnt);
|
||||
***/
|
||||
CREATE INDEX v1 ON violation(rid, vid);
|
||||
CREATE INDEX v2 ON violation(vid);
|
||||
ANALYZE;
|
||||
DELETE FROM sqlite_stat1;
|
||||
DROP TABLE IF EXISTS sqlite_stat4;
|
||||
INSERT INTO sqlite_stat1 VALUES
|
||||
('violation','v2','376661 1'),
|
||||
('violation','v1','376661 28974 1'),
|
||||
('rule','rule2','24 12 1'),
|
||||
('datasource','ds1','3 1 1');
|
||||
ANALYZE sqlite_schema;
|
||||
}
|
||||
set DSNAME ds-two ;# Only a few rows. Change to "ds-one" for many rows.
|
||||
do_eqp_test 1.1 {
|
||||
SELECT count(*), length(group_concat(vx)) FROM (
|
||||
SELECT V.*
|
||||
FROM datasource DS, rule R, violation V
|
||||
WHERE V.rid=R.rid
|
||||
AND R.dsid=DS.dsid
|
||||
AND DS.name=$DSNAME
|
||||
ORDER BY V.vid desc
|
||||
);
|
||||
} {
|
||||
QUERY PLAN
|
||||
|--CO-ROUTINE (subquery-xxxxxx)
|
||||
| |--SEARCH DS USING COVERING INDEX ds1 (name=?)
|
||||
| |--SEARCH R USING COVERING INDEX rule2 (dsid=?)
|
||||
| |--SEARCH V USING INDEX v1 (rid=?)
|
||||
| `--USE TEMP B-TREE FOR ORDER BY
|
||||
`--SCAN (subquery-xxxxxx)
|
||||
}
|
||||
# ^^^^---- We want to see three SEARCH terms. No SCAN terms.
|
||||
# The ORDER BY is implemented by a separate sorter pass.
|
||||
|
||||
finish_test
|
||||
Reference in New Issue
Block a user