1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-01 06:27:03 +03:00

Additional tweaks to the enhancement at [609fbb94b8f01d67] to further reduce

the cost estimate for constructing an automatic index on an ephemeral table,
in order to resolve the performance problem described by
[forum:/forumpost/1d571c0296|forum post 1d571c0296].

FossilOrigin-Name: bf1aae7a8c7f2c74681aa29baa35259d10ce6a1737d2607def6bf27fed592131
This commit is contained in:
drh
2023-01-30 20:44:54 +00:00
parent 26c7cff254
commit f7af9ba13c
4 changed files with 82 additions and 12 deletions

View File

@ -1633,4 +1633,73 @@ do_execsql_test where-29.1 {
1, 1, 1, 1;
} {xyz}
# 2023-01-30
# Tests case for the query planner performance issue reported by
# https://sqlite.org/forum/forumpost/1d571c0296
#
# The fix was to adjust the cost of computing an automatic index for
# ephemeral tables, to help ensure that they are generated if they are
# needed. The test case below only looks at the query plan. But 12x
# improved performance has been verified by populating the "raw" table
# with 100K rows of random data and running actual speed tests.
#
do_test where-30.1 {
unset -nocomplain res
set res {}
db eval {CREATE TABLE raw(country,date,total,delta, UNIQUE(country,date));}
db eval {
EXPLAIN QUERY PLAN
WITH
-- Find the country and min/max date
init(country, date, fin) AS (SELECT country, min(date), max(date)
FROM raw WHERE total > 0 GROUP BY country),
-- Generate the date stream for each country
src(country, date) AS (SELECT raw.country, raw.date
FROM raw JOIN init i on raw.country = i.country AND raw.date > i.date
ORDER BY raw.country, raw.date),
-- Generate the x & y for each entry in the country/date stream
vals(country, date, x, y) AS (SELECT src.country, src.date,
julianday(raw.date) - julianday(src.date), log(delta+1)
FROM src JOIN raw on raw.country = src.country
AND raw.date > date(src.date,'-7 days')
AND raw.date <= src.date AND delta >= 0),
-- Accumulate the data we need
sums(country, date, x2, x, n, xy, y) AS (SELECT country, date,
sum(x*x*1.0), sum(x*1.0), sum(1.0), sum(x*y*1.0), sum(y*1.0)
FROM vals GROUP BY 1, 2),
-- use these to calculate to divisor for the inverse matrix
mult(country, date, m) AS (SELECT country, date, 1.0/(x2 * n - x * x)
FROM sums),
-- Build the inverse matrix
inv(country, date, a,b,c,d) AS (SELECT mult.country, mult.date, n * m,
-x * m, -x * m, x2 * m
FROM mult JOIN sums on sums.country=mult.country
AND mult.date=sums.date),
-- Calculate the coefficients for the least squares fit
fit(country, date, a, b) AS (SELECT inv.country, inv.date,
a * xy + b * y, c * xy + d * y
FROM inv
JOIN mult on mult.country = inv.country AND mult.date = inv.date
JOIN sums on sums.country = mult.country AND sums.date = mult.date
)
SELECT *, nFin/nPrev - 1 AS growth, log(2)/log(nFin/nPrev) AS doubling
FROM (SELECT f.*, exp(b) - 1 AS nFin, exp(a* (-1) + b) - 1 AS nPrev
FROM fit f JOIN init i on i.country = f.country
AND f.date <= date(i.fin,'-3 days'))
WHERE nPrev > 0 AND nFin > 0;
} {
if {$parent!=0} continue
if {![string match SCAN* $detail]} continue
lappend res SCAN
}
set res
} {SCAN}
# ^^^^^^-- there should only be one top-level table scan in the query plan.
finish_test