mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-01 06:27:03 +03:00
Additional tweaks to the enhancement at [609fbb94b8f01d67] to further reduce
the cost estimate for constructing an automatic index on an ephemeral table, in order to resolve the performance problem described by [forum:/forumpost/1d571c0296|forum post 1d571c0296]. FossilOrigin-Name: bf1aae7a8c7f2c74681aa29baa35259d10ce6a1737d2607def6bf27fed592131
This commit is contained in:
@ -1633,4 +1633,73 @@ do_execsql_test where-29.1 {
|
||||
1, 1, 1, 1;
|
||||
} {xyz}
|
||||
|
||||
# 2023-01-30
|
||||
# Tests case for the query planner performance issue reported by
|
||||
# https://sqlite.org/forum/forumpost/1d571c0296
|
||||
#
|
||||
# The fix was to adjust the cost of computing an automatic index for
|
||||
# ephemeral tables, to help ensure that they are generated if they are
|
||||
# needed. The test case below only looks at the query plan. But 12x
|
||||
# improved performance has been verified by populating the "raw" table
|
||||
# with 100K rows of random data and running actual speed tests.
|
||||
#
|
||||
do_test where-30.1 {
|
||||
unset -nocomplain res
|
||||
set res {}
|
||||
db eval {CREATE TABLE raw(country,date,total,delta, UNIQUE(country,date));}
|
||||
db eval {
|
||||
EXPLAIN QUERY PLAN
|
||||
WITH
|
||||
-- Find the country and min/max date
|
||||
init(country, date, fin) AS (SELECT country, min(date), max(date)
|
||||
FROM raw WHERE total > 0 GROUP BY country),
|
||||
|
||||
-- Generate the date stream for each country
|
||||
src(country, date) AS (SELECT raw.country, raw.date
|
||||
FROM raw JOIN init i on raw.country = i.country AND raw.date > i.date
|
||||
ORDER BY raw.country, raw.date),
|
||||
|
||||
-- Generate the x & y for each entry in the country/date stream
|
||||
vals(country, date, x, y) AS (SELECT src.country, src.date,
|
||||
julianday(raw.date) - julianday(src.date), log(delta+1)
|
||||
FROM src JOIN raw on raw.country = src.country
|
||||
AND raw.date > date(src.date,'-7 days')
|
||||
AND raw.date <= src.date AND delta >= 0),
|
||||
|
||||
-- Accumulate the data we need
|
||||
sums(country, date, x2, x, n, xy, y) AS (SELECT country, date,
|
||||
sum(x*x*1.0), sum(x*1.0), sum(1.0), sum(x*y*1.0), sum(y*1.0)
|
||||
FROM vals GROUP BY 1, 2),
|
||||
|
||||
-- use these to calculate to divisor for the inverse matrix
|
||||
mult(country, date, m) AS (SELECT country, date, 1.0/(x2 * n - x * x)
|
||||
FROM sums),
|
||||
|
||||
-- Build the inverse matrix
|
||||
inv(country, date, a,b,c,d) AS (SELECT mult.country, mult.date, n * m,
|
||||
-x * m, -x * m, x2 * m
|
||||
FROM mult JOIN sums on sums.country=mult.country
|
||||
AND mult.date=sums.date),
|
||||
|
||||
-- Calculate the coefficients for the least squares fit
|
||||
fit(country, date, a, b) AS (SELECT inv.country, inv.date,
|
||||
a * xy + b * y, c * xy + d * y
|
||||
FROM inv
|
||||
JOIN mult on mult.country = inv.country AND mult.date = inv.date
|
||||
JOIN sums on sums.country = mult.country AND sums.date = mult.date
|
||||
)
|
||||
SELECT *, nFin/nPrev - 1 AS growth, log(2)/log(nFin/nPrev) AS doubling
|
||||
FROM (SELECT f.*, exp(b) - 1 AS nFin, exp(a* (-1) + b) - 1 AS nPrev
|
||||
FROM fit f JOIN init i on i.country = f.country
|
||||
AND f.date <= date(i.fin,'-3 days'))
|
||||
WHERE nPrev > 0 AND nFin > 0;
|
||||
} {
|
||||
if {$parent!=0} continue
|
||||
if {![string match SCAN* $detail]} continue
|
||||
lappend res SCAN
|
||||
}
|
||||
set res
|
||||
} {SCAN}
|
||||
# ^^^^^^-- there should only be one top-level table scan in the query plan.
|
||||
|
||||
finish_test
|
||||
|
Reference in New Issue
Block a user