Additional tweaks to the enhancement at [609fbb94b8f01d67] to further reduce

the cost estimate for constructing an automatic index on an ephemeral table, in order to resolve the performance problem described by [forum:/forumpost/1d571c0296|forum post 1d571c0296]. FossilOrigin-Name: bf1aae7a8c7f2c74681aa29baa35259d10ce6a1737d2607def6bf27fed592131
2025-08-01 06:27:03 +03:00 · 2023-01-30 20:44:54 +00:00
parent 26c7cff254
commit f7af9ba13c
4 changed files with 82 additions and 12 deletions
--- a/test/where.test
+++ b/test/where.test
@ -1633,4 +1633,73 @@ do_execsql_test where-29.1 {
      1, 1, 1, 1;
 } {xyz}

+# 2023-01-30
+# Tests case for the query planner performance issue reported by
+# https://sqlite.org/forum/forumpost/1d571c0296
+#
+# The fix was to adjust the cost of computing an automatic index for
+# ephemeral tables, to help ensure that they are generated if they are
+# needed.  The test case below only looks at the query plan.  But 12x
+# improved performance has been verified by populating the "raw" table
+# with 100K rows of random data and running actual speed tests.
+#
+do_test where-30.1 {
+  unset -nocomplain res
+  set res {}
+  db eval {CREATE TABLE raw(country,date,total,delta, UNIQUE(country,date));}
+  db eval {
+    EXPLAIN QUERY PLAN
+    WITH
+      -- Find the country and min/max date
+      init(country, date, fin) AS (SELECT country, min(date), max(date)
+         FROM raw WHERE total > 0 GROUP BY country),
+    
+      -- Generate the date stream for each country
+      src(country, date) AS (SELECT raw.country, raw.date
+          FROM raw JOIN init i on raw.country = i.country AND raw.date > i.date
+          ORDER BY raw.country, raw.date),
+    
+      -- Generate the x & y for each entry in the country/date stream
+      vals(country, date, x, y) AS (SELECT src.country, src.date,
+                julianday(raw.date) - julianday(src.date), log(delta+1)
+          FROM src JOIN raw on raw.country = src.country
+                        AND raw.date > date(src.date,'-7 days')
+                        AND raw.date <= src.date AND delta >= 0),
+    
+      -- Accumulate the data we need
+      sums(country, date, x2, x, n, xy, y) AS (SELECT country, date,
+              sum(x*x*1.0), sum(x*1.0), sum(1.0), sum(x*y*1.0), sum(y*1.0)
+         FROM vals GROUP BY 1, 2),
+    
+      -- use these to calculate to divisor for the inverse matrix
+      mult(country, date, m) AS (SELECT country, date, 1.0/(x2 * n - x * x)
+         FROM sums),
+    
+      -- Build the inverse matrix
+      inv(country, date, a,b,c,d) AS (SELECT mult.country, mult.date, n * m,
+                -x * m, -x * m, x2 * m
+          FROM mult JOIN sums on sums.country=mult.country
+                         AND mult.date=sums.date),
+    
+      -- Calculate the coefficients for the least squares fit
+      fit(country, date, a, b) AS (SELECT inv.country, inv.date,
+               a * xy + b * y, c * xy + d * y
+          FROM inv
+          JOIN mult on mult.country = inv.country AND mult.date = inv.date
+          JOIN sums on sums.country = mult.country AND sums.date = mult.date
+    )
+    SELECT *, nFin/nPrev - 1 AS growth, log(2)/log(nFin/nPrev) AS doubling
+      FROM (SELECT f.*, exp(b) - 1 AS nFin, exp(a* (-1) + b) - 1 AS nPrev
+              FROM fit f JOIN init i on i.country = f.country
+                          AND f.date <= date(i.fin,'-3 days'))
+     WHERE nPrev > 0 AND nFin > 0;
+  } {
+    if {$parent!=0} continue
+    if {![string match SCAN* $detail]} continue
+    lappend res SCAN
+  }
+  set res
+} {SCAN}
+# ^^^^^^-- there should only be one top-level table scan in the query plan.
+
 finish_test