1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-08 14:02:16 +03:00

Fix some problems with using window-functions in aggregate queries.

FossilOrigin-Name: fe7081e0952950f577234fcbb58f3c1efa4579267654fd2f713dc4804e470e7e
This commit is contained in:
dan
2018-06-12 18:40:17 +00:00
parent 2a11bb23eb
commit 7392569f5e
6 changed files with 174 additions and 11 deletions

View File

@@ -12,6 +12,125 @@
*/
#include "sqliteInt.h"
/*
** SELECT REWRITING
**
** Any SELECT statement that contains one or more window functions in
** either the select list or ORDER BY clause (the only two places window
** functions may be used) is transformed by function sqlite3WindowRewrite()
** in order to support window function processing. For example, with the
** schema:
**
** CREATE TABLE t1(a, b, c, d, e, f, g);
**
** the statement:
**
** SELECT a+1, max(b) OVER (PARTITION BY c ORDER BY d) FROM t1 ORDER BY e;
**
** is transformed to:
**
** SELECT a+1, max(b) OVER (PARTITION BY c ORDER BY d) FROM (
** SELECT a, e, c, d, b FROM t1 ORDER BY c, d
** ) ORDER BY e;
**
** The flattening optimization is disabled when processing this transformed
** SELECT statement. This allows the implementation of the window function
** (in this case max()) to process rows sorted in order of (c, d), which
** makes things easier for obvious reasons. More generally:
**
** * FROM, WHERE, GROUP BY and HAVING clauses are all moved to
** the sub-query.
**
** * ORDER BY, LIMIT and OFFSET remain part of the parent query.
**
** * Terminals from each of the expression trees that make up the
** select-list and ORDER BY expressions in the parent query are
** selected by the sub-query. For the purposes of the transformation,
** terminals are column references and aggregate functions.
**
** If there is more than one window function in the SELECT that uses
** the same window declaration (the OVER bit), then a single scan may
** be used to process more than one window function. For example:
**
** SELECT max(b) OVER (PARTITION BY c ORDER BY d),
** min(e) OVER (PARTITION BY c ORDER BY d)
** FROM t1;
**
** is transformed in the same way as the example above. However:
**
** SELECT max(b) OVER (PARTITION BY c ORDER BY d),
** min(e) OVER (PARTITION BY a ORDER BY b)
** FROM t1;
**
** Must be transformed to:
**
** SELECT max(b) OVER (PARTITION BY c ORDER BY d) FROM (
** SELECT e, min(e) OVER (PARTITION BY a ORDER BY b), c, d, b FROM
** SELECT a, e, c, d, b FROM t1 ORDER BY a, b
** ) ORDER BY c, d
** ) ORDER BY e;
**
** so that both min() and max() may process rows in the order defined by
** their respective window declarations.
**
** INTERFACE WITH SELECT.C
**
** When processing the rewritten SELECT statement, code in select.c calls
** sqlite3WhereBegin() to begin iterating through the results of the
** sub-query, which is always implemented as a co-routine. It then calls
** sqlite3WindowCodeStep() to process rows and finish the scan by calling
** sqlite3WhereEnd().
**
** sqlite3WindowCodeStep() generates VM code so that, for each row returned
** by the sub-query a sub-routine (OP_Gosub) coded by select.c is invoked.
** When the sub-routine is invoked:
**
** * The results of all window-functions for the row are stored
** in the associated Window.regResult registers.
**
** * The required terminal values are stored in the current row of
** temp table Window.iEphCsr.
**
** In some cases, depending on the window frame and the specific window
** functions invoked, sqlite3WindowCodeStep() caches each entire partition
** in a temp table before returning any rows. In other cases it does not.
** This detail is encapsulated within this file, the code generated by
** select.c is the same in either case.
**
** BUILT-IN WINDOW FUNCTIONS
**
** This implementation features the following built-in window functions:
**
** row_number()
** rank()
** dense_rank()
** percent_rank()
** cume_dist()
** ntile(N)
** lead(expr [, offset [, default]])
** lag(expr [, offset [, default]])
** first_value(expr)
** last_value(expr)
** nth_value(expr, N)
**
** These are the same built-in window functions supported by Postgres.
** Although the behaviour of aggregate window functions (functions that
** can be used as either aggregates or window funtions) allows them to
** be implemented using an API, built-in window functions are much more
** esoteric. Additionally, some window functions (e.g. nth_value())
** may only be implemented by caching the entire partition in memory.
** As such, some built-in window functions use the same API as aggregate
** window functions and some are implemented directly using VDBE
** instructions. Additionally, for those functions that use the API, the
** window frame is sometimes modified before the SELECT statement is
** rewritten. For example, regardless of the specified window frame, the
** row_number() function always uses:
**
** ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
**
** See sqlite3WindowUpdate() for details.
*/
/*
** Implementation of built-in window function row_number(). Assumes that the
** window frame has been coerced to:
@@ -434,6 +553,7 @@ static int selectWindowRewriteExprCb(Walker *pWalker, Expr *pExpr){
}
/* Fall through. */
case TK_AGG_FUNCTION:
case TK_COLUMN: {
Expr *pDup = sqlite3ExprDup(pParse->db, pExpr, 0);
p->pSub = sqlite3ExprListAppend(pParse, p->pSub, pDup);
@@ -595,6 +715,8 @@ int sqlite3WindowRewrite(Parse *pParse, Select *p){
rc = SQLITE_NOMEM;
}else{
pSub->selFlags |= SF_Expanded;
p->selFlags &= ~SF_Aggregate;
sqlite3SelectPrep(pParse, pSub, 0);
}
}