1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-10 01:02:56 +03:00

Improve the star-query heuristic so that it does a better job of identifying

actual star queries.  Also includes improved diagnostic output from the
query planner.

FossilOrigin-Name: 7cfbe14d199bb631abd4d009698eeaee9b8450d5061ded612095ee4738ac6a1f
This commit is contained in:
drh
2025-01-24 16:37:31 +00:00
4 changed files with 117 additions and 42 deletions

View File

@@ -1,5 +1,5 @@
C Correct\sa\sversion\snumber\sin\sautosetup/README.md.
D 2025-01-24T15:41:33.001
C Improve\sthe\sstar-query\sheuristic\sso\sthat\sit\sdoes\sa\sbetter\sjob\sof\sidentifying\nactual\sstar\squeries.\s\sAlso\sincludes\simproved\sdiagnostic\soutput\sfrom\sthe\nquery\splanner.
D 2025-01-24T16:37:31.813
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d
@@ -863,8 +863,8 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9
F src/wal.c 4e6181d8780ab0af2e1388d0754cbe6f2f04593d2b1ab6c41699a89942fd8997
F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452
F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014
F src/where.c 6722991bece039c7094e9e31bb57b9eb155299f01d2209f1378a81c9605ded31
F src/whereInt.h 2b0804f300c7f65de4046a1d81c65f01b208d6c08950ccd1fa6b8c16162a8af7
F src/where.c 647dfba23202523c64ca204f3501a09d18670ea081acea680ddcb1e63beda163
F src/whereInt.h 3b2ef4617758174d00c6940850e4988c1b195d39bcc20f2965319a1f7bffc714
F src/wherecode.c 0c3d3199a2b769a5e2bb70feb5003dc85b3d86842ecaf903a47f2b4205ca5dab
F src/whereexpr.c 0f93a29cabd3a338d09a1f5c6770620a1ac51ec1157f3229502a7e7767c60b6f
F src/window.c 2bf01f9941a64fbcead61a0e3cb5db3fca5094b30d2ff0d23274c2a81d2e2385
@@ -2208,8 +2208,9 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350
F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7
F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 601636829b79888deda7d4dc5786f82b4f0b302d84663ee96b4d47fcb429abcf
R 7edf0e5d3b3392becf43b655692773e3
U stephan
Z 8a4c0606d9fc49f835c4115814c6ca15
P c338caf0fd4adc7baacf20f6d4274b1776b9487a5a976b8d85e490791be82b11 a280f5f5480e560fc2b80e8947d8062e8b3487d930e71cb60fc9ba90d87977c1
R 7125d6757cc5225f643dd2f0408b79ed
T +closed a280f5f5480e560fc2b80e8947d8062e8b3487d930e71cb60fc9ba90d87977c1
U drh
Z f818ed71ad97105ec5c66c8cb2f2a934
# Remove this line to create a well-formed Fossil manifest.

View File

@@ -1 +1 @@
c338caf0fd4adc7baacf20f6d4274b1776b9487a5a976b8d85e490791be82b11
7cfbe14d199bb631abd4d009698eeaee9b8450d5061ded612095ee4738ac6a1f

View File

@@ -2431,8 +2431,9 @@ void sqlite3WhereClausePrint(WhereClause *pWC){
** 1.002.001 t2.t2xy 2 f 010241 N 2 cost 0,56,31
*/
void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
WhereInfo *pWInfo;
if( pWC ){
WhereInfo *pWInfo = pWC->pWInfo;
pWInfo = pWC->pWInfo;
int nb = 1+(pWInfo->pTabList->nSrc+3)/4;
SrcItem *pItem = pWInfo->pTabList->a + p->iTab;
Table *pTab = pItem->pSTab;
@@ -2442,6 +2443,7 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
sqlite3DebugPrintf(" %12s",
pItem->zAlias ? pItem->zAlias : pTab->zName);
}else{
pWInfo = 0;
sqlite3DebugPrintf("%c%2d.%03llx.%03llx %c%d",
p->cId, p->iTab, p->maskSelf, p->prereq & 0xfff, p->cId, p->iTab);
}
@@ -2473,7 +2475,12 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
}else{
sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm);
}
if( pWInfo && pWInfo->nOutStarDelta>0 && p->rStarDelta!=0 ){
sqlite3DebugPrintf(" cost %d,%d,%d delta=%d\n",
p->rSetup, p->rRun, p->nOut, -p->rStarDelta);
}else{
sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut);
}
if( p->nLTerm && (sqlite3WhereTrace & 0x4000)!=0 ){
int i;
for(i=0; i<p->nLTerm; i++){
@@ -5441,11 +5448,13 @@ static LogEst whereSortingCost(
** 18 for star queries
** 12 otherwise
**
** For the purposes of SQLite, a star-query is defined as a query
** with a large central table that is joined (using an INNER JOIN,
** not a LEFT JOIN) against four or more smaller tables. The central
** table is called the "fact" table. The smaller tables that get
** joined are "dimension tables".
** For the purposes of this heuristic, a star-query is defined as a query
** with a large central table that is joined using an INNER JOIN,
** not CROSS or OUTER JOINs, against four or more smaller tables.
* The central table is called the "fact" table. The smaller tables
** that get joined are "dimension tables". Also, any table that is
** self-joined cannot be a dimension table; we assume that dimension
** tables may only be joined against fact tables.
**
** SIDE EFFECT: (and really the whole point of this subroutine)
**
@@ -5456,52 +5465,109 @@ static LogEst whereSortingCost(
** resulting in poor query plans. The total amount of heuristic cost
** adjustment is stored in pWInfo->nOutStarDelta and the cost adjustment
** for each WhereLoop is stored in its rStarDelta field.
**
** This heuristic can be completely disabled, so that no query is
** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to
** disable the SQLITE_StarQuery optimization. In the CLI, the command
** to do that is: ".testctrl opt -starquery".
*/
static int computeMxChoice(WhereInfo *pWInfo, LogEst nRowEst){
static int computeMxChoice(WhereInfo *pWInfo){
int nLoop = pWInfo->nLevel; /* Number of terms in the join */
if( nRowEst==0
&& nLoop>=5
WhereLoop *pWLoop; /* For looping over WhereLoops */
#ifdef SQLITE_DEBUG
/* The star-query detection code below makes use of the following
** properties of the WhereLoop list, so verifying them before
** continuing:
** (1) .maskSelf is the bitmask corresponding to .iTab
** (2) The WhereLoop list is in ascending .iTab order
*/
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
assert( pWLoop->maskSelf==MASKBIT(pWLoop->iTab) );
assert( pWLoop->pNextLoop==0 || pWLoop->iTab<=pWLoop->pNextLoop->iTab );
}
#endif /* SQLITE_DEBUG */
if( nLoop>=5
&& !pWInfo->bStarDone
&& OptimizationEnabled(pWInfo->pParse->db, SQLITE_StarQuery)
){
SrcItem *aFromTabs; /* All terms of the FROM clause */
int iFromIdx; /* Term of FROM clause is the candidate fact-table */
Bitmask m; /* Bitmask for candidate fact-table */
Bitmask mSelfJoin = 0; /* Tables that cannot be dimension tables */
WhereLoop *pStart; /* Where to start searching for dimension-tables */
pWInfo->bStarDone = 1; /* Only do this computation once */
/* Check to see if we are dealing with a star schema and if so, reduce
** the cost of fact tables relative to dimension tables, as a heuristic
** to help keep the fact tables in outer loops.
*/
int iLoop; /* Counter over join terms */
Bitmask m; /* Bitmask for current loop */
assert( pWInfo->nOutStarDelta==0 );
for(iLoop=0, m=1; iLoop<nLoop; iLoop++, m<<=1){
WhereLoop *pWLoop; /* For looping over WhereLoops */
aFromTabs = pWInfo->pTabList->a;
pStart = pWInfo->pLoops;
for(iFromIdx=0, m=1; iFromIdx<nLoop; iFromIdx++, m<<=1){
int nDep = 0; /* Number of dimension tables */
LogEst rDelta; /* Heuristic cost adjustment */
Bitmask mSeen = 0; /* Mask of dimension tables */
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iLoop */
SrcItem *pFactTab; /* The candidate fact table */
pFactTab = aFromTabs + iFromIdx;
if( (pFactTab->fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){
/* If the candidate fact-table is the right table of an outer join
** restrict the search for dimension-tables to be tables to the right
** of the fact-table. */
if( iFromIdx+4 > nLoop ) break; /* Impossible to reach nDep>=4 */
while( ALWAYS(pStart) && pStart->iTab<=iFromIdx ){
pStart = pStart->pNextLoop;
}
}
for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
if( (aFromTabs[pWLoop->iTab].fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){
/* Fact-tables and dimension-tables cannot be separated by an
** outer join (at least for the definition of fact- and dimension-
** used by this heuristic). */
break;
}
if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iFromIdx */
&& (pWLoop->maskSelf & mSeen)==0 /* pWInfo not already a dependency */
&& (pWInfo->pTabList->a[pWLoop->iTab].fg.jointype & JT_LEFT)==0
/* ^- pWInfo isn't a LEFT JOIN */
&& (pWLoop->maskSelf & mSelfJoin)==0 /* Not a self-join */
){
if( aFromTabs[pWLoop->iTab].pSTab==pFactTab->pSTab ){
mSelfJoin |= m;
}else{
nDep++;
mSeen |= pWLoop->maskSelf;
}
}
}
if( nDep<=3 ) continue;
rDelta = 15*(nDep-3);
#ifdef WHERETRACE_ENABLED /* 0x4 */
if( sqlite3WhereTrace&0x4 ){
SrcItem *pItem = pWInfo->pTabList->a + iLoop;
Bitmask x;
int ii;
sqlite3DebugPrintf(
"Fact-table %s(%d): %d dimensions, cost reduced %d\n",
pItem->zAlias ? pItem->zAlias : pItem->pSTab->zName, iLoop,
nDep, rDelta);
"Fact-table %s(%d): cost reduced %d due to %d dimension tables:",
pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName,
iFromIdx, rDelta, nDep
);
for(ii=0, x=1; ii<nLoop; ii++, x<<=1){
if( x & mSeen ){
SrcItem *pDim = aFromTabs + ii;
sqlite3DebugPrintf(" %s(%d)",
pDim->zAlias ? pDim->zAlias : pDim->pSTab->zName, ii
);
}
}
sqlite3DebugPrintf("\n");
}
#endif
if( pWInfo->nOutStarDelta==0 ){
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
pWLoop->rStarDelta = 0;
}
}
pWInfo->nOutStarDelta += rDelta;
pWInfo->nOutStarDelta = rDelta;
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
if( pWLoop->maskSelf==m ){
pWLoop->rRun -= rDelta;
@@ -5587,7 +5653,7 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
}else if( nLoop==2 ){
mxChoice = 5;
}else{
mxChoice = computeMxChoice(pWInfo, nRowEst);
mxChoice = computeMxChoice(pWInfo);
}
assert( nLoop<=pWInfo->pTabList->nSrc );
@@ -5956,6 +6022,9 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
}
pWInfo->nRowOut = pFrom->nRow + pWInfo->nOutStarDelta;
#ifdef WHERETRACE_ENABLED
pWInfo->rTotalCost = pFrom->rCost + pWInfo->nOutStarDelta;
#endif
/* Free temporary memory and return success */
sqlite3StackFreeNN(pParse->db, pSpace);
@@ -6836,7 +6905,8 @@ WhereInfo *sqlite3WhereBegin(
assert( db->mallocFailed==0 );
#ifdef WHERETRACE_ENABLED
if( sqlite3WhereTrace ){
sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut);
sqlite3DebugPrintf("---- Solution cost=%d, nRow=%d",
pWInfo->rTotalCost, pWInfo->nRowOut);
if( pWInfo->nOBSat>0 ){
sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask);
}

View File

@@ -486,8 +486,12 @@ struct WhereInfo {
unsigned untestedTerms :1; /* Not all WHERE terms resolved by outer loop */
unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */
unsigned sorted :1; /* True if really sorted (not just grouped) */
unsigned bStarDone :1; /* True if check for star-query is complete */
LogEst nOutStarDelta; /* Artifical nOut reduction for star-query */
LogEst nRowOut; /* Estimated number of output rows */
#ifdef WHERETRACE_ENABLED
LogEst rTotalCost; /* Total cost of the solution */
#endif
int iTop; /* The very beginning of the WHERE loop */
int iEndWhere; /* End of the WHERE clause itself */
WhereLoop *pLoops; /* List of all WhereLoop objects */