diff --git a/manifest b/manifest index 3b7ea01db7..87f0cf85ca 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Correct\sa\sversion\snumber\sin\sautosetup/README.md. -D 2025-01-24T15:41:33.001 +C Improve\sthe\sstar-query\sheuristic\sso\sthat\sit\sdoes\sa\sbetter\sjob\sof\sidentifying\nactual\sstar\squeries.\s\sAlso\sincludes\simproved\sdiagnostic\soutput\sfrom\sthe\nquery\splanner. +D 2025-01-24T16:37:31.813 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d @@ -863,8 +863,8 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 4e6181d8780ab0af2e1388d0754cbe6f2f04593d2b1ab6c41699a89942fd8997 F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452 F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014 -F src/where.c 6722991bece039c7094e9e31bb57b9eb155299f01d2209f1378a81c9605ded31 -F src/whereInt.h 2b0804f300c7f65de4046a1d81c65f01b208d6c08950ccd1fa6b8c16162a8af7 +F src/where.c 647dfba23202523c64ca204f3501a09d18670ea081acea680ddcb1e63beda163 +F src/whereInt.h 3b2ef4617758174d00c6940850e4988c1b195d39bcc20f2965319a1f7bffc714 F src/wherecode.c 0c3d3199a2b769a5e2bb70feb5003dc85b3d86842ecaf903a47f2b4205ca5dab F src/whereexpr.c 0f93a29cabd3a338d09a1f5c6770620a1ac51ec1157f3229502a7e7767c60b6f F src/window.c 2bf01f9941a64fbcead61a0e3cb5db3fca5094b30d2ff0d23274c2a81d2e2385 @@ -2208,8 +2208,9 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 601636829b79888deda7d4dc5786f82b4f0b302d84663ee96b4d47fcb429abcf -R 7edf0e5d3b3392becf43b655692773e3 -U stephan -Z 8a4c0606d9fc49f835c4115814c6ca15 +P c338caf0fd4adc7baacf20f6d4274b1776b9487a5a976b8d85e490791be82b11 a280f5f5480e560fc2b80e8947d8062e8b3487d930e71cb60fc9ba90d87977c1 +R 7125d6757cc5225f643dd2f0408b79ed +T +closed a280f5f5480e560fc2b80e8947d8062e8b3487d930e71cb60fc9ba90d87977c1 +U drh +Z f818ed71ad97105ec5c66c8cb2f2a934 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 12aa7b8716..38a6c8e782 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c338caf0fd4adc7baacf20f6d4274b1776b9487a5a976b8d85e490791be82b11 +7cfbe14d199bb631abd4d009698eeaee9b8450d5061ded612095ee4738ac6a1f diff --git a/src/where.c b/src/where.c index cad66e70bc..14dc99e5c1 100644 --- a/src/where.c +++ b/src/where.c @@ -2431,8 +2431,9 @@ void sqlite3WhereClausePrint(WhereClause *pWC){ ** 1.002.001 t2.t2xy 2 f 010241 N 2 cost 0,56,31 */ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ + WhereInfo *pWInfo; if( pWC ){ - WhereInfo *pWInfo = pWC->pWInfo; + pWInfo = pWC->pWInfo; int nb = 1+(pWInfo->pTabList->nSrc+3)/4; SrcItem *pItem = pWInfo->pTabList->a + p->iTab; Table *pTab = pItem->pSTab; @@ -2442,6 +2443,7 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ sqlite3DebugPrintf(" %12s", pItem->zAlias ? pItem->zAlias : pTab->zName); }else{ + pWInfo = 0; sqlite3DebugPrintf("%c%2d.%03llx.%03llx %c%d", p->cId, p->iTab, p->maskSelf, p->prereq & 0xfff, p->cId, p->iTab); } @@ -2473,7 +2475,12 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ }else{ sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm); } - sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut); + if( pWInfo && pWInfo->nOutStarDelta>0 && p->rStarDelta!=0 ){ + sqlite3DebugPrintf(" cost %d,%d,%d delta=%d\n", + p->rSetup, p->rRun, p->nOut, -p->rStarDelta); + }else{ + sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut); + } if( p->nLTerm && (sqlite3WhereTrace & 0x4000)!=0 ){ int i; for(i=0; inLTerm; i++){ @@ -5441,11 +5448,13 @@ static LogEst whereSortingCost( ** 18 for star queries ** 12 otherwise ** -** For the purposes of SQLite, a star-query is defined as a query -** with a large central table that is joined (using an INNER JOIN, -** not a LEFT JOIN) against four or more smaller tables. The central -** table is called the "fact" table. The smaller tables that get -** joined are "dimension tables". +** For the purposes of this heuristic, a star-query is defined as a query +** with a large central table that is joined using an INNER JOIN, +** not CROSS or OUTER JOINs, against four or more smaller tables. +* The central table is called the "fact" table. The smaller tables +** that get joined are "dimension tables". Also, any table that is +** self-joined cannot be a dimension table; we assume that dimension +** tables may only be joined against fact tables. ** ** SIDE EFFECT: (and really the whole point of this subroutine) ** @@ -5456,52 +5465,109 @@ static LogEst whereSortingCost( ** resulting in poor query plans. The total amount of heuristic cost ** adjustment is stored in pWInfo->nOutStarDelta and the cost adjustment ** for each WhereLoop is stored in its rStarDelta field. +** +** This heuristic can be completely disabled, so that no query is +** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to +** disable the SQLITE_StarQuery optimization. In the CLI, the command +** to do that is: ".testctrl opt -starquery". */ -static int computeMxChoice(WhereInfo *pWInfo, LogEst nRowEst){ +static int computeMxChoice(WhereInfo *pWInfo){ int nLoop = pWInfo->nLevel; /* Number of terms in the join */ - if( nRowEst==0 - && nLoop>=5 + WhereLoop *pWLoop; /* For looping over WhereLoops */ + +#ifdef SQLITE_DEBUG + /* The star-query detection code below makes use of the following + ** properties of the WhereLoop list, so verifying them before + ** continuing: + ** (1) .maskSelf is the bitmask corresponding to .iTab + ** (2) The WhereLoop list is in ascending .iTab order + */ + for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ + assert( pWLoop->maskSelf==MASKBIT(pWLoop->iTab) ); + assert( pWLoop->pNextLoop==0 || pWLoop->iTab<=pWLoop->pNextLoop->iTab ); + } +#endif /* SQLITE_DEBUG */ + + if( nLoop>=5 + && !pWInfo->bStarDone && OptimizationEnabled(pWInfo->pParse->db, SQLITE_StarQuery) ){ + SrcItem *aFromTabs; /* All terms of the FROM clause */ + int iFromIdx; /* Term of FROM clause is the candidate fact-table */ + Bitmask m; /* Bitmask for candidate fact-table */ + Bitmask mSelfJoin = 0; /* Tables that cannot be dimension tables */ + WhereLoop *pStart; /* Where to start searching for dimension-tables */ + + pWInfo->bStarDone = 1; /* Only do this computation once */ + /* Check to see if we are dealing with a star schema and if so, reduce ** the cost of fact tables relative to dimension tables, as a heuristic ** to help keep the fact tables in outer loops. */ - int iLoop; /* Counter over join terms */ - Bitmask m; /* Bitmask for current loop */ assert( pWInfo->nOutStarDelta==0 ); - for(iLoop=0, m=1; iLooppTabList->a; + pStart = pWInfo->pLoops; + for(iFromIdx=0, m=1; iFromIdxpLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ - if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iLoop */ + SrcItem *pFactTab; /* The candidate fact table */ + + pFactTab = aFromTabs + iFromIdx; + if( (pFactTab->fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){ + /* If the candidate fact-table is the right table of an outer join + ** restrict the search for dimension-tables to be tables to the right + ** of the fact-table. */ + if( iFromIdx+4 > nLoop ) break; /* Impossible to reach nDep>=4 */ + while( ALWAYS(pStart) && pStart->iTab<=iFromIdx ){ + pStart = pStart->pNextLoop; + } + } + for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){ + if( (aFromTabs[pWLoop->iTab].fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){ + /* Fact-tables and dimension-tables cannot be separated by an + ** outer join (at least for the definition of fact- and dimension- + ** used by this heuristic). */ + break; + } + if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iFromIdx */ && (pWLoop->maskSelf & mSeen)==0 /* pWInfo not already a dependency */ - && (pWInfo->pTabList->a[pWLoop->iTab].fg.jointype & JT_LEFT)==0 - /* ^- pWInfo isn't a LEFT JOIN */ + && (pWLoop->maskSelf & mSelfJoin)==0 /* Not a self-join */ ){ - nDep++; - mSeen |= pWLoop->maskSelf; + if( aFromTabs[pWLoop->iTab].pSTab==pFactTab->pSTab ){ + mSelfJoin |= m; + }else{ + nDep++; + mSeen |= pWLoop->maskSelf; + } } } if( nDep<=3 ) continue; rDelta = 15*(nDep-3); #ifdef WHERETRACE_ENABLED /* 0x4 */ if( sqlite3WhereTrace&0x4 ){ - SrcItem *pItem = pWInfo->pTabList->a + iLoop; - sqlite3DebugPrintf( - "Fact-table %s(%d): %d dimensions, cost reduced %d\n", - pItem->zAlias ? pItem->zAlias : pItem->pSTab->zName, iLoop, - nDep, rDelta); + Bitmask x; + int ii; + sqlite3DebugPrintf( + "Fact-table %s(%d): cost reduced %d due to %d dimension tables:", + pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName, + iFromIdx, rDelta, nDep + ); + for(ii=0, x=1; iizAlias ? pDim->zAlias : pDim->pSTab->zName, ii + ); + } + } + sqlite3DebugPrintf("\n"); } #endif - if( pWInfo->nOutStarDelta==0 ){ - for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ - pWLoop->rStarDelta = 0; - } + for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ + pWLoop->rStarDelta = 0; } - pWInfo->nOutStarDelta += rDelta; + pWInfo->nOutStarDelta = rDelta; for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ if( pWLoop->maskSelf==m ){ pWLoop->rRun -= rDelta; @@ -5509,7 +5575,7 @@ static int computeMxChoice(WhereInfo *pWInfo, LogEst nRowEst){ pWLoop->rStarDelta = rDelta; } } - } + } } return pWInfo->nOutStarDelta>0 ? 18 : 12; } @@ -5587,7 +5653,7 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ }else if( nLoop==2 ){ mxChoice = 5; }else{ - mxChoice = computeMxChoice(pWInfo, nRowEst); + mxChoice = computeMxChoice(pWInfo); } assert( nLoop<=pWInfo->pTabList->nSrc ); @@ -5956,6 +6022,9 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ } pWInfo->nRowOut = pFrom->nRow + pWInfo->nOutStarDelta; +#ifdef WHERETRACE_ENABLED + pWInfo->rTotalCost = pFrom->rCost + pWInfo->nOutStarDelta; +#endif /* Free temporary memory and return success */ sqlite3StackFreeNN(pParse->db, pSpace); @@ -6836,7 +6905,8 @@ WhereInfo *sqlite3WhereBegin( assert( db->mallocFailed==0 ); #ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace ){ - sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut); + sqlite3DebugPrintf("---- Solution cost=%d, nRow=%d", + pWInfo->rTotalCost, pWInfo->nRowOut); if( pWInfo->nOBSat>0 ){ sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask); } diff --git a/src/whereInt.h b/src/whereInt.h index f44c040418..5bea70ba9f 100644 --- a/src/whereInt.h +++ b/src/whereInt.h @@ -486,8 +486,12 @@ struct WhereInfo { unsigned untestedTerms :1; /* Not all WHERE terms resolved by outer loop */ unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */ unsigned sorted :1; /* True if really sorted (not just grouped) */ + unsigned bStarDone :1; /* True if check for star-query is complete */ LogEst nOutStarDelta; /* Artifical nOut reduction for star-query */ LogEst nRowOut; /* Estimated number of output rows */ +#ifdef WHERETRACE_ENABLED + LogEst rTotalCost; /* Total cost of the solution */ +#endif int iTop; /* The very beginning of the WHERE loop */ int iEndWhere; /* End of the WHERE clause itself */ WhereLoop *pLoops; /* List of all WhereLoop objects */