1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-07 02:42:48 +03:00

Defer computing the MemPage.nFree value of an in-memory btree page

until it is actually needed, since for many pages it is never needed.
This checkin works sufficiently to prove the concept, but still has issues
with exception handling.

FossilOrigin-Name: 1d43ee4000b71f5c6d49244dee96358c567f09ba3451b9d22895a796d3f61ad6
This commit is contained in:
drh
2019-02-09 21:06:40 +00:00
parent 2fa619a3a8
commit b0ea9432a1
4 changed files with 159 additions and 85 deletions

View File

@@ -1,5 +1,5 @@
C Change\sa\sfew\sassert()\sstatements\sin\sfts3\sthat\smight\sfail\sif\sthe\sdatabase\sis\scorrupt.
D 2019-02-09T19:23:54.418
C Defer\scomputing\sthe\sMemPage.nFree\svalue\sof\san\sin-memory\sbtree\spage\nuntil\sit\sis\sactually\sneeded,\ssince\sfor\smany\spages\sit\sis\snever\sneeded.\nThis\scheckin\sworks\ssufficiently\sto\sprove\sthe\sconcept,\sbut\sstill\shas\sissues\nwith\sexception\shandling.
D 2019-02-09T21:06:40.308
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F Makefile.in 178d8eb6840771149cee40b322d1b3be30d330198c522c903c1b66fb5a1bfca4
@@ -455,9 +455,9 @@ F src/auth.c 0fac71038875693a937e506bceb492c5f136dd7b1249fbd4ae70b4e8da14f9df
F src/backup.c 78d3cecfbe28230a3a9a1793e2ead609f469be43e8f486ca996006be551857ab
F src/bitvec.c 17ea48eff8ba979f1f5b04cc484c7bb2be632f33
F src/btmutex.c 8acc2f464ee76324bf13310df5692a262b801808984c1b79defb2503bbafadb6
F src/btree.c 18046bf14f0e3fa294ef3f7c2dc30ca7e95f3ac11ec222ad906e40b150051bde
F src/btree.c b2bb677fa5d2f18b7c38869761e0d7d1d66ff3a7bdac509865e0606b281d32bc
F src/btree.h 63b94fb38ce571c15eb6a3661815561b501d23d5948b2d1e951fbd7a2d04e8d3
F src/btreeInt.h cd82f0f08886078bf99b29e1a7045960b1ca5d9d5829c38607e1299c508eaf00
F src/btreeInt.h d7520b98e72f9a7e2a3140cc476df461fa8a34a3d56258184f8c26f70248cef9
F src/build.c 906ca6663b9dcd413e72ae9c44dd51e596d8336b04d52e678a7501e71c20cab2
F src/callback.c 25dda5e1c2334a367b94a64077b1d06b2553369f616261ca6783c48bcb6bda73
F src/complete.c a3634ab1e687055cd002e11b8f43eb75c17da23e
@@ -1804,7 +1804,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 1969372ac72d25cc642a0268f4bb0ae4b59f2dca568c119ef61b67183b3a8bd9
R 337fdf56fb6120be7b5570f5a6b13c97
U dan
Z 3ee700c86d7982301bd59c67543d08c0
P db74a56af73d92b7a9d43ceda7e4540915c580c68a0266b4ddefb9e0d5cbcbeb
R 6351a98578ba74f863fbe86b96b0dce9
T *branch * deferred-free-space
T *sym-deferred-free-space *
T -sym-trunk *
U drh
Z 38c6e8e54a38cca2a22ac1c0553f905f

View File

@@ -1 +1 @@
db74a56af73d92b7a9d43ceda7e4540915c580c68a0266b4ddefb9e0d5cbcbeb
1d43ee4000b71f5c6d49244dee96358c567f09ba3451b9d22895a796d3f61ad6

View File

@@ -1506,6 +1506,7 @@ static int defragmentPage(MemPage *pPage, int nMaxFrag){
data[hdr+7] = 0;
defragment_out:
assert( pPage->nFree>=0 );
if( data[hdr+7]+cbrk-iCellFirst!=pPage->nFree ){
return SQLITE_CORRUPT_PAGE(pPage);
}
@@ -1657,6 +1658,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
testcase( gap+2+nByte==top );
if( gap+2+nByte>top ){
assert( pPage->nCell>0 || CORRUPT_DB );
assert( pPage->nFree>=0 );
rc = defragmentPage(pPage, MIN(4, pPage->nFree - (2+nByte)));
if( rc ) return rc;
top = get2byteNotZero(&data[hdr+5]);
@@ -1846,21 +1848,14 @@ static int decodeFlags(MemPage *pPage, int flagByte){
}
/*
** Initialize the auxiliary information for a disk block.
**
** Return SQLITE_OK on success. If we see that the page does
** not contain a well-formed database page, then return
** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
** guarantee that the page is well-formed. It only shows that
** we failed to detect any corruption.
** Compute the amount of freespace on the page. In other words, fill
** in the pPage->nFree field.
*/
static int btreeInitPage(MemPage *pPage){
static int btreeComputeFreeSpace(MemPage *pPage){
int pc; /* Address of a freeblock within pPage->aData[] */
u8 hdr; /* Offset to beginning of page header */
u8 *data; /* Equal to pPage->aData */
BtShared *pBt; /* The main btree structure */
int usableSize; /* Amount of usable space on each page */
u16 cellOffset; /* Offset from start of page to first cell pointer */
int nFree; /* Number of unused bytes on the page */
int top; /* First byte of the cell content area */
int iCellFirst; /* First allowable cell or freeblock offset */
@@ -1872,71 +1867,18 @@ static int btreeInitPage(MemPage *pPage){
assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
assert( pPage->isInit==0 );
assert( pPage->isInit==1 );
assert( pPage->nFree<0 );
pBt = pPage->pBt;
usableSize = pPage->pBt->usableSize;
hdr = pPage->hdrOffset;
data = pPage->aData;
/* EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating
** the b-tree page type. */
if( decodeFlags(pPage, data[hdr]) ){
return SQLITE_CORRUPT_PAGE(pPage);
}
assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
pPage->maskPage = (u16)(pBt->pageSize - 1);
pPage->nOverflow = 0;
usableSize = pBt->usableSize;
pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize;
pPage->aDataEnd = &data[usableSize];
pPage->aCellIdx = &data[cellOffset];
pPage->aDataOfst = &data[pPage->childPtrSize];
/* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates
** the start of the cell content area. A zero value for this integer is
** interpreted as 65536. */
top = get2byteNotZero(&data[hdr+5]);
/* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
** number of cells on the page. */
pPage->nCell = get2byte(&data[hdr+3]);
if( pPage->nCell>MX_CELL(pBt) ){
/* To many cells for a single page. The page must be corrupt */
return SQLITE_CORRUPT_PAGE(pPage);
}
testcase( pPage->nCell==MX_CELL(pBt) );
/* EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only
** possible for a root page of a table that contains no rows) then the
** offset to the cell content area will equal the page size minus the
** bytes of reserved space. */
assert( pPage->nCell>0 || top==usableSize || CORRUPT_DB );
/* A malformed database page might cause us to read past the end
** of page when parsing a cell.
**
** The following block of code checks early to see if a cell extends
** past the end of a page boundary and causes SQLITE_CORRUPT to be
** returned if it does.
*/
iCellFirst = cellOffset + 2*pPage->nCell;
iCellFirst = hdr + 8 + pPage->childPtrSize + 2*pPage->nCell;
iCellLast = usableSize - 4;
if( pBt->db->flags & SQLITE_CellSizeCk ){
int i; /* Index into the cell pointer array */
int sz; /* Size of a cell */
if( !pPage->leaf ) iCellLast--;
for(i=0; i<pPage->nCell; i++){
pc = get2byteAligned(&data[cellOffset+i*2]);
testcase( pc==iCellFirst );
testcase( pc==iCellLast );
if( pc<iCellFirst || pc>iCellLast ){
return SQLITE_CORRUPT_PAGE(pPage);
}
sz = pPage->xCellSize(pPage, &data[pc]);
testcase( pc+sz==usableSize );
if( pc+sz>usableSize ){
return SQLITE_CORRUPT_PAGE(pPage);
}
}
if( !pPage->leaf ) iCellLast++;
}
/* Compute the total free space on the page
** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the
@@ -1984,6 +1926,98 @@ static int btreeInitPage(MemPage *pPage){
return SQLITE_CORRUPT_PAGE(pPage);
}
pPage->nFree = (u16)(nFree - iCellFirst);
return SQLITE_OK;
}
/*
** Initialize the auxiliary information for a disk block.
**
** Return SQLITE_OK on success. If we see that the page does
** not contain a well-formed database page, then return
** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
** guarantee that the page is well-formed. It only shows that
** we failed to detect any corruption.
*/
static int btreeInitPage(MemPage *pPage){
int pc; /* Address of a freeblock within pPage->aData[] */
u8 hdr; /* Offset to beginning of page header */
u8 *data; /* Equal to pPage->aData */
BtShared *pBt; /* The main btree structure */
int usableSize; /* Amount of usable space on each page */
u16 cellOffset; /* Offset from start of page to first cell pointer */
int iCellFirst; /* First allowable cell or freeblock offset */
int iCellLast; /* Last possible cell or freeblock offset */
assert( pPage->pBt!=0 );
assert( pPage->pBt->db!=0 );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
assert( pPage->isInit==0 );
pBt = pPage->pBt;
hdr = pPage->hdrOffset;
data = pPage->aData;
/* EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating
** the b-tree page type. */
if( decodeFlags(pPage, data[hdr]) ){
return SQLITE_CORRUPT_PAGE(pPage);
}
assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
pPage->maskPage = (u16)(pBt->pageSize - 1);
pPage->nOverflow = 0;
usableSize = pBt->usableSize;
pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize;
pPage->aDataEnd = &data[usableSize];
pPage->aCellIdx = &data[cellOffset];
pPage->aDataOfst = &data[pPage->childPtrSize];
/* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
** number of cells on the page. */
pPage->nCell = get2byte(&data[hdr+3]);
if( pPage->nCell>MX_CELL(pBt) ){
/* To many cells for a single page. The page must be corrupt */
return SQLITE_CORRUPT_PAGE(pPage);
}
testcase( pPage->nCell==MX_CELL(pBt) );
/* EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only
** possible for a root page of a table that contains no rows) then the
** offset to the cell content area will equal the page size minus the
** bytes of reserved space. */
assert( pPage->nCell>0
|| get2byteNotZero(&data[hdr+5])==usableSize
|| CORRUPT_DB );
/* A malformed database page might cause us to read past the end
** of page when parsing a cell.
**
** The following block of code checks early to see if a cell extends
** past the end of a page boundary and causes SQLITE_CORRUPT to be
** returned if it does.
*/
iCellFirst = cellOffset + 2*pPage->nCell;
iCellLast = usableSize - 4;
if( pBt->db->flags & SQLITE_CellSizeCk ){
int i; /* Index into the cell pointer array */
int sz; /* Size of a cell */
if( !pPage->leaf ) iCellLast--;
for(i=0; i<pPage->nCell; i++){
pc = get2byteAligned(&data[cellOffset+i*2]);
testcase( pc==iCellFirst );
testcase( pc==iCellLast );
if( pc<iCellFirst || pc>iCellLast ){
return SQLITE_CORRUPT_PAGE(pPage);
}
sz = pPage->xCellSize(pPage, &data[pc]);
testcase( pc+sz==usableSize );
if( pc+sz>usableSize ){
return SQLITE_CORRUPT_PAGE(pPage);
}
}
if( !pPage->leaf ) iCellLast++;
}
pPage->nFree = -1; /* Indicate that this value is yet uncomputed */
pPage->isInit = 1;
return SQLITE_OK;
}
@@ -2127,19 +2161,18 @@ static int getAndInitPage(
if( pgno>btreePagecount(pBt) ){
rc = SQLITE_CORRUPT_BKPT;
goto getAndInitPage_error;
goto getAndInitPage_error1;
}
rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly);
if( rc ){
goto getAndInitPage_error;
goto getAndInitPage_error1;
}
*ppPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
if( (*ppPage)->isInit==0 ){
btreePageFromDbPage(pDbPage, pgno, pBt);
rc = btreeInitPage(*ppPage);
if( rc!=SQLITE_OK ){
releasePage(*ppPage);
goto getAndInitPage_error;
goto getAndInitPage_error2;
}
}
assert( (*ppPage)->pgno==pgno );
@@ -2149,12 +2182,13 @@ static int getAndInitPage(
** compatible with the root page. */
if( pCur && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) ){
rc = SQLITE_CORRUPT_PGNO(pgno);
releasePage(*ppPage);
goto getAndInitPage_error;
goto getAndInitPage_error2;
}
return SQLITE_OK;
getAndInitPage_error:
getAndInitPage_error2:
releasePage(*ppPage);
getAndInitPage_error1:
if( pCur ){
pCur->iPage--;
pCur->pPage = pCur->apPage[pCur->iPage];
@@ -6566,6 +6600,7 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){
assert( CORRUPT_DB || sz==cellSize(pPage, idx) );
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( pPage->nFree>=0 );
data = pPage->aData;
ptr = &pPage->aCellIdx[2*idx];
pc = get2byte(ptr);
@@ -6636,6 +6671,7 @@ static void insertCell(
** might be less than 8 (leaf-size + pointer) on the interior node. Hence
** the term after the || in the following assert(). */
assert( sz==pPage->xCellSize(pPage, pCell) || (sz==8 && iChild>0) );
assert( pPage->nFree>=0 );
if( pPage->nOverflow || sz+2>pPage->nFree ){
if( pTemp ){
memcpy(pTemp, pCell, sz);
@@ -7187,8 +7223,17 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
assert( pPage->nOverflow==1 );
if( pPage->nCell==0 ) return SQLITE_CORRUPT_BKPT; /* dbfuzz001.test */
if( pPage->nFree<0 ){
rc = btreeComputeFreeSpace(pPage);
if( rc ) return rc;
}
if( pParent->nFree<0 ){
rc = btreeComputeFreeSpace(pParent);
if( rc ) return rc;
}
/* Allocate a new page. This page will become the right-sibling of
** pPage. Make the parent page writable, so that the new divider cell
@@ -7466,6 +7511,10 @@ static int balance_nonroot(
if( !aOvflSpace ){
return SQLITE_NOMEM_BKPT;
}
if( pParent->nFree<0 ){
rc = btreeComputeFreeSpace(pParent);
if( rc ) return rc;
}
/* Find the sibling pages to balance. Also locate the cells in pParent
** that divide the siblings. An attempt is made to find NN siblings on
@@ -7501,6 +7550,9 @@ static int balance_nonroot(
pgno = get4byte(pRight);
while( 1 ){
rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0);
if( rc==0 && apOld[i]->nFree<0 ){
rc = btreeComputeFreeSpace(apOld[i]);
}
if( rc ){
memset(apOld, 0, (i+1)*sizeof(MemPage*));
goto balance_cleanup;
@@ -7704,6 +7756,7 @@ static int balance_nonroot(
b.apEnd[k] = pParent->aDataEnd;
b.ixNx[k] = cntOld[i]+1;
}
assert( p->nFree>=0 );
szNew[i] = usableSpace - p->nFree;
for(j=0; j<p->nOverflow; j++){
szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]);
@@ -8247,6 +8300,10 @@ static int balance(BtCursor *pCur){
int iPage = pCur->iPage;
MemPage *pPage = pCur->pPage;
if( pPage->nFree<0 ){
rc = btreeComputeFreeSpace(pPage);
if( rc ) break;
}
if( iPage==0 ){
if( pPage->nOverflow ){
/* The root page of the b-tree is overfull. In this case call the
@@ -8621,6 +8678,10 @@ int sqlite3BtreeInsert(
pPage = pCur->pPage;
assert( pPage->intKey || pX->nKey>=0 );
assert( pPage->leaf || !pPage->intKey );
if( pPage->nFree<0 ){
rc = btreeComputeFreeSpace(pPage);
if( rc ) return rc;
}
TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
pCur->pgnoRoot, pX->nKey, pX->nData, pPage->pgno,
@@ -8771,6 +8832,7 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
iCellIdx = pCur->ix;
pPage = pCur->pPage;
pCell = findCell(pPage, iCellIdx);
if( pPage->nFree<0 && btreeComputeFreeSpace(pPage) ) return SQLITE_CORRUPT;
/* If the bPreserve flag is set to true, then the cursor position must
** be preserved following this delete operation. If the current delete
@@ -8841,6 +8903,10 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
Pgno n;
unsigned char *pTmp;
if( pLeaf->nFree<0 ){
rc = btreeComputeFreeSpace(pLeaf);
if( rc ) return rc;
}
if( iCellDepth<pCur->iPage-1 ){
n = pCur->apPage[iCellDepth+1]->pgno;
}else{
@@ -9732,6 +9798,11 @@ static int checkTreePage(
"btreeInitPage() returns error code %d", rc);
goto end_of_check;
}
if( (rc = btreeComputeFreeSpace(pPage))!=0 ){
assert( rc==SQLITE_CORRUPT );
checkAppendMsg(pCheck, "free space corruption", rc);
goto end_of_check;
}
data = pPage->aData;
hdr = pPage->hdrOffset;

View File

@@ -286,7 +286,7 @@ struct MemPage {
u16 maxLocal; /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
u16 minLocal; /* Copy of BtShared.minLocal or BtShared.minLeaf */
u16 cellOffset; /* Index in aData of first cell pointer */
u16 nFree; /* Number of free bytes on the page */
int nFree; /* Number of free bytes on the page */
u16 nCell; /* Number of cells on this page, local and ovfl */
u16 maskPage; /* Mask for page offset */
u16 aiOvfl[4]; /* Insert the i-th overflow cell before the aiOvfl-th