1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-07 02:42:48 +03:00

Change the balance_nonroot() routine to reduce the amount of memcpy work that takes place. This is a work in progress.

FossilOrigin-Name: 29304499ea4b72dbb6701e10cc19b5d41f7e5ac9
This commit is contained in:
dan
2014-10-09 19:35:37 +00:00
parent 588400b861
commit 33ea486603
5 changed files with 292 additions and 236 deletions

View File

@@ -1,5 +1,5 @@
C Reduce\sthe\samount\sof\smemcpy()\srequired\sby\sdefragmentPage().
D 2014-09-27T05:00:25.096
C Change\sthe\sbalance_nonroot()\sroutine\sto\sreduce\sthe\samount\sof\smemcpy\swork\sthat\stakes\splace.\sThis\sis\sa\swork\sin\sprogress.
D 2014-10-09T19:35:37.452
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in cf57f673d77606ab0f2d9627ca52a9ba1464146a
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -172,7 +172,7 @@ F src/auth.c d8abcde53426275dab6243b441256fcd8ccbebb2
F src/backup.c a31809c65623cc41849b94d368917f8bb66e6a7e
F src/bitvec.c 19a4ba637bd85f8f63fc8c9bae5ade9fb05ec1cb
F src/btmutex.c 49ca66250c7dfa844a4d4cb8272b87420d27d3a5
F src/btree.c 95a942a6ebdb23eb2a5d925526d35169aa6742f6
F src/btree.c 7b89fde3bffa5b7300e94c4aeb69ccff926ef513
F src/btree.h a79aa6a71e7f1055f01052b7f821bd1c2dce95c8
F src/btreeInt.h 1bd7957161a1346a914f1f09231610e777a8e58d
F src/build.c bde83dd5cf812e310a7e5ad2846790a14745bef4
@@ -215,8 +215,8 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa
F src/os_unix.c fb587121840f690101336879adfa6d0b2cd0e8c7
F src/os_win.c 0a4042ef35f322e86fa01f6c8884c5e645b911e7
F src/os_win.h 09e751b20bbc107ffbd46e13555dc73576d88e21
F src/pager.c caab007743821d96752597c9cfd7351654697b06
F src/pager.h ffd5607f7b3e4590b415b007a4382f693334d428
F src/pager.c 0abcb0904a78d68b96357f360c6b160bcfc2a3e0
F src/pager.h 8b6707cb32c788cf36bfc3d63f6d4b4fa689e7c2
F src/parse.y b98772da2bb5415970085b707203f92569400aa8
F src/pcache.c 4121a0571c18581ee9f82f086d5e2030051ebd6a
F src/pcache.h 9b559127b83f84ff76d735c8262f04853be0c59a
@@ -1200,10 +1200,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 83913515830aa850f9e38406f9422d7e88dcab66
R 66a1e1f00a844450677737824735607d
T *branch * defrag-opt
T *sym-defrag-opt *
T -sym-trunk *
U drh
Z f60a1f2e4650c574e91ad245c0c67dee
P 3edab9957cc7bb90b52fd40b02613c2cb03fc166
R 5fca1836b5a4d862df24682ecb47d048
U dan
Z 8527330c8f275358262176fc962502e2

View File

@@ -1 +1 @@
3edab9957cc7bb90b52fd40b02613c2cb03fc166
29304499ea4b72dbb6701e10cc19b5d41f7e5ac9

View File

@@ -5977,6 +5977,49 @@ static void assemblePage(
pPage->nCell = (u16)nCell;
}
static void rebuildPage(
MemPage *pPg, /* Edit this page */
int nRemove, /* Cells to remove from start of page */
int nCell, /* Final number of cells on page */
u8 **apCell, /* Array of nCell final cells */
u16 *szCell /* Array of nCell cell sizes */
){
const int hdr = pPg->hdrOffset; /* Offset of header on pPg */
u8 * const aData = pPg->aData; /* Pointer to data for pPg */
const int usableSize = pPg->pBt->usableSize;
u8 * const pEnd = &aData[usableSize];
int i;
u8 *pCellptr = pPg->aCellIdx;
u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager);
u8 *pData;
i = get2byte(&aData[hdr+5]);
memcpy(&pTmp[i], &aData[i], usableSize - i);
pData = &aData[usableSize];
for(i=0; i<nCell; i++){
u8 *pCell = apCell[i];
if( pCell>aData && pCell<pEnd ){
pCell = &pTmp[pCell - aData];
}
pData -= szCell[i];
memcpy(pData, pCell, szCell[i]);
put2byte(pCellptr, (pData - aData));
pCellptr += 2;
assert( szCell[i]==cellSizePtr(pPg, pCell) );
}
pPg->nFree = (pData - pCellptr);
pPg->nCell = nCell;
pPg->nOverflow = 0;
put2byte(&aData[hdr+1], 0);
put2byte(&aData[hdr+3], pPg->nCell);
put2byte(&aData[hdr+5], pData - aData);
aData[hdr+7] = 0x00;
}
/*
** The following parameters determine how many adjacent pages get involved
** in a balancing operation. NN is the number of neighbors on either side
@@ -6098,7 +6141,7 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
}
#endif /* SQLITE_OMIT_QUICKBALANCE */
#if 0
#if 1
/*
** This function does not contribute anything to the operation of SQLite.
** it is sometimes activated temporarily while debugging code responsible
@@ -6265,7 +6308,6 @@ static int balance_nonroot(
int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */
int szScratch; /* Size of scratch memory requested */
MemPage *apOld[NB]; /* pPage and up to two siblings */
MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */
u8 *pRight; /* Location in parent of right-sibling pointer */
u8 *apDiv[NB-1]; /* Divider cells in pParent */
@@ -6276,6 +6318,13 @@ static int balance_nonroot(
u8 *aSpace1; /* Space for copies of dividers cells */
Pgno pgno; /* Temp var to store a page number in */
int aShiftLeft[NB+2];
int aShiftRight[NB+2];
u8 abDone[NB+2];
Pgno aPgno[NB+2];
u16 aPgFlags[NB+2];
memset(abDone, 0, sizeof(abDone));
pBt = pParent->pBt;
assert( sqlite3_mutex_held(pBt->mutex) );
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
@@ -6384,12 +6433,10 @@ static int balance_nonroot(
/*
** Allocate space for memory structures
*/
k = pBt->pageSize + ROUND8(sizeof(MemPage));
szScratch =
nMaxCells*sizeof(u8*) /* apCell */
+ nMaxCells*sizeof(u16) /* szCell */
+ pBt->pageSize /* aSpace1 */
+ k*nOld; /* Page copies (apCopy) */
+ pBt->pageSize; /* aSpace1 */
apCell = sqlite3ScratchMalloc( szScratch );
if( apCell==0 ){
rc = SQLITE_NOMEM;
@@ -6402,8 +6449,8 @@ static int balance_nonroot(
/*
** Load pointers to all cells on sibling pages and the divider cells
** into the local apCell[] array. Make copies of the divider cells
** into space obtained from aSpace1[] and remove the divider cells
** from pParent.
** into space obtained from aSpace1[]. The divider cells have already
** been removed from pParent.
**
** If the siblings are on leaf pages, then the child pointers of the
** divider cells are stripped from the cells before they are copied
@@ -6419,15 +6466,7 @@ static int balance_nonroot(
leafData = apOld[0]->intKeyLeaf;
for(i=0; i<nOld; i++){
int limit;
/* Before doing anything else, take a copy of the i'th original sibling
** The rest of this function will use data from the copies rather
** that the original pages since the original pages will be in the
** process of being overwritten. */
MemPage *pOld = apCopy[i] = (MemPage*)&aSpace1[pBt->pageSize + k*i];
memcpy(pOld, apOld[i], sizeof(MemPage));
pOld->aData = (void*)&pOld[1];
memcpy(pOld->aData, apOld[i]->aData, pBt->pageSize);
MemPage *pOld = apOld[i];
limit = pOld->nCell+pOld->nOverflow;
if( pOld->nOverflow>0 ){
@@ -6556,10 +6595,10 @@ static int balance_nonroot(
assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) );
#endif
TRACE(("BALANCE: old: %d %d %d ",
apOld[0]->pgno,
nOld>=2 ? apOld[1]->pgno : 0,
nOld>=3 ? apOld[2]->pgno : 0
TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n",
apOld[0]->pgno, apOld[0]->nCell,
nOld>=2 ? apOld[1]->pgno : 0, nOld>=2 ? apOld[1]->nCell : 0,
nOld>=3 ? apOld[2]->pgno : 0, nOld>=3 ? apOld[2]->nCell : 0
));
/*
@@ -6582,6 +6621,7 @@ static int balance_nonroot(
assert( i>0 );
rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0);
if( rc ) goto balance_cleanup;
zeroPage(pNew, pageFlags);
apNew[i] = pNew;
nNew++;
@@ -6595,135 +6635,223 @@ static int balance_nonroot(
}
}
/* Free any old pages that were not reused as new pages.
*/
while( i<nOld ){
freePage(apOld[i], &rc);
if( rc ) goto balance_cleanup;
releasePage(apOld[i]);
apOld[i] = 0;
i++;
}
/*
** Put the new pages in ascending order. This helps to
** keep entries in the disk file in order so that a scan
** of the table is a linear scan through the file. That
** in turn helps the operating system to deliver pages
** from the disk more rapidly.
** Reassign page numbers so that the new pages are in ascending order.
** This helps to keep entries in the disk file in order so that a scan
** of the table is closer to a linear scan through the file. That in turn
** helps the operating system to deliver pages from the disk more rapidly.
**
** An O(n^2) insertion sort algorithm is used, but since
** n is never more than NB (a small constant), that should
** not be a problem.
** An O(n^2) insertion sort algorithm is used, but since n is never more
** than (NB+2) (a small constant), that should not be a problem.
**
** When NB==3, this one optimization makes the database
** about 25% faster for large insertions and deletions.
** When NB==3, this one optimization makes the database about 25% faster
** for large insertions and deletions.
*/
for(i=0; i<k-1; i++){
int minV = apNew[i]->pgno;
int minI = i;
for(j=i+1; j<k; j++){
if( apNew[j]->pgno<(unsigned)minV ){
minI = j;
minV = apNew[j]->pgno;
for(i=0; i<nNew; i++){
aPgno[i] = apNew[i]->pgno;
aPgFlags[i] = apNew[i]->pDbPage->flags;
}
for(i=0; i<nNew; i++){
Pgno iGt = (i==0 ? 0 : apNew[i-1]->pgno);
Pgno iMin = 0;
u16 flags = 0;
for(j=0; j<nNew; j++){
Pgno iPgno = aPgno[j];
if( iPgno>iGt && (iMin==0 || iPgno<iMin) ){
iMin = iPgno;
flags = aPgFlags[j];
}
}
if( minI>i ){
MemPage *pT;
pT = apNew[i];
apNew[i] = apNew[minI];
apNew[minI] = pT;
if( apNew[i]->pgno!=iMin ){
apNew[i]->pDbPage->flags = flags;
sqlite3PagerRekey(apNew[i]->pDbPage, iMin);
apNew[i]->pgno = iMin;
}
}
TRACE(("new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n",
apNew[0]->pgno, szNew[0],
TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) "
"%d(%d nc=%d) %d(%d nc=%d)\n",
apNew[0]->pgno, szNew[0], cntNew[0],
nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0,
nNew>=2 ? cntNew[1] - cntNew[0] - !leafData : 0,
nNew>=3 ? apNew[2]->pgno : 0, nNew>=3 ? szNew[2] : 0,
nNew>=3 ? cntNew[2] - cntNew[1] - !leafData : 0,
nNew>=4 ? apNew[3]->pgno : 0, nNew>=4 ? szNew[3] : 0,
nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0));
nNew>=4 ? cntNew[3] - cntNew[2] - !leafData : 0,
nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0,
nNew>=5 ? cntNew[4] - cntNew[3] - !leafData : 0
));
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
put4byte(pRight, apNew[nNew-1]->pgno);
/*
** Evenly distribute the data in apCell[] across the new pages.
** Insert divider cells into pParent as necessary.
*/
j = 0;
for(i=0; i<nNew; i++){
/* Assemble the new sibling page. */
MemPage *pNew = apNew[i];
assert( j<nMaxCells );
zeroPage(pNew, pageFlags);
assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]);
assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) );
assert( pNew->nOverflow==0 );
/* At this point, "j" is the apCell[] index of the first cell currently
** stored on page apNew[i]. Or, if apNew[i] was not one of the original
** sibling pages, "j" should be set to nCell. Variable iFirst is set
** to the apCell[] index of the first cell that will appear on the
** page following this balancing operation. */
int iFirst = (i==0 ? 0 : cntNew[i-1] + !leafData); /* new first cell */
assert( i<nOld || j==nCell );
aShiftLeft[i] = j - iFirst;
j += apNew[i]->nCell + apNew[i]->nOverflow;
aShiftRight[i] = cntNew[i] - j;
assert( i!=nOld-1 || j==nCell );
if( j<nCell ) j += !leafData;
}
j = cntNew[i];
/* If the sibling pages are not leaves, ensure that the right-child pointer
** of the right-most new sibling page is set to the value that was
** originally in the same field of the right-most old sibling page. */
if( (pageFlags & PTF_LEAF)==0 && nOld!=nNew ){
MemPage *pOld = (nNew>nOld ? apNew : apOld)[nOld-1];
memcpy(&apNew[nNew-1]->aData[8], &pOld->aData[8], 4);
}
/* If the sibling page assembled above was not the right-most sibling,
** insert a divider cell into the parent page.
*/
assert( i<nNew-1 || j==nCell );
if( j<nCell ){
u8 *pCell;
u8 *pTemp;
int sz;
/* Make any required updates to pointer map entries associated with
** cells stored on sibling pages following the balance operation. Pointer
** map entries associated with divider cells are set by the insertCell()
** routine. The associated pointer map entries are:
**
** a) if the cell contains a reference to an overflow chain, the
** entry associated with the first page in the overflow chain, and
**
** b) if the sibling pages are not leaves, the child page associated
** with the cell.
**
** If the sibling pages are not leaves, then the pointer map entry
** associated with the right-child of each sibling may also need to be
** updated. This happens below, after the sibling pages have been
** populated, not here.
*/
if( ISAUTOVACUUM ){
MemPage *pNew = apNew[0];
u8 *aOld = pNew->aData;
int cntOldNext = pNew->nCell + pNew->nOverflow;
int usableSize = pBt->usableSize;
int iNew = 0;
int iOld = 0;
assert( j<nMaxCells );
pCell = apCell[j];
sz = szCell[j] + leafCorrection;
pTemp = &aOvflSpace[iOvflSpace];
if( !pNew->leaf ){
memcpy(&pNew->aData[8], pCell, 4);
}else if( leafData ){
/* If the tree is a leaf-data tree, and the siblings are leaves,
** then there is no divider cell in apCell[]. Instead, the divider
** cell consists of the integer key for the right-most cell of
** the sibling-page assembled above only.
*/
CellInfo info;
j--;
btreeParseCellPtr(pNew, apCell[j], &info);
pCell = pTemp;
sz = 4 + putVarint(&pCell[4], info.nKey);
pTemp = 0;
}else{
pCell -= 4;
/* Obscure case for non-leaf-data trees: If the cell at pCell was
** previously stored on a leaf node, and its reported size was 4
** bytes, then it may actually be smaller than this
** (see btreeParseCellPtr(), 4 bytes is the minimum size of
** any cell). But it is important to pass the correct size to
** insertCell(), so reparse the cell now.
**
** Note that this can never happen in an SQLite data file, as all
** cells are at least 4 bytes. It only happens in b-trees used
** to evaluate "IN (SELECT ...)" and similar clauses.
*/
if( szCell[j]==4 ){
assert(leafCorrection==4);
sz = cellSizePtr(pParent, pCell);
for(i=0; i<nCell; i++){
u8 *pCell = apCell[i];
if( i==cntOldNext ){
MemPage *pOld = (++iOld)<nNew ? apNew[iOld] : apOld[iOld];
cntOldNext += pOld->nCell + pOld->nOverflow + !leafData;
aOld = pOld->aData;
}
if( i==cntNew[iNew] ){
pNew = apNew[++iNew];
if( !leafData ) continue;
}
/* Cell pCell is destined for new sibling page pNew. Originally, it
** was either part of sibling page iOld (possibly an overflow page),
** or else the divider cell to the left of sibling page iOld. So,
** if sibling page iOld had the same page number as pNew, and if
** pCell really was a part of sibling page iOld (not a divider or
** overflow cell), we can skip updating the pointer map entries. */
if( pNew->pgno!=aPgno[iOld] || pCell<aOld || pCell>=&aOld[usableSize] ){
if( !leafCorrection ){
ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc);
}
if( szCell[i]>pNew->minLocal ){
ptrmapPutOvflPtr(pNew, pCell, &rc);
}
}
iOvflSpace += sz;
assert( sz<=pBt->maxLocal+23 );
assert( iOvflSpace <= (int)pBt->pageSize );
insertCell(pParent, nxDiv, pCell, sz, pTemp, pNew->pgno, &rc);
if( rc!=SQLITE_OK ) goto balance_cleanup;
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
j++;
nxDiv++;
}
}
assert( j==nCell );
/* Insert new divider cells into pParent. */
for(i=0; i<nNew-1; i++){
u8 *pCell;
u8 *pTemp;
int sz;
MemPage *pNew = apNew[i];
j = cntNew[i];
assert( j<nMaxCells );
pCell = apCell[j];
sz = szCell[j] + leafCorrection;
pTemp = &aOvflSpace[iOvflSpace];
if( !pNew->leaf ){
memcpy(&pNew->aData[8], pCell, 4);
}else if( leafData ){
/* If the tree is a leaf-data tree, and the siblings are leaves,
** then there is no divider cell in apCell[]. Instead, the divider
** cell consists of the integer key for the right-most cell of
** the sibling-page assembled above only.
*/
CellInfo info;
j--;
btreeParseCellPtr(pNew, apCell[j], &info);
pCell = pTemp;
sz = 4 + putVarint(&pCell[4], info.nKey);
pTemp = 0;
}else{
pCell -= 4;
/* Obscure case for non-leaf-data trees: If the cell at pCell was
** previously stored on a leaf node, and its reported size was 4
** bytes, then it may actually be smaller than this
** (see btreeParseCellPtr(), 4 bytes is the minimum size of
** any cell). But it is important to pass the correct size to
** insertCell(), so reparse the cell now.
**
** Note that this can never happen in an SQLite data file, as all
** cells are at least 4 bytes. It only happens in b-trees used
** to evaluate "IN (SELECT ...)" and similar clauses.
*/
if( szCell[j]==4 ){
assert(leafCorrection==4);
sz = cellSizePtr(pParent, pCell);
}
}
iOvflSpace += sz;
assert( sz<=pBt->maxLocal+23 );
assert( iOvflSpace <= (int)pBt->pageSize );
insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc);
if( rc!=SQLITE_OK ) goto balance_cleanup;
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
}
/* Now update the actual sibling pages. The order in which they are updated
** is important, as this code needs to avoid disrupting any page from which
** cells may still to be read. In practice, this means:
**
** 1) If the aShiftLeft[] entry is less than 0, it is not safe to
** update the page until the page to the left of the current page
** (apNew[i-1]) has already been updated.
**
** 2) If the aShiftRight[] entry is less than 0, it is not safe to
** update the page until the page to the right of the current page
** (apNew[i+1]) has already been updated.
**
** If neither of the above apply, the page is safe to update.
*/
assert( aShiftRight[nNew-1]>=0 && aShiftLeft[0]==0 );
for(i=0; i<nNew*2; i++){
int iPg = (i>=nNew ? i-nNew : nNew-1-i);
if( abDone[iPg]==0
&& (aShiftLeft[iPg]>=0 || abDone[iPg-1])
&& (aShiftRight[iPg]>=0 || abDone[iPg+1])
){
MemPage *pNew = apNew[iPg];
int iLeft = ((iPg==0) ? 0 : cntNew[iPg-1] + !leafData);
rebuildPage(pNew,
aShiftLeft[iPg] < 0 ? (aShiftLeft[iPg]*-1) : 0,
cntNew[iPg] - iLeft,
&apCell[iLeft],
&szCell[iLeft]
);
abDone[iPg] = 1;
assert( pNew->nOverflow==0 );
assert( pNew->nCell==(cntNew[iPg] - (iPg==0?0:cntNew[iPg-1]+!leafData)) );
}
}
assert( memcmp(abDone, "\01\01\01\01\01", nNew)==0 );
assert( nOld>0 );
assert( nNew>0 );
if( (pageFlags & PTF_LEAF)==0 ){
u8 *zChild = &apCopy[nOld-1]->aData[8];
memcpy(&apNew[nNew-1]->aData[8], zChild, 4);
}
if( isRoot && pParent->nCell==0 && pParent->hdrOffset<=apNew[0]->nFree ){
/* The root page of the b-tree now contains no cells. The only sibling
@@ -6746,116 +6874,36 @@ static int balance_nonroot(
);
copyNodeContent(apNew[0], pParent, &rc);
freePage(apNew[0], &rc);
}else if( ISAUTOVACUUM ){
/* Fix the pointer-map entries for all the cells that were shifted around.
** There are several different types of pointer-map entries that need to
** be dealt with by this routine. Some of these have been set already, but
** many have not. The following is a summary:
**
** 1) The entries associated with new sibling pages that were not
** siblings when this function was called. These have already
** been set. We don't need to worry about old siblings that were
** moved to the free-list - the freePage() code has taken care
** of those.
**
** 2) The pointer-map entries associated with the first overflow
** page in any overflow chains used by new divider cells. These
** have also already been taken care of by the insertCell() code.
**
** 3) If the sibling pages are not leaves, then the child pages of
** cells stored on the sibling pages may need to be updated.
**
** 4) If the sibling pages are not internal intkey nodes, then any
** overflow pages used by these cells may need to be updated
** (internal intkey nodes never contain pointers to overflow pages).
**
** 5) If the sibling pages are not leaves, then the pointer-map
** entries for the right-child pages of each sibling may need
** to be updated.
**
** Cases 1 and 2 are dealt with above by other code. The next
** block deals with cases 3 and 4 and the one after that, case 5. Since
** setting a pointer map entry is a relatively expensive operation, this
** code only sets pointer map entries for child or overflow pages that have
** actually moved between pages. */
MemPage *pNew = apNew[0];
MemPage *pOld = apCopy[0];
int nOverflow = pOld->nOverflow;
int iNextOld = pOld->nCell + nOverflow;
int iOverflow = (nOverflow ? pOld->aiOvfl[0] : -1);
j = 0; /* Current 'old' sibling page */
k = 0; /* Current 'new' sibling page */
for(i=0; i<nCell; i++){
int isDivider = 0;
while( i==iNextOld ){
/* Cell i is the cell immediately following the last cell on old
** sibling page j. If the siblings are not leaf pages of an
** intkey b-tree, then cell i was a divider cell. */
assert( j+1 < ArraySize(apCopy) );
assert( j+1 < nOld );
pOld = apCopy[++j];
iNextOld = i + !leafData + pOld->nCell + pOld->nOverflow;
if( pOld->nOverflow ){
nOverflow = pOld->nOverflow;
iOverflow = i + !leafData + pOld->aiOvfl[0];
}
isDivider = !leafData;
}
assert(nOverflow>0 || iOverflow<i );
assert(nOverflow<2 || pOld->aiOvfl[0]==pOld->aiOvfl[1]-1);
assert(nOverflow<3 || pOld->aiOvfl[1]==pOld->aiOvfl[2]-1);
if( i==iOverflow ){
isDivider = 1;
if( (--nOverflow)>0 ){
iOverflow++;
}
}
if( i==cntNew[k] ){
/* Cell i is the cell immediately following the last cell on new
** sibling page k. If the siblings are not leaf pages of an
** intkey b-tree, then cell i is a divider cell. */
pNew = apNew[++k];
if( !leafData ) continue;
}
assert( j<nOld );
assert( k<nNew );
/* If the cell was originally divider cell (and is not now) or
** an overflow cell, or if the cell was located on a different sibling
** page before the balancing, then the pointer map entries associated
** with any child or overflow pages need to be updated. */
if( isDivider || pOld->pgno!=pNew->pgno ){
if( !leafCorrection ){
ptrmapPut(pBt, get4byte(apCell[i]), PTRMAP_BTREE, pNew->pgno, &rc);
}
if( szCell[i]>pNew->minLocal ){
ptrmapPutOvflPtr(pNew, apCell[i], &rc);
}
}
}else if( ISAUTOVACUUM && !leafCorrection ){
/* Fix the pointer map entries associated with the right-child of each
** sibling page. All other pointer map entries have already been taken
** care of. */
for(i=0; i<nNew; i++){
u32 key = get4byte(&apNew[i]->aData[8]);
ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc);
}
}
if( !leafCorrection ){
for(i=0; i<nNew; i++){
u32 key = get4byte(&apNew[i]->aData[8]);
ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc);
}
}
assert( pParent->isInit );
TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
nOld, nNew, nCell));
#if 0
/* Free any old pages that were not reused as new pages.
*/
for(i=nNew; i<nOld; i++){
freePage(apOld[i], &rc);
}
#if 1
if( ISAUTOVACUUM && rc==SQLITE_OK && apNew[0]->isInit ){
/* The ptrmapCheckPages() contains assert() statements that verify that
** all pointer map pages are set correctly. This is helpful while
** debugging. This is usually disabled because a corrupt database may
** cause an assert() statement to fail. */
ptrmapCheckPages(apNew, nNew);
ptrmapCheckPages(&pParent, 1);
#endif
}
assert( pParent->isInit );
TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
nOld, nNew, nCell));
#endif
/*
** Cleanup before returning.

View File

@@ -6835,6 +6835,14 @@ int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
return SQLITE_OK;
}
void sqlite3PagerRekey(DbPage *pPage, Pgno iNew){
PgHdr *pPg = (PgHdr*)pPage;
assert( pPg->flags & PGHDR_DIRTY );
assert( !subjRequiresPage(pPg) );
sqlite3PcacheMove(pPg, iNew);
}
#endif
/*
@@ -7235,4 +7243,5 @@ int sqlite3PagerWalFramesize(Pager *pPager){
}
#endif
#endif /* SQLITE_OMIT_DISKIO */

View File

@@ -188,6 +188,8 @@ int sqlite3SectorSize(sqlite3_file *);
/* Functions used to truncate the database file. */
void sqlite3PagerTruncateImage(Pager*,Pgno);
void sqlite3PagerRekey(DbPage*, Pgno);
#if defined(SQLITE_HAS_CODEC) && !defined(SQLITE_OMIT_WAL)
void *sqlite3PagerCodec(DbPage *);
#endif