1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-05 15:55:57 +03:00

When searching the wal file for a frame, do not search that part that was already checkpointed when the snapshot being read was at the head of the wal file.

FossilOrigin-Name: 90760e72327eb0593cbfa6d7058b554198cd8044
This commit is contained in:
dan
2015-08-13 20:23:46 +00:00
parent 6df003c7a5
commit b8c7cfb834
3 changed files with 31 additions and 22 deletions

View File

@@ -428,6 +428,7 @@ struct Wal {
u8 syncHeader; /* Fsync the WAL header if true */
u8 padToSectorBoundary; /* Pad transactions out to the next sector */
WalIndexHdr hdr; /* Wal-index header for current transaction */
u32 minFrame; /* Ignore wal frames before this one */
const char *zWalName; /* Name of WAL file */
u32 nCkpt; /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
@@ -2296,12 +2297,27 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry
** instead.
**
** This does not guarantee that the copy of the wal-index header is up to
** date before proceeding. That would not be possible without somehow
** blocking writers. It only guarantees that a dangerous checkpoint or
** log-wrap (either of which would require an exclusive lock on
** WAL_READ_LOCK(mxI)) has not occurred since the snapshot was valid.
** Before checking that the live wal-index header has not changed
** since it was read, set Wal.minFrame to the first frame in the wal
** file that has not yet been checkpointed. This client will not need
** to read any frames earlier than minFrame from the wal file - they
** can be safely read directly from the database file.
**
** Because a ShmBarrier() call is made between taking the copy of
** nBackfill and checking that the wal-header in shared-memory still
** matches the one cached in pWal->hdr, it is guaranteed that the
** checkpointer that set nBackfill was not working with a wal-index
** header newer than that cached in pWal->hdr. If it were, that could
** cause a problem. The checkpointer could omit to checkpoint
** a version of page X that lies before pWal->minFrame (call that version
** A) on the basis that there is a newer version (version B) of the same
** page later in the wal file. But if version B happens to like past
** frame pWal->hdr.mxFrame - then the client would incorrectly assume
** that it can read version A from the database file. However, since
** we can guarantee that the checkpointer that set nBackfill could not
** see any pages past pWal->hdr.mxFrame, this problem does not come up.
*/
pWal->minFrame = pInfo->nBackfill+1;
walShmBarrier(pWal);
if( pInfo->aReadMark[mxI]!=mxReadMark
|| memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
@@ -2372,7 +2388,6 @@ int sqlite3WalFindFrame(
u32 iRead = 0; /* If !=0, WAL frame to return data from */
u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */
int iHash; /* Used to loop through N hash tables */
u32 iFirst;
int iMinHash;
/* This routine is only be called from within a read transaction. */
@@ -2384,10 +2399,7 @@ int sqlite3WalFindFrame(
** then the WAL is ignored by the reader so return early, as if the
** WAL were empty.
*/
if( iLast==0
|| pWal->readLock==0
|| iLast==(iFirst = walCkptInfo(pWal)->nBackfill)
){
if( iLast==0 || pWal->readLock==0 ){
*piRead = 0;
return SQLITE_OK;
}
@@ -2417,7 +2429,7 @@ int sqlite3WalFindFrame(
** This condition filters out entries that were added to the hash
** table after the current read-transaction had started.
*/
iMinHash = walFramePage(iFirst);
iMinHash = walFramePage(pWal->minFrame);
for(iHash=walFramePage(iLast); iHash>=iMinHash && iRead==0; iHash--){
volatile ht_slot *aHash; /* Pointer to hash table */
volatile u32 *aPgno; /* Pointer to array of page numbers */
@@ -2433,7 +2445,7 @@ int sqlite3WalFindFrame(
nCollide = HASHTABLE_NSLOT;
for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
u32 iFrame = aHash[iKey] + iZero;
if( iFrame<=iLast && iFrame>iFirst && aPgno[aHash[iKey]]==pgno ){
if( iFrame<=iLast && iFrame>=pWal->minFrame && aPgno[aHash[iKey]]==pgno ){
assert( iFrame>iRead || CORRUPT_DB );
iRead = iFrame;
}