1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-10 01:02:56 +03:00

Merge latest wal2 changes into this branch.

FossilOrigin-Name: 06bb80eeb84f57b1e8109a35f5b14992a2e23f2bf5a8921fa29087f96cb44d10
This commit is contained in:
dan
2018-12-15 20:59:14 +00:00
72 changed files with 2490 additions and 902 deletions

229
src/wal.c
View File

@@ -362,7 +362,6 @@
** recovery procedure still takes the same exclusive lock on the entire
** range of SQLITE_SHM_NLOCK shm-locks. This works because the read-locks
** above use four of the six read-locking slots used by legacy wal mode.
** See the header comment for function walLockReader() for details.
**
** STARTUP/RECOVERY
**
@@ -473,16 +472,20 @@ int sqlite3WalTrace = 0;
** is held, or else is the index of the read-mark on which a lock is
** held.
**
** In wal2 mode, Wal.readLock must be set to one of the following values.
** A value of -1 still indicates that no read-lock is held, but the other
** values are symbolic. See the implementation of walLockReader() for
** details of how the symbols map to OS level locks.
** In wal2 mode, a value of -1 still indicates that no read-lock is held.
** And a non-zero value still represents the index of the read-mark on
** which a lock is held. There are two differences:
**
** 1. wal2 mode never uses read-mark 0.
**
** 2. locks on each read-mark have a different interpretation, as
** indicated by the symbolic names below.
*/
#define WAL_LOCK_NONE -1
#define WAL_LOCK_PART1 1
#define WAL_LOCK_PART1_FULL2 2
#define WAL_LOCK_PART2 3
#define WAL_LOCK_PART2_FULL1 4
#define WAL_LOCK_PART2_FULL1 3
#define WAL_LOCK_PART2 4
/*
** This constant is used in wal2 mode only.
@@ -1112,36 +1115,6 @@ static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
walLockName(lockIdx), n));
}
/*
** This function is used to take and release read-locks in wal2 mode.
**
** Use of WAL_READ_LOCK(x) slots for (1<=x<=4).
**
** 1) Partial read of *-wal-1 (blocks checkpointer from checkpointing)
** 2) Full read of *-wal-2 (blocks writer from writing)
** 3) Partial read of *-wal-2 (blocks checkpointer from checkpointing)
** 4) Full read of *-wal-1 (blocks writer from writing)
*/
static int walLockReader(Wal *pWal, int eLock, int bLock){
int i; /* Index of first readmark to lock */
int n; /* Number of readmarks to lock */
assert( pWal->hdr.iVersion==WAL_VERSION2 );
if( pWal->exclusiveMode ) return SQLITE_OK;
switch( eLock ){
case WAL_LOCK_PART1 : i = 1; n = 1; break;
case WAL_LOCK_PART1_FULL2: i = 1; n = 2; break;
case WAL_LOCK_PART2 : i = 3; n = 1; break;
case WAL_LOCK_PART2_FULL1: i = 3; n = 2; break;
default: assert( !"cannot happen" );
}
return sqlite3OsShmLock(pWal->pDbFd, WAL_READ_LOCK(i), n,
SQLITE_SHM_SHARED | (bLock ? SQLITE_SHM_LOCK : SQLITE_SHM_UNLOCK)
);
}
/*
** Compute a hash on a page number. The resulting hash value must land
** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances
@@ -1232,14 +1205,9 @@ static int walExternalDecode(u32 iExternal, u32 *piRead){
iExternal - (iHash/2) * HASHTABLE_NPAGE;
return 0;
}
if( iHash==0 ){
*piRead = iExternal;
return 0;
}else{
*piRead = iExternal - HASHTABLE_NPAGE_ONE - ((iHash-1)/2) * HASHTABLE_NPAGE;
}
return (iHash % 2);
*piRead = iExternal - HASHTABLE_NPAGE_ONE - ((iHash-1)/2) * HASHTABLE_NPAGE;
return 1;
}
/*
@@ -1565,7 +1533,7 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){
}
sqlite3_free(aFrame);
}else if( pbZero && nSize==0 ){
}else if( pbZero ){
*pbZero = 1;
}
}
@@ -1689,8 +1657,8 @@ static int walIndexRecover(Wal *pWal){
/* The case where *-wal2 may follow *-wal */
if( nCkpt2<=0x0F && nCkpt2==nCkpt1+1 ){
if( sqlite3Get4byte((u8*)(&pWal->hdr.aSalt[0]))==hdr.aFrameCksum[0]
&& sqlite3Get4byte((u8*)(&pWal->hdr.aSalt[1]))==hdr.aFrameCksum[1]
){
&& sqlite3Get4byte((u8*)(&pWal->hdr.aSalt[1]))==hdr.aFrameCksum[1]
){
walidxSetFile(&pWal->hdr, 1);
walidxSetMxFrame(&pWal->hdr, 1, pWal->hdr.mxFrame);
walidxSetMxFrame(&pWal->hdr, 0, hdr.mxFrame);
@@ -1748,9 +1716,9 @@ static int walIndexRecover(Wal *pWal){
if( pWal->hdr.nPage ){
if( isWalMode2(pWal) ){
sqlite3_log(SQLITE_NOTICE_RECOVER_WAL,
"recovered (%d,%d) frames from WAL files %s[2] (%s mode)",
"recovered (%d,%d) frames from WAL files %s[2] (wal2 mode)",
walidxGetMxFrame(&pWal->hdr, 0), walidxGetMxFrame(&pWal->hdr, 1),
pWal->zWalName, isWalMode2(pWal) ? "wal2" : "wal"
pWal->zWalName
);
}else{
sqlite3_log(SQLITE_NOTICE_RECOVER_WAL,
@@ -2252,6 +2220,68 @@ static void walRestartHdr(Wal *pWal, u32 salt1){
assert( pInfo->aReadMark[0]==0 );
}
/*
** This function is used in wal2 mode.
**
** This function is called when writer pWal is just about to start
** writing out frames. Parameter iApp is the current wal file. The "other" wal
** file (wal file !iApp) has been fully checkpointed. This function returns
** SQLITE_OK if there are no readers preventing the writer from switching to
** the other wal file. Or SQLITE_BUSY if there are.
*/
static int wal2RestartOk(Wal *pWal, int iApp){
/* The other wal file (wal file !iApp) can be overwritten if there
** are no readers reading from it - no "full" or "partial" locks.
** Technically speaking it is not possible for any reader to hold
** a "part" lock, as this would have prevented the file from being
** checkpointed. But checking anyway doesn't hurt. The following
** is equivalent to:
**
** if( iApp==0 ) eLock = WAL_LOCK_PART1_FULL2;
** if( iApp==1 ) eLock = WAL_LOCK_PART1;
*/
int eLock = 1 + (iApp==0);
assert( WAL_LOCK_PART1==1 );
assert( WAL_LOCK_PART1_FULL2==2 );
assert( WAL_LOCK_PART2_FULL1==3 );
assert( WAL_LOCK_PART2==4 );
assert( iApp!=0 || eLock==WAL_LOCK_PART1_FULL2 );
assert( iApp!=1 || eLock==WAL_LOCK_PART1 );
return walLockExclusive(pWal, WAL_READ_LOCK(eLock), 3);
}
static void wal2RestartFinished(Wal *pWal, int iApp){
walUnlockExclusive(pWal, WAL_READ_LOCK(1 + (iApp==0)), 3);
}
/*
** This function is used in wal2 mode.
**
** This function is called when a checkpointer wishes to checkpoint wal
** file iCkpt. It takes the required lock and, if successful, returns
** SQLITE_OK. Otherwise, an SQLite error code (e.g. SQLITE_BUSY). If this
** function returns SQLITE_OK, it is the responsibility of the caller
** to invoke wal2CheckpointFinished() to release the lock.
*/
static int wal2CheckpointOk(Wal *pWal, int iCkpt){
int eLock = 1 + (iCkpt*2);
assert( WAL_LOCK_PART1==1 );
assert( WAL_LOCK_PART1_FULL2==2 );
assert( WAL_LOCK_PART2_FULL1==3 );
assert( WAL_LOCK_PART2==4 );
assert( iCkpt!=0 || eLock==WAL_LOCK_PART1 );
assert( iCkpt!=1 || eLock==WAL_LOCK_PART2_FULL1 );
return walLockExclusive(pWal, WAL_READ_LOCK(eLock), 2);
}
static void wal2CheckpointFinished(Wal *pWal, int iCkpt){
walUnlockExclusive(pWal, WAL_READ_LOCK(1 + (iCkpt*2)), 2);
}
/*
** Copy as much content as we can from the WAL back into the database file
** in response to an sqlite3_wal_checkpoint() request or the equivalent.
@@ -2319,7 +2349,7 @@ static int walCheckpoint(
** preventing this checkpoint operation. If one is found, return
** early. */
if( bWal2 ){
rc = walLockExclusive(pWal, WAL_READ_LOCK(1 + iCkpt*2), 1);
rc = wal2CheckpointOk(pWal, iCkpt);
if( rc!=SQLITE_OK ) return rc;
}
@@ -2368,9 +2398,9 @@ static int walCheckpoint(
assert( rc==SQLITE_OK || pIter==0 );
}
if( pIter
&& (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK
){
if( pIter && (bWal2
|| (rc = walBusyLock(pWal, xBusy, pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK
)){
u32 nBackfill = pInfo->nBackfill;
assert( bWal2==0 || nBackfill==0 );
@@ -2435,10 +2465,9 @@ static int walCheckpoint(
}
/* Release the reader lock held while backfilling */
if( bWal2 ){
walUnlockExclusive(pWal, WAL_READ_LOCK(1 + iCkpt*2), 1);
if( bWal2==0 ){
walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
}
walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
}
if( rc==SQLITE_BUSY ){
@@ -2446,6 +2475,7 @@ static int walCheckpoint(
** just because there are active readers. */
rc = SQLITE_OK;
}
if( bWal2 ) wal2CheckpointFinished(pWal, iCkpt);
}
/* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the
@@ -3080,18 +3110,27 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
assert( pWal->apWiData[0]!=0 );
pInfo = walCkptInfo(pWal);
if( isWalMode2(pWal) ){
int eLock = 1 + (walidxGetFile(&pWal->hdr)*2);
if( pInfo->nBackfill==0 ){
eLock += walidxGetMxFrame(&pWal->hdr, !walidxGetFile(&pWal->hdr))>0;
}
rc = walLockReader(pWal, eLock, 1);
if( rc!=SQLITE_OK ){
return rc;
}
/* This connection needs a "part" lock on the current wal file and,
** unless pInfo->nBackfill is set to indicate that it has already been
** checkpointed, a "full" lock on the other wal file. */
int iWal = walidxGetFile(&pWal->hdr);
int nBackfill = pInfo->nBackfill || walidxGetMxFrame(&pWal->hdr, !iWal)==0;
int eLock = 1 + (iWal*2) + (nBackfill==iWal);
assert( nBackfill==0 || nBackfill==1 );
assert( iWal==0 || iWal==1 );
assert( iWal!=0 || nBackfill!=1 || eLock==WAL_LOCK_PART1 );
assert( iWal!=0 || nBackfill!=0 || eLock==WAL_LOCK_PART1_FULL2 );
assert( iWal!=1 || nBackfill!=1 || eLock==WAL_LOCK_PART2 );
assert( iWal!=1 || nBackfill!=0 || eLock==WAL_LOCK_PART2_FULL1 );
rc = walLockShared(pWal, WAL_READ_LOCK(eLock));
if( rc!=SQLITE_OK ){
return (rc==SQLITE_BUSY ? WAL_RETRY : rc);
}
walShmBarrier(pWal);
if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
walLockReader(pWal, eLock, 0);
walUnlockShared(pWal, WAL_READ_LOCK(eLock));
return WAL_RETRY;
}else{
pWal->readLock = eLock;
@@ -3424,11 +3463,7 @@ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
void sqlite3WalEndReadTransaction(Wal *pWal){
sqlite3WalEndWriteTransaction(pWal);
if( pWal->readLock!=WAL_LOCK_NONE ){
if( isWalMode2(pWal) ){
(void)walLockReader(pWal, pWal->readLock, 0);
}else{
walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
}
walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
pWal->readLock = WAL_LOCK_NONE;
}
}
@@ -4063,33 +4098,6 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
return rc;
}
/*
** This function is used in wal2 mode.
**
** This function is called when writer pWal is just about to start
** writing out frames. The "other" wal file (wal file !pWal->hdr.iAppend)
** has been fully checkpointed. This function returns SQLITE_OK if there
** are no readers preventing the writer from switching to the other wal
** file. Or SQLITE_BUSY if there are.
*/
static int walRestartOk(Wal *pWal){
int rc; /* Return code */
int iApp = walidxGetFile(&pWal->hdr); /* Current WAL file */
/* No reader can be doing a "partial" read of wal file !iApp - in that
** case it would not have been possible to checkpoint the file. So
** it is only necessary to test for "full" readers. See the comment
** above walLockReader() function for exactly what this means in terms
** of locks. */
int i = (iApp==0) ? 2 : 4;
rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
if( rc==SQLITE_OK ){
walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
}
return rc;
}
/*
** This function is called just before writing a set of frames to the log
** file (see sqlite3WalFrames()). It checks to see if, instead of appending
@@ -4117,19 +4125,20 @@ static int walRestartLog(Wal *pWal){
if( walidxGetMxFrame(&pWal->hdr, iApp)>=nWalSize ){
volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
if( walidxGetMxFrame(&pWal->hdr, !iApp)==0 || pInfo->nBackfill ){
rc = walRestartOk(pWal);
rc = wal2RestartOk(pWal, iApp);
if( rc==SQLITE_OK ){
iApp = !iApp;
int iNew = !iApp;
pWal->nCkpt++;
walidxSetFile(&pWal->hdr, iApp);
walidxSetMxFrame(&pWal->hdr, iApp, 0);
walidxSetFile(&pWal->hdr, iNew);
walidxSetMxFrame(&pWal->hdr, iNew, 0);
sqlite3Put4byte((u8*)&pWal->hdr.aSalt[0], pWal->hdr.aFrameCksum[0]);
sqlite3Put4byte((u8*)&pWal->hdr.aSalt[1], pWal->hdr.aFrameCksum[1]);
walIndexWriteHdr(pWal);
pInfo->nBackfill = 0;
walLockReader(pWal, pWal->readLock, 0);
pWal->readLock = iApp ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2;
rc = walLockReader(pWal, pWal->readLock, 1);
wal2RestartFinished(pWal, iApp);
walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
pWal->readLock = iNew ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2;
rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock));
}else if( rc==SQLITE_BUSY ){
rc = SQLITE_OK;
}
@@ -4758,11 +4767,7 @@ int sqlite3WalExclusiveMode(Wal *pWal, int op){
if( op==0 ){
if( pWal->exclusiveMode ){
pWal->exclusiveMode = WAL_NORMAL_MODE;
if( isWalMode2(pWal) ){
rc = walLockReader(pWal, pWal->readLock, 1);
}else{
rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock));
}
rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock));
if( rc!=SQLITE_OK ){
pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
}
@@ -4774,11 +4779,7 @@ int sqlite3WalExclusiveMode(Wal *pWal, int op){
}else if( op>0 ){
assert( pWal->exclusiveMode==WAL_NORMAL_MODE );
assert( pWal->readLock>=0 );
if( isWalMode2(pWal) ){
walLockReader(pWal, pWal->readLock, 0);
}else{
walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
}
walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
rc = 1;
}else{