mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-07 02:42:48 +03:00
Changes to make WAL more robust against SQLITE_PROTOCOL errors.
FossilOrigin-Name: fd578a32f8e9f04957359e73f244a60f02afe08c
This commit is contained in:
49
src/wal.c
49
src/wal.c
@@ -2001,10 +2001,28 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
|
||||
|
||||
assert( pWal->readLock<0 ); /* Not currently locked */
|
||||
|
||||
/* Take steps to avoid spinning forever if there is a protocol error. */
|
||||
/* Take steps to avoid spinning forever if there is a protocol error.
|
||||
**
|
||||
** Circumstances that cause a RETRY should only last for the briefest
|
||||
** instances of time. No I/O or other system calls are done while the
|
||||
** locks are held, so the locks should not be held for very long. But
|
||||
** if we are unlucky, another process that is holding a lock might get
|
||||
** paged out or take a page-fault that is time-consuming to resolve,
|
||||
** during the few nanoseconds that it is holding the lock. In that case,
|
||||
** it might take longer than normal for the lock to free.
|
||||
**
|
||||
** After 5 RETRYs, we begin calling sqlite3OsSleep(). The first few
|
||||
** calls to sqlite3OsSleep() have a delay of 1 microsecond. Really this
|
||||
** is more of a scheduler yield than an actual delay. But on the 10th
|
||||
** an subsequent retries, the delays start becoming longer and longer,
|
||||
** so that on the 100th (and last) RETRY we delay for 21 milliseconds.
|
||||
** The total delay time before giving up is less than 1 second.
|
||||
*/
|
||||
if( cnt>5 ){
|
||||
int nDelay = 1; /* Pause time in microseconds */
|
||||
if( cnt>100 ) return SQLITE_PROTOCOL;
|
||||
sqlite3OsSleep(pWal->pVfs, 1);
|
||||
if( cnt>=10 ) nDelay = (cnt-9)*238; /* Max delay 21ms. Total delay 996ms */
|
||||
sqlite3OsSleep(pWal->pVfs, nDelay);
|
||||
}
|
||||
|
||||
if( !useWal ){
|
||||
@@ -2086,22 +2104,9 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
|
||||
mxI = i;
|
||||
}
|
||||
}
|
||||
if( mxI==0 ){
|
||||
/* If we get here, it means that all of the aReadMark[] entries between
|
||||
** 1 and WAL_NREADER-1 are zero. Try to initialize aReadMark[1] to
|
||||
** be mxFrame, then retry.
|
||||
*/
|
||||
rc = walLockExclusive(pWal, WAL_READ_LOCK(1), 1);
|
||||
if( rc==SQLITE_OK ){
|
||||
pInfo->aReadMark[1] = pWal->hdr.mxFrame;
|
||||
walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1);
|
||||
rc = WAL_RETRY;
|
||||
}else if( rc==SQLITE_BUSY ){
|
||||
rc = WAL_RETRY;
|
||||
}
|
||||
return rc;
|
||||
}else{
|
||||
if( mxReadMark < pWal->hdr.mxFrame ){
|
||||
/* There was once an "if" here. The extra "{" is to preserve indentation. */
|
||||
{
|
||||
if( mxReadMark < pWal->hdr.mxFrame || mxI==0 ){
|
||||
for(i=1; i<WAL_NREADER; i++){
|
||||
rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
|
||||
if( rc==SQLITE_OK ){
|
||||
@@ -2114,6 +2119,10 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
|
||||
}
|
||||
}
|
||||
}
|
||||
if( mxI==0 ){
|
||||
assert( rc==SQLITE_BUSY );
|
||||
return WAL_RETRY;
|
||||
}
|
||||
|
||||
rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
|
||||
if( rc ){
|
||||
@@ -2491,6 +2500,8 @@ static int walRestartLog(Wal *pWal){
|
||||
volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
|
||||
assert( pInfo->nBackfill==pWal->hdr.mxFrame );
|
||||
if( pInfo->nBackfill>0 ){
|
||||
u32 salt1;
|
||||
sqlite3_randomness(4, &salt1);
|
||||
rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
|
||||
if( rc==SQLITE_OK ){
|
||||
/* If all readers are using WAL_READ_LOCK(0) (in other words if no
|
||||
@@ -2508,7 +2519,7 @@ static int walRestartLog(Wal *pWal){
|
||||
pWal->nCkpt++;
|
||||
pWal->hdr.mxFrame = 0;
|
||||
sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0]));
|
||||
sqlite3_randomness(4, &aSalt[1]);
|
||||
aSalt[1] = salt1;
|
||||
walIndexWriteHdr(pWal);
|
||||
pInfo->nBackfill = 0;
|
||||
for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
|
||||
|
Reference in New Issue
Block a user