1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-07 02:42:48 +03:00

Enhancements to wal-mode locking scheme.

FossilOrigin-Name: 8549c286497f3d2cd118be1334fce00d6f8a26c4
This commit is contained in:
dan
2010-04-17 12:31:37 +00:00
parent 97a313554d
commit 3de777fd8c
6 changed files with 356 additions and 260 deletions

View File

@@ -1,5 +1,5 @@
C Change\sthe\slog\sfile\sformat\sto\sinclude\sa\ssmall\s(12\sbyte)\sheader\sat\sthe\sstart\sof\sthe\sfile. C Enhancements\sto\swal-mode\slocking\sscheme.
D 2010-04-16T13:59:31 D 2010-04-17T12:31:37
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in 4f2f967b7e58a35bb74fb7ec8ae90e0f4ca7868b F Makefile.in 4f2f967b7e58a35bb74fb7ec8ae90e0f4ca7868b
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@@ -131,7 +131,7 @@ F src/journal.c b0ea6b70b532961118ab70301c00a33089f9315c
F src/legacy.c a199d7683d60cef73089e892409113e69c23a99f F src/legacy.c a199d7683d60cef73089e892409113e69c23a99f
F src/lempar.c 7f026423f4d71d989e719a743f98a1cbd4e6d99e F src/lempar.c 7f026423f4d71d989e719a743f98a1cbd4e6d99e
F src/loadext.c 1c7a61ce1281041f437333f366a96aa0d29bb581 F src/loadext.c 1c7a61ce1281041f437333f366a96aa0d29bb581
F src/log.c 11f683a3429319fb2731aa357717bf9117cdcba4 F src/log.c 6ac96c84ca4564f156de564ceddb3c1a2319ac6e
F src/log.h a2654af46ce7b5732f4d5a731abfdd180f0a06d9 F src/log.h a2654af46ce7b5732f4d5a731abfdd180f0a06d9
F src/main.c c0e7192bad5b90544508b241eb2487ac661de890 F src/main.c c0e7192bad5b90544508b241eb2487ac661de890
F src/malloc.c a08f16d134f0bfab6b20c3cd142ebf3e58235a6a F src/malloc.c a08f16d134f0bfab6b20c3cd142ebf3e58235a6a
@@ -154,7 +154,7 @@ F src/os_common.h 240c88b163b02c21a9f21f87d49678a0aa21ff30
F src/os_os2.c 75a8c7b9a00a2cf1a65f9fa4afbc27d46634bb2f F src/os_os2.c 75a8c7b9a00a2cf1a65f9fa4afbc27d46634bb2f
F src/os_unix.c 5bf0015cebe2f21635da2af983c348eb88b3b4c1 F src/os_unix.c 5bf0015cebe2f21635da2af983c348eb88b3b4c1
F src/os_win.c 1c7453c2df4dab26d90ff6f91272aea18bcf7053 F src/os_win.c 1c7453c2df4dab26d90ff6f91272aea18bcf7053
F src/pager.c 35c7e3b5bbad76f04e7143d2d4676a269a8ba9fc F src/pager.c 674d6558a618ca7714ba28741e6dc681692bffe2
F src/pager.h ce5d076f3860a5f2d7460c582cd68383343b33cf F src/pager.h ce5d076f3860a5f2d7460c582cd68383343b33cf
F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e
F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf
@@ -757,10 +757,10 @@ F test/vtabE.test 7c4693638d7797ce2eda17af74292b97e705cc61
F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5
F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8
F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d
F test/wal.test bb1fa35fc03353a1a154f583f01e5093e25ba001 F test/wal.test 5fa3cdf2e93e79b0891af6cf7fab8ef9e07a23c6
F test/walcrash.test 45cfbab30bb7cbe0b2e9d5cabe90dbcad10cb89b F test/walcrash.test 45cfbab30bb7cbe0b2e9d5cabe90dbcad10cb89b
F test/walslow.test 38076d5fad49e3678027be0f8110e6a32d531dc2 F test/walslow.test 38076d5fad49e3678027be0f8110e6a32d531dc2
F test/walthread.test 58cd64b06f186251f09f64e4918fb74a7e52c963 F test/walthread.test 27e44ee6fd02f1f494a24f999c97086af3ab739d
F test/where.test de337a3fe0a459ec7c93db16a519657a90552330 F test/where.test de337a3fe0a459ec7c93db16a519657a90552330
F test/where2.test 45eacc126aabb37959a387aa83e59ce1f1f03820 F test/where2.test 45eacc126aabb37959a387aa83e59ce1f1f03820
F test/where3.test aa44a9b29e8c9f3d7bb94a3bb3a95b31627d520d F test/where3.test aa44a9b29e8c9f3d7bb94a3bb3a95b31627d520d
@@ -804,7 +804,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P 67d2a89ec2d593a077eb19a6ea2b06cb1c2e9ba8 P 9865d14d6041874bc1239ce7a061d5c75f2d33c9
R 2c35134540abe427220a5914099a1c09 R f6bb150cf1f3c0e2bfeac5bd76d233a7
U dan U dan
Z a25fb37e86e611cc50a9ddc963124e5a Z ea6a1c06d626e2f5d8edb2ab802e8ea3

View File

@@ -1 +1 @@
9865d14d6041874bc1239ce7a061d5c75f2d33c9 8549c286497f3d2cd118be1334fce00d6f8a26c4

482
src/log.c
View File

@@ -11,19 +11,19 @@
** The log header is 12 bytes in size and consists of the following three ** The log header is 12 bytes in size and consists of the following three
** big-endian 32-bit unsigned integer values: ** big-endian 32-bit unsigned integer values:
** **
** 0: Database page size, ** 0: Database page size,
** 4: Randomly selected salt value 1, ** 4: Randomly selected salt value 1,
** 8: Randomly selected salt value 2. ** 8: Randomly selected salt value 2.
** **
** Immediately following the log header are zero or more log frames. Each ** Immediately following the log header are zero or more log frames. Each
** frame itself consists of a 16-byte header followed by a <page-size> bytes ** frame itself consists of a 16-byte header followed by a <page-size> bytes
** of page data. The header is broken into 4 big-endian 32-bit unsigned ** of page data. The header is broken into 4 big-endian 32-bit unsigned
** integer values, as follows: ** integer values, as follows:
** **
** 0: Page number. ** 0: Page number.
** 4: For commit records, the size of the database image in pages ** 4: For commit records, the size of the database image in pages
** after the commit. For all other records, zero. ** after the commit. For all other records, zero.
** 8: Checksum value 1. ** 8: Checksum value 1.
** 12: Checksum value 2. ** 12: Checksum value 2.
*/ */
@@ -106,13 +106,19 @@ struct LogSummary {
/* /*
** The four lockable regions associated with each log-summary. A connection ** The four lockable regions associated with each log-summary. A connection
** may take either a SHARED or EXCLUSIVE lock on each. ** may take either a SHARED or EXCLUSIVE lock on each. An ORed combination
** of the following bitmasks is passed as the second argument to the
** logLockRegion() function.
*/ */
#define LOG_REGION_A 0x01 #define LOG_REGION_A 0x01
#define LOG_REGION_B 0x02 #define LOG_REGION_B 0x02
#define LOG_REGION_C 0x04 #define LOG_REGION_C 0x04
#define LOG_REGION_D 0x08 #define LOG_REGION_D 0x08
#define LOG_LOCK_MUTEX 12
#define LOG_LOCK_DMH 13
#define LOG_LOCK_REGION 14
/* /*
** A single instance of this structure is allocated as part of each ** A single instance of this structure is allocated as part of each
** connection to a database log. All structures associated with the ** connection to a database log. All structures associated with the
@@ -316,14 +322,19 @@ static int logSummaryMap(LogSummary *pSummary, int nByte){
** Regardless of the value of isTruncate, close the file-descriptor ** Regardless of the value of isTruncate, close the file-descriptor
** opened on the log-summary file. ** opened on the log-summary file.
*/ */
static int logSummaryUnmap(LogSummary *pSummary, int isTruncate){ static int logSummaryUnmap(LogSummary *pSummary, int isUnlink){
int rc = SQLITE_OK; int rc = SQLITE_OK;
if( pSummary->aData ){ if( pSummary->aData ){
assert( pSummary->fd>0 ); assert( pSummary->fd>0 );
munmap(pSummary->aData, pSummary->nData); munmap(pSummary->aData, pSummary->nData);
pSummary->aData = 0; pSummary->aData = 0;
if( isTruncate ){ if( isUnlink ){
rc = (ftruncate(pSummary->fd, 0) ? SQLITE_IOERR : SQLITE_OK); char *zFile = sqlite3_mprintf("%s-summary", pSummary->zPath);
if( !zFile ){
rc = SQLITE_NOMEM;
}
unlink(zFile);
sqlite3_free(zFile);
} }
} }
if( pSummary->fd>0 ){ if( pSummary->fd>0 ){
@@ -589,12 +600,197 @@ finished:
return rc; return rc;
} }
/*
** Values for the third parameter to logLockRegion().
*/
#define LOG_UNLOCK 0
#define LOG_RDLOCK 1
#define LOG_WRLOCK 2
#define LOG_WRLOCKW 3
static int logLockFd(LogSummary *pSummary, int iStart, int nByte, int op){
int aType[4] = {
F_UNLCK, /* LOG_UNLOCK */
F_RDLCK, /* LOG_RDLOCK */
F_WRLCK, /* LOG_WRLOCK */
F_WRLCK /* LOG_WRLOCKW */
};
int aOp[4] = {
F_SETLK, /* LOG_UNLOCK */
F_SETLK, /* LOG_RDLOCK */
F_SETLK, /* LOG_WRLOCK */
F_SETLKW /* LOG_WRLOCKW */
};
struct flock f; /* Locking operation */
int rc; /* Value returned by fcntl() */
assert( ArraySize(aType)==ArraySize(aOp) );
assert( op>=0 && op<ArraySize(aType) );
memset(&f, 0, sizeof(f));
f.l_type = aType[op];
f.l_whence = SEEK_SET;
f.l_start = iStart;
f.l_len = nByte;
rc = fcntl(pSummary->fd, aOp[op], &f);
return (rc==0) ? SQLITE_OK : SQLITE_BUSY;
}
static int logLockRegion(Log *pLog, u32 mRegion, int op){
LogSummary *pSummary = pLog->pSummary;
LogLock *p; /* Used to iterate through in-process locks */
u32 mOther; /* Locks held by other connections */
u32 mNew; /* New mask for pLog */
assert(
/* Writer lock operations */
(op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))
/* Normal reader lock operations */
|| (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_A))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_B))
/* Region D reader lock operations */
|| (op==LOG_RDLOCK && mRegion==(LOG_REGION_D))
|| (op==LOG_RDLOCK && mRegion==(LOG_REGION_A))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_D))
/* Checkpointer lock operations */
|| (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
|| (op==LOG_WRLOCK && mRegion==(LOG_REGION_A))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C))
);
/* Assert that a connection never tries to go from an EXCLUSIVE to a
** SHARED lock on a region. Moving from SHARED to EXCLUSIVE sometimes
** happens though (when a region D reader upgrades to a writer).
*/
assert( op!=LOG_RDLOCK || 0==(pLog->lock.mLock & (mRegion<<8)) );
sqlite3_mutex_enter(pSummary->mutex);
/* Calculate a mask of logs held by all connections in this process apart
** from this one. The least significant byte of the mask contains a mask
** of the SHARED logs held. The next least significant byte of the mask
** indicates the EXCLUSIVE locks held. For example, to test if some other
** connection is holding a SHARED lock on region A, or an EXCLUSIVE lock
** on region C, do:
**
** hasSharedOnA = (mOther & (LOG_REGION_A<<0));
** hasExclusiveOnC = (mOther & (LOG_REGION_C<<8));
**
** In all masks, if the bit in the EXCLUSIVE byte mask is set, so is the
** corresponding bit in the SHARED mask.
*/
mOther = 0;
for(p=pSummary->pLock; p; p=p->pNext){
assert( (p->mLock & (p->mLock<<8))==(p->mLock&0x0000FF00) );
if( p!=&pLog->lock ){
mOther |= p->mLock;
}
}
/* If this call is to lock a region (not to unlock one), test if locks held
** by any other connection in this process prevent the new locks from
** begin granted. If so, exit the summary mutex and return SQLITE_BUSY.
*/
if( op && (mOther & (mRegion << (op==LOG_RDLOCK ? 8 : 0))) ){
sqlite3_mutex_leave(pSummary->mutex);
return SQLITE_BUSY;
}
/* Figure out the new log mask for this connection. */
switch( op ){
case LOG_UNLOCK:
mNew = (pLog->lock.mLock & ~(mRegion|(mRegion<<8)));
break;
case LOG_RDLOCK:
mNew = (pLog->lock.mLock | mRegion);
break;
default:
assert( op==LOG_WRLOCK );
mNew = (pLog->lock.mLock | (mRegion<<8) | mRegion);
break;
}
/* Now modify the locks held on the log-summary file descriptor. This
** file descriptor is shared by all log connections in this process.
** Therefore:
**
** + If one or more log connections in this process hold a SHARED lock
** on a region, the file-descriptor should hold a SHARED lock on
** the file region.
**
** + If a log connection in this process holds an EXCLUSIVE lock on a
** region, the file-descriptor should also hold an EXCLUSIVE lock on
** the region in question.
**
** If this is an LOG_UNLOCK operation, only regions for which no other
** connection holds a lock should actually be unlocked. And if this
** is a LOG_RDLOCK operation and other connections already hold all
** the required SHARED locks, then no system call is required.
*/
if( op==LOG_UNLOCK ){
mRegion = (mRegion & ~mOther);
}
if( (op==LOG_WRLOCK)
|| (op==LOG_UNLOCK && mRegion)
|| (op==LOG_RDLOCK && (mOther&mRegion)!=mRegion)
){
struct LockMap {
int iStart; /* Byte offset to start locking operation */
int iLen; /* Length field for locking operation */
} aMap[] = {
/* 0000 */ {0, 0}, /* 0001 */ {4+LOG_LOCK_REGION, 1},
/* 0010 */ {3+LOG_LOCK_REGION, 1}, /* 0011 */ {3+LOG_LOCK_REGION, 2},
/* 0100 */ {2+LOG_LOCK_REGION, 1}, /* 0101 */ {0, 0},
/* 0110 */ {2+LOG_LOCK_REGION, 2}, /* 0111 */ {2+LOG_LOCK_REGION, 3},
/* 1000 */ {1+LOG_LOCK_REGION, 1}, /* 1001 */ {0, 0},
/* 1010 */ {0, 0}, /* 1011 */ {0, 0},
/* 1100 */ {1+LOG_LOCK_REGION, 2}, /* 1101 */ {0, 0},
/* 1110 */ {0, 0}, /* 1111 */ {0, 0}
};
int rc; /* Return code of logLockFd() */
assert( mRegion<ArraySize(aMap) && aMap[mRegion].iStart!=0 );
rc = logLockFd(pSummary, aMap[mRegion].iStart, aMap[mRegion].iLen, op);
if( rc!=0 ){
sqlite3_mutex_leave(pSummary->mutex);
return rc;
}
}
pLog->lock.mLock = mNew;
sqlite3_mutex_leave(pSummary->mutex);
return SQLITE_OK;
}
static int logLockDMH(LogSummary *pSummary, int eLock){
assert( eLock==LOG_RDLOCK || eLock==LOG_WRLOCK );
return logLockFd(pSummary, LOG_LOCK_DMH, 1, eLock);
}
static int logLockMutex(LogSummary *pSummary, int eLock){
assert( eLock==LOG_WRLOCKW || eLock==LOG_UNLOCK );
logLockFd(pSummary, LOG_LOCK_MUTEX, 1, eLock);
return SQLITE_OK;
}
/* /*
** This function intializes the connection to the log-summary identified ** This function intializes the connection to the log-summary identified
** by struct pSummary. ** by struct pSummary.
*/ */
static int logSummaryInit(LogSummary *pSummary, sqlite3_file *pFd){ static int logSummaryInit(
LogSummary *pSummary, /* Log summary object to initialize */
sqlite3_file *pFd /* File descriptor open on log file */
){
int rc; /* Return Code */ int rc; /* Return Code */
char *zFile; /* File name for summary file */ char *zFile; /* File name for summary file */
@@ -614,36 +810,35 @@ static int logSummaryInit(LogSummary *pSummary, sqlite3_file *pFd){
return SQLITE_IOERR; return SQLITE_IOERR;
} }
/* Grab an exclusive lock the summary file. Then mmap() it. TODO: This /* Grab an exclusive lock the summary file. Then mmap() it.
** code needs to be enhanced to support a growable mapping. For now, just **
** make the mapping very large to start with. ** TODO: This code needs to be enhanced to support a growable mapping.
** For now, just make the mapping very large to start with. The
** pages should not be allocated until they are first accessed anyhow,
** so using a large mapping consumes no more resources than a smaller
** one would.
*/ */
rc = logSummaryLock(pSummary); assert( sqlite3_mutex_held(pSummary->mutex) );
rc = logLockMutex(pSummary, LOG_WRLOCKW);
if( rc!=SQLITE_OK ) return rc; if( rc!=SQLITE_OK ) return rc;
rc = logSummaryMap(pSummary, 512*1024); rc = logSummaryMap(pSummary, 512*1024);
if( rc!=SQLITE_OK ) goto out; if( rc!=SQLITE_OK ) goto out;
/* Grab a SHARED lock on the log file. Then try to upgrade to an EXCLUSIVE /* Try to obtain an EXCLUSIVE lock on the dead-mans-hand region. If this
** lock. If successful, then this is the first (and only) connection to ** is possible, the contents of the log-summary file (if any) may not
** the database. In this case assume the contents of the log-summary ** be trusted. Zero the log-summary header before continuing.
** cannot be trusted. Zero the log-summary header to make sure.
**
** The SHARED lock on the log file is not released until the connection
** to the database is closed.
*/ */
rc = sqlite3OsLock(pFd, SQLITE_LOCK_SHARED); rc = logLockDMH(pSummary, LOG_WRLOCK);
if( rc!=SQLITE_OK ) goto out;
rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE);
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK ){
/* This is the first and only connection. */
memset(pSummary->aData, 0, (LOGSUMMARY_HDR_NFIELD+2)*sizeof(u32) ); memset(pSummary->aData, 0, (LOGSUMMARY_HDR_NFIELD+2)*sizeof(u32) );
rc = sqlite3OsUnlock(pFd, SQLITE_LOCK_SHARED); }
}else if( rc==SQLITE_BUSY ){ rc = logLockDMH(pSummary, LOG_RDLOCK);
rc = SQLITE_OK; if( rc!=SQLITE_OK ){
return SQLITE_IOERR;
} }
out: out:
logSummaryUnlock(pSummary); logLockMutex(pSummary, LOG_UNLOCK);
return rc; return rc;
} }
@@ -652,6 +847,12 @@ static int logSummaryInit(LogSummary *pSummary, sqlite3_file *pFd){
** database file does not actually have to exist. zDb is used only to ** database file does not actually have to exist. zDb is used only to
** figure out the name of the log file to open. If the log file does not ** figure out the name of the log file to open. If the log file does not
** exist it is created by this call. ** exist it is created by this call.
**
** A SHARED lock should be held on the database file when this function
** is called. The purpose of this SHARED lock is to prevent any other
** client from unlinking the log or log-summary file. If another process
** were to do this just after this client opened one of these files, the
** system would be badly broken.
*/ */
int sqlite3LogOpen( int sqlite3LogOpen(
sqlite3_vfs *pVfs, /* vfs module to open log file with */ sqlite3_vfs *pVfs, /* vfs module to open log file with */
@@ -666,11 +867,10 @@ int sqlite3LogOpen(
char *zWal = 0; /* Path to WAL file */ char *zWal = 0; /* Path to WAL file */
int nWal; /* Length of zWal in bytes */ int nWal; /* Length of zWal in bytes */
/* Zero output variables */
assert( zDb ); assert( zDb );
*ppLog = 0;
/* Allocate an instance of struct Log to return. */ /* Allocate an instance of struct Log to return. */
*ppLog = 0;
pRet = (Log *)sqlite3MallocZero(sizeof(Log) + pVfs->szOsFile); pRet = (Log *)sqlite3MallocZero(sizeof(Log) + pVfs->szOsFile);
if( !pRet ) goto out; if( !pRet ) goto out;
pRet->pVfs = pVfs; pRet->pVfs = pVfs;
@@ -726,15 +926,12 @@ int sqlite3LogOpen(
/* Object pSummary is shared between all connections to the database made /* Object pSummary is shared between all connections to the database made
** by this process. So at this point it may or may not be connected to ** by this process. So at this point it may or may not be connected to
** the log-summary. If it is not, connect it. Otherwise, just take the ** the log-summary. If it is not, connect it.
** SHARED lock on the log file.
*/ */
sqlite3_mutex_enter(pSummary->mutex); sqlite3_mutex_enter(pSummary->mutex);
mutex = pSummary->mutex; mutex = pSummary->mutex;
if( pSummary->fd<0 ){ if( pSummary->fd<0 ){
rc = logSummaryInit(pSummary, pRet->pFd); rc = logSummaryInit(pSummary, pRet->pFd);
}else{
rc = sqlite3OsLock(pRet->pFd, SQLITE_LOCK_SHARED);
} }
pRet->lock.pNext = pSummary->pLock; pRet->lock.pNext = pSummary->pLock;
@@ -940,45 +1137,43 @@ int sqlite3LogClose(
**/ **/
pSummary->nRef--; pSummary->nRef--;
if( pSummary->nRef==0 ){ if( pSummary->nRef==0 ){
int rc;
LogSummary **pp; LogSummary **pp;
rc = logSummaryLock(pSummary);
if( rc==SQLITE_OK ){
int isTruncate = 0;
int rc2 = sqlite3OsLock(pLog->pFd, SQLITE_LOCK_EXCLUSIVE);
if( rc2==SQLITE_OK ){
/* This is the last connection to the database (including other
** processes). Do three things:
**
** 1. Checkpoint the db.
** 2. Truncate the log file to zero bytes.
** 3. Truncate the log-summary file to zero bytes.
*/
rc2 = logCheckpoint(pLog, pFd, zBuf);
if( rc2==SQLITE_OK ){
rc2 = sqlite3OsTruncate(pLog->pFd, 0);
}
isTruncate = 1;
}else if( rc2==SQLITE_BUSY ){
rc2 = SQLITE_OK;
}
logSummaryUnmap(pSummary, isTruncate);
sqlite3OsUnlock(pLog->pFd, SQLITE_LOCK_NONE);
rc = logSummaryUnlock(pSummary);
if( rc2!=SQLITE_OK ) rc = rc2;
}
/* Remove the LogSummary object from the global list. Then free the
** mutex and the object itself.
*/
for(pp=&pLogSummary; *pp!=pSummary; pp=&(*pp)->pNext); for(pp=&pLogSummary; *pp!=pSummary; pp=&(*pp)->pNext);
*pp = (*pp)->pNext; *pp = (*pp)->pNext;
sqlite3_mutex_leave(mutex);
rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE);
if( rc==SQLITE_OK ){
/* This is the last connection to the database (including other
** processes). Do three things:
**
** 1. Checkpoint the db.
** 2. Truncate the log file.
** 3. Unlink the log-summary file.
*/
rc = logCheckpoint(pLog, pFd, zBuf);
if( rc==SQLITE_OK ){
rc = sqlite3OsDelete(pLog->pVfs, pSummary->zPath, 0);
}
logSummaryUnmap(pSummary, 1);
}else{
if( rc==SQLITE_BUSY ){
rc = SQLITE_OK;
}
logSummaryUnmap(pSummary, 0);
}
sqlite3OsUnlock(pFd, SQLITE_LOCK_NONE);
sqlite3_mutex_free(pSummary->mutex); sqlite3_mutex_free(pSummary->mutex);
sqlite3_free(pSummary); sqlite3_free(pSummary);
}else{
sqlite3_mutex_leave(mutex);
} }
sqlite3_mutex_leave(mutex);
/* Close the connection to the log file and free the Log handle. */ /* Close the connection to the log file and free the Log handle. */
sqlite3OsClose(pLog->pFd); sqlite3OsClose(pLog->pFd);
sqlite3_free(pLog); sqlite3_free(pLog);
@@ -1012,7 +1207,7 @@ static int logEnterMutex(Log *pLog){
int rc; int rc;
sqlite3_mutex_enter(pSummary->mutex); sqlite3_mutex_enter(pSummary->mutex);
rc = logSummaryLock(pSummary); rc = logLockMutex(pSummary, LOG_WRLOCKW);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
sqlite3_mutex_leave(pSummary->mutex); sqlite3_mutex_leave(pSummary->mutex);
} }
@@ -1020,157 +1215,10 @@ static int logEnterMutex(Log *pLog){
} }
static void logLeaveMutex(Log *pLog){ static void logLeaveMutex(Log *pLog){
LogSummary *pSummary = pLog->pSummary; LogSummary *pSummary = pLog->pSummary;
logSummaryUnlock(pSummary); logLockMutex(pSummary, LOG_UNLOCK);
sqlite3_mutex_leave(pSummary->mutex); sqlite3_mutex_leave(pSummary->mutex);
} }
/*
** Values for the second parameter to logLockRegion().
*/
#define LOG_UNLOCK 0
#define LOG_RDLOCK 1
#define LOG_WRLOCK 2
static int logLockRegion(Log *pLog, u32 mRegion, int op){
LogSummary *pSummary = pLog->pSummary;
LogLock *p; /* Used to iterate through in-process locks */
u32 mOther; /* Locks held by other connections */
u32 mNew; /* New mask for pLog */
assert(
/* Writer lock operations */
(op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))
/* Normal reader lock operations */
|| (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_A))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_B))
/* Region D reader lock operations */
|| (op==LOG_RDLOCK && mRegion==(LOG_REGION_D))
|| (op==LOG_RDLOCK && mRegion==(LOG_REGION_A))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_D))
/* Checkpointer lock operations */
|| (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
|| (op==LOG_WRLOCK && mRegion==(LOG_REGION_A))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C))
);
/* Assert that a connection never tries to go from an EXCLUSIVE to a
** SHARED lock on a region. Moving from SHARED to EXCLUSIVE sometimes
** happens though (when a region D reader upgrades to a writer).
*/
assert( op!=LOG_RDLOCK || 0==(pLog->lock.mLock & (mRegion<<8)) );
sqlite3_mutex_enter(pSummary->mutex);
/* Calculate a mask of logs held by all connections in this process apart
** from this one. The least significant byte of the mask contains a mask
** of the SHARED logs held. The next least significant byte of the mask
** indicates the EXCLUSIVE locks held. For example, to test if some other
** connection is holding a SHARED lock on region A, or an EXCLUSIVE lock
** on region C, do:
**
** hasSharedOnA = (mOther & (LOG_REGION_A<<0));
** hasExclusiveOnC = (mOther & (LOG_REGION_C<<8));
**
** In all masks, if the bit in the EXCLUSIVE byte mask is set, so is the
** corresponding bit in the SHARED mask.
*/
mOther = 0;
for(p=pSummary->pLock; p; p=p->pNext){
assert( (p->mLock & (p->mLock<<8))==(p->mLock&0x0000FF00) );
if( p!=&pLog->lock ){
mOther |= p->mLock;
}
}
/* If this call is to lock a region (not to unlock one), test if locks held
** by any other connection in this process prevent the new locks from
** begin granted. If so, exit the summary mutex and return SQLITE_BUSY.
*/
if( op && (mOther & (mRegion << (op==LOG_RDLOCK ? 8 : 0))) ){
sqlite3_mutex_leave(pSummary->mutex);
return SQLITE_BUSY;
}
/* Figure out the new log mask for this connection. */
switch( op ){
case LOG_UNLOCK:
mNew = (pLog->lock.mLock & ~(mRegion|(mRegion<<8)));
break;
case LOG_RDLOCK:
mNew = (pLog->lock.mLock | mRegion);
break;
default:
assert( op==LOG_WRLOCK );
mNew = (pLog->lock.mLock | (mRegion<<8) | mRegion);
break;
}
/* Now modify the locks held on the log-summary file descriptor. This
** file descriptor is shared by all log connections in this process.
** Therefore:
**
** + If one or more log connections in this process hold a SHARED lock
** on a region, the file-descriptor should hold a SHARED lock on
** the file region.
**
** + If a log connection in this process holds an EXCLUSIVE lock on a
** region, the file-descriptor should also hold an EXCLUSIVE lock on
** the region in question.
**
** If this is an LOG_UNLOCK operation, only regions for which no other
** connection holds a lock should actually be unlocked. And if this
** is a LOG_RDLOCK operation and other connections already hold all
** the required SHARED locks, then no system call is required.
*/
if( op==LOG_UNLOCK ){
mRegion = (mRegion & ~mOther);
}
if( (op==LOG_WRLOCK)
|| (op==LOG_UNLOCK && mRegion)
|| (op==LOG_RDLOCK && (mOther&mRegion)!=mRegion)
){
struct LockMap {
int iStart; /* Byte offset to start locking operation */
int iLen; /* Length field for locking operation */
} aMap[] = {
/* 0000 */ {0, 0}, /* 0001 */ {4, 1},
/* 0010 */ {3, 1}, /* 0011 */ {3, 2},
/* 0100 */ {2, 1}, /* 0101 */ {0, 0},
/* 0110 */ {2, 2}, /* 0111 */ {2, 3},
/* 1000 */ {1, 1}, /* 1001 */ {0, 0},
/* 1010 */ {0, 0}, /* 1011 */ {0, 0},
/* 1100 */ {1, 2}, /* 1101 */ {0, 0},
/* 1110 */ {0, 0}, /* 1111 */ {0, 0}
};
int rc; /* Return code of fcntl() */
struct flock f; /* Locking operation */
assert( mRegion<ArraySize(aMap) && aMap[mRegion].iStart!=0 );
memset(&f, 0, sizeof(f));
f.l_type = (op==LOG_WRLOCK?F_WRLCK:(op==LOG_RDLOCK?F_RDLCK:F_UNLCK));
f.l_whence = SEEK_SET;
f.l_start = 32 + aMap[mRegion].iStart;
f.l_len = aMap[mRegion].iLen;
rc = fcntl(pSummary->fd, F_SETLK, &f);
if( rc!=0 ){
sqlite3_mutex_leave(pSummary->mutex);
return SQLITE_BUSY;
}
}
pLog->lock.mLock = mNew;
sqlite3_mutex_leave(pSummary->mutex);
return SQLITE_OK;
}
/* /*
** Try to read the log-summary header. Attempt to verify the header ** Try to read the log-summary header. Attempt to verify the header
** checksum. If the checksum can be verified, copy the log-summary ** checksum. If the checksum can be verified, copy the log-summary

View File

@@ -491,6 +491,7 @@ static int assert_pager_state(Pager *pPager){
} }
#endif #endif
/* /*
** Return true if it is necessary to write page *pPg into the sub-journal. ** Return true if it is necessary to write page *pPg into the sub-journal.
** A page needs to be written into the sub-journal if there exists one ** A page needs to be written into the sub-journal if there exists one
@@ -1187,19 +1188,6 @@ static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){
return rc; return rc;
} }
/*
** Open a connection to the write-ahead log file for pager pPager.
*/
static int pagerOpenLog(Pager *pPager){
if( !pPager->pLog ){
int rc; /* Return code from LogOpen() */
rc = sqlite3LogOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog);
if( rc!=SQLITE_OK ) return rc;
}
return SQLITE_OK;
}
/* /*
** Return true if this pager uses a write-ahead log instead of the usual ** Return true if this pager uses a write-ahead log instead of the usual
** rollback journal. Otherwise false. ** rollback journal. Otherwise false.
@@ -1241,8 +1229,9 @@ static void pager_unlock(Pager *pPager){
if( pagerUseLog(pPager) ){ if( pagerUseLog(pPager) ){
sqlite3LogCloseSnapshot(pPager->pLog); sqlite3LogCloseSnapshot(pPager->pLog);
}else{
rc = osUnlock(pPager->fd, NO_LOCK);
} }
rc = osUnlock(pPager->fd, NO_LOCK);
if( rc ){ if( rc ){
pPager->errCode = rc; pPager->errCode = rc;
} }
@@ -3733,6 +3722,54 @@ static int hasHotJournal(Pager *pPager, int *pExists){
return rc; return rc;
} }
/*
** Open a connection to the write-ahead log file for pager pPager. If
** the log connection is already open, this function is a no-op.
*/
static int pagerOpenLog(Pager *pPager){
if( !pPager->pLog ){
int rc; /* Return code */
/* Before opening the log file, obtain a SHARED lock on the database
** file. This lock will not be released until after the log file
** connection has been closed. The purpose of this lock is to stop
** any other process from unlinking the log or log-summary files while
** this connection still has them open. An EXCLUSIVE lock on the
** database file is required to unlink either of those two files.
*/
assert( pPager->state==PAGER_UNLOCK );
rc = pager_wait_on_lock(pPager, SHARED_LOCK);
if( rc!=SQLITE_OK ){
assert( pPager->state==PAGER_UNLOCK );
return pager_error(pPager, rc);
}
assert( pPager->state>=SHARED_LOCK );
/* Open the connection to the log file. If this operation fails,
** (e.g. due to malloc() failure), unlock the database file and
** return an error code.
*/
rc = sqlite3LogOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog);
if( rc!=SQLITE_OK ){
osUnlock(pPager->fd, SQLITE_LOCK_NONE);
pPager->state = PAGER_UNLOCK;
return rc;
}
}else{
/* If the log file was already open, check that the pager is still holding
** the required SHARED lock on the database file.
*/
#ifdef SQLITE_DEBUG
int locktype;
sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_LOCKSTATE, &locktype);
assert( locktype==SQLITE_LOCK_SHARED );
#endif
}
return SQLITE_OK;
}
/* /*
** This function is called to obtain a shared lock on the database file. ** This function is called to obtain a shared lock on the database file.
** It is illegal to call sqlite3PagerAcquire() until after this function ** It is illegal to call sqlite3PagerAcquire() until after this function
@@ -3786,17 +3823,25 @@ int sqlite3PagerSharedLock(Pager *pPager){
pager_reset(pPager); pager_reset(pPager);
} }
if( pagerUseLog(pPager) ){
int changed = 0;
if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
int changed = 0; /* True if the cache must be flushed */
/* Open the log file, if it is not already open. */
rc = pagerOpenLog(pPager);
if( rc!=SQLITE_OK ){
return rc;
}
/* Open a log snapshot to read from. */
rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed); rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed);
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK ){
int dummy;
if( changed ){ if( changed ){
pager_reset(pPager); pager_reset(pPager);
assert( pPager->errCode || pPager->dbSizeValid==0 ); assert( pPager->errCode || pPager->dbSizeValid==0 );
} }
pPager->state = PAGER_SHARED; /* TODO: Is this right? */ rc = sqlite3PagerPagecount(pPager, &dummy);
rc = sqlite3PagerPagecount(pPager, &changed);
} }
}else if( pPager->state==PAGER_UNLOCK || isErrorReset ){ }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
sqlite3_vfs * const pVfs = pPager->pVfs; sqlite3_vfs * const pVfs = pPager->pVfs;
@@ -5611,15 +5656,13 @@ int sqlite3PagerJournalMode(Pager *pPager, int eMode){
sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
} }
if( eMode==PAGER_JOURNALMODE_WAL ){ /* Switching into WAL mode can only take place when no
int rc = pagerOpenLog(pPager); ** locks are held on the database file.
if( rc!=SQLITE_OK ){ */
/* TODO: The error code should not just get dropped here. Change if( eMode==PAGER_JOURNALMODE_WAL && pPager->state!=PAGER_UNLOCK ){
** this to set a flag to force the log to be opened the first time return (int)pPager->journalMode;
** it is actually required. */
return (int)pPager->journalMode;
}
} }
pPager->journalMode = (u8)eMode; pPager->journalMode = (u8)eMode;
} }
return (int)pPager->journalMode; return (int)pPager->journalMode;

View File

@@ -32,6 +32,10 @@ proc log_file_size {nFrame pgsz} {
expr {12 + ($pgsz+16)*$nFrame} expr {12 + ($pgsz+16)*$nFrame}
} }
proc log_deleted {logfile} {
return [expr [file exists $logfile]==0]
}
# #
# These are 'warm-body' tests used while developing the WAL code. They # These are 'warm-body' tests used while developing the WAL code. They
# serve to prove that a few really simple cases work: # serve to prove that a few really simple cases work:
@@ -179,7 +183,6 @@ do_test wal-5.5 {
} {1 2 3 4} } {1 2 3 4}
db close db close
foreach sector {512 4096} { foreach sector {512 4096} {
sqlite3_simulate_device -sectorsize $sector sqlite3_simulate_device -sectorsize $sector
foreach pgsz {512 1024 2048 4096} { foreach pgsz {512 1024 2048 4096} {
@@ -198,8 +201,8 @@ foreach sector {512 4096} {
} [expr $pgsz*2] } [expr $pgsz*2]
do_test wal-6.$sector.$pgsz.2 { do_test wal-6.$sector.$pgsz.2 {
file size test.db-wal log_deleted test.db-wal
} {0} } {1}
} }
} }
@@ -591,8 +594,8 @@ do_test wal-11.8 {
do_test wal-11.9 { do_test wal-11.9 {
db close db close
sqlite3_wal db test.db sqlite3_wal db test.db
list [expr [file size test.db]/1024] [file size test.db-wal] list [expr [file size test.db]/1024] [log_deleted test.db-wal]
} {37 0} } {37 1}
do_test wal-11.10 { do_test wal-11.10 {
execsql { execsql {

View File

@@ -21,12 +21,14 @@ if {[run_thread_tests]==0} { finish_test ; return }
do_test walthread-1.1 { do_test walthread-1.1 {
execsql { execsql {
PRAGMA journal_mode = WAL; PRAGMA journal_mode = WAL;
PRAGMA lock_status;
CREATE TABLE t1(x PRIMARY KEY); CREATE TABLE t1(x PRIMARY KEY);
PRAGMA lock_status;
INSERT INTO t1 VALUES(randomblob(100)); INSERT INTO t1 VALUES(randomblob(100));
INSERT INTO t1 VALUES(randomblob(100)); INSERT INTO t1 VALUES(randomblob(100));
INSERT INTO t1 SELECT md5sum(x) FROM t1; INSERT INTO t1 SELECT md5sum(x) FROM t1;
} }
} {wal} } {wal main unlocked temp closed main shared temp closed}
do_test walthread-1.2 { do_test walthread-1.2 {
execsql { execsql {
SELECT (SELECT count(*) FROM t1), ( SELECT (SELECT count(*) FROM t1), (
@@ -41,7 +43,7 @@ do_test walthread-1.3 {
} {ok} } {ok}
do_test walthread-1.4 { do_test walthread-1.4 {
execsql { PRAGMA lock_status } execsql { PRAGMA lock_status }
} {main unlocked temp unknown} } {main shared temp unknown}
#-------------------------------------------------------------------------- #--------------------------------------------------------------------------
# Start N threads. Each thread performs both read and write transactions. # Start N threads. Each thread performs both read and write transactions.