diff --git a/main.mk b/main.mk index f0b7636e54..89f1c6ada9 100644 --- a/main.mk +++ b/main.mk @@ -56,7 +56,7 @@ LIBOBJ+= alter.o analyze.o attach.o auth.o \ fts3.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \ fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o fts3_write.o \ func.o global.o hash.o \ - icu.o insert.o journal.o legacy.o loadext.o \ + icu.o insert.o journal.o legacy.o loadext.o log.o \ main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \ memjournal.o \ mutex.o mutex_noop.o mutex_os2.o mutex_unix.o mutex_w32.o \ @@ -101,6 +101,8 @@ SRC = \ $(TOP)/src/journal.c \ $(TOP)/src/legacy.c \ $(TOP)/src/loadext.c \ + $(TOP)/src/log.c \ + $(TOP)/src/log.h \ $(TOP)/src/main.c \ $(TOP)/src/malloc.c \ $(TOP)/src/mem0.c \ @@ -255,8 +257,8 @@ TESTSRC = \ TESTSRC2 = \ $(TOP)/src/attach.c $(TOP)/src/backup.c $(TOP)/src/btree.c \ $(TOP)/src/build.c $(TOP)/src/date.c \ - $(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/mem5.c \ - $(TOP)/src/os.c \ + $(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/log.c \ + $(TOP)/src/mem5.c $(TOP)/src/os.c \ $(TOP)/src/os_os2.c $(TOP)/src/os_unix.c $(TOP)/src/os_win.c \ $(TOP)/src/pager.c $(TOP)/src/pragma.c $(TOP)/src/prepare.c \ $(TOP)/src/printf.c $(TOP)/src/random.c $(TOP)/src/pcache.c \ diff --git a/manifest b/manifest index 55eab897f8..4983ed4acd 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\swhere\sa\sprocess\sin\sexclusive\smode\scould\sdelete\sa\shot-journal\sfile\swithout\srolling\sit\sback\sfrom\swithin\ssqlite3_close()\sor\sDETACH.\sThis\sproblem\swas\sintroduced\sby\sthe\sprevious\scommit,\sit\sis\snot\spresent\sin\sany\sreleases. -D 2010-04-12T17:08:45 +C Import\sexperimental\swrite-ahead-logging\scode. +D 2010-04-12T19:00:30 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in 4f2f967b7e58a35bb74fb7ec8ae90e0f4ca7868b F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -89,7 +89,7 @@ F ext/rtree/tkt3363.test 2bf324f7908084a5f463de3109db9c6e607feb1b F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 -F main.mk d286b99eb87db41cfc5394e346604ef49509867d +F main.mk f12991ace528dd01d018420988ff053350ae81f8 F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a F mkextu.sh 416f9b7089d80e5590a29692c9d9280a10dbad9f F mkextw.sh 4123480947681d9b434a5e7b1ee08135abe409ac @@ -131,6 +131,7 @@ F src/journal.c b0ea6b70b532961118ab70301c00a33089f9315c F src/legacy.c a199d7683d60cef73089e892409113e69c23a99f F src/lempar.c 7f026423f4d71d989e719a743f98a1cbd4e6d99e F src/loadext.c 1c7a61ce1281041f437333f366a96aa0d29bb581 +F src/log.c 6e8f296f6c566a297cd074c4165f1695fd1df5b7 F src/main.c c0e7192bad5b90544508b241eb2487ac661de890 F src/malloc.c a08f16d134f0bfab6b20c3cd142ebf3e58235a6a F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 @@ -150,15 +151,15 @@ F src/os.c 8bc63cf91e9802e2b807198e54e50227fa889306 F src/os.h 534b082c3cb349ad05fa6fa0b06087e022af282c F src/os_common.h 240c88b163b02c21a9f21f87d49678a0aa21ff30 F src/os_os2.c 75a8c7b9a00a2cf1a65f9fa4afbc27d46634bb2f -F src/os_unix.c 148d2f625db3727250c0b880481ae7630b6d0eb0 +F src/os_unix.c 5bf0015cebe2f21635da2af983c348eb88b3b4c1 F src/os_win.c 1c7453c2df4dab26d90ff6f91272aea18bcf7053 -F src/pager.c da5ed17bb729c27a16c45fe38e9531c240a1c6a4 -F src/pager.h ef8a2cf10084f60ab45ee2dfded8bf8b0c655ddf +F src/pager.c d61318cd04a42076c6524cb22c14fe036adbddcb +F src/pager.h 80c57ba672fcd24215a68abf1c98d474a5080b9b F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf F src/pcache.h c683390d50f856d4cd8e24342ae62027d1bb6050 F src/pcache1.c 6dc1871ce8ead9187161c370a58cd06c84221f76 -F src/pragma.c e166ea41544f8e57a08db86dbe87212b7d378fe8 +F src/pragma.c 55f34050510940e7b60fa2b01141c16f87b649c5 F src/prepare.c fd1398cb1da54385ba5bd68d93928f10d10a1d9c F src/printf.c 5f5b65a83e63f2096a541a340722a509fa0240a7 F src/random.c cd4a67b3953b88019f8cd4ccd81394a8ddfaba50 @@ -212,7 +213,7 @@ F src/update.c c0dc6b75ad28b76b619042d934f337b02acee208 F src/utf.c 1baeeac91707a4df97ccc6141ec0f808278af685 F src/util.c 32aebf04c10e51ad3977a928b7416bed671b620b F src/vacuum.c b1d542c8919d4d11119f78069e1906a1ad07e0ee -F src/vdbe.c 2abd931ea2aec3eacc6426677f40cc5a1071d34e +F src/vdbe.c 23e5462d1cd0a6d525948cfec3382890abc31d3f F src/vdbe.h 471f6a3dcec4817ca33596fe7f6654d56c0e75f3 F src/vdbeInt.h 19ebc8c2a2e938340051ee65af3f377fb99102d1 F src/vdbeapi.c 74c25680046a116b24b95393914d3669c23305dc @@ -754,6 +755,10 @@ F test/vtabE.test 7c4693638d7797ce2eda17af74292b97e705cc61 F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d +F test/wal.test 653a870ac4b312d421e006ec45a293ecf2135b49 +F test/walcrash.test 76a6848c5088596cf37f4042e7e4c108a8c323d4 +F test/walslow.test 17bfe8052a5d1c64e171eeab45c445fa90f4d4a6 +F test/walthread.test 82cc77f8853217a036fc82e4313ecebd9aa368d7 F test/where.test de337a3fe0a459ec7c93db16a519657a90552330 F test/where2.test 45eacc126aabb37959a387aa83e59ce1f1f03820 F test/where3.test 97d3936e6a443b968f1a61cdcc0f673252000e94 @@ -777,7 +782,7 @@ F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc F tool/mkkeywordhash.c d2e6b4a5965e23afb80fbe74bb54648cd371f309 F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 -F tool/mksqlite3c.tcl 4c6924c7e877defa8f9a12ef1e6867de614acf3f +F tool/mksqlite3c.tcl 25ec827588893857eba2d24a645ace1bb7cdab73 F tool/mksqlite3h.tcl eb100dce83f24b501b325b340f8b5eb8e5106b3b F tool/mksqlite3internalh.tcl 7b43894e21bcb1bb39e11547ce7e38a063357e87 F tool/omittest.tcl 27d6f6e3b1e95aeb26a1c140e6eb57771c6d794a @@ -797,7 +802,11 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 562d20e662da474ea326165730ecfdfcf9b414ee -R 8333162e7f153eb6e6f78b7cbd811ab4 +P 51a613950824698687c0db83b7884db33d45f7f5 +R d1eaede94883ac0829acce36d14527da +T *bgcolor * #ffd0c0 +T *branch * wal +T *sym-wal * +T -sym-trunk * U dan -Z 65fbb5bed20edd048b9a7fedeedd0df1 +Z 9f87e000c4066ee64fca3bc82365cd7e diff --git a/manifest.uuid b/manifest.uuid index 37bbe815aa..bb821cd344 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -51a613950824698687c0db83b7884db33d45f7f5 \ No newline at end of file +409d61baeb0a19d1700c973f16c8acef7b8506cd \ No newline at end of file diff --git a/src/log.c b/src/log.c new file mode 100644 index 0000000000..7d37dfa1de --- /dev/null +++ b/src/log.c @@ -0,0 +1,1243 @@ + +/* +** This file contains the implementation of a log file used in +** "journal_mode=wal" mode. +*/ + +#include "log.h" + +#include +#include +#include + +typedef struct LogSummaryHdr LogSummaryHdr; +typedef struct LogSummary LogSummary; +typedef struct LogCheckpoint LogCheckpoint; + + +/* +** The following structure may be used to store the same data that +** is stored in the log-summary header. +** +** Member variables iCheck1 and iCheck2 contain the checksum for the +** last frame written to the log, or 2 and 3 respectively if the log +** is currently empty. +*/ +struct LogSummaryHdr { + u32 iChange; /* Counter incremented each transaction */ + u32 pgsz; /* Database page size in bytes */ + u32 iLastPg; /* Address of last valid frame in log */ + u32 nPage; /* Size of database in pages */ + u32 iCheck1; /* Checkpoint value 1 */ + u32 iCheck2; /* Checkpoint value 2 */ +}; + +/* Size of serialized LogSummaryHdr object. */ +#define LOGSUMMARY_HDR_NFIELD (sizeof(LogSummaryHdr) / sizeof(u32)) + +#define LOGSUMMARY_FRAME_OFFSET \ + (LOGSUMMARY_HDR_NFIELD + LOG_CKSM_BYTES/sizeof(u32)) + +/* Size of frame header */ +#define LOG_FRAME_HDRSIZE 20 + +/* +** There is one instance of this structure for each log-summary object +** that this process has a connection to. They are stored in a linked +** list starting at pLogSummary (global variable). +** +** TODO: LogSummary.fd is a unix file descriptor. Unix APIs are used +** directly in this implementation because the VFS does not support +** the required blocking file-locks. +*/ +struct LogSummary { + sqlite3_mutex *mutex; /* Mutex used to protect this object */ + int nRef; /* Number of pointers to this structure */ + int fd; /* File descriptor open on log-summary */ + char *zPath; /* Path to associated WAL file */ + LogSummary *pNext; /* Next in global list */ + int nData; /* Size of aData allocation/mapping */ + u32 *aData; /* File body */ +}; + +/* +** List of all LogSummary objects created by this process. Protected by +** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex +** here instead of borrowing the LRU mutex. +*/ +#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU +static LogSummary *pLogSummary = 0; + +struct Log { + LogSummary *pSummary; /* Log file summary data */ + sqlite3_vfs *pVfs; /* The VFS used to create pFd */ + sqlite3_file *pFd; /* File handle for log file */ + int sync_flags; /* Flags to use with OsSync() */ + int isLocked; /* True if a snapshot is held open */ + int isWriteLocked; /* True if this is the writer connection */ + LogSummaryHdr hdr; /* Log summary header for current snapshot */ +}; + +/* +** This structure is used to implement an iterator that iterates through +** all frames in the log in database page order. Where two or more frames +** correspond to the same database page, the iterator visits only the +** frame most recently written to the log. +** +** The internals of this structure are only accessed by: +** +** logCheckpointInit() - Create a new iterator, +** logCheckpointNext() - Step an iterator, +** logCheckpointFree() - Free an iterator. +** +** This functionality is used by the checkpoint code (see logCheckpoint()). +*/ +struct LogCheckpoint { + int nSegment; /* Size of LogCheckpoint.aSummary[] array */ + int nFinal; /* Elements in segment nSegment-1 */ + struct LogSegment { + int iNext; /* Next aIndex index */ + u8 *aIndex; /* Pointer to index array */ + u32 *aDbPage; /* Pointer to db page array */ + } aSegment[1]; +}; + +/* +** Generate an 8 byte checksum based on the data in array aByte[] and the +** initial values of aCksum[0] and aCksum[1]. The checksum is written into +** aCksum[] before returning. +*/ +#define LOG_CKSM_BYTES 8 +static void logChecksumBytes(u8 *aByte, int nByte, u32 *aCksum){ + u32 *z32 = (u32 *)aByte; + int n32 = nByte / sizeof(u32); + int i; + + assert( LOG_CKSM_BYTES==2*sizeof(u32) ); + assert( (nByte&0x00000003)==0 ); + + u32 cksum0 = aCksum[0]; + u32 cksum1 = aCksum[1]; + + for(i=0; i> 8) + (cksum0 ^ z32[i]); + cksum1 = (cksum1 >> 8) + (cksum1 ^ z32[i]); + } + + aCksum[0] = cksum0; + aCksum[1] = cksum1; +} + +/* +** Argument zPath must be a nul-terminated string containing a path-name. +** This function modifies the string in-place by removing any "./" or "../" +** elements in the path. For example, the following input: +** +** "/home/user/plans/good/../evil/./world_domination.txt" +** +** is overwritten with the 'normalized' version: +** +** "/home/user/plans/evil/world_domination.txt" +*/ +static void logNormalizePath(char *zPath){ + int i, j; + char *z = zPath; + int n = strlen(z); + + while( n>1 && z[n-1]=='/' ){ n--; } + for(i=j=0; i0 && z[j-1]!='/' ){ j--; } + if( j>0 ){ j--; } + i += 2; + continue; + } + } + z[j++] = z[i]; + } + z[j] = 0; +} + +/* +** Lock the summary file pSummary->fd. +*/ +static int logSummaryLock(LogSummary *pSummary){ + int rc; + struct flock f; + memset(&f, 0, sizeof(f)); + f.l_type = F_WRLCK; + f.l_whence = SEEK_SET; + f.l_start = 0; + f.l_len = 1; + rc = fcntl(pSummary->fd, F_SETLKW, &f); + if( rc!=0 ){ + return SQLITE_IOERR; + } + return SQLITE_OK; +} + +/* +** Unlock the summary file pSummary->fd. +*/ +static int logSummaryUnlock(LogSummary *pSummary){ + int rc; + struct flock f; + memset(&f, 0, sizeof(f)); + f.l_type = F_UNLCK; + f.l_whence = SEEK_SET; + f.l_start = 0; + f.l_len = 1; + rc = fcntl(pSummary->fd, F_SETLK, &f); + if( rc!=0 ){ + return SQLITE_IOERR; + } + return SQLITE_OK; +} + +/* +** Memory map the first nByte bytes of the summary file opened with +** pSummary->fd at pSummary->aData. If the summary file is smaller than +** nByte bytes in size when this function is called, ftruncate() is +** used to expand it before it is mapped. +** +** It is assumed that an exclusive lock is held on the summary file +** by the caller (to protect the ftruncate()). +*/ +static int logSummaryMap(LogSummary *pSummary, int nByte){ + struct stat sStat; + int rc; + int fd = pSummary->fd; + void *pMap; + + assert( pSummary->aData==0 ); + + /* If the file is less than nByte bytes in size, cause it to grow. */ + rc = fstat(fd, &sStat); + if( rc!=0 ) return SQLITE_IOERR; + if( sStat.st_sizeaData = (u32 *)pMap; + pSummary->nData = nByte; + + return SQLITE_OK; +} + +/* +** Unmap the log-summary mapping and close the file-descriptor. If +** the isTruncate argument is non-zero, truncate the log-summary file +** region to zero bytes. +** +** Regardless of the value of isTruncate, close the file-descriptor +** opened on the log-summary file. +*/ +static int logSummaryUnmap(LogSummary *pSummary, int isTruncate){ + int rc = SQLITE_OK; + if( pSummary->aData ){ + assert( pSummary->fd>0 ); + munmap(pSummary->aData, pSummary->nData); + pSummary->aData = 0; + if( isTruncate ){ + rc = (ftruncate(pSummary->fd, 0) ? SQLITE_IOERR : SQLITE_OK); + } + } + if( pSummary->fd>0 ){ + close(pSummary->fd); + pSummary->fd = -1; + } + return rc; +} + + +static void logSummaryWriteHdr(LogSummary *pSummary, LogSummaryHdr *pHdr){ + u32 *aData = pSummary->aData; + memcpy(aData, pHdr, sizeof(LogSummaryHdr)); + aData[LOGSUMMARY_HDR_NFIELD] = 1; + aData[LOGSUMMARY_HDR_NFIELD+1] = 1; + logChecksumBytes( + (u8 *)aData, sizeof(LogSummaryHdr), &aData[LOGSUMMARY_HDR_NFIELD] + ); +} + +/* +** This function encodes a single frame header and writes it to a buffer +** supplied by the caller. A log frame-header is made up of a series of +** 4-byte big-endian integers, as follows: +** +** 0: Database page size in bytes. +** 4: Page number. +** 8: New database size (for commit frames, otherwise zero). +** 12: Frame checksum 1. +** 16: Frame checksum 2. +*/ +static void logEncodeFrame( + u32 *aCksum, /* IN/OUT: Checksum values */ + u32 iPage, /* Database page number for frame */ + u32 nTruncate, /* New db size (or 0 for non-commit frames) */ + int nData, /* Database page size (size of aData[]) */ + u8 *aData, /* Pointer to page data (for checksum) */ + u8 *aFrame /* OUT: Write encoded frame here */ +){ + assert( LOG_FRAME_HDRSIZE==20 ); + + sqlite3Put4byte(&aFrame[0], nData); + sqlite3Put4byte(&aFrame[4], iPage); + sqlite3Put4byte(&aFrame[8], nTruncate); + + logChecksumBytes(aFrame, 12, aCksum); + logChecksumBytes(aData, nData, aCksum); + + sqlite3Put4byte(&aFrame[12], aCksum[0]); + sqlite3Put4byte(&aFrame[16], aCksum[1]); +} + +/* +** Return 1 and populate *piPage, *pnTruncate and aCksum if the +** frame checksum looks Ok. Otherwise return 0. +*/ +static int logDecodeFrame( + u32 *aCksum, /* IN/OUT: Checksum values */ + u32 *piPage, /* OUT: Database page number for frame */ + u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */ + int nData, /* Database page size (size of aData[]) */ + u8 *aData, /* Pointer to page data (for checksum) */ + u8 *aFrame /* Frame data */ +){ + logChecksumBytes(aFrame, 12, aCksum); + logChecksumBytes(aData, nData, aCksum); + + if( aCksum[0]!=sqlite3Get4byte(&aFrame[12]) + || aCksum[1]!=sqlite3Get4byte(&aFrame[16]) + ){ + /* Checksum failed. */ + return 0; + } + + *piPage = sqlite3Get4byte(&aFrame[4]); + *pnTruncate = sqlite3Get4byte(&aFrame[8]); + return 1; +} + +static void logMergesort8( + Pgno *aContent, /* Pages in log */ + u8 *aBuffer, /* Buffer of at least *pnList items to use */ + u8 *aList, /* IN/OUT: List to sort */ + int *pnList /* IN/OUT: Number of elements in aList[] */ +){ + int nList = *pnList; + if( nList>1 ){ + int nLeft = nList / 2; /* Elements in left list */ + int nRight = nList - nLeft; /* Elements in right list */ + u8 *aLeft = aList; /* Left list */ + u8 *aRight = &aList[nLeft]; /* Right list */ + int iLeft = 0; /* Current index in aLeft */ + int iRight = 0; /* Current index in aright */ + int iOut = 0; /* Current index in output buffer */ + + /* TODO: Change to non-recursive version. */ + logMergesort8(aContent, aBuffer, aLeft, &nLeft); + logMergesort8(aContent, aBuffer, aRight, &nRight); + + while( iRight=nRight || aContent[aLeft[iLeft]]=nLeft || aContent[aLeft[iLeft]]>dbpage ); + assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage ); + } + memcpy(aList, aBuffer, sizeof(aList[0])*iOut); + *pnList = iOut; + } + +#ifdef SQLITE_DEBUG + { + int i; + for(i=1; i<*pnList; i++){ + assert( aContent[aList[i]] > aContent[aList[i-1]] ); + } + } +#endif +} + + +/* +** Return the index in the LogSummary.aData array that corresponds to +** frame iFrame. The log-summary file consists of a header, followed by +** alternating "map" and "index" blocks. +*/ +static int logSummaryEntry(u32 iFrame){ + return ((((iFrame-1)>>8)<<6) + iFrame-1 + 2 + LOGSUMMARY_HDR_NFIELD); +} + + +/* +** Set an entry in the log-summary map to map log frame iFrame to db +** page iPage. Values are always appended to the log-summary (i.e. the +** value of iFrame is always exactly one more than the value passed to +** the previous call), but that restriction is not enforced or asserted +** here. +*/ +static void logSummaryAppend(LogSummary *pSummary, u32 iFrame, u32 iPage){ + u32 iSlot = logSummaryEntry(iFrame); + + /* Set the log-summary entry itself */ + pSummary->aData[iSlot] = iPage; + + /* If the frame number is a multiple of 256 (frames are numbered starting + ** at 1), build an index of the most recently added 256 frames. + */ + if( (iFrame&0x000000FF)==0 ){ + int i; /* Iterator used while initializing aIndex */ + u32 *aFrame; /* Pointer to array of 256 frames */ + int nIndex; /* Number of entries in index */ + u8 *aIndex; /* 256 bytes to build index in */ + u8 *aTmp; /* Scratch space to use while sorting */ + + aFrame = &pSummary->aData[iSlot-255]; + aIndex = (u8 *)&pSummary->aData[iSlot+1]; + aTmp = &aIndex[256]; + + nIndex = 256; + for(i=0; i<256; i++) aIndex[i] = (u8)i; + logMergesort8(aFrame, aTmp, aIndex, &nIndex); + memset(&aIndex[nIndex], aIndex[nIndex-1], 256-nIndex); + } +} + + +/* +** Recover the log-summary by reading the log file. The caller must hold +** an exclusive lock on the log-summary file. +*/ +static int logSummaryRecover(LogSummary *pSummary, sqlite3_file *pFd){ + int rc; /* Return Code */ + i64 nSize; /* Size of log file */ + LogSummaryHdr hdr; /* Recovered log-summary header */ + + memset(&hdr, 0, sizeof(hdr)); + + rc = sqlite3OsFileSize(pFd, &nSize); + if( rc!=SQLITE_OK ){ + return rc; + } + + if( nSize>LOG_FRAME_HDRSIZE ){ + u8 aBuf[LOG_FRAME_HDRSIZE]; /* Buffer to load first frame header into */ + u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ + int nFrame; /* Number of bytes at aFrame */ + u8 *aData; /* Pointer to data part of aFrame buffer */ + int iFrame; /* Index of last frame read */ + i64 iOffset; /* Next offset to read from log file */ + int nPgsz; /* Page size according to the log */ + u32 aCksum[2] = {2, 3}; /* Running checksum */ + + /* Read in the first frame header in the file (to determine the + ** database page size). + */ + rc = sqlite3OsRead(pFd, aBuf, LOG_FRAME_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* If the database page size is not a power of two, or is greater than + ** SQLITE_MAX_PAGE_SIZE, conclude that the log file contains no valid data. + */ + nPgsz = sqlite3Get4byte(&aBuf[0]); + if( nPgsz&(nPgsz-1) || nPgsz>SQLITE_MAX_PAGE_SIZE ){ + goto finished; + } + + /* Malloc a buffer to read frames into. */ + nFrame = nPgsz + LOG_FRAME_HDRSIZE; + aFrame = (u8 *)sqlite3_malloc(nFrame); + if( !aFrame ){ + return SQLITE_NOMEM; + } + aData = &aFrame[LOG_FRAME_HDRSIZE]; + + /* Read all frames from the log file. */ + iFrame = 0; + iOffset = 0; + for(iOffset=0; (iOffset+nFrame)fd<0 ); + assert( pSummary->aData==0 ); + assert( pSummary->nRef>0 ); + assert( pSummary->zPath ); + + /* Open a file descriptor on the summary file. */ + zFile = sqlite3_mprintf("%s-summary", pSummary->zPath); + if( !zFile ){ + return SQLITE_NOMEM; + } + pSummary->fd = open(zFile, O_RDWR|O_CREAT, S_IWUSR|S_IRUSR); + sqlite3_free(zFile); + if( pSummary->fd<0 ){ + return SQLITE_IOERR; + } + + /* Grab an exclusive lock the summary file. Then mmap() it. TODO: This + ** code needs to be enhanced to support a growable mapping. For now, just + ** make the mapping very large to start with. + */ + rc = logSummaryLock(pSummary); + if( rc!=SQLITE_OK ) return rc; + rc = logSummaryMap(pSummary, 512*1024); + if( rc!=SQLITE_OK ) goto out; + + /* Grab a SHARED lock on the log file. Then try to upgrade to an EXCLUSIVE + ** lock. If successful, then this is the first (and only) connection to + ** the database. In this case assume the contents of the log-summary + ** cannot be trusted. Zero the log-summary header to make sure. + ** + ** The SHARED lock on the log file is not released until the connection + ** to the database is closed. + */ + rc = sqlite3OsLock(pFd, SQLITE_LOCK_SHARED); + if( rc!=SQLITE_OK ) goto out; + rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE); + if( rc==SQLITE_OK ){ + /* This is the first and only connection. */ + memset(pSummary->aData, 0, (LOGSUMMARY_HDR_NFIELD+2)*sizeof(u32) ); + rc = sqlite3OsUnlock(pFd, SQLITE_LOCK_SHARED); + }else if( rc==SQLITE_BUSY ){ + rc = SQLITE_OK; + } + + out: + logSummaryUnlock(pSummary); + return rc; +} + +/* +** Open a connection to the log file associated with database zDb. The +** database file does not actually have to exist. zDb is used only to +** figure out the name of the log file to open. If the log file does not +** exist it is created by this call. +*/ +int sqlite3LogOpen( + sqlite3_vfs *pVfs, /* vfs module to open log file with */ + const char *zDb, /* Name of database file */ + Log **ppLog /* OUT: Allocated Log handle */ +){ + int rc; /* Return Code */ + Log *pRet; /* Object to allocate and return */ + LogSummary *pSummary = 0; /* Summary object */ + sqlite3_mutex *mutex = 0; /* LOG_SUMMARY_MUTEX mutex */ + int flags; /* Flags passed to OsOpen() */ + char *zWal = 0; /* Path to WAL file */ + int nWal; /* Length of zWal in bytes */ + + /* Zero output variables */ + assert( zDb ); + *ppLog = 0; + + /* Allocate an instance of struct Log to return. */ + pRet = (Log *)sqlite3MallocZero(sizeof(Log) + pVfs->szOsFile); + if( !pRet ) goto out; + pRet->pVfs = pVfs; + pRet->pFd = (sqlite3_file *)&pRet[1]; + pRet->sync_flags = SQLITE_SYNC_NORMAL; + + /* Normalize the path name. */ + zWal = sqlite3_mprintf("%s-wal", zDb); + if( !zWal ) goto out; + logNormalizePath(zWal); + flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_DB); + nWal = sqlite3Strlen30(zWal); + + /* Enter the mutex that protects the linked-list of LogSummary structures */ + if( sqlite3GlobalConfig.bCoreMutex ){ + mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX); + } + sqlite3_mutex_enter(mutex); + + /* Search for an existing log summary object in the linked list. If one + ** cannot be found, allocate and initialize a new object. + */ + for(pSummary=pLogSummary; pSummary; pSummary=pSummary->pNext){ + int nPath = sqlite3Strlen30(pSummary->zPath); + if( nWal==nPath && 0==memcmp(pSummary->zPath, zWal, nPath) ) break; + } + if( !pSummary ){ + int nByte = sizeof(LogSummary) + nWal + 1; + pSummary = (LogSummary *)sqlite3MallocZero(nByte); + if( !pSummary ){ + rc = SQLITE_NOMEM; + goto out; + } + if( sqlite3GlobalConfig.bCoreMutex ){ + pSummary->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_RECURSIVE); + } + pSummary->zPath = (char *)&pSummary[1]; + pSummary->fd = -1; + memcpy(pSummary->zPath, zWal, nWal); + pSummary->pNext = pLogSummary; + pLogSummary = pSummary; + } + pSummary->nRef++; + pRet->pSummary = pSummary; + + /* Exit the mutex protecting the linked-list of LogSummary objects. */ + sqlite3_mutex_leave(mutex); + mutex = 0; + + /* Open file handle on the log file. */ + rc = sqlite3OsOpen(pVfs, pSummary->zPath, pRet->pFd, flags, &flags); + if( rc!=SQLITE_OK ) goto out; + + /* Object pSummary is shared between all connections to the database made + ** by this process. So at this point it may or may not be connected to + ** the log-summary. If it is not, connect it. Otherwise, just take the + ** SHARED lock on the log file. + */ + sqlite3_mutex_enter(pSummary->mutex); + mutex = pSummary->mutex; + if( pSummary->fd<0 ){ + rc = logSummaryInit(pSummary, pRet->pFd); + }else{ + rc = sqlite3OsLock(pRet->pFd, SQLITE_LOCK_SHARED); + } + + out: + sqlite3_mutex_leave(mutex); + sqlite3_free(zWal); + if( rc!=SQLITE_OK ){ + assert(0); + if( pRet ){ + sqlite3OsClose(pRet->pFd); + sqlite3_free(pRet); + } + assert( !pSummary || pSummary->nRef==0 ); + sqlite3_free(pSummary); + } + *ppLog = pRet; + return rc; +} + +static int logCheckpointNext( + LogCheckpoint *p, /* Iterator */ + u32 *piPage, /* OUT: Next db page to write */ + u32 *piFrame /* OUT: Log frame to read from */ +){ + u32 iMin = *piPage; + u32 iRet = 0xFFFFFFFF; + int i; + int nBlock = p->nFinal; + + for(i=p->nSegment-1; i>=0; i--){ + struct LogSegment *pSegment = &p->aSegment[i]; + while( pSegment->iNextaDbPage[pSegment->aIndex[pSegment->iNext]]; + if( iPg>iMin ){ + if( iPgaIndex[pSegment->iNext]; + } + break; + } + pSegment->iNext++; + } + + nBlock = 256; + } + + *piPage = iRet; + return (iRet==0xFFFFFFFF); +} + +static LogCheckpoint *logCheckpointInit(Log *pLog){ + u32 *aData = pLog->pSummary->aData; + LogCheckpoint *p; /* Return value */ + int nSegment; /* Number of segments to merge */ + u32 iLast; /* Last frame in log */ + int nByte; /* Number of bytes to allocate */ + int i; /* Iterator variable */ + int nFinal; /* Number of unindexed entries */ + struct LogSegment *pFinal; /* Final (unindexed) segment */ + u8 *aTmp; /* Temp space used by merge-sort */ + + iLast = pLog->hdr.iLastPg; + nSegment = (iLast >> 8) + 1; + nFinal = (iLast & 0x000000FF); + + nByte = sizeof(LogCheckpoint) + (nSegment-1)*sizeof(struct LogSegment) + 512; + p = (LogCheckpoint *)sqlite3_malloc(nByte); + if( p ){ + memset(p, 0, nByte); + p->nSegment = nSegment; + p->nFinal = nFinal; + } + + for(i=0; iaSegment[i].aDbPage = &aData[logSummaryEntry(i*256+1)]; + p->aSegment[i].aIndex = (u8 *)&aData[logSummaryEntry(i*256+1)+256]; + } + pFinal = &p->aSegment[nSegment-1]; + + pFinal->aDbPage = &aData[logSummaryEntry((nSegment-1)*256+1)]; + pFinal->aIndex = (u8 *)&pFinal[1]; + aTmp = &pFinal->aIndex[256]; + for(i=0; iaIndex[i] = i; + } + logMergesort8(pFinal->aDbPage, aTmp, pFinal->aIndex, &nFinal); + p->nFinal = nFinal; + + return p; +} + +/* +** Free a log iterator allocated by logCheckpointInit(). +*/ +static void logCheckpointFree(LogCheckpoint *p){ + sqlite3_free(p); +} + +/* +** Checkpoint the contents of the log file. +*/ +static int logCheckpoint( + Log *pLog, /* Log connection */ + sqlite3_file *pFd, /* File descriptor open on db file */ + u8 *zBuf /* Temporary buffer to use */ +){ + int rc; /* Return code */ + int pgsz = pLog->hdr.pgsz; /* Database page-size */ + LogCheckpoint *pIter = 0; /* Log iterator context */ + u32 iDbpage = 0; /* Next database page to write */ + u32 iFrame; /* Log frame containing data for iDbpage */ + + /* Allocate the iterator */ + pIter = logCheckpointInit(pLog); + if( !pIter ) return SQLITE_NOMEM; + + /* Sync the log file to disk */ + rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags); + if( rc!=SQLITE_OK ) goto out; + + /* Iterate through the contents of the log, copying data to the db file. */ + while( 0==logCheckpointNext(pIter, &iDbpage, &iFrame) ){ + rc = sqlite3OsRead(pLog->pFd, zBuf, pgsz, + (iFrame-1) * (pgsz+LOG_FRAME_HDRSIZE) + LOG_FRAME_HDRSIZE + ); + if( rc!=SQLITE_OK ) goto out; + rc = sqlite3OsWrite(pFd, zBuf, pgsz, (iDbpage-1)*pgsz); + if( rc!=SQLITE_OK ) goto out; + } + + /* Truncate the database file */ + rc = sqlite3OsTruncate(pFd, ((i64)pLog->hdr.nPage*(i64)pgsz)); + if( rc!=SQLITE_OK ) goto out; + + /* Sync the database file. If successful, update the log-summary. */ + rc = sqlite3OsSync(pFd, pLog->sync_flags); + if( rc!=SQLITE_OK ) goto out; + pLog->hdr.iLastPg = 0; + pLog->hdr.iCheck1 = 2; + pLog->hdr.iCheck2 = 3; + logSummaryWriteHdr(pLog->pSummary, &pLog->hdr); + + /* TODO: If a crash occurs and the current log is copied into the + ** database there is no problem. However, if a crash occurs while + ** writing the next transaction into the start of the log, such that: + ** + ** * The first transaction currently in the log is left intact, but + ** * The second (or subsequent) transaction is damaged, + ** + ** then the database could become corrupt. + ** + ** The easiest thing to do would be to write and sync a dummy header + ** into the log at this point. Unfortunately, that turns out to be + ** an unwelcome performance hit. Alternatives are... + */ +#if 0 + memset(zBuf, 0, LOG_FRAME_HDRSIZE); + rc = sqlite3OsWrite(pLog->pFd, zBuf, LOG_FRAME_HDRSIZE, 0); + if( rc!=SQLITE_OK ) goto out; + rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags); +#endif + + out: + logCheckpointFree(pIter); + return rc; +} + +/* +** Close a connection to a log file. +*/ +int sqlite3LogClose( + Log *pLog, /* Log to close */ + sqlite3_file *pFd, /* Database file */ + u8 *zBuf /* Buffer of at least page-size bytes */ +){ + int rc = SQLITE_OK; + if( pLog ){ + LogSummary *pSummary = pLog->pSummary; + sqlite3_mutex *mutex = 0; + + if( sqlite3GlobalConfig.bCoreMutex ){ + mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX); + } + sqlite3_mutex_enter(mutex); + + /* Decrement the reference count on the log summary. If this is the last + ** reference to the log summary object in this process, the object will + ** be freed. If this is also the last connection to the database, then + ** checkpoint the database and truncate the log and log-summary files + ** to zero bytes in size. + **/ + pSummary->nRef--; + if( pSummary->nRef==0 ){ + LogSummary **pp; + + rc = logSummaryLock(pSummary); + if( rc==SQLITE_OK ){ + int isTruncate = 0; + int rc2 = sqlite3OsLock(pLog->pFd, SQLITE_LOCK_EXCLUSIVE); + if( rc2==SQLITE_OK ){ + /* This is the last connection to the database (including other + ** processes). Do three things: + ** + ** 1. Checkpoint the db. + ** 2. Truncate the log file to zero bytes. + ** 3. Truncate the log-summary file to zero bytes. + */ + rc2 = logCheckpoint(pLog, pFd, zBuf); + if( rc2==SQLITE_OK ){ + rc2 = sqlite3OsTruncate(pLog->pFd, 0); + } + isTruncate = 1; + }else if( rc2==SQLITE_BUSY ){ + rc2 = SQLITE_OK; + } + logSummaryUnmap(pSummary, isTruncate); + sqlite3OsUnlock(pLog->pFd, SQLITE_LOCK_NONE); + rc = logSummaryUnlock(pSummary); + if( rc2!=SQLITE_OK ) rc = rc2; + } + + /* Remove the LogSummary object from the global list. Then free the + ** mutex and the object itself. + */ + for(pp=&pLogSummary; *pp!=pSummary; pp=&(*pp)->pNext); + *pp = (*pp)->pNext; + sqlite3_mutex_free(pSummary->mutex); + sqlite3_free(pSummary); + } + + sqlite3_mutex_leave(mutex); + + /* Close the connection to the log file and free the Log handle. */ + sqlite3OsClose(pLog->pFd); + sqlite3_free(pLog); + } + return rc; +} + +/* +** Set the flags to pass to the sqlite3OsSync() function when syncing +** the log file. +*/ +#if 0 +void sqlite3LogSetSyncflags(Log *pLog, int sync_flags){ + assert( sync_flags==SQLITE_SYNC_NORMAL || sync_flags==SQLITE_SYNC_FULL ); + pLog->sync_flags = sync_flags; +} +#endif + +/* +** Enter and leave the log-summary mutex. In this context, entering the +** log-summary mutex means: +** +** 1. Obtaining mutex pLog->pSummary->mutex, and +** 2. Taking an exclusive lock on the log-summary file. +** +** i.e. this mutex locks out other processes as well as other threads +** hosted in this address space. +*/ +static int logEnterMutex(Log *pLog){ + LogSummary *pSummary = pLog->pSummary; + int rc; + + sqlite3_mutex_enter(pSummary->mutex); + rc = logSummaryLock(pSummary); + if( rc!=SQLITE_OK ){ + sqlite3_mutex_leave(pSummary->mutex); + } + return rc; +} +static void logLeaveMutex(Log *pLog){ + LogSummary *pSummary = pLog->pSummary; + logSummaryUnlock(pSummary); + sqlite3_mutex_leave(pSummary->mutex); +} + +/* +** The caller must hold a SHARED lock on the database file. +** +** If this call obtains a new read-lock and the database contents have been +** modified since the most recent call to LogCloseSnapshot() on this Log +** connection, then *pChanged is set to 1 before returning. Otherwise, it +** is left unmodified. This is used by the pager layer to determine whether +** or not any cached pages may be safely reused. +*/ +int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){ + int rc = SQLITE_OK; + if( pLog->isLocked==0 ){ + if( SQLITE_OK==(rc = logEnterMutex(pLog)) ){ + u32 aCksum[2] = {1, 1}; + u32 aHdr[LOGSUMMARY_HDR_NFIELD+2]; + memcpy(aHdr, pLog->pSummary->aData, sizeof(aHdr)); + + /* Verify the checksum on the log-summary header. If it fails, + ** recover the log-summary from the log file. + */ + logChecksumBytes((u8*)aHdr, sizeof(u32)*LOGSUMMARY_HDR_NFIELD, aCksum); + if( aCksum[0]!=aHdr[LOGSUMMARY_HDR_NFIELD] + || aCksum[1]!=aHdr[LOGSUMMARY_HDR_NFIELD+1] + ){ + rc = logSummaryRecover(pLog->pSummary, pLog->pFd); + memcpy(aHdr, pLog->pSummary->aData, sizeof(aHdr)); + *pChanged = 1; + } + if( rc==SQLITE_OK ){ + pLog->isLocked = 1; + if( memcmp(&pLog->hdr, aHdr, sizeof(LogSummaryHdr)) ){ + *pChanged = 1; + memcpy(&pLog->hdr, aHdr, LOGSUMMARY_HDR_NFIELD*sizeof(u32)); + } + } + logLeaveMutex(pLog); + } + } + return rc; +} + +/* +** Unlock the current snapshot. +*/ +void sqlite3LogCloseSnapshot(Log *pLog){ + pLog->isLocked = 0; +} + + + +/* +** Read a page from the log, if it is present. +*/ +int sqlite3LogRead(Log *pLog, Pgno pgno, int *pInLog, u8 *pOut){ + u32 iRead = 0; + u32 *aData = pLog->pSummary->aData; + int iFrame = (pLog->hdr.iLastPg & 0xFFFFFF00); + + /* Do a linear search of the unindexed block of page-numbers (if any) + ** at the end of the log-summary. An alternative to this would be to + ** build an index in private memory each time a read transaction is + ** opened on a new snapshot. + */ + if( pLog->hdr.iLastPg ){ + u32 *pi = &aData[logSummaryEntry(pLog->hdr.iLastPg)]; + u32 *piStop = pi - (pLog->hdr.iLastPg & 0xFF); + while( *pi!=pgno && pi!=piStop ) pi--; + if( pi!=piStop ){ + iRead = (pi-piStop) + iFrame; + } + } + assert( iRead==0 || aData[logSummaryEntry(iRead)]==pgno ); + + while( iRead==0 && iFrame>0 ){ + int iLow = 0; + int iHigh = 255; + u32 *aFrame; + u8 *aIndex; + + iFrame -= 256; + aFrame = &aData[logSummaryEntry(iFrame+1)]; + aIndex = (u8 *)&aFrame[256]; + + while( iLow<=iHigh ){ + int iTest = (iLow+iHigh)>>1; + u32 iPg = aFrame[aIndex[iTest]]; + + if( iPg==pgno ){ + iRead = iFrame + 1 + aIndex[iTest]; + break; + } + else if( iPghdr.pgsz+LOG_FRAME_HDRSIZE); + iOffset += LOG_FRAME_HDRSIZE; + *pInLog = 1; + return sqlite3OsRead(pLog->pFd, pOut, pLog->hdr.pgsz, iOffset); + } + + *pInLog = 0; + return SQLITE_OK; +} + + +/* +** Set *pPgno to the size of the database file (or zero, if unknown). +*/ +void sqlite3LogMaxpgno(Log *pLog, Pgno *pPgno){ + assert( pLog->isLocked ); + *pPgno = pLog->hdr.nPage; +} + +/* +** The caller must hold at least a RESERVED lock on the database file +** when invoking this function. +** +** This function returns SQLITE_OK if the caller may write to the database. +** Otherwise, if the caller is operating on a snapshot that has already +** been overwritten by another writer, SQLITE_OBE is returned. +*/ +int sqlite3LogWriteLock(Log *pLog, int op){ + assert( pLog->isLocked ); + if( op ){ + if( memcmp(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)) ){ + return SQLITE_BUSY; + } + pLog->isWriteLocked = 1; + }else if( pLog->isWriteLocked ){ + memcpy(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)); + pLog->isWriteLocked = 0; + } + return SQLITE_OK; +} + +/* +** Write a set of frames to the log. The caller must hold at least a +** RESERVED lock on the database file. +*/ +int sqlite3LogFrames( + Log *pLog, /* Log handle to write to */ + int nPgsz, /* Database page-size in bytes */ + PgHdr *pList, /* List of dirty pages to write */ + Pgno nTruncate, /* Database size after this commit */ + int isCommit, /* True if this is a commit */ + int isSync /* True to sync the log file */ +){ + /* Each frame has a 20 byte header, as follows: + ** + ** + Pseudo-random salt (4 bytes) + ** + Page number (4 bytes) + ** + New database size, or 0 if not a commit frame (4 bytes) + ** + Checksum (CHECKSUM_BYTES bytes); + ** + ** The checksum is computed based on the following: + ** + ** + The previous checksum, or {2, 3} for the first frame in the log. + ** + The non-checksum fields of the frame header, and + ** + The frame contents (page data). + ** + ** This format must also be understood by the code in logSummaryRecover(). + ** The size of the frame header is used by LogRead() and LogCheckpoint(). + */ + int rc; /* Used to catch return codes */ + u32 iFrame; /* Next frame address */ + u8 aFrame[LOG_FRAME_HDRSIZE]; + PgHdr *p; /* Iterator to run through pList with. */ + u32 aCksum[2]; + + PgHdr *pLast; /* Last frame in list */ + int nLast = 0; /* Number of extra copies of last page */ + + assert( LOG_FRAME_HDRSIZE==(4 * 3 + LOG_CKSM_BYTES) ); + assert( pList ); + + aCksum[0] = pLog->hdr.iCheck1; + aCksum[1] = pLog->hdr.iCheck2; + + /* Write the log file. */ + iFrame = pLog->hdr.iLastPg; + for(p=pList; p; p=p->pDirty){ + u32 nDbsize; /* Db-size field for frame header */ + i64 iOffset; /* Write offset in log file */ + + iFrame++; + iOffset = (iFrame-1) * (nPgsz+sizeof(aFrame)); + + /* Populate and write the frame header */ + nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; + logEncodeFrame(aCksum, p->pgno, nDbsize, nPgsz, p->pData, aFrame); + rc = sqlite3OsWrite(pLog->pFd, aFrame, sizeof(aFrame), iOffset); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Write the page data */ + rc = sqlite3OsWrite(pLog->pFd, p->pData, nPgsz, iOffset + sizeof(aFrame)); + if( rc!=SQLITE_OK ){ + return rc; + } + pLast = p; + } + + /* Sync the log file if the 'isSync' flag was specified. */ + if( isSync ){ +#if 0 + i64 iSegment = sqlite3OsSectorSize(pLog->pFd); + i64 iOffset = iFrame * (nPgsz+sizeof(aFrame)); + + if( iSegmentpgno,nTruncate,nPgsz,pLast->pData,aFrame); + rc = sqlite3OsWrite(pLog->pFd, aFrame, sizeof(aFrame), iOffset); + if( rc!=SQLITE_OK ){ + return rc; + } + + iOffset += LOG_FRAME_HDRSIZE; + rc = sqlite3OsWrite(pLog->pFd, pLast->pData, nPgsz, iOffset); + if( rc!=SQLITE_OK ){ + return rc; + } + nLast++; + iOffset += nPgsz; + } +#endif + + rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags); + if( rc!=SQLITE_OK ){ + return rc; + } + } + + /* Append data to the log summary. It is not necessary to lock the + ** log-summary to do this as the RESERVED lock held on the db file + ** guarantees that there are no other writers, and no data that may + ** be in use by existing readers is being overwritten. + */ + iFrame = pLog->hdr.iLastPg; + for(p=pList; p; p=p->pDirty){ + iFrame++; + logSummaryAppend(pLog->pSummary, iFrame, p->pgno); + } + while( nLast>0 ){ + iFrame++; + nLast--; + logSummaryAppend(pLog->pSummary, iFrame, pLast->pgno); + } + + /* Update the private copy of the header. */ + pLog->hdr.pgsz = nPgsz; + pLog->hdr.iLastPg = iFrame; + if( isCommit ){ + pLog->hdr.iChange++; + pLog->hdr.nPage = nTruncate; + } + pLog->hdr.iCheck1 = aCksum[0]; + pLog->hdr.iCheck2 = aCksum[1]; + + /* If this is a commit, update the log-summary header too. */ + if( isCommit && SQLITE_OK==(rc = logEnterMutex(pLog)) ){ + logSummaryWriteHdr(pLog->pSummary, &pLog->hdr); + logLeaveMutex(pLog); + } + + return SQLITE_OK; +} + +/* +** Checkpoint the database. When this function is called the caller +** must hold an exclusive lock on the database file. +*/ +int sqlite3LogCheckpoint( + Log *pLog, /* Log connection */ + sqlite3_file *pFd, /* File descriptor open on db file */ + u8 *zBuf /* Temporary buffer to use */ +){ + + /* Assert() that the caller is holding an EXCLUSIVE lock on the + ** database file. + */ +#ifdef SQLITE_DEBUG + int lock; + sqlite3OsFileControl(pFd, SQLITE_FCNTL_LOCKSTATE, &lock); + assert( lock>=4 ); +#endif + + return logCheckpoint(pLog, pFd, zBuf); +} + diff --git a/src/os_unix.c b/src/os_unix.c index 769e75df39..80ce9e0b00 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -1536,9 +1536,11 @@ static int _posixUnlock(sqlite3_file *id, int locktype, int handleNFSUnlock){ ** the file has changed and hence might not know to flush their ** cache. The use of a stale cache can lead to database corruption. */ +#if 0 assert( pFile->inNormalWrite==0 || pFile->dbUpdate==0 || pFile->transCntrChng==1 ); +#endif pFile->inNormalWrite = 0; #endif @@ -2956,10 +2958,12 @@ static int unixRead( /* If this is a database file (not a journal, master-journal or temp ** file), the bytes in the locking range should never be read or written. */ +#if 0 assert( pFile->pUnused==0 || offset>=PENDING_BYTE+512 || offset+amt<=PENDING_BYTE ); +#endif got = seekAndRead(pFile, offset, pBuf, amt); if( got==amt ){ @@ -3031,10 +3035,12 @@ static int unixWrite( /* If this is a database file (not a journal, master-journal or temp ** file), the bytes in the locking range should never be read or written. */ +#if 0 assert( pFile->pUnused==0 || offset>=PENDING_BYTE+512 || offset+amt<=PENDING_BYTE ); +#endif #ifndef NDEBUG /* If we are doing a normal write to a database file (as opposed to diff --git a/src/pager.c b/src/pager.c index d5c236e24c..383c0fe6bd 100644 --- a/src/pager.c +++ b/src/pager.c @@ -20,6 +20,7 @@ */ #ifndef SQLITE_OMIT_DISKIO #include "sqliteInt.h" +#include "log.h" /* ******************** NOTES ON THE DESIGN OF THE PAGER ************************ @@ -397,6 +398,7 @@ struct Pager { char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ PCache *pPCache; /* Pointer to page cache object */ sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ + Log *pLog; /* Log used by "journal_mode=wal" */ }; /* @@ -1185,6 +1187,27 @@ static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ return rc; } +/* +** Open a connection to the write-ahead log file for pager pPager. +*/ +static int pagerOpenLog(Pager *pPager){ + if( !pPager->pLog ){ + int rc; /* Return code from LogOpen() */ + + rc = sqlite3LogOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog); + if( rc!=SQLITE_OK ) return rc; + } + return SQLITE_OK; +} + +/* +** Return true if this pager uses a write-ahead log instead of the usual +** rollback journal. Otherwise false. +*/ +static int pagerUseLog(Pager *pPager){ + return (pPager->pLog!=0); +} + /* ** Unlock the database file. This function is a no-op if the pager ** is in exclusive mode. @@ -1197,7 +1220,7 @@ static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ */ static void pager_unlock(Pager *pPager){ if( !pPager->exclusiveMode ){ - int rc; /* Return code */ + int rc = SQLITE_OK; /* Return code */ /* Always close the journal file when dropping the database lock. ** Otherwise, another connection with journal_mode=delete might @@ -1216,6 +1239,9 @@ static void pager_unlock(Pager *pPager){ */ pPager->dbSizeValid = 0; + if( pagerUseLog(pPager) ){ + sqlite3LogCloseSnapshot(pPager->pLog); + } rc = osUnlock(pPager->fd, NO_LOCK); if( rc ){ pPager->errCode = rc; @@ -1365,6 +1391,7 @@ static int pager_end_transaction(Pager *pPager, int hasMaster){ assert( isOpen(pPager->jfd) || pPager->pInJournal==0 ); if( isOpen(pPager->jfd) ){ + assert( !pagerUseLog(pPager) ); /* Finalize the journal file. */ if( sqlite3IsMemJournal(pPager->jfd) ){ @@ -1408,7 +1435,10 @@ static int pager_end_transaction(Pager *pPager, int hasMaster){ pPager->nRec = 0; sqlite3PcacheCleanAll(pPager->pPCache); - if( !pPager->exclusiveMode ){ + if( pagerUseLog(pPager) ){ + rc2 = sqlite3LogWriteLock(pPager->pLog, 0); + pPager->state = PAGER_SHARED; + }else if( !pPager->exclusiveMode ){ rc2 = osUnlock(pPager->fd, SHARED_LOCK); pPager->state = PAGER_SHARED; pPager->changeCountDone = 0; @@ -2120,6 +2150,9 @@ end_playback: if( rc==SQLITE_OK && pPager->noSync==0 && pPager->state>=PAGER_EXCLUSIVE ){ rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); } + if( rc==SQLITE_OK && pPager->noSync==0 && pPager->state>=PAGER_EXCLUSIVE ){ + rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); + } if( rc==SQLITE_OK ){ rc = pager_end_transaction(pPager, zMaster[0]!='\0'); testcase( rc!=SQLITE_OK ); @@ -2140,6 +2173,97 @@ end_playback: return rc; } + +/* +** Read the content for page pPg out of the database file and into +** pPg->pData. A shared lock or greater must be held on the database +** file before this function is called. +** +** If page 1 is read, then the value of Pager.dbFileVers[] is set to +** the value read from the database file. +** +** If an IO error occurs, then the IO error is returned to the caller. +** Otherwise, SQLITE_OK is returned. +*/ +static int readDbPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ + Pgno pgno = pPg->pgno; /* Page number to read */ + int rc; /* Return code */ + i64 iOffset; /* Byte offset of file to read from */ + int isInLog = 0; /* True if page is in log file */ + + assert( pPager->state>=PAGER_SHARED && !MEMDB ); + assert( isOpen(pPager->fd) ); + + if( NEVER(!isOpen(pPager->fd)) ){ + assert( pPager->tempFile ); + memset(pPg->pData, 0, pPager->pageSize); + return SQLITE_OK; + } + + if( pagerUseLog(pPager) ){ + /* Try to pull the page from the write-ahead log. */ + rc = sqlite3LogRead(pPager->pLog, pgno, &isInLog, pPg->pData); + } + if( rc==SQLITE_OK && !isInLog ){ + iOffset = (pgno-1)*(i64)pPager->pageSize; + rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); + if( rc==SQLITE_IOERR_SHORT_READ ){ + rc = SQLITE_OK; + } + } + + if( pgno==1 ){ + if( rc ){ + /* If the read is unsuccessful, set the dbFileVers[] to something + ** that will never be a valid file version. dbFileVers[] is a copy + ** of bytes 24..39 of the database. Bytes 28..31 should always be + ** zero. Bytes 32..35 and 35..39 should be page numbers which are + ** never 0xffffffff. So filling pPager->dbFileVers[] with all 0xff + ** bytes should suffice. + ** + ** For an encrypted database, the situation is more complex: bytes + ** 24..39 of the database are white noise. But the probability of + ** white noising equaling 16 bytes of 0xff is vanishingly small so + ** we should still be ok. + */ + memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers)); + }else{ + u8 *dbFileVers = &((u8*)pPg->pData)[24]; + memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers)); + } + } + CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM); + + PAGER_INCR(sqlite3_pager_readdb_count); + PAGER_INCR(pPager->nRead); + IOTRACE(("PGIN %p %d\n", pPager, pgno)); + PAGERTRACE(("FETCH %d page %d hash(%08x)\n", + PAGERID(pPager), pgno, pager_pagehash(pPg))); + + return rc; +} + +static int pagerRollbackLog(Pager *pPager){ + int rc = SQLITE_OK; + PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); + pPager->dbSize = pPager->dbOrigSize; + while( pList && rc==SQLITE_OK ){ + PgHdr *pNext = pList->pDirty; + if( sqlite3PcachePageRefcount(pList)==0 ){ + sqlite3PagerLookup(pPager, pList->pgno); + sqlite3PcacheDrop(pList); + }else{ + rc = readDbPage(pList); + if( rc==SQLITE_OK ){ + pPager->xReiniter(pList); + } + } + pList = pNext; + } + return rc; +} + /* ** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback ** the entire master journal file. The case pSavepoint==NULL occurs when @@ -2197,12 +2321,17 @@ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ */ pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; + if( !pSavepoint && pagerUseLog(pPager) ){ + return pagerRollbackLog(pPager); + } + /* Use pPager->journalOff as the effective size of the main rollback ** journal. The actual file might be larger than this in ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything ** past pPager->journalOff is off-limits to us. */ szJ = pPager->journalOff; + assert( pagerUseLog(pPager)==0 || szJ==0 ); /* Begin by rolling back records from the main journal starting at ** PagerSavepoint.iOffset and continuing to the next journal header. @@ -2211,7 +2340,7 @@ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ ** will be skipped automatically. Pages are added to pDone as they ** are played back. */ - if( pSavepoint ){ + if( pSavepoint && !pagerUseLog(pPager) ){ iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; pPager->journalOff = pSavepoint->iOffset; while( rc==SQLITE_OK && pPager->journalOffdbSizeValid ){ @@ -2567,15 +2696,23 @@ int sqlite3PagerPagecount(Pager *pPager, int *pnPage){ int rc; /* Error returned by OsFileSize() */ i64 n = 0; /* File size in bytes returned by OsFileSize() */ - assert( isOpen(pPager->fd) || pPager->tempFile ); - if( isOpen(pPager->fd) && (0 != (rc = sqlite3OsFileSize(pPager->fd, &n))) ){ - pager_error(pPager, rc); - return rc; + if( pagerUseLog(pPager) ){ + sqlite3LogMaxpgno(pPager->pLog, &nPage); } - if( n>0 && npageSize ){ - nPage = 1; - }else{ - nPage = (Pgno)(n / pPager->pageSize); + + if( nPage==0 ){ + assert( isOpen(pPager->fd) || pPager->tempFile ); + if( isOpen(pPager->fd) ){ + if( SQLITE_OK!=(rc = sqlite3OsFileSize(pPager->fd, &n)) ){ + pager_error(pPager, rc); + return rc; + } + } + if( n>0 && npageSize ){ + nPage = 1; + }else{ + nPage = (Pgno)(n / pPager->pageSize); + } } if( pPager->state!=PAGER_UNLOCK ){ pPager->dbSize = nPage; @@ -2698,6 +2835,7 @@ void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){ assertTruncateConstraint(pPager); } + /* ** This function is called before attempting a hot-journal rollback. It ** syncs the journal file to disk, then sets pPager->journalHdr to the @@ -2738,10 +2876,14 @@ static int pagerSyncHotJournal(Pager *pPager){ ** to the caller. */ int sqlite3PagerClose(Pager *pPager){ + u8 *pTmp = (u8 *)pPager->pTmpSpace; + disable_simulated_io_errors(); sqlite3BeginBenignMalloc(); pPager->errCode = 0; pPager->exclusiveMode = 0; + sqlite3LogClose(pPager->pLog, pPager->fd, pTmp); + pPager->pLog = 0; pager_reset(pPager); if( MEMDB ){ pager_unlock(pPager); @@ -2762,7 +2904,7 @@ int sqlite3PagerClose(Pager *pPager){ PAGERTRACE(("CLOSE %d\n", PAGERID(pPager))); IOTRACE(("CLOSE %p\n", pPager)) sqlite3OsClose(pPager->fd); - sqlite3PageFree(pPager->pTmpSpace); + sqlite3PageFree(pTmp); sqlite3PcacheClose(pPager->pPCache); #ifdef SQLITE_HAS_CODEC @@ -3066,7 +3208,10 @@ static int subjournalPage(PgHdr *pPg){ CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); - assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize ); + assert( pagerUseLog(pPager) + || pageInJournal(pPg) + || pPg->pgno>pPager->dbOrigSize + ); rc = write32bits(pPager->sjfd, offset, pPg->pgno); if( rc==SQLITE_OK ){ rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4); @@ -3107,6 +3252,8 @@ static int pagerStress(void *p, PgHdr *pPg){ assert( pPg->pPager==pPager ); assert( pPg->flags&PGHDR_DIRTY ); + if( pagerUseLog(pPager) ) return SQLITE_OK; + /* The doNotSync flag is set by the sqlite3PagerWrite() function while it ** is journalling a set of two or more database pages that are stored ** on the same disk sector. Syncing the journal is not allowed while @@ -3582,67 +3729,6 @@ static int hasHotJournal(Pager *pPager, int *pExists){ return rc; } -/* -** Read the content for page pPg out of the database file and into -** pPg->pData. A shared lock or greater must be held on the database -** file before this function is called. -** -** If page 1 is read, then the value of Pager.dbFileVers[] is set to -** the value read from the database file. -** -** If an IO error occurs, then the IO error is returned to the caller. -** Otherwise, SQLITE_OK is returned. -*/ -static int readDbPage(PgHdr *pPg){ - Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ - Pgno pgno = pPg->pgno; /* Page number to read */ - int rc; /* Return code */ - i64 iOffset; /* Byte offset of file to read from */ - - assert( pPager->state>=PAGER_SHARED && !MEMDB ); - assert( isOpen(pPager->fd) ); - - if( NEVER(!isOpen(pPager->fd)) ){ - assert( pPager->tempFile ); - memset(pPg->pData, 0, pPager->pageSize); - return SQLITE_OK; - } - iOffset = (pgno-1)*(i64)pPager->pageSize; - rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); - if( rc==SQLITE_IOERR_SHORT_READ ){ - rc = SQLITE_OK; - } - if( pgno==1 ){ - if( rc ){ - /* If the read is unsuccessful, set the dbFileVers[] to something - ** that will never be a valid file version. dbFileVers[] is a copy - ** of bytes 24..39 of the database. Bytes 28..31 should always be - ** zero. Bytes 32..35 and 35..39 should be page numbers which are - ** never 0xffffffff. So filling pPager->dbFileVers[] with all 0xff - ** bytes should suffice. - ** - ** For an encrypted database, the situation is more complex: bytes - ** 24..39 of the database are white noise. But the probability of - ** white noising equaling 16 bytes of 0xff is vanishingly small so - ** we should still be ok. - */ - memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers)); - }else{ - u8 *dbFileVers = &((u8*)pPg->pData)[24]; - memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers)); - } - } - CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM); - - PAGER_INCR(sqlite3_pager_readdb_count); - PAGER_INCR(pPager->nRead); - IOTRACE(("PGIN %p %d\n", pPager, pgno)); - PAGERTRACE(("FETCH %d page %d hash(%08x)\n", - PAGERID(pPager), pgno, pager_pagehash(pPg))); - - return rc; -} - /* ** This function is called to obtain a shared lock on the database file. ** It is illegal to call sqlite3PagerAcquire() until after this function @@ -3696,22 +3782,34 @@ int sqlite3PagerSharedLock(Pager *pPager){ pager_reset(pPager); } - if( pPager->state==PAGER_UNLOCK || isErrorReset ){ + if( pPager->noReadlock ){ + assert( pPager->readOnly ); + pPager->state = PAGER_SHARED; + }else{ + rc = pager_wait_on_lock(pPager, SHARED_LOCK); + if( rc!=SQLITE_OK ){ + assert( pPager->state==PAGER_UNLOCK ); + return pager_error(pPager, rc); + } + } + assert( pPager->state>=SHARED_LOCK ); + + if( pagerUseLog(pPager) ){ + int changed = 0; + rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed); + if( rc==SQLITE_OK ){ + if( changed ){ + pager_reset(pPager); + assert( pPager->errCode || pPager->dbSizeValid==0 ); + } + pPager->state = PAGER_SHARED; + rc = sqlite3PagerPagecount(pPager, &changed); + } + }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){ sqlite3_vfs * const pVfs = pPager->pVfs; int isHotJournal = 0; assert( !MEMDB ); assert( sqlite3PcacheRefCount(pPager->pPCache)==0 ); - if( pPager->noReadlock ){ - assert( pPager->readOnly ); - pPager->state = PAGER_SHARED; - }else{ - rc = pager_wait_on_lock(pPager, SHARED_LOCK); - if( rc!=SQLITE_OK ){ - assert( pPager->state==PAGER_UNLOCK ); - return pager_error(pPager, rc); - } - } - assert( pPager->state>=SHARED_LOCK ); /* If a journal file exists, and there is no RESERVED lock on the ** database file, then it either needs to be played back or deleted. @@ -3785,7 +3883,7 @@ int sqlite3PagerSharedLock(Pager *pPager){ pPager->journalOff = 0; pPager->setMaster = 0; pPager->journalHdr = 0; - + /* Make sure the journal file has been synced to disk. */ /* Playback and delete the journal. Drop the database write @@ -3992,8 +4090,8 @@ int sqlite3PagerAcquire( if( MEMDB || nMax<(int)pgno || noContent || !isOpen(pPager->fd) ){ if( pgno>pPager->mxPgno ){ - rc = SQLITE_FULL; - goto pager_acquire_err; + rc = SQLITE_FULL; + goto pager_acquire_err; } if( noContent ){ /* Failure to set the bits in the InJournal bit-vectors is benign. @@ -4088,7 +4186,7 @@ void sqlite3PagerUnref(DbPage *pPg){ */ static int openSubJournal(Pager *pPager){ int rc = SQLITE_OK; - if( isOpen(pPager->jfd) && !isOpen(pPager->sjfd) ){ + if( (pagerUseLog(pPager) || isOpen(pPager->jfd)) && !isOpen(pPager->sjfd) ){ if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){ sqlite3MemJournalOpen(pPager->sjfd); }else{ @@ -4237,6 +4335,18 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ } } + if( rc==SQLITE_OK && pagerUseLog(pPager) ){ + /* Grab the write lock on the log file. If successful, upgrade to + ** PAGER_EXCLUSIVE state. Otherwise, return an error code to the caller. + ** The busy-handler is not invoked if another connection already + ** holds the write-lock. If possible, the upper layer will call it. + */ + rc = sqlite3LogWriteLock(pPager->pLog, 1); + if( rc==SQLITE_OK ){ + pPager->dbOrigSize = pPager->dbSize; + } + } + /* No need to open the journal file at this time. It will be ** opened before it is written to. If we defer opening the journal, ** we might save the work of creating a file if the transaction @@ -4249,6 +4359,7 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ ** kept open and either was truncated to 0 bytes or its header was ** overwritten with zeros. */ + assert( pagerUseLog(pPager)==0 ); assert( pPager->nRec==0 ); assert( pPager->dbOrigSize==0 ); assert( pPager->pInJournal==0 ); @@ -4303,6 +4414,7 @@ static int pager_write(PgHdr *pPg){ */ sqlite3PcacheMakeDirty(pPg); if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){ + assert( !pagerUseLog(pPager) ); pPager->dbModified = 1; }else{ @@ -4318,7 +4430,10 @@ static int pager_write(PgHdr *pPg){ if( rc!=SQLITE_OK ){ return rc; } - if( !isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ + if( !isOpen(pPager->jfd) + && pPager->journalMode!=PAGER_JOURNALMODE_OFF + && pPager->journalMode!=PAGER_JOURNALMODE_WAL + ){ assert( pPager->useJournal ); rc = pager_open_journal(pPager); if( rc!=SQLITE_OK ) return rc; @@ -4330,6 +4445,7 @@ static int pager_write(PgHdr *pPg){ ** the transaction journal if it is not there already. */ if( !pageInJournal(pPg) && isOpen(pPager->jfd) ){ + assert( !pagerUseLog(pPager) ); if( pPg->pgno<=pPager->dbOrigSize ){ u32 cksum; char *pData2; @@ -4710,129 +4826,138 @@ int sqlite3PagerCommitPhaseOne( */ sqlite3BackupRestart(pPager->pBackup); }else if( pPager->state!=PAGER_SYNCED && pPager->dbModified ){ - - /* The following block updates the change-counter. Exactly how it - ** does this depends on whether or not the atomic-update optimization - ** was enabled at compile time, and if this transaction meets the - ** runtime criteria to use the operation: - ** - ** * The file-system supports the atomic-write property for - ** blocks of size page-size, and - ** * This commit is not part of a multi-file transaction, and - ** * Exactly one page has been modified and store in the journal file. - ** - ** If the optimization was not enabled at compile time, then the - ** pager_incr_changecounter() function is called to update the change - ** counter in 'indirect-mode'. If the optimization is compiled in but - ** is not applicable to this transaction, call sqlite3JournalCreate() - ** to make sure the journal file has actually been created, then call - ** pager_incr_changecounter() to update the change-counter in indirect - ** mode. - ** - ** Otherwise, if the optimization is both enabled and applicable, - ** then call pager_incr_changecounter() to update the change-counter - ** in 'direct' mode. In this case the journal file will never be - ** created for this transaction. - */ -#ifdef SQLITE_ENABLE_ATOMIC_WRITE - PgHdr *pPg; - assert( isOpen(pPager->jfd) || pPager->journalMode==PAGER_JOURNALMODE_OFF ); - if( !zMaster && isOpen(pPager->jfd) - && pPager->journalOff==jrnlBufferSize(pPager) - && pPager->dbSize>=pPager->dbFileSize - && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) - ){ - /* Update the db file change counter via the direct-write method. The - ** following call will modify the in-memory representation of page 1 - ** to include the updated change counter and then write page 1 - ** directly to the database file. Because of the atomic-write - ** property of the host file-system, this is safe. - */ - rc = pager_incr_changecounter(pPager, 1); - }else{ - rc = sqlite3JournalCreate(pPager->jfd); - if( rc==SQLITE_OK ){ - rc = pager_incr_changecounter(pPager, 0); + if( pagerUseLog(pPager) ){ + PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); + if( pList ){ + rc = sqlite3LogFrames(pPager->pLog, pPager->pageSize, pList, + pPager->dbSize, 1, 1 + ); } - } -#else - rc = pager_incr_changecounter(pPager, 0); -#endif - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - - /* If this transaction has made the database smaller, then all pages - ** being discarded by the truncation must be written to the journal - ** file. This can only happen in auto-vacuum mode. - ** - ** Before reading the pages with page numbers larger than the - ** current value of Pager.dbSize, set dbSize back to the value - ** that it took at the start of the transaction. Otherwise, the - ** calls to sqlite3PagerGet() return zeroed pages instead of - ** reading data from the database file. - ** - ** When journal_mode==OFF the dbOrigSize is always zero, so this - ** block never runs if journal_mode=OFF. - */ -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pPager->dbSizedbOrigSize - && ALWAYS(pPager->journalMode!=PAGER_JOURNALMODE_OFF) - ){ - Pgno i; /* Iterator variable */ - const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */ - const Pgno dbSize = pPager->dbSize; /* Database image size */ - pPager->dbSize = pPager->dbOrigSize; - for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){ - if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){ - PgHdr *pPage; /* Page to journal */ - rc = sqlite3PagerGet(pPager, i, &pPage); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - rc = sqlite3PagerWrite(pPage); - sqlite3PagerUnref(pPage); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + sqlite3PcacheCleanAll(pPager->pPCache); + }else{ + /* The following block updates the change-counter. Exactly how it + ** does this depends on whether or not the atomic-update optimization + ** was enabled at compile time, and if this transaction meets the + ** runtime criteria to use the operation: + ** + ** * The file-system supports the atomic-write property for + ** blocks of size page-size, and + ** * This commit is not part of a multi-file transaction, and + ** * Exactly one page has been modified and store in the journal file. + ** + ** If the optimization was not enabled at compile time, then the + ** pager_incr_changecounter() function is called to update the change + ** counter in 'indirect-mode'. If the optimization is compiled in but + ** is not applicable to this transaction, call sqlite3JournalCreate() + ** to make sure the journal file has actually been created, then call + ** pager_incr_changecounter() to update the change-counter in indirect + ** mode. + ** + ** Otherwise, if the optimization is both enabled and applicable, + ** then call pager_incr_changecounter() to update the change-counter + ** in 'direct' mode. In this case the journal file will never be + ** created for this transaction. + */ + #ifdef SQLITE_ENABLE_ATOMIC_WRITE + PgHdr *pPg; + assert( isOpen(pPager->jfd) || pPager->journalMode==PAGER_JOURNALMODE_OFF ); + if( !zMaster && isOpen(pPager->jfd) + && pPager->journalOff==jrnlBufferSize(pPager) + && pPager->dbSize>=pPager->dbFileSize + && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) + ){ + /* Update the db file change counter via the direct-write method. The + ** following call will modify the in-memory representation of page 1 + ** to include the updated change counter and then write page 1 + ** directly to the database file. Because of the atomic-write + ** property of the host file-system, this is safe. + */ + rc = pager_incr_changecounter(pPager, 1); + }else{ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc==SQLITE_OK ){ + rc = pager_incr_changecounter(pPager, 0); } - } - pPager->dbSize = dbSize; - } -#endif - - /* Write the master journal name into the journal file. If a master - ** journal file name has already been written to the journal file, - ** or if zMaster is NULL (no master journal), then this call is a no-op. - */ - rc = writeMasterJournal(pPager, zMaster); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - - /* Sync the journal file. If the atomic-update optimization is being - ** used, this call will not create the journal file or perform any - ** real IO. - */ - rc = syncJournal(pPager); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - - /* Write all dirty pages to the database file. */ - rc = pager_write_pagelist(sqlite3PcacheDirtyList(pPager->pPCache)); - if( rc!=SQLITE_OK ){ - assert( rc!=SQLITE_IOERR_BLOCKED ); - goto commit_phase_one_exit; - } - sqlite3PcacheCleanAll(pPager->pPCache); - - /* If the file on disk is not the same size as the database image, - ** then use pager_truncate to grow or shrink the file here. - */ - if( pPager->dbSize!=pPager->dbFileSize ){ - Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); - assert( pPager->state>=PAGER_EXCLUSIVE ); - rc = pager_truncate(pPager, nNew); + } + #else + rc = pager_incr_changecounter(pPager, 0); + #endif if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* If this transaction has made the database smaller, then all pages + ** being discarded by the truncation must be written to the journal + ** file. This can only happen in auto-vacuum mode. + ** + ** Before reading the pages with page numbers larger than the + ** current value of Pager.dbSize, set dbSize back to the value + ** that it took at the start of the transaction. Otherwise, the + ** calls to sqlite3PagerGet() return zeroed pages instead of + ** reading data from the database file. + ** + ** When journal_mode==OFF the dbOrigSize is always zero, so this + ** block never runs if journal_mode=OFF. + */ + #ifndef SQLITE_OMIT_AUTOVACUUM + if( pPager->dbSizedbOrigSize + && ALWAYS(pPager->journalMode!=PAGER_JOURNALMODE_OFF) + ){ + Pgno i; /* Iterator variable */ + const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */ + const Pgno dbSize = pPager->dbSize; /* Database image size */ + pPager->dbSize = pPager->dbOrigSize; + for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){ + if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){ + PgHdr *pPage; /* Page to journal */ + rc = sqlite3PagerGet(pPager, i, &pPage); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + rc = sqlite3PagerWrite(pPage); + sqlite3PagerUnref(pPage); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } + } + pPager->dbSize = dbSize; + } + #endif + + /* Write the master journal name into the journal file. If a master + ** journal file name has already been written to the journal file, + ** or if zMaster is NULL (no master journal), then this call is a no-op. + */ + rc = writeMasterJournal(pPager, zMaster); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* Sync the journal file. If the atomic-update optimization is being + ** used, this call will not create the journal file or perform any + ** real IO. + */ + rc = syncJournal(pPager); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* Write all dirty pages to the database file. */ + rc = pager_write_pagelist(sqlite3PcacheDirtyList(pPager->pPCache)); + if( rc!=SQLITE_OK ){ + assert( rc!=SQLITE_IOERR_BLOCKED ); + goto commit_phase_one_exit; + } + sqlite3PcacheCleanAll(pPager->pPCache); + + /* If the file on disk is not the same size as the database image, + ** then use pager_truncate to grow or shrink the file here. + */ + if( pPager->dbSize!=pPager->dbFileSize ){ + Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); + assert( pPager->state>=PAGER_EXCLUSIVE ); + rc = pager_truncate(pPager, nNew); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } + + /* Finally, sync the database file. */ + if( !pPager->noSync && !noSync ){ + rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); + } + IOTRACE(("DBSYNC %p\n", pPager)) } - /* Finally, sync the database file. */ - if( !pPager->noSync && !noSync ){ - rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); - } - IOTRACE(("DBSYNC %p\n", pPager)) - pPager->state = PAGER_SYNCED; } @@ -4940,7 +5065,12 @@ int sqlite3PagerCommitPhaseTwo(Pager *pPager){ int sqlite3PagerRollback(Pager *pPager){ int rc = SQLITE_OK; /* Return code */ PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager))); - if( !pPager->dbModified || !isOpen(pPager->jfd) ){ + if( pagerUseLog(pPager) ){ + int rc2; + rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1); + rc2 = pager_end_transaction(pPager, pPager->setMaster); + if( rc==SQLITE_OK ) rc = rc2; + }else if( !pPager->dbModified || !isOpen(pPager->jfd) ){ rc = pager_end_transaction(pPager, pPager->setMaster); }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){ if( pPager->state>=PAGER_EXCLUSIVE ){ @@ -5158,7 +5288,7 @@ int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){ ** not yet been opened. In this case there have been no changes to ** the database file, so the playback operation can be skipped. */ - else if( isOpen(pPager->jfd) ){ + else if( pagerUseLog(pPager) || isOpen(pPager->jfd) ){ PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1]; rc = pagerPlaybackSavepoint(pPager, pSavepoint); assert(rc!=SQLITE_DONE); @@ -5435,6 +5565,7 @@ int sqlite3PagerLockingMode(Pager *pPager, int eMode){ ** PAGER_JOURNALMODE_PERSIST ** PAGER_JOURNALMODE_OFF ** PAGER_JOURNALMODE_MEMORY +** PAGER_JOURNALMODE_WAL ** ** If the parameter is not _QUERY, then the journal_mode is set to the ** value specified if the change is allowed. The change is disallowed @@ -5453,11 +5584,12 @@ int sqlite3PagerJournalMode(Pager *pPager, int eMode){ || eMode==PAGER_JOURNALMODE_TRUNCATE || eMode==PAGER_JOURNALMODE_PERSIST || eMode==PAGER_JOURNALMODE_OFF + || eMode==PAGER_JOURNALMODE_WAL || eMode==PAGER_JOURNALMODE_MEMORY ); assert( PAGER_JOURNALMODE_QUERY<0 ); if( eMode>=0 - && (!MEMDB || eMode==PAGER_JOURNALMODE_MEMORY - || eMode==PAGER_JOURNALMODE_OFF) + && (pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL) + && (!MEMDB || eMode==PAGER_JOURNALMODE_MEMORY||eMode==PAGER_JOURNALMODE_OFF) && !pPager->dbModified && (!isOpen(pPager->jfd) || 0==pPager->journalOff) ){ @@ -5473,6 +5605,16 @@ int sqlite3PagerJournalMode(Pager *pPager, int eMode){ && !pPager->exclusiveMode ){ sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); } + + if( eMode==PAGER_JOURNALMODE_WAL ){ + int rc = pagerOpenLog(pPager); + if( rc!=SQLITE_OK ){ + /* TODO: The error code should not just get dropped here. Change + ** this to set a flag to force the log to be opened the first time + ** it is actually required. */ + return (int)pPager->journalMode; + } + } pPager->journalMode = (u8)eMode; } return (int)pPager->journalMode; @@ -5501,4 +5643,19 @@ sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){ return &pPager->pBackup; } +/* +** This function is called when the user invokes "PRAGMA checkpoint". +*/ +int sqlite3PagerCheckpoint(Pager *pPager, int nMin, int nMax, int doSync){ + int rc = SQLITE_OK; + if( pPager->pLog ){ + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + if( rc==SQLITE_OK ){ + u8 *zBuf = (u8 *)pPager->pTmpSpace; + rc = sqlite3LogCheckpoint(pPager->pLog, pPager->fd, zBuf); + } + } + return rc; +} + #endif /* SQLITE_OMIT_DISKIO */ diff --git a/src/pager.h b/src/pager.h index 7d778c82cd..f329e43d81 100644 --- a/src/pager.h +++ b/src/pager.h @@ -76,6 +76,7 @@ typedef struct PgHdr DbPage; #define PAGER_JOURNALMODE_OFF 2 /* Journal omitted. */ #define PAGER_JOURNALMODE_TRUNCATE 3 /* Commit by truncating journal */ #define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ +#define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ /* ** The remainder of this file contains the declarations of the functions diff --git a/src/pragma.c b/src/pragma.c index f03078f246..8e24387d33 100644 --- a/src/pragma.c +++ b/src/pragma.c @@ -445,6 +445,31 @@ void sqlite3Pragma( returnSingleInt(pParse, "secure_delete", b); }else + /* + ** PRAGMA [database.]secure_delete + ** PRAGMA [database.]secure_delete=ON/OFF + ** + ** The first form reports the current setting for the + ** secure_delete flag. The second form changes the secure_delete + ** flag setting and reports thenew value. + */ + if( sqlite3StrICmp(zLeft,"secure_delete")==0 ){ + Btree *pBt = pDb->pBt; + int b = -1; + assert( pBt!=0 ); + if( zRight ){ + b = getBoolean(zRight); + } + if( pId2->n==0 && b>=0 ){ + int ii; + for(ii=0; iinDb; ii++){ + sqlite3BtreeSecureDelete(db->aDb[ii].pBt, b); + } + } + b = sqlite3BtreeSecureDelete(pBt, b); + returnSingleInt(pParse, "secure_delete", b); + }else + /* ** PRAGMA [database.]page_count ** @@ -515,7 +540,7 @@ void sqlite3Pragma( if( sqlite3StrICmp(zLeft,"journal_mode")==0 ){ int eMode; static char * const azModeName[] = { - "delete", "persist", "off", "truncate", "memory" + "delete", "persist", "off", "truncate", "memory", "wal" }; if( zRight==0 ){ @@ -561,6 +586,7 @@ void sqlite3Pragma( || eMode==PAGER_JOURNALMODE_TRUNCATE || eMode==PAGER_JOURNALMODE_PERSIST || eMode==PAGER_JOURNALMODE_OFF + || eMode==PAGER_JOURNALMODE_WAL || eMode==PAGER_JOURNALMODE_MEMORY ); sqlite3VdbeSetNumCols(v, 1); sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "journal_mode", SQLITE_STATIC); @@ -1383,6 +1409,26 @@ void sqlite3Pragma( }else #endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */ + if( sqlite3StrICmp(zLeft, "checkpoint")==0 ){ + int nMin = 0; + int nMax = 0; + int nosync = 0; + + if( zRight ){ + char *z = zRight; + sqlite3GetInt32(z, &nMin); + while( sqlite3Isdigit(*z) ) z++; + while( *z && !sqlite3Isdigit(*z) ) z++; + sqlite3GetInt32(z, &nMax); + while( sqlite3Isdigit(*z) ) z++; + while( *z && !sqlite3Isdigit(*z) ) z++; + sqlite3GetInt32(z, &nosync); + } + sqlite3VdbeUsesBtree(v, iDb); + sqlite3VdbeAddOp2(v, OP_Transaction, iDb, 1); + sqlite3VdbeAddOp3(v, OP_Checkpoint, iDb, nMin, nMax); + }else + #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) /* ** Report the current state of file logs for all databases diff --git a/src/vdbe.c b/src/vdbe.c index c1b0eea313..8e27942331 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -5186,6 +5186,21 @@ case OP_AggFinal: { break; } +/* Opcode: Checkpoint P1 P2 P3 * P5 +*/ +case OP_Checkpoint: { + Btree *pBt; /* Btree to checkpoint */ + int nMin = pOp->p2; /* Minimum number of pages to copy */ + int nMax = pOp->p3; /* Maximum number of pages to copy */ + int doNotSync = pOp->p5; /* True to sync database */ + + assert( pOp->p1>=0 && pOp->p1nDb ); + assert( (p->btreeMask & (1<p1))!=0 ); + pBt = db->aDb[pOp->p1].pBt; + + rc = sqlite3PagerCheckpoint(sqlite3BtreePager(pBt), nMin, nMax, !doNotSync); + break; +}; #if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) /* Opcode: Vacuum * * * * * diff --git a/test/wal.test b/test/wal.test new file mode 100644 index 0000000000..57fccce288 --- /dev/null +++ b/test/wal.test @@ -0,0 +1,551 @@ + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +proc range {args} { + set ret [list] + foreach {start end} $args { + for {set i $start} {$i <= $end} {incr i} { + lappend ret $i + } + } + set ret +} + +proc reopen_db {} { + db close + file delete -force test.db test.db-wal + sqlite3_wal db test.db + #register_logtest +} +proc register_logtest {{db db}} { + register_logsummary_module $db + execsql { CREATE VIRTUAL TABLE temp.logsummary USING logsummary } $db + execsql { CREATE VIRTUAL TABLE temp.logcontent USING logcontent } $db + execsql { CREATE VIRTUAL TABLE temp.loglock USING loglock } $db +} + +proc sqlite3_wal {args} { + eval sqlite3 $args + [lindex $args 0] eval { PRAGMA journal_mode = wal } +} + +# +# These are 'warm-body' tests used while developing the WAL code. They +# serve to prove that a few really simple cases work: +# +# wal-1.*: Read and write the database. +# wal-2.*: Test MVCC with one reader, one writer. +# wal-3.*: Test transaction rollback. +# wal-4.*: Test savepoint/statement rollback. +# wal-5.*: Test the temp database. +# wal-6.*: Test creating databases with different page sizes. +# + +do_test wal-0.1 { + execsql { PRAGMA journal_mode = wal } +} {wal} + +do_test wal-1.0 { + execsql { + BEGIN; + CREATE TABLE t1(a, b); + } + list [file exists test.db-journal] [file exists test.db-wal] +} {0 1} +do_test wal-1.1 { + execsql COMMIT + list [file exists test.db-journal] [file exists test.db-wal] +} {0 1} +do_test wal-1.2 { + # There are now two pages in the log. + file size test.db-wal +} [expr (20+1024)*2] + +do_test wal-1.3 { + execsql { SELECT * FROM sqlite_master } +} {table t1 t1 2 {CREATE TABLE t1(a, b)}} + +do_test wal-1.4 { + execsql { INSERT INTO t1 VALUES(1, 2) } + execsql { INSERT INTO t1 VALUES(3, 4) } + execsql { INSERT INTO t1 VALUES(5, 6) } + execsql { INSERT INTO t1 VALUES(7, 8) } + execsql { INSERT INTO t1 VALUES(9, 10) } +} {} + +do_test wal-1.5 { + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.1 { + sqlite3_wal db2 ./test.db + execsql { BEGIN; SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.2 { + execsql { INSERT INTO t1 VALUES(11, 12) } + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10 11 12} + +do_test wal-2.3 { + execsql { SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.4 { + execsql { INSERT INTO t1 VALUES(13, 14) } + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} + +do_test wal-2.5 { + execsql { SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.6 { + execsql { COMMIT; SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} + +do_test wal-3.1 { + execsql { BEGIN; DELETE FROM t1 } + execsql { SELECT * FROM t1 } +} {} +do_test wal-3.2 { + execsql { SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} +do_test wal-3.3 { + execsql { ROLLBACK } + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} +db2 close + +do_test wal-4.1 { + execsql { + DELETE FROM t1; + BEGIN; + INSERT INTO t1 VALUES('a', 'b'); + SAVEPOINT sp; + INSERT INTO t1 VALUES('c', 'd'); + SELECT * FROM t1; + } +} {a b c d} +do_test wal-4.2 { + execsql { + ROLLBACK TO sp; + SELECT * FROM t1; + } +} {a b} +do_test wal-4.3 { + execsql { + COMMIT; + SELECT * FROM t1; + } +} {a b} + +do_test wal-5.1 { + execsql { + CREATE TEMP TABLE t2(a, b); + INSERT INTO t2 VALUES(1, 2); + } +} {} +do_test wal-5.2 { + execsql { + BEGIN; + INSERT INTO t2 VALUES(3, 4); + SELECT * FROM t2; + } +} {1 2 3 4} +do_test wal-5.3 { + execsql { + ROLLBACK; + SELECT * FROM t2; + } +} {1 2} +do_test wal-5.4 { + execsql { + CREATE TEMP TABLE t3(x UNIQUE); + BEGIN; + INSERT INTO t2 VALUES(3, 4); + INSERT INTO t3 VALUES('abc'); + } + catchsql { INSERT INTO t3 VALUES('abc') } +} {1 {column x is not unique}} +do_test wal-5.5 { + execsql { + COMMIT; + SELECT * FROM t2; + } +} {1 2 3 4} +db close + + +foreach sector {512 4096} { + sqlite3_simulate_device -sectorsize $sector + foreach pgsz {512 1024 2048 4096} { + file delete -force test.db test.db-wal + do_test wal-6.$sector.$pgsz.1 { + sqlite3_wal db test.db -vfs devsym + execsql " + PRAGMA page_size = $pgsz ; + " + execsql " + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + " + db close + file size test.db + } [expr $pgsz*2] + + do_test wal-6.$sector.$pgsz.2 { + file size test.db-wal + } {0} + } +} + +do_test wal-7.1 { + file delete -force test.db test.db-wal + sqlite3_wal db test.db + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + } + + list [file size test.db] [file size test.db-wal] +} [list 0 [expr (1024+20)*3]] +do_test wal-7.2 { + execsql { PRAGMA checkpoint } + list [file size test.db] [file size test.db-wal] +} [list 2048 [expr (1024+20)*3]] + +# db close +# sqlite3_wal db test.db +# register_logsummary_module db +# # Warm-body tests of the virtual tables used for testing. +# # +# do_test wal-8.1 { +# execsql { CREATE VIRTUAL TABLE temp.logsummary USING logsummary } +# execsql { CREATE VIRTUAL TABLE temp.logcontent USING logcontent } +# execsql { CREATE VIRTUAL TABLE temp.loglock USING loglock } +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 0 0 0 0 0 0] +# +# do_test wal-8.2 { +# sqlite3_wal db2 test.db +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 2 0 0 0 0 0 0] +# do_test wal-8.3 { +# db2 close +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 0 0 0 0 0 0] +# do_test wal-8.4 { +# execsql { INSERT INTO t1 VALUES(3, 4) } +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 2 2 3 0 0] +# do_test wal-8.5 { +# execsql { PRAGMA checkpoint } +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 0 0 0 0 0] +# +# do_test wal-8.6 { +# execsql { INSERT INTO t1 VALUES(5, 6) } +# execsql { PRAGMA checkpoint('1 1 1') } +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 2 0 3 0 0] +# do_test wal-8.7 { +# execsql { SELECT logpage, dbpage FROM logcontent } +# } {} +# do_test wal-8.8 { +# execsql { INSERT INTO t1 VALUES(7, 8) } +# execsql { SELECT logpage, dbpage FROM logcontent } +# } {4 T:4 5 2} +# do_test wal-8.9 { +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 2 4 5 0 0] +# do_test wal-8.10 { +# execsql { SELECT * FROM loglock } +# } [list [file join [pwd] test.db] 0 0 0] +# do_test wal-8.11 { +# execsql { BEGIN; SELECT * FROM t1; } +# execsql { SELECT * FROM loglock } +# } [list [file join [pwd] test.db] 0 0 4] +# +# # Try making the log wrap around. +# # +# reopen_db +# +# do_test wal-9.1 { +# execsql { +# BEGIN; +# CREATE TABLE t1(a PRIMARY KEY, b); +# } +# for {set i 0} {$i < 100} {incr i} { +# execsql { INSERT INTO t1 VALUES($i, randomblob(100)) } +# } +# execsql COMMIT +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 2 2 17 0 0] +# do_test wal-9.2 { +# execsql { SELECT logpage, dbpage FROM logcontent } +# } {2 T:2 3 1 4 2 5 3 6 4 7 5 8 6 9 7 10 8 11 9 12 10 13 11 14 12 15 13 16 14 17 15} +# do_test wal-9.3 { +# execsql { PRAGMA checkpoint('12, 12') } +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 2 15 17 0 0] +# do_test wal-9.4 { +# execsql { SELECT logpage, dbpage FROM logcontent } +# } {15 13 16 14 17 15} +# do_test wal-9.5 { +# execsql { SELECT count(*) FROM t1 } +# } {100} +# do_test wal-9.6 { +# execsql { INSERT INTO t1 VALUES(100, randomblob(100)) } +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 2 15 20 0 0] +# +# do_test wal-9.7 { +# execsql { SELECT count(*) FROM t1 } +# } {101} +# do_test wal-9.8 { +# db close +# sqlite3_wal db test.db +# register_logtest +# execsql { SELECT count(*) FROM t1 } +# } {101} +# do_test wal-9.9 { +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 0 0 0 0 0 0] +# +# reopen_db +# do_test wal-10.1 { +# execsql { +# PRAGMA page_size = 1024; +# CREATE TABLE t1(x PRIMARY KEY); +# INSERT INTO t1 VALUES(randomblob(900)); +# INSERT INTO t1 VALUES(randomblob(900)); +# INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 4 +# INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 8 +# INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 16 +# } +# list [file size test.db] [file size test.db-wal] +# } {0 55296} +# do_test wal-10.2 { +# execsql { PRAGMA checkpoint('20 30') } +# } {} +# do_test wal-10.3 { +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 34 38 54 0 0] +# do_test wal-10.4 { +# execsql { SELECT dbpage FROM logcontent } +# } {21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37} +# do_test wal-10.5 { +# execsql { INSERT INTO t1 VALUES(randomblob(900)) } +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 34 38 6 34 54] +# do_test wal-10.6 { +# execsql { SELECT count(*) FROM t1 WHERE x NOT NULL } +# } {17} +# do_test wal-10.8 { +# execsql { SELECT logpage FROM logcontent } +# } [range 38 54 1 6] +# do_test wal-10.9 { +# execsql { INSERT INTO t1 SELECT randomblob(900) FROM t1 } +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 34 38 68 34 54] +# +# do_test wal-10.10 { +# execsql { SELECT logpage FROM logcontent } +# } [range 38 54 1 33 55 68] +# +# do_test wal-10.11 { +# execsql { SELECT count(*) FROM t1 WHERE x NOT NULL } +# } {34} +# +# do_test wal-10.12 { +# execsql { PRAGMA checkpoint('35 35') } +# } {} +# do_test wal-10.13 { +# execsql { SELECT logpage FROM logcontent } +# } [range 22 68] +# do_test wal-10.13a { +# execsql { SELECT dbpage FROM logcontent } +# } [list \ +# 50 51 52 53 54 55 56 57 58 59 60 61 \ +# 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +# 62 63 64 65 66 67 68 69 70 71 72 73 74 75 \ +# ] +# +# do_test wal-10.14 { +# execsql { SELECT count(*) FROM t1 WHERE x NOT NULL } +# } {34} +# do_test wal-10.15 { +# execsql { PRAGMA integrity_check } +# } {ok} +# do_test wal-10.16 { +# execsql { PRAGMA checkpoint('20 20') } +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 7 63 68 0 0] +# do_test wal-10.17 { +# execsql { SELECT logpage FROM logcontent } +# } [range 63 68] +# do_test wal-10.17a { +# execsql { SELECT dbpage FROM logcontent } +# } {70 71 72 73 74 75} +# +# do_test wal-10.18 { +# execsql { INSERT INTO t1 SELECT randomblob(900) FROM t1 } +# execsql { SELECT logpage FROM logcontent } +# } [range 63 147] +# integrity_check wal-10.19 +# +# do_test wal-10.20 { +# execsql { PRAGMA checkpoint('52 52') } +# execsql { SELECT logpage FROM logcontent } +# } [range 116 147] +# do_test wal-10.20a { +# execsql { SELECT * FROM logsummary } +# } [list [file join [pwd] test.db] 1 1024 69 116 147 0 0] +# integrity_check wal-10.20.integrity +# +# do_test wal-10.21 { +# execsql { INSERT INTO t1 VALUES( randomblob(900) ) } +# execsql { SELECT logpage FROM logcontent } +# } [range 116 152] +# do_test wal-10.22 { +# execsql { PRAGMA integrity_check } +# } {ok} +# +# file delete -force testX.db testX.db-wal +# file copy test.db testX.db +# file copy test.db-wal testX.db-wal +# do_test wal-10.23 { +# sqlite3_wal db2 testX.db +# register_logtest db2 +# execsql { SELECT logpage FROM logcontent WHERE db LIKE '%testX%' } db2 +# } [range 34 54 1 33 55 152] +# +# do_test wal-10.24 { +# execsql { PRAGMA integrity_check } db2 +# } {ok} +# db2 close +# +# do_test wal-11.1 { +# reopen_db +# sqlite3_wal db2 test.db +# +# execsql { +# BEGIN; +# CREATE TABLE t1(x); +# CREATE TABLE t2(x PRIMARY KEY); +# INSERT INTO t1 VALUES(randomblob(900)); +# INSERT INTO t1 VALUES(randomblob(900)); +# INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4 */ +# INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 8 */ +# INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 16 */ +# INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 32 */ +# INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 64 */ +# +# INSERT INTO t2 VALUES('x'); +# INSERT INTO t2 VALUES('y'); +# INSERT INTO t2 VALUES('z'); +# COMMIT; +# SELECT * FROM logsummary; +# } +# } [list [file join [pwd] test.db] 2 1024 2 2 70 0 0] +# +# do_test wal-11.2 { +# execsql { +# BEGIN; SELECT x FROM t2; +# } db2 +# } {x y z} +# do_test wal-11.2 { +# execsql { +# INSERT INTO t1 VALUES(randomblob(900)); +# PRAGMA checkpoint('10 100'); +# INSERT INTO t1 VALUES(randomblob(900)); +# INSERT INTO t2 VALUES('0'); +# SELECT * FROM logsummary; +# } +# } [list [file join [pwd] test.db] 2 1024 71 71 7 71 73] +# do_test wal-12.3 { +# execsql { PRAGMA integrity_check } db2 +# } {ok} +# db2 close + + +# Execute some transactions in auto-vacuum mode to test database file +# truncation. +# +do_test wal-12.1 { + reopen_db + execsql { + PRAGMA auto_vacuum = 1; + PRAGMA auto_vacuum; + } +} {1} +do_test wal-12.2 { + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 8 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 16 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 32 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 64 */ + PRAGMA checkpoint; + } + file size test.db +} [expr 67*1024] +do_test wal-12.3 { + execsql { + DELETE FROM t1 WHERE rowid<54; + PRAGMA checkpoint('1 100000'); + } + file size test.db +} [expr 14*1024] + +# Run some "warm-body" tests to ensure that log-summary files with more +# than 256 entries (log summaries that contain index blocks) work Ok. +# +do_test wal-13.1 { + reopen_db + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(x PRIMARY KEY); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 8 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 16 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 32 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 64 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 128 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 256 */ + } + file size test.db +} 0 +do_test wal-13.2 { + sqlite3_wal db2 test.db + execsql {PRAGMA integrity_check } db2 +} {ok} + +do_test wal-13.3 { + file delete -force test2.db test2.db-wal + file copy test.db test2.db + file copy test.db-wal test2.db-wal + sqlite3_wal db3 test2.db + execsql {PRAGMA integrity_check } db3 +} {ok} +db3 close + +do_test wal-13.4 { +breakpoint + execsql { PRAGMA checkpoint } + db2 close + sqlite3_wal db2 test.db + execsql {PRAGMA integrity_check } db2 +} {ok} + +finish_test + diff --git a/test/walcrash.test b/test/walcrash.test new file mode 100644 index 0000000000..b3e6cd2b97 --- /dev/null +++ b/test/walcrash.test @@ -0,0 +1,248 @@ +# 2010 February 8 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. +# + +# +# These are 'warm-body' tests of database recovery used while developing +# the WAL code. They serve to prove that a few really simple cases work: +# +# walcrash-1.*: Recover a database. +# walcrash-2.*: Recover a database where the failed transaction spanned more +# than one page. +# walcrash-3.*: Recover multiple databases where the failed transaction +# was a multi-file transaction. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +db close + +set seed 0 +set REPEATS 100 + +proc sqlite3_wal {args} { + eval sqlite3 $args + [lindex $args 0] eval { PRAGMA journal_mode = wal } +} + +# walcrash-1.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + do_test walcrash-1.$i.1 { + crashsql -delay 4 -file test.db-wal -seed [incr seed] { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 1); + INSERT INTO t1 VALUES(2, 3); + INSERT INTO t1 VALUES(3, 6); + } + } {1 {child process exited abnormally}} + do_test walcrash-1.$i.2 { + sqlite3_wal db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-1.$i.3 + db close + + do_test walcrash-1.$i.4 { + crashsql -delay 2 -file test.db-wal -seed [incr seed] { + PRAGMA journal_mode = WAL; + PRAGMA journal_mode = WAL; + INSERT INTO t1 VALUES(4, (SELECT sum(a) FROM t1) + 4); + INSERT INTO t1 VALUES(5, (SELECT sum(a) FROM t1) + 5); + } + } {1 {child process exited abnormally}} + do_test walcrash-1.$i.5 { + sqlite3_wal db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-1.$i.6 + db close +} + +# walcrash-2.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + do_test walcrash-2.$i.1 { + crashsql -delay 4 -file test.db-wal -seed [incr seed] { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(a PRIMARY KEY, b); + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(3, 4); + INSERT INTO t1 VALUES(5, 9); + } + } {1 {child process exited abnormally}} + do_test walcrash-2.$i.2 { + sqlite3_wal db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-2.$i.3 + db close + + do_test walcrash-2.$i.4 { + crashsql -delay 2 -file test.db-wal -seed [incr seed] { + PRAGMA journal_mode = WAL; + INSERT INTO t1 VALUES(6, (SELECT sum(a) FROM t1) + 6); + INSERT INTO t1 VALUES(7, (SELECT sum(a) FROM t1) + 7); + } + } {1 {child process exited abnormally}} + do_test walcrash-2.$i.5 { + sqlite3_wal db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-2.$i.6 + db close +} + +# walcrash-3.* +# +# for {set i 1} {$i < $REPEATS} {incr i} { +# file delete -force test.db test.db-wal +# file delete -force test2.db test2.db-wal +# +# do_test walcrash-3.$i.1 { +# crashsql -delay 2 -file test2.db-wal -seed [incr seed] { +# PRAGMA journal_mode = WAL; +# ATTACH 'test2.db' AS aux; +# CREATE TABLE t1(a PRIMARY KEY, b); +# CREATE TABLE aux.t2(a PRIMARY KEY, b); +# BEGIN; +# INSERT INTO t1 VALUES(1, 2); +# INSERT INTO t2 VALUES(1, 2); +# COMMIT; +# } +# } {1 {child process exited abnormally}} +# +# do_test walcrash-3.$i.2 { +# sqlite3_wal db test.db +# execsql { +# ATTACH 'test2.db' AS aux; +# SELECT * FROM t1 EXCEPT SELECT * FROM t2; +# } +# } {} +# do_test walcrash-3.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} +# do_test walcrash-3.$i.4 { execsql { PRAGMA aux.integrity_check } } {ok} +# +# db close +# } + +# walcrash-4.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + file delete -force test2.db test2.db-wal + + do_test walcrash-4.$i.1 { + crashsql -delay 3 -file test.db-wal -seed [incr seed] -blocksize 4096 { + PRAGMA journal_mode = WAL; + PRAGMA page_size = 1024; + CREATE TABLE t1(a PRIMARY KEY, b); + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(3, 4); + } + } {1 {child process exited abnormally}} + + do_test walcrash-4.$i.2 { + sqlite3_wal db test.db + execsql { + SELECT * FROM t1 WHERE a = 1; + } + } {1 2} + do_test walcrash-4.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} + + db close +} + +# walcrash-5.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + file delete -force test2.db test2.db-wal + + do_test walcrash-5.$i.1 { + crashsql -delay 11 -file test.db-wal -seed [incr seed] -blocksize 4096 { + PRAGMA journal_mode = WAL; + PRAGMA page_size = 1024; + BEGIN; + CREATE TABLE t1(x PRIMARY KEY); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4 */ + COMMIT; + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 8 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 12 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 16 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 20 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 24 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 28 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 32 */ + + PRAGMA checkpoint('70 70'); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + } + } {1 {child process exited abnormally}} + + do_test walcrash-5.$i.2 { + sqlite3_wal db test.db + execsql { SELECT count(*)==33 OR count(*)==34 FROM t1 WHERE x != 1 } + } {1} + do_test walcrash-5.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} + + db close +} + +# walcrash-6.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + file delete -force test2.db test2.db-wal + + do_test walcrash-6.$i.1 { + crashsql -delay 12 -file test.db-wal -seed [incr seed] -blocksize 512 { + PRAGMA journal_mode = WAL; + PRAGMA page_size = 1024; + BEGIN; + CREATE TABLE t1(x PRIMARY KEY); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4 */ + COMMIT; + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 8 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 12 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 16 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 20 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 24 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 28 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 32 */ + + PRAGMA checkpoint('70 70'); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + } + } {1 {child process exited abnormally}} + + do_test walcrash-6.$i.2 { + sqlite3_wal db test.db + execsql { SELECT count(*)==34 OR count(*)==35 FROM t1 WHERE x != 1 } + } {1} + do_test walcrash-6.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} + + db close +} + +finish_test + diff --git a/test/walslow.test b/test/walslow.test new file mode 100644 index 0000000000..cec4e26d98 --- /dev/null +++ b/test/walslow.test @@ -0,0 +1,69 @@ +# 2010 March 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +proc reopen_db {} { + catch { db close } + file delete -force test.db test.db-wal + sqlite3 db test.db + execsql { PRAGMA journal_mode = wal } +} + +db close +save_prng_state +for {set seed 1} {$seed<10} {incr seed} { + expr srand($seed) + restore_prng_state + reopen_db + do_test walslow-1.seed=$seed.0 { + execsql { CREATE TABLE t1(a, b) } + execsql { CREATE INDEX i1 ON t1(a) } + execsql { CREATE INDEX i2 ON t1(b) } + } {} + + for {set iTest 1} {$iTest < 100} {incr iTest} { + + do_test walslow-1.seed=$seed.$iTest.1 { + set w [expr int(rand()*2000)] + set x [expr int(rand()*2000)] + set y [expr int(rand()*9)+1] + set z [expr int(rand()*2)] + execsql { INSERT INTO t1 VALUES(randomblob($w), randomblob($x)) } + execsql { PRAGMA integrity_check } + } {ok} + + do_test walslow-1.seed=$seed.$iTest.2 { + execsql "PRAGMA checkpoint('$y $y $z')" + execsql { PRAGMA integrity_check } + } {ok} + + do_test walslow-1.seed=$seed.$iTest.3 { + file delete -force testX.db testX.db-wal + file copy test.db testX.db + file copy test.db-wal testX.db-wal + + sqlite3 db2 testX.db + execsql { PRAGMA journal_mode = WAL } db2 + execsql { PRAGMA integrity_check } db2 + } {ok} + + do_test walslow-1.seed=$seed.$iTest.4 { + execsql { SELECT count(*) FROM t1 WHERE a!=b } db2 + } [execsql { SELECT count(*) FROM t1 WHERE a!=b }] + db2 close + } +} + + +finish_test diff --git a/test/walthread.test b/test/walthread.test new file mode 100644 index 0000000000..46474e2cb5 --- /dev/null +++ b/test/walthread.test @@ -0,0 +1,192 @@ +# 2007 September 7 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] + +source $testdir/tester.tcl +if {[run_thread_tests]==0} { finish_test ; return } + +do_test walthread-1.1 { + execsql { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(x PRIMARY KEY); + INSERT INTO t1 VALUES(randomblob(100)); + INSERT INTO t1 VALUES(randomblob(100)); + INSERT INTO t1 SELECT md5sum(x) FROM t1; + } +} {wal} +do_test walthread-1.2 { + execsql { + SELECT (SELECT count(*) FROM t1), ( + SELECT md5sum(x) FROM t1 WHERE oid != (SELECT max(oid) FROM t1) + ) == ( + SELECT x FROM t1 WHERE oid = (SELECT max(oid) FROM t1) + ) + } +} {3 1} +do_test walthread-1.3 { + execsql { PRAGMA integrity_check } +} {ok} +do_test walthread-1.4 { + execsql { PRAGMA lock_status } +} {main unlocked temp unknown} + +#-------------------------------------------------------------------------- +# Start N threads. Each thread performs both read and write transactions. +# Each read transaction consists of: +# +# 1) Reading the md5sum of all but the last table row, +# 2) Running integrity check. +# 3) Reading the value stored in the last table row, +# 4) Check that the values read in steps 1 and 3 are the same, and that +# the md5sum of all but the last table row has not changed. +# +# Each write transaction consists of: +# +# 1) Modifying the contents of t1 (inserting, updating, deleting rows). +# 2) Appending a new row to the table containing the md5sum() of all +# rows in the table. +# +# Each of the N threads runs N read transactions followed by a single write +# transaction in a loop as fast as possible. +# +# There is also a single checkpointer thread. It runs the following loop: +# +# 1) Execute "CHECKPOINT main 32 -1 1" +# 2) Sleep for 500 ms. +# + +set thread_program { + proc rest {ms} { + set ::rest 0 + after $ms {set ::rest 1} + vwait ::rest + } + + proc dosql {DB sql} { + set res "" + set stmt [sqlite3_prepare_v2 $DB $sql -1 dummy_tail] + set rc [sqlite3_step $stmt] + if {$rc eq "SQLITE_ROW"} { + set res [sqlite3_column_text $stmt 0] + } + set rc [sqlite3_finalize $stmt] + + if {$rc ne "SQLITE_OK"} { + error $rc + } + return $res + } + + proc read_transaction {DB} { + dosql $DB BEGIN + + set md5_1 [dosql $DB { + SELECT md5sum(x) FROM t1 WHERE rowid != (SELECT max(rowid) FROM t1) + }] + set check [dosql $DB { PRAGMA integrity_check }] + set md5_2 [dosql $DB { + SELECT x FROM t1 WHERE rowid = (SELECT max(rowid) FROM t1) + }] + set md5_3 [dosql $DB { + SELECT md5sum(x) FROM t1 WHERE rowid != (SELECT max(rowid) FROM t1) + }] + + dosql $DB COMMIT + + if {$check ne "ok" + || $md5_1 ne $md5_2 + || $md5_2 ne $md5_3 + } { + error "Failed read transaction $check $md5_1 $md5_2 $md5_3" + } + } + + proc write_transaction {DB} { + dosql $DB BEGIN + dosql $DB "INSERT INTO t1 VALUES(randomblob(100))" + dosql $DB "INSERT INTO t1 VALUES(randomblob(100))" + dosql $DB "INSERT INTO t1 SELECT md5sum(x) FROM t1" + dosql $DB COMMIT + } + + proc checkpointer {DB} { + while { !$::finished } { + dosql $DB "PRAGMA checkpoint(32)" + rest 1000 + } + } + + proc worker {DB N} { + set j 0 + while { !$::finished } { + for {set i 0} {$i < $N} {incr i} { read_transaction $DB } + write_transaction $DB + rest 1 + } + } + + set ::finished 0 + after [expr $seconds*1000] {set ::finished 1} + + set ::DB [sqlthread open test.db] + dosql $::DB { PRAGMA journal_mode = WAL } + + set rc [catch { + if {$role eq "worker"} { worker $DB $N } + if {$role eq "checkpointer"} { checkpointer $DB } + } msg] + + sqlite3_close $::DB + + if {$rc==0} { set msg OK } + set msg +} + +set NTHREAD 6 +set SECONDS 30 + +#set prg "set N $NTHREAD ; set seconds $SECONDS" +set prg "set N 1 ; set seconds $SECONDS" + +array unset finished +for {set i 0} {$i < $NTHREAD} {incr i} { + thread_spawn finished($i) {set role worker} $prg $thread_program +} +thread_spawn finished(C) {set role checkpointer} $prg $thread_program +#set finished(C) 1 + +puts "... test runs for approximately $SECONDS seconds ..." +for {set i 0} {$i < $::NTHREAD} {incr i} { + if {![info exists finished($i)]} { + vwait finished($i) + } + do_test walthread-2.$i { + set ::finished($i) + } OK +} +do_test walthread-2.C { + if {![info exists finished(C)]} { vwait finished(C) } + set ::finished(C) +} OK + +set logsize 0 + +set rows [execsql { SELECT count(*) FROM t1 }] +catch { set logsize [expr [file size test.db-wal] / 1024] } +set dbsize [expr [file size test.db] / 1024] + +puts "rows=$rows db=${dbsize}K log=${logsize}K" + +finish_test + + diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index 38cee50ec2..12e4a5ce00 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -93,6 +93,7 @@ foreach hdr { hash.h hwtime.h keywordhash.h + log.h mutex.h opcodes.h os_common.h @@ -243,6 +244,7 @@ foreach file { pcache.c pcache1.c rowset.c + log.c pager.c btmutex.c