diff --git a/manifest b/manifest index c374482ea8..9c9da2bd3d 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,5 @@ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA1 - -C Refactor\sand\ssimplify\sthe\slogic\sused\sto\schange\sjournalmode. -D 2010-06-11T17:01:25 +C Experimental\schange\sto\sthe\sxShmXXX\sparts\sof\sthe\sVFS\sinterface. +D 2010-06-11T19:04:21 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in a5cad1f8f3e021356bfcc6c77dc16f6f1952bbc3 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -153,11 +150,11 @@ F src/mutex_os2.c 6a62583e374ba3ac1a3fcc0da2bfdac7d3942689 F src/mutex_unix.c cf84466b4fdd2baa0d5a10bb19f08b2abc1ce42e F src/mutex_w32.c 1fe0e735897be20e09dd6f53c3fb516c6b48c0eb F src/notify.c cbfa66a836da3a51567209636e6a94059c137930 -F src/os.c 1516984144e26734f97748f891f1a04f9e294c2e -F src/os.h 6f529984a29511c7a3479cfe549c10bfa131532f +F src/os.c 00ab9dcdee6e33ff3d060744c86af25200c51e0f +F src/os.h a0d2c1436cb6003e6da16001499a0b828f1edb34 F src/os_common.h a8f95b81eca8a1ab8593d23e94f8a35f35d4078f F src/os_os2.c 665876d5eec7585226b0a1cf5e18098de2b2da19 -F src/os_unix.c 12051d37e533cdaa8bb13c9d9fe2a13e08552187 +F src/os_unix.c 29dac62790ccea7db1516be3abb007988accb165 F src/os_win.c 0cf1f571546f165001e2391b5d4a4a16d86977d3 F src/pager.c 2964185d4356d0dc159b8340e52d2538d32394e5 F src/pager.h ca1f23c0cf137ac26f8908df2427c8b308361efd @@ -173,7 +170,7 @@ F src/resolve.c ac5f1a713cd1ae77f08b83cc69581e11bf5ae6f9 F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697 F src/select.c c03d8a0565febcde8c6a12c5d77d065fddae889b F src/shell.c fd4ccdb37c3b68de0623eb938a649e0990710714 -F src/sqlite.h.in b6a64327e174cf725e57dd93ddf1e97c52dd41e2 +F src/sqlite.h.in 092df034f4b426ffbb9e5bb905958fa35bbb7f7a F src/sqlite3ext.h 69dfb8116af51b84a029cddb3b35062354270c89 F src/sqliteInt.h 242987ebd2366ea36650a09cdab04a9163c62109 F src/sqliteLimit.h 196e2f83c3b444c4548fc1874f52f84fdbda40f3 @@ -195,7 +192,7 @@ F src/test_backup.c c129c91127e9b46e335715ae2e75756e25ba27de F src/test_btree.c 47cd771250f09cdc6e12dda5bc71bc0b3abc96e2 F src/test_config.c 6210f501d358bde619ae761f06f123529c6ba24f F src/test_demovfs.c da81a5f7785bb352bda7911c332a983ec4f17f27 -F src/test_devsym.c 709712f5157667410cd0dad1b7b1b54319c122c5 +F src/test_devsym.c cf64a4b602ccde10c9261283d1b9be12f4c4a0ea F src/test_func.c 13b582345fb1185a93e46c53310fae8547dcce20 F src/test_hexio.c 1237f000ec7a491009b1233f5c626ea71bce1ea2 F src/test_init.c 5d624ffd0409d424cf9adbfe1f056b200270077c @@ -212,7 +209,7 @@ F src/test_schema.c 8c06ef9ddb240c7a0fcd31bc221a6a2aade58bf0 F src/test_server.c bbba05c144b5fc4b52ff650a4328027b3fa5fcc6 F src/test_tclvar.c f4dc67d5f780707210d6bb0eb6016a431c04c7fa F src/test_thread.c aa9919c885a1fe53eafc73492f0898ee6c0a0726 -F src/test_vfs.c d329e3ea93624f65d7b6a46209861ddecea4e21d +F src/test_vfs.c b83206d2c04b3ba84d8d85420c4c7573c58feba5 F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 F src/tokenize.c 25ceb0f0a746ea1d0f9553787f3f0a56853cfaeb F src/trigger.c 8927588cb9e6d47f933b53bfe74200fbb504100d @@ -229,7 +226,7 @@ F src/vdbeblob.c 5327132a42a91e8b7acfb60b9d2c3b1c5c863e0e F src/vdbemem.c 2a82f455f6ca6f78b59fb312f96054c04ae0ead1 F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2 F src/vtab.c a0f8a40274e4261696ef57aa806de2776ab72cda -F src/wal.c 2cdfea9a5e50e4dde48767e69e1fead2ff1781cd +F src/wal.c 0aa364734d6daca75771944fc2b4a8f36e63fc4e F src/wal.h 4ace25262452d17e7d3ec970c89ee17794004008 F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f F src/where.c 1c895bef33d0dfc7ed90fb1f74120435d210ea56 @@ -540,7 +537,7 @@ F test/pageropt.test 8146bf448cf09e87bb1867c2217b921fb5857806 F test/pagesize.test 76aa9f23ecb0741a4ed9d2e16c5fa82671f28efb F test/pcache.test eebc4420b37cb07733ae9b6e99c9da7c40dd6d58 F test/pcache2.test 0d85f2ab6963aee28c671d4c71bec038c00a1d16 -F test/permutations.test ad10d7b31b4a585977380886c832e2ac13c41237 +F test/permutations.test 64fbafa685149be54a1ceb545942911f998c604d F test/pragma.test 6960f9efbce476f70ba9ee2171daf5042f9e3d8a F test/pragma2.test 5364893491b9231dd170e3459bfc2e2342658b47 F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea @@ -768,8 +765,8 @@ F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d F test/wal.test 0a599c3c4812ed92bc7ad9efcc2c4007fe4cc99a -F test/wal2.test 854a2b409450f1cb756c2bbd1e87e30740094357 -F test/wal3.test ae876ff988af5b2b34d27474e0dd1a8c84e9bbcb +F test/wal2.test f9dce93acecff697fc1935869b1ae4cb7dc14587 +F test/wal3.test 1d3aee1a0295db941a0323c0ce5ac16bd5b7689d F test/wal_common.tcl 3e953ae60919281688ea73e4d0aa0e1bc94becd9 F test/walbak.test e7650a26eb4b8abeca9b145b1af1e63026dde432 F test/walcksum.test 4efa8fb88c32bed8288ea4385a9cc113a5c8f0bf @@ -823,14 +820,10 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P af353bd89e5ec89f113d217225cc59cbc8373d64 -R 3e2d7a9cc84ff16ee3e947d8ce602cce -U drh -Z e14f3acbecbf2df0abe6fde12480ec15 ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1.4.6 (GNU/Linux) - -iD8DBQFMEmvooxKgR168RlERAg5RAJ9VmP08NyW1of8QWkDMnAiHK8A/xgCeLBcs -hbn4o1Zj2vKA/g5KFcfNycs= -=Li8t ------END PGP SIGNATURE----- +P 95cc3f6fdec5494560c3cd4439d06870d1c62506 +R 05bb7be31606a33c54980de0a121cf7e +T *branch * experimental +T *sym-experimental * +T -sym-trunk * +U dan +Z 7e046a655e9f9112461a59731e31d3dc diff --git a/manifest.uuid b/manifest.uuid index cc28afffbf..447d7ce948 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -95cc3f6fdec5494560c3cd4439d06870d1c62506 \ No newline at end of file +ca68472db01c14a899892007d1cbaff5e86ae193 \ No newline at end of file diff --git a/src/os.c b/src/os.c index 0b17a6b633..697233be0c 100644 --- a/src/os.c +++ b/src/os.c @@ -119,6 +119,15 @@ void sqlite3OsShmBarrier(sqlite3_file *id){ int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){ return id->pMethods->xShmClose(id, deleteFlag); } +int sqlite3OsShmPage( + sqlite3_file *id, + int iPage, + int pgsz, + int isWrite, + void volatile **pp +){ + return id->pMethods->xShmPage(id, iPage, pgsz, isWrite, pp); +} /* ** The next group of routines are convenience wrappers around the diff --git a/src/os.h b/src/os.h index 001732798e..d1d6bde058 100644 --- a/src/os.h +++ b/src/os.h @@ -254,6 +254,7 @@ int sqlite3OsShmRelease(sqlite3_file *id); int sqlite3OsShmLock(sqlite3_file *id, int, int, int); void sqlite3OsShmBarrier(sqlite3_file *id); int sqlite3OsShmClose(sqlite3_file *id, int); +int sqlite3OsShmPage(sqlite3_file *,int,int,int,void volatile **); /* ** Functions for accessing sqlite3_vfs methods diff --git a/src/os_unix.c b/src/os_unix.c index dadc3c98af..612d5c4366 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -3141,8 +3141,14 @@ struct unixShmNode { sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ char *zFilename; /* Name of the mmapped file */ int h; /* Open file descriptor */ + int szMap; /* Size of the mapping into memory */ char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */ + + int pgsz; /* Size of shared-memory pages */ + int nPage; /* Size of array apPage */ + char **apPage; /* Array of mapped shared-memory pages */ + int nRef; /* Number of unixShm objects pointing to this */ unixShm *pFirst; /* All unixShm objects pointing to this */ #ifdef SQLITE_DEBUG @@ -3266,10 +3272,15 @@ static void unixShmPurge(unixFile *pFd){ unixShmNode *p = pFd->pInode->pShmNode; assert( unixMutexHeld() ); if( p && p->nRef==0 ){ + int i; assert( p->pInode==pFd->pInode ); if( p->mutex ) sqlite3_mutex_free(p->mutex); if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf); if( p->pMMapBuf ) munmap(p->pMMapBuf, p->szMap); + for(i=0; inPage; i++){ + munmap(p->apPage[i], p->pgsz); + } + sqlite3_free(p->apPage); if( p->h>=0 ) close(p->h); p->pInode->pShmNode = 0; sqlite3_free(p); @@ -3706,6 +3717,71 @@ static void unixShmBarrier( unixLeaveMutex(); } +static int unixShmPage( + sqlite3_file *fd, /* Handle open on database file */ + int iPage, /* Page to retrieve */ + int pgsz, /* Size of pages */ + int isWrite, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ +){ + unixFile *pDbFd = (unixFile*)fd; + unixShm *p = pDbFd->pShm; + unixShmNode *pShmNode = p->pShmNode; + int rc = SQLITE_OK; + + assert( p->hasMutexBuf==0 ); + sqlite3_mutex_enter(pShmNode->mutexBuf); + assert( pgsz==pShmNode->pgsz || pShmNode->nPage==0 ); + + if( pShmNode->nPage<=iPage ){ + char **apNew; /* New apPage[] array */ + int nByte = (iPage+1)*pgsz; /* Minimum required file size */ + struct stat sStat; + + pShmNode->pgsz = pgsz; + + /* Make sure the underlying file is large enough (or fail) */ + if( fstat(pShmNode->h, &sStat) ){ + rc = SQLITE_IOERR_SHMSIZE; + goto shmpage_out; + }else if( sStat.st_sizeh, nByte) ){ + rc = SQLITE_IOERR_SHMSIZE; + goto shmpage_out; + } + } + + apNew = (char**)sqlite3_realloc(pShmNode->apPage, (iPage+1)*sizeof(char *)); + if( !apNew ){ + rc = SQLITE_IOERR_NOMEM; + goto shmpage_out; + } + pShmNode->apPage = apNew; + + while(pShmNode->nPage<=iPage){ + void *pMem = mmap( + 0, pgsz, PROT_READ|PROT_WRITE, MAP_SHARED, pShmNode->h, iPage*pgsz + ); + if( pMem==MAP_FAILED ){ + assert(0); + rc = SQLITE_IOERR; + goto shmpage_out; + } + pShmNode->apPage[pShmNode->nPage] = pMem; + pShmNode->nPage++; + } + } + +shmpage_out: + if( pShmNode->nPage>iPage ){ + *pp = pShmNode->apPage[iPage]; + }else{ + *pp = 0; + } + sqlite3_mutex_leave(pShmNode->mutexBuf); + return rc; +} #else # define unixShmOpen 0 @@ -3715,6 +3791,7 @@ static void unixShmBarrier( # define unixShmLock 0 # define unixShmBarrier 0 # define unixShmClose 0 +# define unixShmPage 0 #endif /* #ifndef SQLITE_OMIT_WAL */ /* @@ -3778,7 +3855,8 @@ static const sqlite3_io_methods METHOD = { \ unixShmRelease, /* xShmRelease */ \ unixShmLock, /* xShmLock */ \ unixShmBarrier, /* xShmBarrier */ \ - unixShmClose /* xShmClose */ \ + unixShmClose, /* xShmClose */ \ + unixShmPage /* xShmPage */ \ }; \ static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ diff --git a/src/sqlite.h.in b/src/sqlite.h.in index e583e47d95..94b994ee39 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -666,6 +666,7 @@ struct sqlite3_io_methods { int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); void (*xShmBarrier)(sqlite3_file*); int (*xShmClose)(sqlite3_file*, int deleteFlag); + int (*xShmPage)(sqlite3_file*, int iPage, int pgsz, int, void volatile**); /* Methods above are valid for version 2 */ /* Additional methods may be added in future releases */ }; diff --git a/src/test_devsym.c b/src/test_devsym.c index 0464804939..e27c00176e 100644 --- a/src/test_devsym.c +++ b/src/test_devsym.c @@ -57,6 +57,7 @@ static int devsymShmRelease(sqlite3_file*); static int devsymShmLock(sqlite3_file*,int,int,int); static void devsymShmBarrier(sqlite3_file*); static int devsymShmClose(sqlite3_file*,int); +static int devsymShmPage(sqlite3_file*,int,int,int, void volatile **); /* ** Method declarations for devsym_vfs. @@ -125,7 +126,8 @@ static sqlite3_io_methods devsym_io_methods = { devsymShmRelease, /* xShmRelease */ devsymShmLock, /* xShmLock */ devsymShmBarrier, /* xShmBarrier */ - devsymShmClose /* xShmClose */ + devsymShmClose, /* xShmClose */ + devsymShmPage /* xShmPage */ }; struct DevsymGlobal { @@ -275,6 +277,16 @@ static int devsymShmClose(sqlite3_file *pFile, int delFlag){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmClose(p->pReal, delFlag); } +static int devsymShmPage( + sqlite3_file *pFile, + int iPage, + int pgsz, + int isWrite, + void volatile **pp +){ + devsym_file *p = (devsym_file *)pFile; + return sqlite3OsShmPage(p->pReal, iPage, pgsz, isWrite, pp); +} diff --git a/src/test_vfs.c b/src/test_vfs.c index 1083080eb9..745a417a7a 100644 --- a/src/test_vfs.c +++ b/src/test_vfs.c @@ -75,10 +75,14 @@ struct Testvfs { #define TESTVFS_SHMLOCK_MASK 0x00000010 #define TESTVFS_SHMBARRIER_MASK 0x00000020 #define TESTVFS_SHMCLOSE_MASK 0x00000040 +#define TESTVFS_SHMPAGE_MASK 0x00000080 -#define TESTVFS_OPEN_MASK 0x00000080 -#define TESTVFS_SYNC_MASK 0x00000100 -#define TESTVFS_ALL_MASK 0x000001FF +#define TESTVFS_OPEN_MASK 0x00000100 +#define TESTVFS_SYNC_MASK 0x00000200 +#define TESTVFS_ALL_MASK 0x000003FF + + +#define TESTVFS_MAX_PAGES 256 /* ** A shared-memory buffer. There is one of these objects for each shared @@ -87,8 +91,8 @@ struct Testvfs { */ struct TestvfsBuffer { char *zFile; /* Associated file name */ - int n; /* Size of allocated buffer in bytes */ - u8 *a; /* Buffer allocated using ckalloc() */ + int pgsz; /* Page size */ + u8 *aPage[TESTVFS_MAX_PAGES]; /* Array of ckalloc'd pages */ TestvfsFile *pFile; /* List of open handles */ TestvfsBuffer *pNext; /* Next in linked list of all buffers */ }; @@ -139,6 +143,7 @@ static int tvfsShmRelease(sqlite3_file*); static int tvfsShmLock(sqlite3_file*, int , int, int); static void tvfsShmBarrier(sqlite3_file*); static int tvfsShmClose(sqlite3_file*, int); +static int tvfsShmPage(sqlite3_file*,int,int,int, void volatile **); static sqlite3_io_methods tvfs_io_methods = { 2, /* iVersion */ @@ -160,7 +165,8 @@ static sqlite3_io_methods tvfs_io_methods = { tvfsShmRelease, /* xShmRelease */ tvfsShmLock, /* xShmLock */ tvfsShmBarrier, /* xShmBarrier */ - tvfsShmClose /* xShmClose */ + tvfsShmClose, /* xShmClose */ + tvfsShmPage /* xShmPage */ }; static int tvfsResultCode(Testvfs *p, int *pRc){ @@ -547,16 +553,6 @@ static int tvfsCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ return PARENTVFS(pVfs)->xCurrentTime(PARENTVFS(pVfs), pTimeOut); } -static void tvfsGrowBuffer(TestvfsFile *pFd, int reqSize, int *pNewSize){ - TestvfsBuffer *pBuffer = pFd->pShm; - if( reqSize>pBuffer->n ){ - pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, reqSize); - memset(&pBuffer->a[pBuffer->n], 0x55, reqSize-pBuffer->n); - pBuffer->n = reqSize; - } - *pNewSize = pBuffer->n; -} - static int tvfsInjectIoerr(Testvfs *p){ int ret = 0; if( p->ioerr ){ @@ -624,66 +620,66 @@ static int tvfsShmSize( int reqSize, int *pNewSize ){ - int rc = SQLITE_OK; - TestvfsFile *pFd = (TestvfsFile *)pFile; - Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); - - if( p->pScript && p->mask&TESTVFS_SHMSIZE_MASK ){ - tvfsExecTcl(p, "xShmSize", - Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0 - ); - tvfsResultCode(p, &rc); - } - if( rc==SQLITE_OK && p->mask&TESTVFS_SHMSIZE_MASK && tvfsInjectIoerr(p) ){ - rc = SQLITE_IOERR; - } - if( rc==SQLITE_OK ){ - tvfsGrowBuffer(pFd, reqSize, pNewSize); - } - return rc; + assert(0); + return SQLITE_OK; } - static int tvfsShmGet( sqlite3_file *pFile, int reqMapSize, int *pMapSize, volatile void **pp +){ + assert(0); + return SQLITE_OK; +} +static int tvfsShmRelease(sqlite3_file *pFile){ + assert(0); + return SQLITE_OK; +} + +static void tvfsAllocPage(TestvfsBuffer *p, int iPage, int pgsz){ + assert( iPageaPage[iPage]==0 ){ + p->aPage[iPage] = ckalloc(pgsz); + memset(p->aPage[iPage], 0, pgsz); + p->pgsz = pgsz; + } +} + +static int tvfsShmPage( + sqlite3_file *pFile, /* Handle open on database file */ + int iPage, /* Page to retrieve */ + int pgsz, /* Size of pages */ + int isWrite, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ ){ int rc = SQLITE_OK; TestvfsFile *pFd = (TestvfsFile *)pFile; Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); - if( p->pScript && p->mask&TESTVFS_SHMGET_MASK ){ - tvfsExecTcl(p, "xShmGet", - Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, - Tcl_NewIntObj(reqMapSize) + if( p->pScript && p->mask&TESTVFS_SHMPAGE_MASK ){ + Tcl_Obj *pArg = Tcl_NewObj(); + Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(iPage)); + Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(pgsz)); + Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(isWrite)); + tvfsExecTcl(p, "xShmPage", + Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, pArg ); tvfsResultCode(p, &rc); } - if( rc==SQLITE_OK && p->mask&TESTVFS_SHMGET_MASK && tvfsInjectIoerr(p) ){ + if( rc==SQLITE_OK && p->mask&TESTVFS_SHMPAGE_MASK && tvfsInjectIoerr(p) ){ rc = SQLITE_IOERR; } - *pMapSize = pFd->pShm->n; - *pp = pFd->pShm->a; - return rc; -} - -static int tvfsShmRelease(sqlite3_file *pFile){ - int rc = SQLITE_OK; - TestvfsFile *pFd = (TestvfsFile *)pFile; - Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); - - if( p->pScript && p->mask&TESTVFS_SHMRELEASE_MASK ){ - tvfsExecTcl(p, "xShmRelease", - Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0 - ); - tvfsResultCode(p, &rc); + if( rc==SQLITE_OK && isWrite && !pFd->pShm->aPage[iPage] ){ + tvfsAllocPage(pFd->pShm, iPage, pgsz); } + *pp = (void volatile *)pFd->pShm->aPage[iPage]; return rc; } + static int tvfsShmLock( sqlite3_file *pFile, int ofst, @@ -782,10 +778,13 @@ static int tvfsShmClose( *ppFd = pFd->pNext; if( pBuffer->pFile==0 ){ + int i; TestvfsBuffer **pp; for(pp=&p->pBuffer; *pp!=pBuffer; pp=&((*pp)->pNext)); *pp = (*pp)->pNext; - ckfree((char *)pBuffer->a); + for(i=0; pBuffer->aPage[i]; i++){ + ckfree((char *)pBuffer->aPage[i]); + } ckfree((char *)pBuffer); } pFd->pShm = 0; @@ -821,6 +820,8 @@ static int testvfs_obj_cmd( switch( (enum DB_enum)i ){ case CMD_SHM: { + Tcl_Obj *pObj; + int i; TestvfsBuffer *pBuffer; char *zName; if( objc!=3 && objc!=4 ){ @@ -838,11 +839,22 @@ static int testvfs_obj_cmd( if( objc==4 ){ int n; u8 *a = Tcl_GetByteArrayFromObj(objv[3], &n); - pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, n); - pBuffer->n = n; - memcpy(pBuffer->a, a, n); + assert( pBuffer->pgsz==0 || pBuffer->pgsz==32768 ); + for(i=0; i*32768aPage[i], &a[i*32768], nByte); + } } - Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(pBuffer->a, pBuffer->n)); + + pObj = Tcl_NewObj(); + for(i=0; pBuffer->aPage[i]; i++){ + Tcl_AppendObjToObj(pObj, Tcl_NewByteArrayObj(pBuffer->aPage[i], 32768)); + } + Tcl_SetObjResult(interp, pObj); break; } diff --git a/src/wal.c b/src/wal.c index 017b844908..a05eec0b82 100644 --- a/src/wal.c +++ b/src/wal.c @@ -370,8 +370,8 @@ struct Wal { sqlite3_file *pDbFd; /* File handle for the database file */ sqlite3_file *pWalFd; /* File handle for WAL file */ u32 iCallback; /* Value to pass to log callback (or 0) */ - int szWIndex; /* Size of the wal-index that is mapped in mem */ - volatile u32 *pWiData; /* Pointer to wal-index content in memory */ + int nWiData; /* Size of array apWiData */ + volatile u32 **apWiData; /* Pointer to wal-index content in memory */ u16 szPage; /* Database page size */ i16 readLock; /* Which read lock is being held. -1 for none */ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ @@ -386,15 +386,77 @@ struct Wal { #endif }; +/* +** Define the parameters of the hash tables in the wal-index file. There +** is a hash-table following every HASHTABLE_NPAGE page numbers in the +** wal-index. +** +** Changing any of these constants will alter the wal-index format and +** create incompatibilities. +*/ +#define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */ +#define HASHTABLE_DATATYPE u16 +#define HASHTABLE_HASH_1 383 /* Should be prime */ +#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ +#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT) + +/* The block of page numbers associated with the first hash-table in a +** wal-index is smaller than usual. This is so that there is a complete +** hash-table on each aligned 32KB page of the wal-index. +*/ +#define HASHTABLE_NPAGE_ONE (4096 - (WALINDEX_HDR_SIZE/sizeof(u32))) + +/* The wal-index is divided into pages of HASHTABLE_PAGESIZE bytes each. */ +#define HASHTABLE_PAGESIZE (HASHTABLE_NBYTE + HASHTABLE_NPAGE*sizeof(u32)) + +/* +** Obtain a pointer to the iPage'th page of the wal-index. The wal-index +** is broken into pages of HASHTABLE_PAGESIZE bytes. Wal-index pages are +** numbered from zero. +** +** If this call is successful, *ppPage is set to point to the wal-index +** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs, +** then an SQLite error code is returned and *ppPage is set to 0. +*/ +static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ + int rc = SQLITE_OK; + + /* Enlarge the pWal->apWiData[] array if required */ + if( pWal->nWiData<=iPage ){ + int nByte = sizeof(u32 *)*(iPage+1); + volatile u32 **apNew; + apNew = (volatile u32 **)sqlite3_realloc(pWal->apWiData, nByte); + if( !apNew ){ + *ppPage = 0; + return SQLITE_NOMEM; + } + memset(&apNew[pWal->nWiData], 0, sizeof(u32 *)*(iPage+1-pWal->nWiData)); + pWal->apWiData = apNew; + pWal->nWiData = iPage+1; + } + + /* Request a pointer to the required page from the VFS */ + if( pWal->apWiData[iPage]==0 ){ + rc = sqlite3OsShmPage(pWal->pDbFd, iPage, HASHTABLE_PAGESIZE, + pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] + ); + } + + *ppPage = pWal->apWiData[iPage]; + assert( iPage==0 || *ppPage || rc!=SQLITE_OK ); + return rc; +} + /* ** Return a pointer to the WalCkptInfo structure in the wal-index. */ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ - assert( pWal->pWiData!=0 ); - return (volatile WalCkptInfo*)&pWal->pWiData[sizeof(WalIndexHdr)/2]; + volatile u32 *page1 = 0; + walIndexPage(pWal, 0, &page1); + assert( page1 ); + return (volatile WalCkptInfo*)&page1[sizeof(WalIndexHdr)/2]; } - /* ** This structure is used to implement an iterator that loops through ** all frames in the WAL in database page order. Where two or more frames @@ -413,12 +475,13 @@ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ struct WalIterator { int iPrior; /* Last result returned from the iterator */ int nSegment; /* Size of the aSegment[] array */ - int nFinal; /* Elements in aSegment[nSegment-1] */ struct WalSegment { - int iNext; /* Next slot in aIndex[] not previously returned */ - u8 *aIndex; /* i0, i1, i2... such that aPgno[iN] ascending */ - u32 *aPgno; /* 256 page numbers. Pointer to Wal.pWiData */ - } aSegment[1]; /* One for every 256 entries in the WAL */ + int iNext; /* Next slot in aIndex[] not yet returned */ + HASHTABLE_DATATYPE *aIndex; /* i0, i1, i2... such that aPgno[iN] ascend */ + u32 *aPgno; /* Array of page numbers. */ + int nEntry; /* Max size of aPgno[] and aIndex[] arrays */ + int iZero; /* Frame number associated with aPgno[0] */ + } aSegment[1]; /* One for every 32KB page in the WAL */ }; /* @@ -492,7 +555,7 @@ static void walIndexWriteHdr(Wal *pWal){ pWal->hdr.isInit = 1; walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum), 0, pWal->hdr.aCksum); - aHdr = (WalIndexHdr*)pWal->pWiData; + walIndexPage(pWal, 0, (volatile u32 **)&aHdr); memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr)); sqlite3OsShmBarrier(pWal->pDbFd); memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr)); @@ -586,19 +649,6 @@ static int walDecodeFrame( return 1; } -/* -** Define the parameters of the hash tables in the wal-index file. There -** is a hash-table following every HASHTABLE_NPAGE page numbers in the -** wal-index. -** -** Changing any of these constants will alter the wal-index format and -** create incompatibilities. -*/ -#define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */ -#define HASHTABLE_DATATYPE u16 -#define HASHTABLE_HASH_1 383 /* Should be prime */ -#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ -#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT) #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) /* @@ -663,96 +713,6 @@ static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ walLockName(lockIdx), n)); } -/* -** Return the index in the Wal.pWiData array that corresponds to -** frame iFrame. -** -** Wal.pWiData is an array of u32 elements that is the wal-index. -** The array begins with a header and is then followed by alternating -** "map" and "hash-table" blocks. Each "map" block consists of -** HASHTABLE_NPAGE u32 elements which are page numbers corresponding -** to frames in the WAL file. -** -** This routine returns an index X such that Wal.pWiData[X] is part -** of a "map" block that contains the page number of the iFrame-th -** frame in the WAL file. -*/ -static int walIndexEntry(u32 iFrame){ - return ( - (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32) - + (((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NBYTE)/sizeof(u32) - + (iFrame-1) - ); -} - -/* -** Return the minimum size of the shared-memory, in bytes, that is needed -** to support a wal-index containing frame iFrame. The value returned -** includes the wal-index header and the complete "block" containing iFrame, -** including the hash table segment that follows the block. -*/ -static int walMappingSize(u32 iFrame){ - const int nByte = (sizeof(u32)*HASHTABLE_NPAGE + HASHTABLE_NBYTE) ; - return ( WALINDEX_LOCK_OFFSET - + WALINDEX_LOCK_RESERVED - + nByte * ((iFrame + HASHTABLE_NPAGE - 1)/HASHTABLE_NPAGE) - ); -} - -/* -** Release our reference to the wal-index memory map, if we are holding -** it. -*/ -static void walIndexUnmap(Wal *pWal){ - if( pWal->pWiData ){ - sqlite3OsShmRelease(pWal->pDbFd); - } - pWal->pWiData = 0; - pWal->szWIndex = -1; -} - -/* -** Map the wal-index file into memory if it isn't already. -** -** The reqSize parameter is the requested size of the mapping. The -** mapping will be at least this big if the underlying storage is -** that big. But the mapping will never grow larger than the underlying -** storage. Use the walIndexRemap() to enlarget the storage space. -*/ -static int walIndexMap(Wal *pWal, int reqSize){ - int rc = SQLITE_OK; - if( pWal->pWiData==0 || reqSize>pWal->szWIndex ){ - walIndexUnmap(pWal); - rc = sqlite3OsShmGet(pWal->pDbFd, reqSize, &pWal->szWIndex, - (void volatile**)(char volatile*)&pWal->pWiData); - if( rc!=SQLITE_OK ){ - walIndexUnmap(pWal); - } - } - return rc; -} - -/* -** Enlarge the wal-index to be at least enlargeTo bytes in size and -** Remap the wal-index so that the mapping covers the full size -** of the underlying file. -** -** If enlargeTo is non-negative, then increase the size of the underlying -** storage to be at least as big as enlargeTo before remapping. -*/ -static int walIndexRemap(Wal *pWal, int enlargeTo){ - int rc; - int sz; - assert( pWal->writeLock ); - rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz); - if( rc==SQLITE_OK && sz>pWal->szWIndex ){ - walIndexUnmap(pWal); - rc = walIndexMap(pWal, sz); - } - assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK ); - return rc; -} - /* ** Compute a hash on a page number. The resulting hash value must land ** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances @@ -767,6 +727,54 @@ static int walNextHash(int iPriorHash){ return (iPriorHash+1)&(HASHTABLE_NSLOT-1); } +static void walHashGet( + Wal *pWal, /* WAL handle */ + int iHash, /* Find the iHash'th table */ + volatile HASHTABLE_DATATYPE **paHash, /* OUT: Pointer to hash index */ + volatile u32 **paPgno, /* OUT: Pointer to page number array */ + u32 *piZero /* OUT: Frame associated with *paPgno[0] */ +){ + u32 iZero; + volatile u32 *aPgno; + volatile HASHTABLE_DATATYPE *aHash; + + walIndexPage(pWal, iHash, &aPgno); + aHash = (volatile HASHTABLE_DATATYPE *)&aPgno[HASHTABLE_NPAGE]; + + if( iHash==0 ){ + aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)-1]; + iZero = 0; + }else{ + iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE; + aPgno = &aPgno[-1*iZero-1]; + } + + *paPgno = aPgno; + *paHash = aHash; + *piZero = iZero; +} + +static int walFramePage(u32 iFrame){ + int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE; + assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE) + && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE) + && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)) + && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE) + && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE)) + ); + return iHash; +} + +/* +** Return the page number associated with frame iFrame in this WAL. +*/ +static u32 walFramePgno(Wal *pWal, u32 iFrame){ + int iHash = walFramePage(iFrame); + if( iHash==0 ){ + return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1]; + } + return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE]; +} /* ** Find the hash table and (section of the) page number array used to @@ -789,27 +797,8 @@ static void walHashFind( volatile u32 **paPgno, /* OUT: Pointer to page number array */ u32 *piZero /* OUT: Frame associated with *paPgno[0] */ ){ - u32 iZero; - volatile u32 *aPgno; - volatile HASHTABLE_DATATYPE *aHash; - - iZero = ((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NPAGE; - aPgno = &pWal->pWiData[walIndexEntry(iZero+1)-iZero-1]; - aHash = (HASHTABLE_DATATYPE *)&aPgno[iZero+HASHTABLE_NPAGE+1]; - - /* Assert that: - ** - ** + the mapping is large enough for this hash-table, and - ** - ** + that aPgno[iZero+1] really is the database page number associated - ** with the first frame indexed by this hash table. - */ - assert( (u32*)(&aHash[HASHTABLE_NSLOT])<=&pWal->pWiData[pWal->szWIndex/4] ); - assert( walIndexEntry(iZero+1)==(&aPgno[iZero+1] - pWal->pWiData) ); - - *paHash = aHash; - *paPgno = aPgno; - *piZero = iZero; + int iHash = walFramePage(iFrame); + walHashGet(pWal, iHash, paHash, paPgno, piZero); } /* @@ -829,16 +818,16 @@ static void walCleanupHash(Wal *pWal){ volatile u32 *aPgno; /* Unused return from walHashFind() */ u32 iZero; /* frame == (aHash[x]+iZero) */ int iLimit = 0; /* Zero values greater than this */ + int nByte; /* Number of bytes to zero in aPgno[] */ + int i; /* Used to iterate through aHash[] */ assert( pWal->writeLock ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 ); - if( (pWal->hdr.mxFrame % HASHTABLE_NPAGE)>0 ){ - int nByte; /* Number of bytes to zero in aPgno[] */ - int i; /* Used to iterate through aHash[] */ - walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero); + walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero); + if( iZero!=pWal->hdr.mxFrame ){ iLimit = pWal->hdr.mxFrame - iZero; assert( iLimit>0 ); for(i=0; ihdr.mxFrame. */ - nByte = sizeof(u32) * (HASHTABLE_NPAGE-iLimit); - memset((void *)&aPgno[iZero+iLimit+1], 0, nByte); - assert( &((u8 *)&aPgno[iZero+iLimit+1])[nByte]==(u8 *)aHash ); + nByte = ((char *)aHash - (char *)&aPgno[pWal->hdr.mxFrame+1]); + memset((void *)&aPgno[pWal->hdr.mxFrame+1], 0, nByte); } #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT @@ -878,15 +866,7 @@ static void walCleanupHash(Wal *pWal){ ** pPage into WAL frame iFrame. */ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ - int rc; /* Return code */ - int nMapping; /* Required mapping size in bytes */ - - /* Make sure the wal-index is mapped. Enlarge the mapping if required. */ - nMapping = walMappingSize(iFrame); - rc = walIndexMap(pWal, nMapping); - while( rc==SQLITE_OK && nMapping>pWal->szWIndex ){ - rc = walIndexRemap(pWal, nMapping); - } + int rc = SQLITE_OK; /* Return code */ /* Assuming the wal-index file was successfully mapped, find the hash ** table and section of of the page number array that pertain to frame @@ -904,8 +884,8 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero); idx = iFrame - iZero; if( idx==1 ){ - memset((void*)&aPgno[iZero+1], 0, HASHTABLE_NPAGE*sizeof(u32)); - memset((void*)aHash, 0, HASHTABLE_NBYTE); + int nByte = (u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1+iZero]; + memset((void*)&aPgno[1+iZero], 0, nByte); } assert( idx <= HASHTABLE_NSLOT/2 + 1 ); @@ -1076,9 +1056,6 @@ static int walIndexRecover(Wal *pWal){ } finished: - if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){ - rc = walIndexRemap(pWal, walMappingSize(1)); - } if( rc==SQLITE_OK ){ volatile WalCkptInfo *pInfo; int i; @@ -1164,7 +1141,6 @@ int sqlite3WalOpen( pRet->pVfs = pVfs; pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pDbFd = pDbFd; - pRet->szWIndex = -1; pRet->readLock = -1; sqlite3_randomness(8, &pRet->hdr.aSalt); pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd; @@ -1207,24 +1183,22 @@ static int walIteratorNext( u32 iMin; /* Result pgno must be greater than iMin */ u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */ int i; /* For looping through segments */ - int nBlock = p->nFinal; /* Number of entries in current segment */ iMin = p->iPrior; assert( iMin<0xffffffff ); for(i=p->nSegment-1; i>=0; i--){ struct WalSegment *pSegment = &p->aSegment[i]; - while( pSegment->iNextiNextnEntry ){ u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]]; if( iPg>iMin ){ if( iPgaIndex[pSegment->iNext]; + *piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext]; } break; } pSegment->iNext++; } - nBlock = 256; } *piPage = p->iPrior = iRet; @@ -1232,28 +1206,28 @@ static int walIteratorNext( } -static void walMergesort8( - Pgno *aContent, /* Pages in wal */ - u8 *aBuffer, /* Buffer of at least *pnList items to use */ - u8 *aList, /* IN/OUT: List to sort */ +static void walMergesort( + u32 *aContent, /* Pages in wal */ + HASHTABLE_DATATYPE *aBuffer, /* Buffer of at least *pnList items to use */ + HASHTABLE_DATATYPE *aList, /* IN/OUT: List to sort */ int *pnList /* IN/OUT: Number of elements in aList[] */ ){ int nList = *pnList; if( nList>1 ){ int nLeft = nList / 2; /* Elements in left list */ int nRight = nList - nLeft; /* Elements in right list */ - u8 *aLeft = aList; /* Left list */ - u8 *aRight = &aList[nLeft]; /* Right list */ int iLeft = 0; /* Current index in aLeft */ int iRight = 0; /* Current index in aright */ int iOut = 0; /* Current index in output buffer */ + HASHTABLE_DATATYPE *aLeft = aList; /* Left list */ + HASHTABLE_DATATYPE *aRight = &aList[nLeft]; /* Right list */ /* TODO: Change to non-recursive version. */ - walMergesort8(aContent, aBuffer, aLeft, &nLeft); - walMergesort8(aContent, aBuffer, aRight, &nRight); + walMergesort(aContent, aBuffer, aLeft, &nLeft); + walMergesort(aContent, aBuffer, aRight, &nRight); while( iRightpWiData && pWal->szWIndex>=walMappingSize(pWal->hdr.mxFrame) ); + HASHTABLE_DATATYPE *aTmp; /* Temp space used by merge-sort */ + HASHTABLE_DATATYPE *aSpace; /* Space at the end of the allocation */ /* This routine only runs while holding SQLITE_SHM_CHECKPOINT. No other ** thread is able to write to shared memory while this routine is ** running (or, indeed, while the WalIterator object exists). Hence, - ** we can cast off the volatile qualifacation from shared memory + ** we can cast off the volatile qualification from shared memory */ assert( pWal->ckptLock ); - aData = (u32*)pWal->pWiData; + iLast = pWal->hdr.mxFrame; /* Allocate space for the WalIterator object */ - iLast = pWal->hdr.mxFrame; - nSegment = (iLast >> 8) + 1; - nFinal = (iLast & 0x000000FF); - nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256); + nSegment = walFramePage(iLast) + 1; + nByte = sizeof(WalIterator) + + nSegment*(sizeof(struct WalSegment)) + + (nSegment+1)*(HASHTABLE_NPAGE * sizeof(HASHTABLE_DATATYPE)); p = (WalIterator *)sqlite3_malloc(nByte); if( !p ){ return SQLITE_NOMEM; } memset(p, 0, nByte); - /* Initialize the WalIterator object. Each 256-entry segment is - ** presorted in order to make iterating through all entries much - ** faster. - */ + /* Allocate space for the WalIterator object */ p->nSegment = nSegment; - aSpace = (u8 *)&p->aSegment[nSegment]; - aTmp = &aSpace[nSegment*256]; + aSpace = (HASHTABLE_DATATYPE *)&p->aSegment[nSegment]; + aTmp = &aSpace[HASHTABLE_NPAGE*nSegment]; for(i=0; iaSegment[i].aPgno = &aData[walIndexEntry(i*256+1)]; - p->aSegment[i].aIndex = aSpace; - for(j=0; jaSegment[i].aPgno, aTmp, aSpace, &nIndex); - memset(&aSpace[nIndex], aSpace[nIndex-1], 256-nIndex); - aSpace += 256; - p->nFinal = nIndex; + walMergesort((u32 *)aPgno, aTmp, aSpace, &nEntry); + p->aSegment[i].iZero = iZero; + p->aSegment[i].nEntry = nEntry; + p->aSegment[i].aIndex = aSpace; + p->aSegment[i].aPgno = (u32 *)aPgno; + aSpace += HASHTABLE_NPAGE; } + assert( aSpace==aTmp ); - /* Return the fully initializd WalIterator object */ + /* Return the fully initialized WalIterator object */ *pp = p; return SQLITE_OK ; } @@ -1430,8 +1412,8 @@ static int walCheckpoint( ** cannot be backfilled from the WAL. */ mxSafeFrame = pWal->hdr.mxFrame; - pHdr = (volatile WalIndexHdr*)pWal->pWiData; - pInfo = (volatile WalCkptInfo*)&pHdr[2]; + walIndexPage(pWal, 0, (volatile u32 **)&pHdr); + pInfo = walCkptInfo(pWal); assert( pInfo==walCkptInfo(pWal) ); for(i=1; iaReadMark[i]; @@ -1461,6 +1443,7 @@ static int walCheckpoint( /* Iterate through the contents of the WAL, copying data to the db file. */ while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ + assert( walFramePgno(pWal, iFrame)==iDbpage ); if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue; rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE @@ -1525,7 +1508,6 @@ int sqlite3WalClose( if( rc==SQLITE_OK ){ isDelete = 1; } - walIndexUnmap(pWal); } walIndexClose(pWal, isDelete); @@ -1534,6 +1516,7 @@ int sqlite3WalClose( sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); } WALTRACE(("WAL%p: closed\n", pWal)); + sqlite3_free(pWal->apWiData); sqlite3_free(pWal); } return rc; @@ -1560,13 +1543,14 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){ u32 aCksum[2]; /* Checksum on the header content */ WalIndexHdr h1, h2; /* Two copies of the header content */ WalIndexHdr *aHdr; /* Header in shared memory */ + volatile u32 *page1 = 0; - if( pWal->szWIndex < WALINDEX_HDR_SIZE ){ + walIndexPage(pWal, 0, &page1); + if( !page1 ){ /* The wal-index is not large enough to hold the header, then assume ** header is invalid. */ return 1; } - assert( pWal->pWiData ); /* Read the header. This might happen currently with a write to the ** same area of shared memory on a different CPU in a SMP, @@ -1578,7 +1562,7 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){ ** Memory barriers are used to prevent the compiler or the hardware from ** reordering the reads and writes. */ - aHdr = (WalIndexHdr*)pWal->pWiData; + aHdr = (WalIndexHdr*)page1; memcpy(&h1, &aHdr[0], sizeof(h1)); sqlite3OsShmBarrier(pWal->pDbFd); memcpy(&h2, &aHdr[1], sizeof(h2)); @@ -1625,9 +1609,10 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){ static int walIndexReadHdr(Wal *pWal, int *pChanged){ int rc; /* Return code */ int badHdr; /* True if a header read failed */ + volatile u32 *dummy; assert( pChanged ); - rc = walIndexMap(pWal, walMappingSize(1)); + rc = walIndexPage(pWal, 0, &dummy); if( rc!=SQLITE_OK ){ return rc; } @@ -1659,14 +1644,6 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ } } - /* Make sure the mapping is large enough to cover the entire wal-index */ - if( rc==SQLITE_OK ){ - int szWanted = walMappingSize(pWal->hdr.mxFrame); - if( pWal->szWIndexreadLock<0 ); /* Not currently locked */ @@ -1739,16 +1716,14 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ rc = SQLITE_BUSY_RECOVERY; } } - }else{ - rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); } if( rc!=SQLITE_OK ){ return rc; } - pHdr = (volatile WalIndexHdr*)pWal->pWiData; - pInfo = (volatile WalCkptInfo*)&pHdr[2]; - assert( pInfo==walCkptInfo(pWal) ); + walIndexPage(pWal, 0, (volatile u32 **)&pHdr); + pInfo = walCkptInfo(pWal); + assert( pInfo==(volatile WalCkptInfo *)&pHdr[2] ); if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){ /* The WAL has been completely backfilled (or it is empty). ** and can be safely ignored. @@ -1883,7 +1858,6 @@ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ do{ rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); }while( rc==WAL_RETRY ); - walIndexUnmap(pWal); return rc; } @@ -1913,7 +1887,6 @@ int sqlite3WalRead( int nOut, /* Size of buffer pOut in bytes */ u8 *pOut /* Buffer to write page data to */ ){ - int rc; /* Return code */ u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ @@ -1932,12 +1905,6 @@ int sqlite3WalRead( return SQLITE_OK; } - /* Ensure the wal-index is mapped. */ - rc = walIndexMap(pWal, walMappingSize(iLast)); - if( rc!=SQLITE_OK ){ - return rc; - } - /* Search the hash table or tables for an entry matching page number ** pgno. Each iteration of the following for() loop searches one ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). @@ -1963,16 +1930,13 @@ int sqlite3WalRead( ** This condition filters out entries that were added to the hash ** table after the current read-transaction had started. */ - for(iHash=iLast; iHash>0 && iRead==0; iHash-=HASHTABLE_NPAGE){ + for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){ volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table */ volatile u32 *aPgno; /* Pointer to array of page numbers */ u32 iZero; /* Frame number corresponding to aPgno[0] */ int iKey; /* Hash slot index */ - int mxHash; /* upper bound on aHash[] values */ - walHashFind(pWal, iHash, &aHash, &aPgno, &iZero); - mxHash = iLast - iZero; - if( mxHash > HASHTABLE_NPAGE ) mxHash = HASHTABLE_NPAGE; + walHashGet(pWal, iHash, &aHash, &aPgno, &iZero); for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ u32 iFrame = aHash[iKey] + iZero; if( iFrame<=iLast && aPgno[iFrame]==pgno ){ @@ -1981,7 +1945,6 @@ int sqlite3WalRead( } } } - assert( iRead==0 || pWal->pWiData[walIndexEntry(iRead)]==pgno ); #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* If expensive assert() statements are available, do a linear search @@ -1991,7 +1954,7 @@ int sqlite3WalRead( u32 iRead2 = 0; u32 iTest; for(iTest=iLast; iTest>0; iTest--){ - if( pWal->pWiData[walIndexEntry(iTest)]==pgno ){ + if( walFramePgno(pWal, iTest)==pgno ){ iRead2 = iTest; break; } @@ -2003,7 +1966,6 @@ int sqlite3WalRead( /* If iRead is non-zero, then it is the log frame number that contains the ** required page. Read and return data from the log file. */ - walIndexUnmap(pWal); if( iRead ){ i64 iOffset = walFrameOffset(iRead, pWal->hdr.szPage) + WAL_FRAME_HDRSIZE; *pInWal = 1; @@ -2039,6 +2001,7 @@ void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){ */ int sqlite3WalBeginWriteTransaction(Wal *pWal){ int rc; + volatile u32 *page1; /* Cannot start a write transaction without first holding a read ** transaction. */ @@ -2057,19 +2020,13 @@ int sqlite3WalBeginWriteTransaction(Wal *pWal){ ** time the read transaction on this connection was started, then ** the write is disallowed. */ - rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); - if( rc ){ - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - pWal->writeLock = 0; - return rc; - } - if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){ + walIndexPage(pWal, 0, &page1); + if( memcmp(&pWal->hdr, (void*)page1, sizeof(WalIndexHdr))!=0 ){ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); pWal->writeLock = 0; rc = SQLITE_BUSY; } - walIndexUnmap(pWal); return rc; } @@ -2102,11 +2059,7 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; - assert( pWal->pWiData==0 ); rc = walIndexReadHdr(pWal, &unused); - if( rc==SQLITE_OK ){ - rc = walIndexMap(pWal, walMappingSize(iMax)); - } if( rc==SQLITE_OK ){ for(iFrame=pWal->hdr.mxFrame+1; ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; @@ -2124,12 +2077,11 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ ** committed. As a result, the call to xUndo may not fail. */ assert( pWal->writeLock ); - assert( pWal->pWiData[walIndexEntry(iFrame)]!=1 ); - rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]); + assert( walFramePgno(pWal, iFrame)!=1 ); + rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); } walCleanupHash(pWal); } - walIndexUnmap(pWal); } return rc; } @@ -2170,7 +2122,6 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ } if( aWalData[0]hdr.mxFrame ){ - rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); pWal->hdr.mxFrame = aWalData[0]; pWal->hdr.aFrameCksum[0] = aWalData[1]; pWal->hdr.aFrameCksum[1] = aWalData[2]; @@ -2179,7 +2130,6 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ } } - walIndexUnmap(pWal); return rc; } @@ -2199,9 +2149,7 @@ static int walRestartLog(Wal *pWal){ int rc = SQLITE_OK; int cnt; - if( pWal->readLock==0 - && SQLITE_OK==(rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame))) - ){ + if( pWal->readLock==0 ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ @@ -2237,11 +2185,6 @@ static int walRestartLog(Wal *pWal){ int notUsed; rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); }while( rc==WAL_RETRY ); - - /* Unmap the wal-index before returning. Otherwise the VFS layer may - ** hold a mutex for the duration of the IO performed by WalFrames(). - */ - walIndexUnmap(pWal); } return rc; } @@ -2267,7 +2210,6 @@ int sqlite3WalFrames( assert( pList ); assert( pWal->writeLock ); - assert( pWal->pWiData==0 ); #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} @@ -2280,10 +2222,8 @@ int sqlite3WalFrames( ** log file, instead of appending to it at pWal->hdr.mxFrame. */ if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ - assert( pWal->pWiData==0 ); return rc; } - assert( pWal->pWiData==0 && pWal->readLock>0 ); /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of @@ -2358,7 +2298,6 @@ int sqlite3WalFrames( rc = sqlite3OsSync(pWal->pWalFd, sync_flags); } - assert( pWal->pWiData==0 ); /* Append data to the wal-index. It is not necessary to lock the ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index @@ -2391,7 +2330,6 @@ int sqlite3WalFrames( } } - walIndexUnmap(pWal); WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); return rc; } @@ -2412,7 +2350,6 @@ int sqlite3WalCheckpoint( int rc; /* Return code */ int isChanged = 0; /* True if a new wal-index header is loaded */ - assert( pWal->pWiData==0 ); assert( pWal->ckptLock==0 ); WALTRACE(("WAL%p: checkpoint begins\n", pWal)); @@ -2441,7 +2378,6 @@ int sqlite3WalCheckpoint( } /* Release the locks. */ - walIndexUnmap(pWal); walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); pWal->ckptLock = 0; WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok")); diff --git a/test/permutations.test b/test/permutations.test index 13c1ae4c63..00329acb1f 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -13,6 +13,7 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl +db close # Argument processing. # diff --git a/test/wal2.test b/test/wal2.test index c3f98a4b24..fdbcae7d1e 100644 --- a/test/wal2.test +++ b/test/wal2.test @@ -75,9 +75,14 @@ proc incr_tvfs_hdr {file idx incrval} { # database content. # do_test wal2-1.0 { - proc tvfs_cb {method args} { return SQLITE_OK } + proc tvfs_cb {method filename args} { + set ::filename $filename + return SQLITE_OK + } + testvfs tvfs tvfs script tvfs_cb + tvfs filter xShmOpen sqlite3 db test.db -vfs tvfs sqlite3 db2 test.db -vfs tvfs @@ -123,21 +128,15 @@ foreach {tn iInsert res wal_index_hdr_mod wal_locks} " do_test wal2-1.$tn.1 { execsql { INSERT INTO t1 VALUES($iInsert) } - set ::locks [list] - set ::cb_done 0 - proc tvfs_cb {method args} { - if {$::cb_done == 0 && $method == "xShmGet"} { - set ::cb_done 1 - if {$::wal_index_hdr_mod >= 0} { - incr_tvfs_hdr [lindex $args 0] $::wal_index_hdr_mod 1 - } - } - if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] } + lappend ::locks [lindex $args 2] return SQLITE_OK } - + tvfs filter xShmLock + if {$::wal_index_hdr_mod >= 0} { + incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1 + } execsql { SELECT count(a), sum(a) FROM t1 } db2 } $res @@ -174,8 +173,9 @@ do_test wal2-2.0 { testvfs tvfs tvfs script tvfs_cb + tvfs filter xShmOpen proc tvfs_cb {method args} { - if {$method == "xShmOpen"} { set ::shm_file [lindex $args 0] } + set ::filename [lindex $args 0] return SQLITE_OK } @@ -208,32 +208,28 @@ foreach {tn iInsert res0 res1 wal_index_hdr_mod} { 8 11 {10 55} {11 66} 6 9 12 {11 66} {12 78} 7 } { + tvfs filter xShmLock + do_test wal2-2.$tn.1 { - set oldhdr [set_tvfs_hdr $::shm_file] + set oldhdr [set_tvfs_hdr $::filename] execsql { INSERT INTO t1 VALUES($iInsert) } execsql { SELECT count(a), sum(a) FROM t1 } } $res1 do_test wal2-2.$tn.2 { set ::locks [list] - set ::cb_done 0 proc tvfs_cb {method args} { - if {$::cb_done == 0 && $method == "xShmGet"} { - set ::cb_done 1 - if {$::wal_index_hdr_mod >= 0} { - incr_tvfs_hdr $::shm_file $::wal_index_hdr_mod 1 - } - } - if {$method == "xShmLock"} { - set lock [lindex $args 2] - lappend ::locks $lock - if {$lock == $::WRITER} { - set_tvfs_hdr $::shm_file $::oldhdr - } + set lock [lindex $args 2] + lappend ::locks $lock + if {$lock == $::WRITER} { + set_tvfs_hdr $::filename $::oldhdr } return SQLITE_OK } + if {$::wal_index_hdr_mod >= 0} { + incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1 + } execsql { SELECT count(a), sum(a) FROM t1 } db2 } $res0 @@ -243,21 +239,15 @@ foreach {tn iInsert res0 res1 wal_index_hdr_mod} { do_test wal2-2.$tn.4 { set ::locks [list] - set ::cb_done 0 proc tvfs_cb {method args} { - if {$::cb_done == 0 && $method == "xShmGet"} { - set ::cb_done 1 - if {$::wal_index_hdr_mod >= 0} { - incr_tvfs_hdr $::shm_file $::wal_index_hdr_mod 1 - } - } - if {$method == "xShmLock"} { - set lock [lindex $args 2] - lappend ::locks $lock - } + set lock [lindex $args 2] + lappend ::locks $lock return SQLITE_OK } + if {$::wal_index_hdr_mod >= 0} { + incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1 + } execsql { SELECT count(a), sum(a) FROM t1 } db2 } $res1 } diff --git a/test/wal3.test b/test/wal3.test index 6a5898297d..bc4ffa8b8f 100644 --- a/test/wal3.test +++ b/test/wal3.test @@ -353,7 +353,7 @@ T script method_callback proc method_callback {method args} { if {$method == "xShmBarrier"} { incr ::barrier_count - if {$::barrier_count == 1} { + if {$::barrier_count == 2} { # This code is executed within the xShmBarrier() callback invoked # by the client running recovery as part of writing the recovered # wal-index header. If a second client attempts to access the