diff --git a/manifest b/manifest index acc30194f2..e73614d2b4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C When\scopying\sa\srollback\smode\sdatabase\sover\sa\sWAL\sdatabase\susing\sthe\sbackup\sAPI,\sleave\sthe\sdestination\sdatabase\sin\sWAL\smode\s(instead\sof\sswitching\sit\sto\srollback\smode).\sFix\sfor\s[af95b8c609]. -D 2011-08-23T11:47:06.849 +C Merge\sthe\sPAGECACHE_BLOCKALLOC\schanges\sinto\strunk. +D 2011-08-23T12:50:09.565 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 8c930e7b493d59099ea1304bd0f2aed152eb3315 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -130,7 +130,7 @@ F src/btreeInt.h 67978c014fa4f7cc874032dd3aacadd8db656bc3 F src/build.c 6c60478a1e23f7cdd30ae0251b46f9adbe0e98c2 F src/callback.c 0425c6320730e6d3981acfb9202c1bed9016ad1a F src/complete.c dc1d136c0feee03c2f7550bafc0d29075e36deac -F src/ctime.c 7f283795650dd4122cc07bd02193c40193b32cc6 +F src/ctime.c caf51429be3e0d4114056a8273b0fff812ff8ae9 F src/date.c a3c6842bad7ae632281811de112a8ba63ff08ab3 F src/delete.c ff68e5ef23aee08c0ff528f699a19397ed8bbed8 F src/expr.c 4bbdfaf66bc614be9254ce0c26a17429067a3e07 @@ -165,14 +165,14 @@ F src/os.c fcc717427a80b2ed225373f07b642dc1aad7490b F src/os.h 9dbed8c2b9c1f2f2ebabc09e49829d4777c26bf9 F src/os_common.h 65a897143b64667d23ed329a7984b9b405accb58 F src/os_os2.c 4a75888ba3dfc820ad5e8177025972d74d7f2440 -F src/os_unix.c 81f15448f112e77bd2bd3a1bcf531430616de918 +F src/os_unix.c 87ced852f8dc3da0c43621e4fed7d9021e43af6a F src/os_win.c 4eb6fa00ee28f6d7bad0526edcbe5a60d297c67a F src/pager.c 120550e7ef01dafaa2cbb4a0528c0d87c8f12b41 F src/pager.h 3f8c783de1d4706b40b1ac15b64f5f896bcc78d1 F src/parse.y 12b7ebd61ea54f0e1b1083ff69cc2c8ce9353d58 F src/pcache.c 49e718c095810c6b3334e3a6d89970aceaddefce F src/pcache.h c683390d50f856d4cd8e24342ae62027d1bb6050 -F src/pcache1.c 912bd5687d6df344698d8e69560f347b6e21c18a +F src/pcache1.c 0a131cf9999ba5baeb82fe1b073924ef115af853 F src/pragma.c ebcd20f1e654f5cb3aeef864ed69c4697719fbaa F src/prepare.c e64261559a3187698a3e7e6c8b001a4f4f98dab4 F src/printf.c 585a36b6a963df832cfb69505afa3a34ed5ef8a1 @@ -201,7 +201,7 @@ F src/test_async.c 0612a752896fad42d55c3999a5122af10dcf22ad F src/test_autoext.c 30e7bd98ab6d70a62bb9ba572e4c7df347fe645e F src/test_backup.c c129c91127e9b46e335715ae2e75756e25ba27de F src/test_btree.c 47cd771250f09cdc6e12dda5bc71bc0b3abc96e2 -F src/test_config.c 9bc44df77f22cd0648c651fcd459353b8a984d7b +F src/test_config.c e342660556d64365aacea2b23cbb5e6654d7278f F src/test_demovfs.c 20a4975127993f4959890016ae9ce5535a880094 F src/test_devsym.c e7498904e72ba7491d142d5c83b476c4e76993bc F src/test_func.c cbdec5cededa0761daedde5baf06004a9bf416b5 @@ -358,7 +358,7 @@ F test/crash8.test 38767cb504bbe491de6be4a7006b154973a2309f F test/crashtest1.c 09c1c7d728ccf4feb9e481671e29dda5669bbcc2 F test/createtab.test b5de160630b209c4b8925bdcbbaf48cc90b67fe8 F test/cse.test 277350a26264495e86b1785f34d2d0c8600e021c -F test/ctime.test 7bd009071e242aac4f18521581536b652b789a47 +F test/ctime.test 7f0bd5084d9dd7da9ad46901810896edd2ebb463 F test/date.test a18a2ce81add84b17b06559e82ad7bb91bc6ddff F test/dbstatus.test a719af0f226bd280748a4bb9054c0a5a9fc1b16c F test/default.test 6faf23ccb300114924353007795aa9a8ec0aa9dc @@ -583,9 +583,9 @@ F test/mallocK.test d79968641d1b70d88f6c01bdb9a7eb4a55582cc9 F test/malloc_common.tcl 2930895b0962823ec679853e67e58dd6d8198b3c F test/manydb.test 28385ae2087967aa05c38624cec7d96ec74feb3e F test/mem5.test c6460fba403c5703141348cd90de1c294188c68f -F test/memdb.test 708a028d6d373e5b3842e4bdc8ba80998c9a4da6 +F test/memdb.test 4b5d2671588ed59cb08642adc67fd78c666dc9c2 F test/memleak.test 10b9c6c57e19fc68c32941495e9ba1c50123f6e2 -F test/memsubsys1.test 16ce163ac1ace3d71bf0eaa6a821ed153addd91f +F test/memsubsys1.test 39f1ddddf76ce51a3232aab0279668e23cf00f83 F test/memsubsys2.test 3a1c1a9de48e5726faa85108b02459fae8cb9ee9 F test/minmax.test 722d80816f7e096bf2c04f4111f1a6c1ba65453d F test/minmax2.test 33504c01a03bd99226144e4b03f7631a274d66e0 @@ -618,7 +618,7 @@ F test/pagerfault3.test f16e2efcb5fc9996d1356f7cbc44c998318ae1d7 F test/pageropt.test 8146bf448cf09e87bb1867c2217b921fb5857806 F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16 -F test/pcache2.test 9f9357bb0f463b87bdf695646024ed2031a0c85a +F test/pcache2.test bc67c6802989dba05cdf3a4574fd882e238c7ecf F test/permutations.test ad17319066a90e2db71823c3ff104795ffc71b31 F test/pragma.test c8108e01da04f16e67e5754e610bc62c1b993f6c F test/pragma2.test 3a55f82b954242c642f8342b17dffc8b47472947 @@ -961,7 +961,7 @@ F tool/symbols.sh caaf6ccc7300fd43353318b44524853e222557d5 F tool/tostr.awk 11760e1b94a5d3dcd42378f3cc18544c06cfa576 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings.sh 2ebae31e1eb352696f3c2f7706a34c084b28c262 -P a65681926d8acf779ad39be21b74537c21777783 -R 4fa828c7c042de4b81d2337ffc4e400a -U dan -Z 6839a6474a17b2a69c514b02de64259d +P 35e6ac18e3d030095da57fff87a288939e9be93f ca47da2a1f6e6d221470e7f02b129fc21c288d7b +R 403e9fa9f5f657d069e88180eb3874b3 +U drh +Z ec65a11b28f52408e8ceb2cf85827722 diff --git a/manifest.uuid b/manifest.uuid index 46d3d76682..387dd4abbe 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -35e6ac18e3d030095da57fff87a288939e9be93f \ No newline at end of file +768c1846d48a555054f07edeabdae8817a2c0a8e \ No newline at end of file diff --git a/src/ctime.c b/src/ctime.c index 9d31596bba..77174d0dae 100644 --- a/src/ctime.c +++ b/src/ctime.c @@ -326,6 +326,9 @@ static const char * const azCompileOpt[] = { #ifdef SQLITE_OMIT_XFER_OPT "OMIT_XFER_OPT", #endif +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + "PAGECACHE_BLOCKALLOC", +#endif #ifdef SQLITE_PERFORMANCE_TRACE "PERFORMANCE_TRACE", #endif diff --git a/src/os_unix.c b/src/os_unix.c index 83bf7b142b..2f068dc58a 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -3510,7 +3510,11 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return SQLITE_OK; } case SQLITE_FCNTL_SIZE_HINT: { - return fcntlSizeHint(pFile, *(i64 *)pArg); + int rc; + SimulateIOErrorBenign(1); + rc = fcntlSizeHint(pFile, *(i64 *)pArg); + SimulateIOErrorBenign(0); + return rc; } case SQLITE_FCNTL_PERSIST_WAL: { int bPersist = *(int*)pArg; diff --git a/src/pcache1.c b/src/pcache1.c index e47265a225..b39e453ddd 100644 --- a/src/pcache1.c +++ b/src/pcache1.c @@ -24,6 +24,9 @@ typedef struct PgHdr1 PgHdr1; typedef struct PgFreeslot PgFreeslot; typedef struct PGroup PGroup; +typedef struct PGroupBlock PGroupBlock; +typedef struct PGroupBlockList PGroupBlockList; + /* Each page cache (or PCache) belongs to a PGroup. A PGroup is a set ** of one or more PCaches that are able to recycle each others unpinned ** pages when they are under memory pressure. A PGroup is an instance of @@ -53,8 +56,63 @@ struct PGroup { int mxPinned; /* nMaxpage + 10 - nMinPage */ int nCurrentPage; /* Number of purgeable pages allocated */ PgHdr1 *pLruHead, *pLruTail; /* LRU list of unpinned pages */ + PGroupBlockList *pBlockList; /* List of block-lists for this group */ }; +/* +** If SQLITE_PAGECACHE_BLOCKALLOC is defined when the library is built, +** each PGroup structure has a linked list of the the following starting +** at PGroup.pBlockList. There is one entry for each distinct page-size +** currently used by members of the PGroup (i.e. 1024 bytes, 4096 bytes +** etc.). Variable PGroupBlockList.nByte is set to the actual allocation +** size requested by each pcache, which is the database page-size plus +** the various header structures used by the pcache, pager and btree layers. +** Usually around (pgsz+200) bytes. +** +** This size (pgsz+200) bytes is not allocated efficiently by some +** implementations of malloc. In particular, some implementations are only +** able to allocate blocks of memory chunks of 2^N bytes, where N is some +** integer value. Since the page-size is a power of 2, this means we +** end up wasting (pgsz-200) bytes in each allocation. +** +** If SQLITE_PAGECACHE_BLOCKALLOC is defined, the (pgsz+200) byte blocks +** are not allocated directly. Instead, blocks of roughly M*(pgsz+200) bytes +** are requested from malloc allocator. After a block is returned, +** sqlite3MallocSize() is used to determine how many (pgsz+200) byte +** allocations can fit in the space returned by malloc(). This value may +** be more than M. +** +** The blocks are stored in a doubly-linked list. Variable PGroupBlock.nEntry +** contains the number of allocations that will fit in the aData[] space. +** nEntry is limited to the number of bits in bitmask mUsed. If a slot +** within aData is in use, the corresponding bit in mUsed is set. Thus +** when (mUsed+1==(1 << nEntry)) the block is completely full. +** +** Each time a slot within a block is freed, the block is moved to the start +** of the linked-list. And if a block becomes completely full, then it is +** moved to the end of the list. As a result, when searching for a free +** slot, only the first block in the list need be examined. If it is full, +** then it is guaranteed that all blocks are full. +*/ +struct PGroupBlockList { + int nByte; /* Size of each allocation in bytes */ + PGroupBlock *pFirst; /* First PGroupBlock in list */ + PGroupBlock *pLast; /* Last PGroupBlock in list */ + PGroupBlockList *pNext; /* Next block-list attached to group */ +}; + +struct PGroupBlock { + Bitmask mUsed; /* Mask of used slots */ + int nEntry; /* Maximum number of allocations in aData[] */ + u8 *aData; /* Pointer to data block */ + PGroupBlock *pNext; /* Next PGroupBlock in list */ + PGroupBlock *pPrev; /* Previous PGroupBlock in list */ + PGroupBlockList *pList; /* Owner list */ +}; + +/* Minimum value for PGroupBlock.nEntry */ +#define PAGECACHE_BLOCKALLOC_MINENTRY 15 + /* Each page cache is an instance of the following object. Every ** open database file (including each in-memory database and each ** temporary or transient database) has a single page cache which @@ -158,6 +216,17 @@ static SQLITE_WSD struct PCacheGlobal { #define PGHDR1_TO_PAGE(p) (void*)(((char*)p) - p->pCache->szPage) #define PAGE_TO_PGHDR1(c, p) (PgHdr1*)(((char*)p) + c->szPage) +/* +** Blocks used by the SQLITE_PAGECACHE_BLOCKALLOC blocks to store/retrieve +** a PGroupBlock pointer based on a pointer to a page buffer. +*/ +#define PAGE_SET_BLOCKPTR(pCache, pPg, pBlock) \ + ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) = pBlock ) + +#define PAGE_GET_BLOCKPTR(pCache, pPg) \ + ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) ) + + /* ** Macros to enter and leave the PCache LRU mutex. */ @@ -283,13 +352,143 @@ static int pcache1MemSize(void *p){ } #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */ +/* +** The block pBlock belongs to list pList but is not currently linked in. +** Insert it into the start of the list. +*/ +static void addBlockToList(PGroupBlockList *pList, PGroupBlock *pBlock){ + pBlock->pPrev = 0; + pBlock->pNext = pList->pFirst; + pList->pFirst = pBlock; + if( pBlock->pNext ){ + pBlock->pNext->pPrev = pBlock; + }else{ + assert( pList->pLast==0 ); + pList->pLast = pBlock; + } +} + +/* +** If there are no blocks in the list headed by pList, remove pList +** from the pGroup->pBlockList list and free it with sqlite3_free(). +*/ +static void freeListIfEmpty(PGroup *pGroup, PGroupBlockList *pList){ + assert( sqlite3_mutex_held(pGroup->mutex) ); + if( pList->pFirst==0 ){ + PGroupBlockList **pp; + for(pp=&pGroup->pBlockList; *pp!=pList; pp=&(*pp)->pNext); + *pp = (*pp)->pNext; + sqlite3_free(pList); + } +} + /* ** Allocate a new page object initially associated with cache pCache. */ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){ int nByte = sizeof(PgHdr1) + pCache->szPage; - void *pPg = pcache1Alloc(nByte); + void *pPg = 0; PgHdr1 *p; + +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + PGroup *pGroup = pCache->pGroup; + PGroupBlockList *pList; + PGroupBlock *pBlock; + int i; + + nByte += sizeof(PGroupBlockList *); + nByte = ROUND8(nByte); + + do{ + for(pList=pGroup->pBlockList; pList; pList=pList->pNext){ + if( pList->nByte==nByte ) break; + } + if( pList==0 ){ + PGroupBlockList *pNew; + pcache1LeaveMutex(pCache->pGroup); + pNew = (PGroupBlockList *)sqlite3MallocZero(sizeof(PGroupBlockList)); + pcache1EnterMutex(pCache->pGroup); + if( pNew==0 ){ + /* malloc() failure. Return early. */ + return 0; + } + for(pList=pGroup->pBlockList; pList; pList=pList->pNext){ + if( pList->nByte==nByte ) break; + } + if( pList ){ + sqlite3_free(pNew); + }else{ + pNew->nByte = nByte; + pNew->pNext = pGroup->pBlockList; + pGroup->pBlockList = pNew; + pList = pNew; + } + } + }while( pList==0 ); + + pBlock = pList->pFirst; + if( pBlock==0 || pBlock->mUsed==(((Bitmask)1<nEntry)-1) ){ + int sz; + + /* Allocate a new block. Try to allocate enough space for the PGroupBlock + ** structure and MINENTRY allocations of nByte bytes each. If the + ** allocator returns more memory than requested, then more than MINENTRY + ** allocations may fit in it. */ + pcache1LeaveMutex(pCache->pGroup); + sz = sizeof(PGroupBlock) + PAGECACHE_BLOCKALLOC_MINENTRY * nByte; + pBlock = (PGroupBlock *)sqlite3Malloc(sz); + pcache1EnterMutex(pCache->pGroup); + + if( !pBlock ){ + freeListIfEmpty(pGroup, pList); + return 0; + } + pBlock->nEntry = (sqlite3MallocSize(pBlock) - sizeof(PGroupBlock)) / nByte; + if( pBlock->nEntry>=BMS ){ + pBlock->nEntry = BMS-1; + } + pBlock->pList = pList; + pBlock->mUsed = 0; + pBlock->aData = (u8 *)&pBlock[1]; + addBlockToList(pList, pBlock); + + sz = sqlite3MallocSize(pBlock); + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz); + sqlite3_mutex_leave(pcache1.mutex); + } + + for(i=0; pPg==0 && ALWAYS(inEntry); i++){ + if( 0==(pBlock->mUsed & ((Bitmask)1<mUsed |= ((Bitmask)1<aData[pList->nByte * i]; + } + } + assert( pPg ); + PAGE_SET_BLOCKPTR(pCache, pPg, pBlock); + + /* If the block is now full, shift it to the end of the list */ + if( pBlock->mUsed==(((Bitmask)1<nEntry)-1) && pList->pLast!=pBlock ){ + assert( pList->pFirst==pBlock ); + assert( pBlock->pPrev==0 ); + assert( pList->pLast->pNext==0 ); + pList->pFirst = pBlock->pNext; + pList->pFirst->pPrev = 0; + pBlock->pPrev = pList->pLast; + pBlock->pNext = 0; + pList->pLast->pNext = pBlock; + pList->pLast = pBlock; + } +#else + /* The group mutex must be released before pcache1Alloc() is called. This + ** is because it may call sqlite3_release_memory(), which assumes that + ** this mutex is not held. */ + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); + pcache1LeaveMutex(pCache->pGroup); + pPg = pcache1Alloc(nByte); + pcache1EnterMutex(pCache->pGroup); +#endif + if( pPg ){ p = PAGE_TO_PGHDR1(pCache, pPg); if( pCache->bPurgeable ){ @@ -311,10 +510,52 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){ static void pcache1FreePage(PgHdr1 *p){ if( ALWAYS(p) ){ PCache1 *pCache = p->pCache; + void *pPg = PGHDR1_TO_PAGE(p); + +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + PGroupBlock *pBlock = PAGE_GET_BLOCKPTR(pCache, pPg); + PGroupBlockList *pList = pBlock->pList; + int i = ((u8 *)pPg - pBlock->aData) / pList->nByte; + + assert( pPg==(void *)&pBlock->aData[i*pList->nByte] ); + assert( pBlock->mUsed & ((Bitmask)1<mUsed &= ~((Bitmask)1<pFirst==pBlock ){ + pList->pFirst = pBlock->pNext; + if( pList->pFirst ) pList->pFirst->pPrev = 0; + }else{ + pBlock->pPrev->pNext = pBlock->pNext; + } + if( pList->pLast==pBlock ){ + pList->pLast = pBlock->pPrev; + if( pList->pLast ) pList->pLast->pNext = 0; + }else{ + pBlock->pNext->pPrev = pBlock->pPrev; + } + + if( pBlock->mUsed==0 ){ + PGroup *pGroup = p->pCache->pGroup; + + int sz = sqlite3MallocSize(pBlock); + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, -sz); + sqlite3_mutex_leave(pcache1.mutex); + freeListIfEmpty(pGroup, pList); + sqlite3_free(pBlock); + }else{ + addBlockToList(pList, pBlock); + } +#else + assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) ); + pcache1Free(pPg); +#endif if( pCache->bPurgeable ){ pCache->pGroup->nCurrentPage--; } - pcache1Free(PGHDR1_TO_PAGE(p)); } } @@ -752,9 +993,7 @@ static void *pcache1Fetch(sqlite3_pcache *p, unsigned int iKey, int createFlag){ */ if( !pPage ){ if( createFlag==1 ) sqlite3BeginBenignMalloc(); - pcache1LeaveMutex(pGroup); pPage = pcache1AllocPage(pCache); - pcache1EnterMutex(pGroup); if( createFlag==1 ) sqlite3EndBenignMalloc(); } diff --git a/src/test_config.c b/src/test_config.c index 5b0ffaa697..5af60bafd0 100644 --- a/src/test_config.c +++ b/src/test_config.c @@ -555,6 +555,12 @@ Tcl_SetVar2(interp, "sqlite_options", "long_double", Tcl_SetVar2(interp, "sqlite_options", "yytrackmaxstackdepth", "0", TCL_GLOBAL_ONLY); #endif +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + Tcl_SetVar2(interp, "sqlite_options", "blockalloc", "1", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "blockalloc", "0", TCL_GLOBAL_ONLY); +#endif + #define LINKVAR(x) { \ static const int cv_ ## x = SQLITE_ ## x; \ Tcl_LinkVar(interp, "SQLITE_" #x, (char *)&(cv_ ## x), \ diff --git a/test/ctime.test b/test/ctime.test index e4cb156168..11216f9016 100644 --- a/test/ctime.test +++ b/test/ctime.test @@ -223,5 +223,16 @@ do_test ctime-2.5.$tc { } ] } {0 {{}}} +ifcapable blockalloc { + do_test ctime-3.1a { + db eval {SELECT sqlite_compileoption_used('PAGECACHE_BLOCKALLOC')} + } {1} +} else { + do_test ctime-3.1b { + db eval {SELECT sqlite_compileoption_used('PAGECACHE_BLOCKALLOC')} + } {0} +} + + finish_test diff --git a/test/memdb.test b/test/memdb.test index 1da3d7c58b..ec3accffa6 100644 --- a/test/memdb.test +++ b/test/memdb.test @@ -407,7 +407,7 @@ do_test memdb-8.2 { # Test that auto-vacuum works with in-memory databases. # -ifcapable autovacuum { +ifcapable autovacuum&&!blockalloc { do_test memdb-9.1 { db close sqlite3 db test.db diff --git a/test/memsubsys1.test b/test/memsubsys1.test index 7eecf083a7..dbd4f233ec 100644 --- a/test/memsubsys1.test +++ b/test/memsubsys1.test @@ -25,6 +25,13 @@ if {[permutation] == "memsubsys1"} { return } +# Nor will it work if the pager is allocating memory in blocks. +# +ifcapable blockalloc { + finish_test + return +} + # This procedure constructs a new database in test.db. It fills # this database with many small records (enough to force multiple # rebalance operations in the btree-layer and to require a large diff --git a/test/pcache2.test b/test/pcache2.test index 20cb2a8466..bbfb90d29c 100644 --- a/test/pcache2.test +++ b/test/pcache2.test @@ -16,6 +16,13 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl +# If compiled with blockalloc, pagecache memory is not used. Which +# causes these tests to fail. +# +ifcapable blockalloc { + finish_test + return +} # Set up a pcache memory pool so that we can easily track how many # pages are being used for cache.