From f74b8d9b89ef1bfd0bcd826284e763530fd9991f Mon Sep 17 00:00:00 2001 From: drh Date: Sun, 1 Sep 2002 23:20:45 +0000 Subject: [PATCH] Relax the locking requirements on BTree cursors. Any number of read and write cursors can be open at the same time now, but a write cannot occur as long as one or more read cursors are open. Before this change, one or more read cursors could be open on a table, or a single write cursor, but not both. Both policies have the same desirable effect: they prevent writes to a table while a sequential scan of that table is underway. But the new policy is a little less restrictive. Both policies prevent an UPDATE from occurring inside a SELECT (which is what we want) but the new policy allows a SELECT to occur inside an UPDATE. (CVS 739) FossilOrigin-Name: 8c2a0836980341faa479cfe6c716409e6057367d --- manifest | 14 ++--- manifest.uuid | 2 +- src/btree.c | 161 ++++++++++++++++++++++++++++++++------------------ src/vdbe.c | 6 +- 4 files changed, 118 insertions(+), 65 deletions(-) diff --git a/manifest b/manifest index 3a2855344a..9ddca9e8ba 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Parse\sforeign\skey\sconstraints\sand\spopulate\sinternal\sdata\sstructures\nappropriately.\s\sConstraints\sare\sstill\snot\senforced.\s(CVS\s738) -D 2002-08-31T18:53:06 +C Relax\sthe\slocking\srequirements\son\sBTree\scursors.\s\sAny\snumber\sof\sread\sand\nwrite\scursors\scan\sbe\sopen\sat\sthe\ssame\stime\snow,\sbut\sa\swrite\scannot\soccur\nas\slong\sas\sone\sor\smore\sread\scursors\sare\sopen.\n\nBefore\sthis\schange,\sone\sor\smore\sread\scursors\scould\sbe\sopen\son\sa\stable,\nor\sa\ssingle\swrite\scursor,\sbut\snot\sboth.\s\sBoth\spolicies\shave\sthe\ssame\ndesirable\seffect:\sthey\sprevent\swrites\sto\sa\stable\swhile\sa\ssequential\sscan\nof\sthat\stable\sis\sunderway.\s\sBut\sthe\snew\spolicy\sis\sa\slittle\sless\srestrictive.\nBoth\spolicies\sprevent\san\sUPDATE\sfrom\soccurring\sinside\sa\sSELECT\s(which\sis\nwhat\swe\swant)\sbut\sthe\snew\spolicy\sallows\sa\sSELECT\sto\soccur\sinside\san\nUPDATE.\s(CVS\s739) +D 2002-09-01T23:20:45 F Makefile.in 420fada882179cb72ffd07313f3fd693f9f06640 F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -18,7 +18,7 @@ F main.mk 8e34134476c039c89bb404f2712bcbb2f044bdfe F publish.sh a7a8d23e6525bd25d4f5ba9b0fc6edc107d94050 F spec.template 238f7db425a78dc1bb7682e56e3834c7270a3f5e F sqlite.1 83f4a9d37bdf2b7ef079a82d54eaf2e3509ee6ea -F src/btree.c 9e21606581a5a4a5b18ad304d7a4f433101f1538 +F src/btree.c 8024b87635c2adf133f153f1bb595125ec1c7d7b F src/btree.h 0ca6c2631338df62e4f7894252d9347ae234eda9 F src/build.c 0116afe4f67687206364c4d1e88dc07aefc661de F src/delete.c c9f59ee217e062eb9de7b64b76b5cfff42b2f028 @@ -52,7 +52,7 @@ F src/tokenize.c 62c98842447effe92eba9622bb2f9a2a8a4b97ad F src/trigger.c c90a292a4bef25e478fd5deda6d300319be6a023 F src/update.c f07e6ed2c517c92871e54d3f5886d1cf56121b11 F src/util.c c70d5da5357e01b58392faebae3c3620c1d71f14 -F src/vdbe.c d4969d78fdd2408706a329c7d4554b9596d1be94 +F src/vdbe.c 4a1744c9054965ee34c034ed4673231124b571c9 F src/vdbe.h 7cfeb3aab6a901336532d93494cdedbddf30b7ec F src/where.c 53959c9d94adaf93b409271815e26eafa6ddd515 F test/all.test efd958d048c70a3247997c482f0b33561f7759f0 @@ -150,7 +150,7 @@ F www/speed.tcl a20a792738475b68756ea7a19321600f23d1d803 F www/sqlite.tcl ae3dcfb077e53833b59d4fcc94d8a12c50a44098 F www/tclsqlite.tcl 1db15abeb446aad0caf0b95b8b9579720e4ea331 F www/vdbe.tcl 2013852c27a02a091d39a766bc87cff329f21218 -P 5f51e13d56a58d7c263043cae9898d796017a369 -R 5f57a541da1cc0eea6b8b3a2339bca70 +P 170711ca65dc894d0486b9d575edb8f1708250fb +R 665b6d3bd425ef91563749e2f71e8997 U drh -Z 53eaaf70a55382e9bd46cf1caade9346 +Z c368bcd74fab4b1fea02ec6dd259bce0 diff --git a/manifest.uuid b/manifest.uuid index 5fb922ae93..2e21aa5362 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -170711ca65dc894d0486b9d575edb8f1708250fb \ No newline at end of file +8c2a0836980341faa479cfe6c716409e6057367d \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 55d0969a6a..8e599809a5 100644 --- a/src/btree.c +++ b/src/btree.c @@ -9,7 +9,7 @@ ** May you share freely, never taking more than you give. ** ************************************************************************* -** $Id: btree.c,v 1.71 2002/08/15 13:50:49 drh Exp $ +** $Id: btree.c,v 1.72 2002/09/01 23:20:45 drh Exp $ ** ** This file implements a external (disk-based) database using BTrees. ** For a detailed discussion of BTrees, refer to @@ -345,7 +345,6 @@ struct Btree { u8 inCkpt; /* True if there is a checkpoint on the transaction */ u8 readOnly; /* True if the underlying file is readonly */ u8 needSwab; /* Need to byte-swapping */ - Hash locks; /* Key: root page number. Data: lock count */ }; typedef Btree Bt; @@ -357,6 +356,7 @@ typedef Btree Bt; struct BtCursor { Btree *pBt; /* The Btree to which this cursor belongs */ BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */ + BtCursor *pShared; /* Loop of cursors with the same root page */ Pgno pgnoRoot; /* The root page of this tree */ MemPage *pPage; /* Page that contains the entry */ int idx; /* Index of the entry in pPage->apCell[] */ @@ -682,7 +682,6 @@ int sqliteBtreeOpen( pBt->pCursor = 0; pBt->page1 = 0; pBt->readOnly = sqlitepager_isreadonly(pBt->pPager); - sqliteHashInit(&pBt->locks, SQLITE_HASH_INT, 0); *ppBtree = pBt; return SQLITE_OK; } @@ -695,7 +694,6 @@ int sqliteBtreeClose(Btree *pBt){ sqliteBtreeCloseCursor(pBt->pCursor); } sqlitepager_close(pBt->pPager); - sqliteHashClear(&pBt->locks); sqliteFree(pBt); return SQLITE_OK; } @@ -825,19 +823,16 @@ static int newDatabase(Btree *pBt){ int sqliteBtreeBeginTrans(Btree *pBt){ int rc; if( pBt->inTrans ) return SQLITE_ERROR; + if( pBt->readOnly ) return SQLITE_READONLY; if( pBt->page1==0 ){ rc = lockBtree(pBt); if( rc!=SQLITE_OK ){ return rc; } } - if( pBt->readOnly ){ - rc = SQLITE_OK; - }else{ - rc = sqlitepager_begin(pBt->page1); - if( rc==SQLITE_OK ){ - rc = newDatabase(pBt); - } + rc = sqlitepager_begin(pBt->page1); + if( rc==SQLITE_OK ){ + rc = newDatabase(pBt); } if( rc==SQLITE_OK ){ pBt->inTrans = 1; @@ -856,7 +851,6 @@ int sqliteBtreeBeginTrans(Btree *pBt){ */ int sqliteBtreeCommit(Btree *pBt){ int rc; - if( pBt->inTrans==0 ) return SQLITE_ERROR; rc = pBt->readOnly ? SQLITE_OK : sqlitepager_commit(pBt->pPager); pBt->inTrans = 0; pBt->inCkpt = 0; @@ -903,7 +897,7 @@ int sqliteBtreeRollback(Btree *pBt){ int sqliteBtreeBeginCkpt(Btree *pBt){ int rc; if( !pBt->inTrans || pBt->inCkpt ){ - return SQLITE_ERROR; + return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; } rc = pBt->readOnly ? SQLITE_OK : sqlitepager_ckpt_begin(pBt->pPager); pBt->inCkpt = 1; @@ -955,20 +949,39 @@ int sqliteBtreeRollbackCkpt(Btree *pBt){ ** the database file. ** ** If wrFlag==0, then the cursor can only be used for reading. -** If wrFlag==1, then the cursor can be used for reading or writing. -** A read/write cursor requires exclusive access to its table. There -** cannot be two or more cursors open on the same table if any one of -** cursors is a read/write cursor. But there can be two or more -** read-only cursors open on the same table. +** If wrFlag==1, then the cursor can be used for reading or for +** writing if other conditions for writing are also met. These +** are the conditions that must be met in order for writing to +** be allowed: ** +** 1: The cursor must have been opened with wrFlag==1 +** +** 2: No other cursors may be open with wrFlag==0 on the same table +** +** 3: The database must be writable (not on read-only media) +** +** 4: There must be an active transaction. +** +** Condition 2 warrants further discussion. If any cursor is opened +** on a table with wrFlag==0, that prevents all other cursors from +** writing to that table. This is a kind of "read-lock". When a cursor +** is opened with wrFlag==0 it is guaranteed that the table will not +** change as long as the cursor is open. This allows the cursor to +** do a sequential scan of the table without having to worry about +** entries being inserted or deleted during the scan. Cursors should +** be opened with wrFlag==0 only if this read-lock property is needed. +** That is to say, cursors should be opened with wrFlag==0 only if they +** intend to use the sqliteBtreeNext() system call. All other cursors +** should be opened with wrFlag==1 even if they never really intend +** to write. +** ** No checking is done to make sure that page iTable really is the ** root page of a b-tree. If it is not, then the cursor acquired ** will not work correctly. */ int sqliteBtreeCursor(Btree *pBt, int iTable, int wrFlag, BtCursor **ppCur){ int rc; - BtCursor *pCur; - ptr nLock; + BtCursor *pCur, *pRing; if( pBt->page1==0 ){ rc = lockBtree(pBt); @@ -977,10 +990,6 @@ int sqliteBtreeCursor(Btree *pBt, int iTable, int wrFlag, BtCursor **ppCur){ return rc; } } - if( wrFlag && pBt->readOnly ){ - *ppCur = 0; - return SQLITE_READONLY; - } pCur = sqliteMalloc( sizeof(*pCur) ); if( pCur==0 ){ rc = SQLITE_NOMEM; @@ -995,13 +1004,6 @@ int sqliteBtreeCursor(Btree *pBt, int iTable, int wrFlag, BtCursor **ppCur){ if( rc!=SQLITE_OK ){ goto create_cursor_exception; } - nLock = (ptr)sqliteHashFind(&pBt->locks, 0, iTable); - if( nLock<0 || (nLock>0 && wrFlag) ){ - rc = SQLITE_LOCKED; - goto create_cursor_exception; - } - nLock = wrFlag ? -1 : nLock+1; - sqliteHashInsert(&pBt->locks, 0, iTable, (void*)nLock); pCur->pBt = pBt; pCur->wrFlag = wrFlag; pCur->idx = 0; @@ -1010,6 +1012,14 @@ int sqliteBtreeCursor(Btree *pBt, int iTable, int wrFlag, BtCursor **ppCur){ pCur->pNext->pPrev = pCur; } pCur->pPrev = 0; + pRing = pBt->pCursor; + while( pRing && pRing->pgnoRoot!=pCur->pgnoRoot ){ pRing = pRing->pNext; } + if( pRing ){ + pCur->pShared = pRing->pShared; + pRing->pShared = pCur; + }else{ + pCur->pShared = pCur; + } pBt->pCursor = pCur; *ppCur = pCur; return SQLITE_OK; @@ -1029,7 +1039,6 @@ create_cursor_exception: ** when the last cursor is closed. */ int sqliteBtreeCloseCursor(BtCursor *pCur){ - ptr nLock; Btree *pBt = pCur->pBt; if( pCur->pPrev ){ pCur->pPrev->pNext = pCur->pNext; @@ -1042,11 +1051,12 @@ int sqliteBtreeCloseCursor(BtCursor *pCur){ if( pCur->pPage ){ sqlitepager_unref(pCur->pPage); } + if( pCur->pShared!=pCur ){ + BtCursor *pRing = pCur->pShared; + while( pRing->pShared!=pCur ){ pRing = pRing->pShared; } + pRing->pShared = pCur->pShared; + } unlockBtreeIfUnused(pBt); - nLock = (ptr)sqliteHashFind(&pBt->locks, 0, pCur->pgnoRoot); - assert( nLock!=0 || sqlite_malloc_failed ); - nLock = nLock<0 ? 0 : nLock-1; - sqliteHashInsert(&pBt->locks, 0, pCur->pgnoRoot, (void*)nLock); sqliteFree(pCur); return SQLITE_OK; } @@ -2388,6 +2398,35 @@ balance_cleanup: return rc; } +/* +** This routine checks all cursors that point to the same table +** as pCur points to. If any of those cursors were opened with +** wrFlag==0 then this routine returns SQLITE_LOCKED. If all +** cursors point to the same table were opened with wrFlag==1 +** then this routine returns SQLITE_OK. +** +** In addition to checking for read-locks (where a read-lock +** means a cursor opened with wrFlag==0) this routine also moves +** all cursors other than pCur so that they are pointing to the +** first Cell on root page. This is necessary because an insert +** or delete might change the number of cells on a page or delete +** a page entirely and we do not want to leave any cursors +** pointing to non-existant pages or cells. +*/ +static int checkReadLocks(BtCursor *pCur){ + BtCursor *p; + assert( pCur->wrFlag ); + for(p=pCur->pShared; p!=pCur; p=p->pShared){ + assert( p ); + assert( p->pgnoRoot==pCur->pgnoRoot ); + if( p->wrFlag==0 ) return SQLITE_LOCKED; + if( sqlitepager_pagenumber(p->pPage)!=p->pgnoRoot ){ + moveToRoot(p); + } + } + return SQLITE_OK; +} + /* ** Insert a new record into the BTree. The key is given by (pKey,nKey) ** and the data is given by (pData,nData). The cursor is used only to @@ -2409,12 +2448,17 @@ int sqliteBtreeInsert( if( pCur->pPage==0 ){ return SQLITE_ABORT; /* A rollback destroyed this cursor */ } - if( !pCur->pBt->inTrans || nKey+nData==0 ){ - return SQLITE_ERROR; /* Must start a transaction first */ + if( !pBt->inTrans || nKey+nData==0 ){ + /* Must start a transaction before doing an insert */ + return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; } + assert( !pBt->readOnly ); if( !pCur->wrFlag ){ return SQLITE_PERM; /* Cursor not open for writing */ } + if( checkReadLocks(pCur) ){ + return SQLITE_LOCKED; /* The table pCur points to has a read lock */ + } rc = sqliteBtreeMoveto(pCur, pKey, nKey, &loc); if( rc ) return rc; pPage = pCur->pPage; @@ -2463,15 +2507,20 @@ int sqliteBtreeDelete(BtCursor *pCur){ if( pCur->pPage==0 ){ return SQLITE_ABORT; /* A rollback destroyed this cursor */ } - if( !pCur->pBt->inTrans ){ - return SQLITE_ERROR; /* Must start a transaction first */ + if( !pBt->inTrans ){ + /* Must start a transaction before doing a delete */ + return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; } + assert( !pBt->readOnly ); if( pCur->idx >= pPage->nCell ){ return SQLITE_ERROR; /* The cursor is not pointing to anything */ } if( !pCur->wrFlag ){ return SQLITE_PERM; /* Did not open this cursor for writing */ } + if( checkReadLocks(pCur) ){ + return SQLITE_LOCKED; /* The table pCur points to has a read lock */ + } rc = sqlitepager_write(pPage); if( rc ) return rc; pCell = pPage->apCell[pCur->idx]; @@ -2538,7 +2587,8 @@ int sqliteBtreeCreateTable(Btree *pBt, int *piTable){ Pgno pgnoRoot; int rc; if( !pBt->inTrans ){ - return SQLITE_ERROR; /* Must start a transaction first */ + /* Must start a transaction first */ + return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; } if( pBt->readOnly ){ return SQLITE_READONLY; @@ -2610,16 +2660,15 @@ static int clearDatabasePage(Btree *pBt, Pgno pgno, int freePageFlag){ */ int sqliteBtreeClearTable(Btree *pBt, int iTable){ int rc; - ptr nLock; + BtCursor *pCur; if( !pBt->inTrans ){ - return SQLITE_ERROR; /* Must start a transaction first */ + return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; } - if( pBt->readOnly ){ - return SQLITE_READONLY; - } - nLock = (ptr)sqliteHashFind(&pBt->locks, 0, iTable); - if( nLock ){ - return SQLITE_LOCKED; + for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ + if( pCur->pgnoRoot==(Pgno)iTable ){ + if( pCur->wrFlag==0 ) return SQLITE_LOCKED; + moveToRoot(pCur); + } } rc = clearDatabasePage(pBt, (Pgno)iTable, 0); if( rc ){ @@ -2636,11 +2685,14 @@ int sqliteBtreeClearTable(Btree *pBt, int iTable){ int sqliteBtreeDropTable(Btree *pBt, int iTable){ int rc; MemPage *pPage; + BtCursor *pCur; if( !pBt->inTrans ){ - return SQLITE_ERROR; /* Must start a transaction first */ + return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; } - if( pBt->readOnly ){ - return SQLITE_READONLY; + for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ + if( pCur->pgnoRoot==(Pgno)iTable ){ + return SQLITE_LOCKED; /* Cannot drop a table that has a cursor */ + } } rc = sqlitepager_get(pBt->pPager, (Pgno)iTable, (void**)&pPage); if( rc ) return rc; @@ -2680,10 +2732,7 @@ int sqliteBtreeUpdateMeta(Btree *pBt, int *aMeta){ PageOne *pP1; int rc, i; if( !pBt->inTrans ){ - return SQLITE_ERROR; /* Must start a transaction first */ - } - if( pBt->readOnly ){ - return SQLITE_READONLY; + return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; } pP1 = pBt->page1; rc = sqlitepager_write(pP1); diff --git a/src/vdbe.c b/src/vdbe.c index e7831f274d..f45777917f 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -30,7 +30,7 @@ ** But other routines are also provided to help in building up ** a program instruction by instruction. ** -** $Id: vdbe.c,v 1.173 2002/08/28 03:01:00 drh Exp $ +** $Id: vdbe.c,v 1.174 2002/09/01 23:20:46 drh Exp $ */ #include "sqliteInt.h" #include @@ -2959,6 +2959,10 @@ case OP_Transaction: { } break; } + case SQLITE_READONLY: { + rc = SQLITE_OK; + /* Fall thru into the next case */ + } case SQLITE_OK: { busy = 0; break;