From 8b18dd4fb5e1af588b5531769e7dc37ebc9fa3e7 Mon Sep 17 00:00:00 2001 From: drh Date: Wed, 12 May 2004 19:18:15 +0000 Subject: [PATCH] Implement a B+tree option (all data stored on leaves). (CVS 1365) FossilOrigin-Name: b8f70d17f06531269caa0a127efb2d25ad0f3e1c --- manifest | 17 +++--- manifest.uuid | 2 +- src/btree.c | 135 ++++++++++++++++++++++++++++++++--------------- src/btree.h | 7 +-- test/btree5.test | 11 ++-- test/btree6.test | 127 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 237 insertions(+), 62 deletions(-) create mode 100644 test/btree6.test diff --git a/manifest b/manifest index a5090be74d..33aacd2ab3 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Btree\suses\ssigned\sintegers\sfor\sthe\srowid.\s\sThe\sintToKey()\sand\skeyToInt()\smacros\nare\snow\sno-ops.\s(CVS\s1364) -D 2004-05-12T15:15:47 +C Implement\sa\sB+tree\soption\s(all\sdata\sstored\son\sleaves).\s(CVS\s1365) +D 2004-05-12T19:18:16 F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -23,8 +23,8 @@ F sqlite.def fc4f5734786fe4743cfe2aa98eb2da4b089edb5f F sqlite.pc.in 30552343140c53304c2a658c080fbe810cd09ca2 F src/attach.c c315c58cb16fd6e913b3bfa6412aedecb4567fa5 F src/auth.c 5c2f0bea4729c98c2be3b69d6b466fc51448fe79 -F src/btree.c 62a870f24d3fa067d206596c7a8686192edf8deb -F src/btree.h 5549569274a78d31c941845e0771b878755b07e5 +F src/btree.c dc347ebd014e4adff75bffe890eb771393fad6c1 +F src/btree.h 6f51ad0ffebfba71295fcacdbe86007512200050 F src/btree_rb.c 9d7973e266ee6f9c61ce592f68742ce9cd5b10e5 F src/build.c f25e4ac9f102efd70188bc09a459c2b461fe2135 F src/copy.c 4d2038602fd0549d80c59bda27d96f13ea9b5e29 @@ -78,7 +78,8 @@ F test/bind.test 56a57043b42c4664ca705f6050e56717a8a6699a F test/btree.test ed5781db83b6c1de02e62781d44915a9abe3450a F test/btree2.test aa4a6d05b1ea90b1acaf83ba89039dd302a88635 F test/btree4.test 3797b4305694c7af6828675b0f4b1424b8ca30e4 -F test/btree5.test 56977bd84ec64a8bc6ffdaa36b6621c2103c10e2 +F test/btree5.test 13763ea0aa768dfbcef02d93b0711601e03f84b4 +F test/btree6.test ebcd1b56d500f208fa58ffc8110b4ae56039c6f4 F test/capi2.test ec96e0e235d87b53cbaef3d8e3e0f8ccf32c71ca F test/conflict.test 0911bb2f079046914a6e9c3341b36658c4e2103e F test/copy.test f07ea8d60878da7a67416ab62f78e9706b9d3c45 @@ -190,7 +191,7 @@ F www/sqlite.tcl 3c83b08cf9f18aa2d69453ff441a36c40e431604 F www/tclsqlite.tcl b9271d44dcf147a93c98f8ecf28c927307abd6da F www/vdbe.tcl 9b9095d4495f37697fd1935d10e14c6015e80aa1 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4 -P 97de9f7ceebab859ef984d155808575ad321afc0 -R 2679bc087b647fe5d90e5a7c20264be0 +P fb3c80301441f0d255164578601439db3e0c7a61 +R 8c6e7272f61fac86442d80d07609d870 U drh -Z bf6ca6f575d739fe1938b329ef825f50 +Z 1129f764b16985b0218b1b152f1cd264 diff --git a/manifest.uuid b/manifest.uuid index 124f8f1505..41e8a06db6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fb3c80301441f0d255164578601439db3e0c7a61 \ No newline at end of file +b8f70d17f06531269caa0a127efb2d25ad0f3e1c \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index ff970fbaca..652ea67532 100644 --- a/src/btree.c +++ b/src/btree.c @@ -9,7 +9,7 @@ ** May you share freely, never taking more than you give. ** ************************************************************************* -** $Id: btree.c,v 1.128 2004/05/12 15:15:47 drh Exp $ +** $Id: btree.c,v 1.129 2004/05/12 19:18:16 drh Exp $ ** ** This file implements a external (disk-based) database using BTrees. ** For a detailed discussion of BTrees, refer to @@ -196,8 +196,8 @@ static const char zMagicHeader[] = "SQLite format 3"; */ #define PTF_INTKEY 0x01 #define PTF_ZERODATA 0x02 -#define PTF_LEAF 0x04 -/* Idea for the future: PTF_LEAFDATA */ +#define PTF_LEAFDATA 0x04 +#define PTF_LEAF 0x08 /* ** As each page of the file is loaded into memory, an instance of the following @@ -216,7 +216,9 @@ struct MemPage { u8 isOverfull; /* Some aCell[] do not fit on page */ u8 intKey; /* True if intkey flag is set */ u8 leaf; /* True if leaf flag is set */ - u8 zeroData; /* True if zero data flag is set */ + u8 zeroData; /* True if table stores keys only */ + u8 leafData; /* True if tables stores data on leaves only */ + u8 hasData; /* True if this page stores data */ u8 hdrOffset; /* 100 for page 1. 0 otherwise */ u8 needRelink; /* True if need to run relinkCellList() */ int idxParent; /* Index in pParent->aCell[] of this node */ @@ -345,10 +347,10 @@ static void parseCellHeader( }else{ n = 6; } - if( pPage->zeroData ){ - *pnData = 0; - }else{ + if( pPage->hasData ){ n += getVarint(&pCell[n], pnData); + }else{ + *pnData = 0; } n += getVarint(&pCell[n], (u64*)pnKey); *pnHeader = n; @@ -402,7 +404,10 @@ static void _pageIntegrity(MemPage *pPage){ if( pPage->isInit ){ assert( pPage->leaf == ((c & PTF_LEAF)!=0) ); assert( pPage->zeroData == ((c & PTF_ZERODATA)!=0) ); - assert( pPage->intKey == ((c & PTF_INTKEY)!=0) ); + assert( pPage->leafData == ((c & PTF_LEAFDATA)!=0) ); + assert( pPage->intKey == ((c & (PTF_INTKEY|PTF_LEAFDATA))!=0) ); + assert( pPage->hasData == + !(pPage->zeroData || (!pPage->leaf && pPage->leafData)) ); } data = pPage->aData; memset(used, 0, pageSize); @@ -689,9 +694,11 @@ static int initPage( hdr = pPage->hdrOffset; data = pPage->aData; c = data[hdr]; - pPage->intKey = (c & PTF_INTKEY)!=0; + pPage->intKey = (c & (PTF_INTKEY|PTF_LEAFDATA))!=0; pPage->zeroData = (c & PTF_ZERODATA)!=0; + pPage->leafData = (c & PTF_LEAFDATA)!=0; pPage->leaf = (c & PTF_LEAF)!=0; + pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData)); pPage->isOverfull = 0; pPage->needRelink = 0; pPage->idxShift = 0; @@ -763,9 +770,11 @@ static void zeroPage(MemPage *pPage, int flags){ pPage->nCell = 0; pPage->nCellAlloc = 0; pPage->nFree = pBt->pageSize - first; - pPage->intKey = (flags & PTF_INTKEY)!=0; - pPage->leaf = (flags & PTF_LEAF)!=0; + pPage->intKey = (flags & (PTF_INTKEY|PTF_LEAFDATA))!=0; pPage->zeroData = (flags & PTF_ZERODATA)!=0; + pPage->leafData = (flags & PTF_LEAFDATA)!=0; + pPage->leaf = (flags & PTF_LEAF)!=0; + pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData)); pPage->hdrOffset = hdr; pPage->isOverfull = 0; pPage->needRelink = 0; @@ -1033,7 +1042,7 @@ static int newDatabase(Btree *pBt){ data[18] = 1; data[19] = 1; put2byte(&data[22], (SQLITE_PAGE_SIZE-10)/4-12); - zeroPage(pP1, PTF_INTKEY|PTF_LEAF); + zeroPage(pP1, PTF_INTKEY|PTF_LEAF ); return SQLITE_OK; } @@ -1410,7 +1419,7 @@ int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){ if( !pPage->leaf ){ cell += 4; /* Skip the child pointer */ } - if( !pPage->zeroData ){ + if( pPage->hasData ){ while( (0x80&*(cell++))!=0 ){} /* Skip the data size number */ } getVarint(cell, pSize); @@ -1437,7 +1446,7 @@ int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ assert( pPage!=0 ); assert( pPage->isInit ); pageIntegrity(pPage); - if( pPage->zeroData ){ + if( !pPage->hasData ){ *pSize = 0; }else{ assert( pCur->idx>=0 && pCur->idxnCell ); @@ -1488,10 +1497,10 @@ static int getPayload( if( !pPage->leaf ){ aPayload += 4; /* Skip the child pointer */ } - if( pPage->zeroData ){ - nData = 0; - }else{ + if( pPage->hasData ){ aPayload += getVarint(aPayload, &nData); + }else{ + nData = 0; } aPayload += getVarint(aPayload, (u64*)&nKey); if( pPage->intKey ){ @@ -1635,10 +1644,10 @@ static const unsigned char *fetchPayload( if( !pPage->leaf ){ aPayload += 4; /* Skip the child pointer */ } - if( pPage->zeroData ){ - nData = 0; - }else{ + if( pPage->hasData ){ aPayload += getVarint(aPayload, &nData); + }else{ + nData = 0; } aPayload += getVarint(aPayload, (u64*)&nKey); if( pPage->intKey ){ @@ -1977,9 +1986,13 @@ int sqlite3BtreeMoveto(BtCursor *pCur, const void *pKey, i64 nKey, int *pRes){ if( rc ) return rc; } if( c==0 ){ - pCur->iMatch = c; - if( pRes ) *pRes = 0; - return SQLITE_OK; + if( pPage->leafData && !pPage->leaf ){ + break; + }else{ + pCur->iMatch = c; + if( pRes ) *pRes = 0; + return SQLITE_OK; + } } if( c<0 ){ lwr = pCur->idx+1; @@ -2031,6 +2044,7 @@ int sqlite3BtreeEof(BtCursor *pCur){ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ int rc; MemPage *pPage = pCur->pPage; + assert( pRes!=0 ); if( pCur->isValid==0 ){ *pRes = 1; @@ -2057,7 +2071,12 @@ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ pPage = pCur->pPage; }while( pCur->idx>=pPage->nCell ); *pRes = 0; - return SQLITE_OK; + if( pPage->leafData ){ + rc = sqlite3BtreeNext(pCur, pRes); + }else{ + rc = SQLITE_OK; + } + return rc; } *pRes = 0; if( pPage->leaf ){ @@ -2100,7 +2119,11 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ pPage = pCur->pPage; } pCur->idx--; - rc = SQLITE_OK; + if( pPage->leafData ){ + rc = sqlite3BtreePrevious(pCur, pRes); + }else{ + rc = SQLITE_OK; + } } *pRes = 0; return rc; @@ -2335,13 +2358,13 @@ static int fillInCell( if( !pPage->leaf ){ nHeader += 4; } - if( !pPage->zeroData ){ + if( pPage->hasData ){ nHeader += putVarint(&pCell[nHeader], nData); } nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey); /* Fill in the payload */ - if( pPage->zeroData ){ + if( !pPage->hasData ){ nData = 0; } nPayload = nData; @@ -2687,6 +2710,7 @@ static int balance(MemPage *pPage){ int nxDiv; /* Next divider slot in pParent->aCell[] */ int rc; /* The return code */ int leafCorrection; /* 4 if pPage is a leaf. 0 if not */ + int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ int usableSpace; /* Bytes in pPage beyond the header */ int pageFlags; /* Value of pPage->aData[0] */ int subtotal; /* Subtotal of bytes in cells on one page */ @@ -2713,7 +2737,7 @@ static int balance(MemPage *pPage){ assert( pPage->isInit ); assert( sqlite3pager_iswriteable(pPage->aData) ); pBt = pPage->pBt; - if( !pPage->isOverfull && pPage->nFreepageSize/2 && pPage->nCell>=2){ + if( !pPage->isOverfull && pPage->nFreepageSize*2/3 && pPage->nCell>=2){ relinkCellList(pPage); return SQLITE_OK; } @@ -2912,6 +2936,7 @@ static int balance(MemPage *pPage){ */ nCell = 0; leafCorrection = pPage->leaf*4; + leafData = pPage->leafData && pPage->leaf; for(i=0; inCell; j++){ @@ -2920,21 +2945,26 @@ static int balance(MemPage *pPage){ nCell++; } if( ileaf ){ - assert( leafCorrection==0 ); - /* The right pointer of the child page pOld becomes the left - ** pointer of the divider cell */ - memcpy(&apCell[nCell][2], &pOld->aData[pOld->hdrOffset+6], 4); + if( leafData ){ + int sz = cellSize(pParent, apDiv[i]); + dropCell(pParent, nxDiv, sz); }else{ - assert( leafCorrection==4 ); + szCell[nCell] = cellSize(pParent, apDiv[i]); + memcpy(aTemp[i], apDiv[i], szCell[nCell]); + apCell[nCell] = &aTemp[i][leafCorrection]; + dropCell(pParent, nxDiv, szCell[nCell]); + szCell[nCell] -= leafCorrection; + assert( get4byte(&aTemp[i][2])==pgnoOld[i] ); + if( !pOld->leaf ){ + assert( leafCorrection==0 ); + /* The right pointer of the child page pOld becomes the left + ** pointer of the divider cell */ + memcpy(&apCell[nCell][2], &pOld->aData[pOld->hdrOffset+6], 4); + }else{ + assert( leafCorrection==4 ); + } + nCell++; } - nCell++; } } @@ -2954,6 +2984,7 @@ static int balance(MemPage *pPage){ if( subtotal > usableSpace ){ szNew[k] = subtotal - szCell[i]; cntNew[k] = i; + if( leafData ){ i--; } subtotal = 0; k++; } @@ -3064,16 +3095,28 @@ static int balance(MemPage *pPage){ assert( !pNew->isOverfull ); relinkCellList(pNew); if( ileaf ){ memcpy(&pNew->aData[6], pCell+2, 4); pTemp = 0; + }else if( leafData ){ + i64 nKey; + u64 nData; + int nHeader; + j--; + parseCellHeader(pNew, apCell[j], &nData, &nKey, &nHeader); + pCell = aInsBuf[i]; + fillInCell(pParent, pCell, 0, nKey, 0, 0, &sz); + pTemp = 0; }else{ pCell -= 4; pTemp = aInsBuf[i]; } - insertCell(pParent, nxDiv, pCell, szCell[j]+leafCorrection, pTemp); + insertCell(pParent, nxDiv, pCell, sz, pTemp); put4byte(&pParent->aCell[nxDiv][2], pNew->pgno); j++; nxDiv++; @@ -3200,6 +3243,7 @@ int sqlite3BtreeInsert( if( rc ) return rc; pPage = pCur->pPage; assert( pPage->intKey || nKey>=0 ); + assert( pPage->leaf || !pPage->leafData ); TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n", pCur->pgnoRoot, nKey, nData, pPage->pgno, loc==0 ? "overwrite" : "new entry")); @@ -3284,6 +3328,7 @@ int sqlite3BtreeDelete(BtCursor *pCur){ int szNext; int notUsed; unsigned char tempCell[MX_CELL_SIZE]; + assert( !pPage->leafData ); getTempCursor(pCur, &leafCur); rc = sqlite3BtreeNext(&leafCur, ¬Used); if( rc!=SQLITE_OK ){ @@ -3518,9 +3563,11 @@ int sqlite3BtreePageDump(Btree *pBt, int pgno, int recursive){ hdr = pPage->hdrOffset; data = pPage->aData; c = data[hdr]; - pPage->intKey = (c & PTF_INTKEY)!=0; + pPage->intKey = (c & (PTF_INTKEY|PTF_LEAFDATA))!=0; pPage->zeroData = (c & PTF_ZERODATA)!=0; + pPage->leafData = (c & PTF_LEAFDATA)!=0; pPage->leaf = (c & PTF_LEAF)!=0; + pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData)); printf("PAGE %d: flags=0x%02x frag=%d parent=%d\n", pgno, data[hdr], data[hdr+5], (pPage->isInit && pPage->pParent) ? pPage->pParent->pgno : 0); diff --git a/src/btree.h b/src/btree.h index 99a68126af..d36f3aa52b 100644 --- a/src/btree.h +++ b/src/btree.h @@ -13,7 +13,7 @@ ** subsystem. See comments in the source code for a detailed description ** of what each interface routine does. ** -** @(#) $Id: btree.h,v 1.46 2004/05/12 15:15:47 drh Exp $ +** @(#) $Id: btree.h,v 1.47 2004/05/12 19:18:17 drh Exp $ */ #ifndef _BTREE_H_ #define _BTREE_H_ @@ -55,8 +55,9 @@ int sqlite3BtreeCopyFile(Btree *, Btree *); /* The flags parameter to sqlite3BtreeCreateTable can be the bitwise OR ** of the following flags: */ -#define BTREE_INTKEY 1 /* Table has only 64-bit integer keys */ -#define BTREE_ZERODATA 2 /* Table has keys only - no data */ +#define BTREE_INTKEY 1 /* Table has only 64-bit signed integer keys */ +#define BTREE_ZERODATA 2 /* Table has keys only - no data */ +#define BTREE_LEAFDATA 4 /* Data stored in leaves only. Implies INTKEY */ int sqlite3BtreeDropTable(Btree*, int); int sqlite3BtreeClearTable(Btree*, int); diff --git a/test/btree5.test b/test/btree5.test index f89ccf8ae5..8a99a1718f 100644 --- a/test/btree5.test +++ b/test/btree5.test @@ -11,7 +11,7 @@ # This file implements regression tests for SQLite library. The # focus of this script is btree database backend # -# $Id: btree5.test,v 1.2 2004/05/11 00:58:56 drh Exp $ +# $Id: btree5.test,v 1.3 2004/05/12 19:18:17 drh Exp $ set testdir [file dirname $argv0] @@ -123,7 +123,6 @@ proc check_table {N} { set fdata1 [btree_fetch_data $c1 $n] set fdata2 [btree_fetch_data $c1 -1] if {$fdata1 ne "" && $fdata1 ne $data} { -puts "fdata1=[list $fdata1] data=[list $data]" return "DataFetch returned the wrong value with amt=$n" } if {$fdata1 ne $fdata2} { @@ -154,20 +153,20 @@ set btree_trace 0 # set cnt 0 for {set i 1} {$i<=100} {incr i} { - do_test test5-2.$i.1 { + do_test btree5-2.$i.1 { random_inserts 200 incr cnt 200 check_table $cnt } {} - do_test test5-2.$i.2 { + do_test btree5-2.$i.2 { btree_integrity_check $b1 1 } {} - do_test test5-2.$i.3 { + do_test btree5-2.$i.3 { random_deletes 190 incr cnt -190 check_table $cnt } {} - do_test test5-2.$i.4 { + do_test btree5-2.$i.4 { btree_integrity_check $b1 1 } {} } diff --git a/test/btree6.test b/test/btree6.test new file mode 100644 index 0000000000..4f23ca44a5 --- /dev/null +++ b/test/btree6.test @@ -0,0 +1,127 @@ +# 2004 May 10 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is btree database backend - specifically +# the B+tree tables. B+trees store all data on the leaves rather +# that storing data with keys on interior nodes. +# +# $Id: btree6.test,v 1.1 2004/05/12 19:18:17 drh Exp $ + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + + +# Insert many entries into the table that cursor $cur points to. +# The table should be an INTKEY table. +# +# Stagger the inserts. After the inserts complete, go back and do +# deletes. Stagger the deletes too. Repeat this several times. +# + +# Do N inserts into table $tab using random keys between 0 and 1000000 +# +proc random_inserts {cur N} { + global inscnt + while {$N>0} { + set k [expr {int(rand()*1000000)}] + if {[btree_move_to $cur $k]==0} { + continue; # entry already exists + } + incr inscnt + btree_insert $cur $k data-for-$k + incr N -1 + } +} +set inscnt 0 + +# Do N delete from the table that $cur points to. +# +proc random_deletes {cur N} { + while {$N>0} { + set k [expr {int(rand()*1000000)}] + btree_move_to $cur $k + btree_delete $cur + incr N -1 + } +} + +# Make sure the table that $cur points to has exactly N entries. +# Make sure the data for each entry agrees with its key. +# +proc check_table {cur N} { + btree_first $cur + set cnt 0 + while {![btree_eof $cur]} { + if {[set data [btree_data $cur]] ne "data-for-[btree_key $cur]"} { + return "wrong data for entry $cnt" + } + set n [string length $data] + set fdata1 [btree_fetch_data $cur $n] + set fdata2 [btree_fetch_data $cur -1] + if {$fdata1 ne "" && $fdata1 ne $data} { + return "DataFetch returned the wrong value with amt=$n" + } + if {$fdata1 ne $fdata2} { + return "DataFetch returned the wrong value when amt=-1" + } + if {$n>10} { + set fdata3 [btree_fetch_data $cur 10] + if {$fdata3 ne [string range $data 0 9]} { + return "DataFetch returned the wrong value when amt=10" + } + } + incr cnt + btree_next $cur + } + if {$cnt!=$N} { + return "wrong number of entries. Got $cnt. Looking for $N" + } + return {} +} + +# Initialize the database +# +file delete -force test1.bt +file delete -force test1.bt-journal +set b1 [btree_open test1.bt 2000 0] +btree_begin_transaction $b1 +set tab [btree_create_table $b1 5] +set cur [btree_cursor $b1 $tab 1] +set btree_trace 0 +expr srand(1) + +# Do the tests. +# +set cnt 0 +for {set i 1} {$i<=100} {incr i} { + do_test btree6-1.$i.1 { + random_inserts $cur 200 + incr cnt 200 + check_table $cur $cnt + } {} + do_test btree6-1.$i.2 { + btree_integrity_check $b1 1 $tab + } {} + do_test btree6-1.$i.3 { + random_deletes $cur 190 + incr cnt -190 + check_table $cur $cnt + } {} + do_test btree6-1.$i.4 { + btree_integrity_check $b1 1 $tab + } {} +} + +btree_close_cursor $cur +btree_commit $b1 + +finish_test