From 2af926b95a512d500f2e41e04828cce12249d16b Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 15 May 2001 00:39:25 +0000 Subject: [PATCH] :-) (CVS 217) FossilOrigin-Name: ee6760fb62e81af95796c0fcf1e65e5dc0701194 --- manifest | 12 +- manifest.uuid | 2 +- src/btree.c | 456 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 322 insertions(+), 148 deletions(-) diff --git a/manifest b/manifest index 8a56566118..389b4b6d91 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C :-)\s(CVS\s216) -D 2001-05-11T11:02:47 +C :-)\s(CVS\s217) +D 2001-05-15T00:39:25 F COPYRIGHT 74a8a6531a42e124df07ab5599aad63870fa0bd4 F Makefile.in acef0f0275a5ca8e68bda165f7f05d810a207664 F README 51f6a4e7408b34afa5bc1c0485f61b6a4efb6958 @@ -12,7 +12,7 @@ F notes/notes1.txt b7c0812b704a022e88c621146ae50955c923d464 F notes/notes2.txt 49b4d7ba35f183feb1fb098a27272b82f5c76eca F notes/notes3.txt cd5e7bd2167d7ef89b1077abdfa68f0af6337744 F src/TODO 38a68a489e56e9fd4a96263e0ff9404a47368ad4 -F src/btree.c 9b985eede929ce3f1a6ad510ecd04c197ce092f7 +F src/btree.c f6d724fccaf108c4452dba6589e6027ba8f3e88a F src/btree.h f21c240d0c95f93e2a128106d04a6c448ed0eb94 F src/build.c 4f6a2d551c56342cd4a0420654835be3ad179651 F src/dbbe.c b18259f99d87240cbe751021cf14dd3aa83a48af @@ -106,7 +106,7 @@ F www/opcode.tcl cb3a1abf8b7b9be9f3a228d097d6bf8b742c2b6f F www/sqlite.tcl cb0d23d8f061a80543928755ec7775da6e4f362f F www/tclsqlite.tcl 06f81c401f79a04f2c5ebfb97e7c176225c0aef2 F www/vdbe.tcl 0c8aaa529dd216ccbf7daaabd80985e413d5f9ad -P 624ccbca98be33a26c2af72d2d91c78f8a06ae9c -R dd7612c12a95a523d5ea5cc83b2e1f75 +P c3e521190f02120a34f1e9244fe1ea3a975a6caa +R d3ffc66c90d6f5774a5954932275df58 U drh -Z 7bf915d3af7096b69289ffd2ecea3428 +Z 0f43bc31472b172caca9953860c2e8b6 diff --git a/manifest.uuid b/manifest.uuid index 5a727989b2..9a322bf7e7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c3e521190f02120a34f1e9244fe1ea3a975a6caa \ No newline at end of file +ee6760fb62e81af95796c0fcf1e65e5dc0701194 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 0048edd525..89d9f0c24c 100644 --- a/src/btree.c +++ b/src/btree.c @@ -21,13 +21,43 @@ ** http://www.hwaci.com/drh/ ** ************************************************************************* -** $Id: btree.c,v 1.4 2001/05/11 11:02:47 drh Exp $ +** $Id: btree.c,v 1.5 2001/05/15 00:39:25 drh Exp $ */ #include "sqliteInt.h" #include "pager.h" #include "btree.h" #include +/* +** The maximum number of database entries that can be held in a single +** page of the database. +*/ +#define MX_CELL ((SQLITE_PAGE_SIZE-sizeof(PageHdr))/sizeof(Cell)) + +/* +** The maximum amount of data (in bytes) that can be stored locally for a +** database entry. If the entry contains more data than this, the +** extra goes onto overflow pages. +*/ +#define MX_LOCAL_PAYLOAD ((SQLITE_PAGE_SIZE-sizeof(PageHdr)-4*sizeof(Cell))/4) + + +/* +** The in-memory image of a disk page has the auxiliary information appended +** to the end. EXTRA_SIZE is the number of bytes of space needed to hold +** that extra information. +*/ +#define EXTRA_SIZE (sizeof(MemPage)-SQLITE_PAGE_SIZE) + +/* +** Number of bytes on a single overflow page. +*/ +#define OVERFLOW_SIZE (SQLITE_PAGE_SIZE-sizeof(Pgno)) + + +/* +** Primitive data types. u32 must be 4 bytes and u16 must be 2 bytes. +*/ typedef unsigned int u32; typedef unsigned short int u16; @@ -35,37 +65,43 @@ typedef unsigned short int u16; ** Forward declarations of structures used only in this file. */ typedef struct Page1Header Page1Header; +typedef struct MemPage MemPage; typedef struct PageHdr PageHdr; typedef struct Cell Cell; typedef struct FreeBlk FreeBlk; +typedef struct OverflowPage OverflowPage; + +/* +** All structures on a database page are aligned to 4-byte boundries. +** This routine rounds up a number of bytes to the next multiple of 4. +*/ +#define ROUNDUP(X) ((X+3) & ~3) /* -** The first page contains the following additional information: -** -** MAGIC-1 -** MAGIC-2 -** First free block +** The first pages of the database file contains some additional +** information used for housekeeping and sanity checking. Otherwise, +** the first page is just like any other. The additional information +** found on the first page is described by the following structure. */ -#define EXTRA_PAGE_1_CELLS 3 +struct Page1Header { + u32 magic1; /* A magic number for sanity checking */ + u32 magic2; /* A second magic number for sanity checking */ + Pgno firstList; /* First free page in a list of all free pages */ +}; #define MAGIC_1 0x7264dc61 #define MAGIC_2 0x54e55d9e -struct Page1Header { - u32 magic1; - u32 magic2; - Pgno firstList; -}; /* ** Each database page has a header as follows: ** ** page1_header Extra numbers found on page 1 only. -** leftmost_pgno Page number of the leftmost child +** rightmost_pgno Page number of the right-most child page ** first_cell Index into MemPage.aPage of first cell ** first_free Index of first free block ** -** MemPage.pStart always points to the leftmost_pgno. First_free is +** MemPage.pStart always points to the rightmost_pgno. First_free is ** 0 if there is no free space on this page. Otherwise it points to ** an area like this: ** @@ -88,42 +124,10 @@ struct FreeBlk { u16 iSize; /* Number of u32-sized slots in the block of free space */ u16 iNext; /* Index in MemPage.aPage[] of the next free block */ }; - -/* -** The maximum number of database entries that can be held in a single -** page of the database. Each entry has a 16-byte header consisting of -** 4 unsigned 32-bit numbers, as follows: -** -** nKey Number of byte in the key -** nData Number of byte in the data -** pgno Page number of the right child block -** next index in MemPage.aPage[] of the next entry in sorted order -** -** The key and data follow this header. The key and data are packed together -** and the total rounded up to the next multiple of 4 bytes. There must -** be at least 4 bytes in the key/data packet, so each entry consumes at -** least 20 bytes of space on the page. -*/ -#define MX_CELL ((SQLITE_PAGE_SIZE-sizeof(PageHdr))/sizeof(Cell)) - -/* -** The maximum amount of data (in bytes) that can be stored locally for a -** database entry. If the entry contains more data than this, the -** extra goes onto overflow pages. -*/ -#define MX_LOCAL_PAYLOAD ((SQLITE_PAGE_SIZE-20-4*24)/4) - -/* -** On a single disk page, there are sections of the page that are used -** to hold data and sections that are unused and available for holding -** new data. A single instance of this structure describes a contiguous -** block of free space on a disk page. -*/ -struct FreeBlk { - int idx; /* Index into MemPage.aPage[] of the start of freeblock */ - int size; /* Number of MemPage.aPage[] slots used by this block */ +struct OverflowPage { + Pgno next; + char aData[SQLITE_PAGE_SIZE-sizeof(Pgno)]; }; -typedef struct FreeBlk; /* ** For every page in the database file, an instance of the following structure @@ -131,27 +135,20 @@ typedef struct FreeBlk; ** the disk. The rest is auxiliary data that held in memory only. */ struct MemPage { - u32 aPage[SQLITE_PAGE_SIZE/sizeof(u32)]; /* Page data stored on disk */ - unsigned char isInit; /* True if sequel is initialized */ - unsigned char validUp; /* True if MemPage.up is valid */ - unsigned char validLeft; /* True if MemPage.left is valid */ - unsigned char validRight; /* True if MemPage.right is valid */ + char aPage[SQLITE_PAGE_SIZE]; /* Page data stored on disk */ + unsigned char isInit; /* True if sequel is initialized */ + unsigned char validUp; /* True if MemPage.up is valid */ + unsigned char validLeft; /* True if MemPage.left is valid */ + unsigned char validRight; /* True if MemPage.right is valid */ Pgno up; /* The parent page. 0 means this is the root */ Pgno left; /* Left sibling page. 0==none */ Pgno right; /* Right sibling page. 0==none */ int idxStart; /* Index in aPage[] of real data */ - int nFree; /* Number of free slots of aPage[] */ + PageHdr *pStart; /* Points to aPage[idxStart] */ + int nFree; /* Number of free bytes in aPage[] */ int nCell; /* Number of entries on this page */ u32 *aCell[MX_CELL]; /* All entires in sorted order */ } -typedef struct MemPage; - -/* -** The in-memory image of a disk page has the auxiliary information appended -** to the end. EXTRA_SIZE is the number of bytes of space needed to hold -** that extra information. -*/ -#define EXTRA_SIZE (sizeof(MemPage)-SQLITE_PAGE_SIZE) /* ** Everything we need to know about an open database @@ -170,96 +167,163 @@ typedef Btree Bt; ** MemPage.aCell[] of the entry. */ struct Cursor { - Btree *pBt; /* The pointer back to the BTree */ - MemPage *pPage; /* Page that contains the entry */ - int idx; /* Index of the entry in pPage->aCell[] */ - int skip_incr; /* */ + Btree *pBt; /* The pointer back to the BTree */ + Cursor *pPrev, *pNext; /* List of all cursors */ + MemPage *pPage; /* Page that contains the entry */ + int idx; /* Index of the entry in pPage->aCell[] */ + int skip_incr; /* */ }; -/* -** The maximum depth of a cursor -*/ -#define MX_LEVEL 20 - -/* -** Within a cursor, each level off the search tree is an instance of -** this structure. -*/ -typedef struct BtIdxpt BtIdxpt; -struct BtIdxpt { - Pgno pgno; /* The page number */ - u32 *aPage; /* The page data */ - int idx; /* Index into pPage[] */ - u32 *aIdx; /* Pointer to pPage[idx] */ -}; - -/* -** Everything we need to know about a cursor -*/ -struct BtCursor { - Btree *pBt; /* The whole database */ - BtCursor *pPrev, *pNext; /* Linked list of all cursors */ - int valid; /* True if the cursor points to something */ - int nLevel; /* Number of levels of indexing used */ - BtIdxpt *pLevel; /* Pointer to aLevel[nLevel] */ - BtIdxpt aLevel[MX_LEVEL]; /* The index levels */ -}; - - /* ** Defragment the page given. All of the free space ** is collected into one big block at the end of the ** page. */ static void defragmentPage(MemPage *pPage){ + int pc; + int i, n; + FreeBlk *pFBlk; + char newPage[SQLITE_PAGE_SIZE]; + + pc = ROUNDUP(pPage->idxStart + sizeof(PageHdr)); + pPage->pStart->firstCell = pc; + memcpy(newPage, pPage->aPage, pc); + for(i=0; inCell; i++){ + Cell *pCell = &pPage->aCell[i]; + n = pCell->nKey + pCell->nData; + if( n>MAX_LOCAL_PAYLOAD ) n = MAX_LOCAL_PAYLOAD + sizeof(Pgno); + n = ROUNDUP(n); + n += sizeof(Cell) - sizeof(pCell->aData); + pCell->iNext = inCell ? pc + n : 0; + memcpy(&newPage[pc], pCell, n); + pPage->aCell[i] = (Cell*)&pPage->aPage[pc]; + pc += n; + } + assert( pPage->nFree==pc ); + memcpy(pPage->aPage, newPage, pc); + pFBlk = &pPage->aPage[pc]; + pFBlk->iSize = SQLITE_PAGE_SIZE - pc; + pFBlk->iNext = 0; + pPage->pStart->firstFree = pc; + memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk)); } /* -** Mark a section of the memory block as in-use. +** Allocate space on a page. The space needs to be at least +** nByte bytes in size. (Actually, all allocations are rounded +** up to the next even multiple of 4.) Return the index into +** pPage->aPage[] of the first byte of the new allocation. +** Or return 0 if there is not enough space. +** +** This routine will call defragmentPage if necessary to consolidate +** free space. */ -static void useSpace(MemPage *pPage, int start, int size){ +static int allocSpace(MemPage *pPage, int nByte){ + FreeBlk *p; + u16 *pIdx; + int start; + nByte = ROUNDUP(nByte); + if( pPage->nFreepStart->firstFree; + p = (FreeBlk*)&pPage->aPage[*pIdx]; + while( p->iSizeiNext==0 ){ + defragmentPage(pPage); + pIdx = &pPage->pStart->firstFree; + }else{ + pIdx = &p->iNext; + } + p = (FreeBlk*)&pPage->aPage[*pIdx]; + } + if( p->iSize==nByte ){ + start = *pIdx; + *pIdx = p->iNext; + }else{ + p->iSize -= nByte; + start = *pIdx + p->iSize; + } + pPage->nFree -= nByte; + return start; } /* ** Return a section of the MemPage.aPage[] to the freelist. +** The first byte of the new free block is pPage->aPage[start] +** and there are a told of size bytes to be freed. */ static void freeSpace(MemPage *pPage, int start, int size){ + int end = start + size; + u16 *pIdx, idx; + FreeBlk *pFBlk; + FreeBlk *pNew; + FreeBlk *pNext; + + assert( size == ROUNDUP(size) ); + assert( start == ROUNDUP(start) ); + pIdx = &pPage->pStart->firstFree; + idx = *pIdx; + while( idx!=0 && idxaPage[idx]; + if( idx + pFBlk->iSize == start ){ + pFBlk->iSize += size; + if( idx + pFBlk->iSize == pFBlk->iNext ){ + pNext = (FreeBlk*)&pPage->aPage[pFblk->iNext]; + pFBlk->iSize += pNext->iSize; + pFBlk->iNext = pNext->iNext; + } + pPage->nFree += size; + return; + } + pIdx = &pFBlk->iNext; + idx = *pIdx; + } + pNew = (FreeBlk*)&pPage->aPage[start]; + if( idx != end ){ + pNew->iSize = size; + pNew->iNext = idx; + }else{ + pNext = (FreeBlk*)&pPage->aPage[idx]; + pNew->iSize = size + pNext->iSize; + pNew->iNext = pNext->iNext; + } + *pIdx = start; + pPage->nFree += size; } /* ** Initialize the auxiliary information for a disk block. */ static int initPage(MemPage *pPage, Pgno pgnoThis, Pgno pgnoParent){ - u32 idx; + int idx; + Cell *pCell; + FreeBlk *pFBlk; + + pPage->idxStart = (pgnoThis==1) ? sizeof(Page1Header) : 0; + pPage->pStart = (PageHdr*)&pPage->aPage[pPage->idxStart]; pPage->isInit = 1; pPage->validUp = 1; pPage->up = pgnoParent; - pPage->nFreeSlot = SQLITE_PAGE_SIZE/sizeof(pPage->aPage[0]) - 2; - pPage->nFree = 1; - if( pgnoThis==1 ){ - pPage->idxStart = EXTRA_PAGE_1_CELLS; - pPage->nFreeByte -= EXTRA_PAGE_1_CELLS; - } - pPage->aFree[0].idx = pPage->idxStart + 2; - pPage->aFree[0].size = pPage->nFreeByte; pPage->nCell = 0; - idx = pPage->aPage[pPage->idxStart+1]; + idx = pPage->pStart->firstCell; while( idx!=0 ){ - int size; - pPage->aCell[pPage->nCell++] = idx; - size = pPage->aPage[idx] + pPage->aPage[idx+1]; - if( size>MX_LOCAL_PAYLOAD ){ - if( size>MX_DIRECT_PAYLOAD ){ - size = MX_LOCAL_PAYLOAD + 2*sizeof(u32); - }else{ - size = MX_LOCAL_PAYLOAD + sizeof(u32); - } - } - size = (size + sizeof(u32) - 1)/sizeof(u32) + 4; - useSpace(pPage, idx, size); - idx = pPage->aPage[idx+3]; + if( idx>SQLITE_PAGE_SIZE-sizeof(Cell) ) goto page_format_error; + pCell = (Cell*)&pPage->aPage[idx]; + pPage->aCell[pPage->nCell++] = pCell; + idx = pCell->iNext; + } + pPage->nFree = 0; + idx = pPage->pStart->firstFree; + while( idx!=0 ){ + if( idx>SQLITE_PAGE_SIZE-sizeof(FreeBlk) ) goto page_format_error; + pFBlk = (FreeBlk*)&pPage->aPage[idx]; + pPage->nFree += pFBlk->iSize; + if( pFBlk->iNext <= idx ) goto page_format_error; + idx = pFBlk->iNext; } return SQLITE_OK; + +page_format_error: + return SQLITE_CORRUPT; } /* @@ -324,7 +388,7 @@ static int lockBtree(Btree *pBt){ if( pBt->page1 ) return SQLITE_OK; rc = sqlitepager_get(pBt->pPager, 1, &pBt->page1); if( rc!=SQLITE_OK ) return rc; - rc = initPage(pBt->page1); + rc = initPage(pBt->page1, 1, 0); if( rc!=SQLITE_OK ){ sqlitepager_unref(pBt->page1); pBt->page1 = 0; @@ -397,10 +461,18 @@ int sqliteBtreeCursor(Btree *pBt, BtCursor **ppCur){ } pBt->pCursor = pCur; pCur->pBt = pBt; - pCur->nLevel = 1; - pCur->aLevel[0].pgno = 1; - pCur->aLevel[0].aPage = pBt->page1; - pCur->aLevel[0].idx = 0; + rc = sqlitepager_get(pBt->pPager, 1, &pCur->pPage); + if( rc!=SQLITE_OK ){ + sqliteFree(pCur); + *ppCur = 0; + return rc; + } + if( !pCur->pPage->isInit ){ + initPage(pCur->pPage); + } + pCur->idx = 0; + *ppCur = pCur; + return SQLITE_OK; } /* @@ -417,9 +489,7 @@ int sqliteBtreeCloseCursor(BtCursor *pCur){ if( pCur->pNext ){ pCur->pNext->pPrev = pCur->pPrev; } - for(i=pCur->nLevel-1; i>0; i--){ - sqlitepager_unref(pCur->aLevel[i].aPage); - } + sqlitepager_unref(pCur->pPage); if( pBt->pCursor==0 && pBt->inTrans==0 ){ unlockBtree(pBt); } @@ -432,23 +502,127 @@ int sqliteBtreeCloseCursor(BtCursor *pCur){ ** initialized or is pointed to a deleted entry, then return 0. */ int sqliteBtreeKeySize(BtCursor *pCur){ - int nEntry; - u32 *aPage; - BtIdxpt *pIdx; - int offset; - if( !pCur->valid ) return 0; - pIdx = &pCur->aLevel[pCur->nLevel-1]; - aPage = pIdx->aPage; - offset = (pIdx->pgno==1)*EXTRA_PAGE_1_CELLS; - nEntry = aPage[offset]; - if( pIdx->idxpPage; + if( pCur->idx >= pPage->nCell ) return 0; + pCell = pPage->aCell[pCur->idx]; + return pCell->nKey; } + +static int getPayload(BtCursor *pCur, int offset, int amt, char *zBuf){ + char *aData; + Pgno nextPage; + aData = pCur->pPage->aCell[pCur->idx].aData; + if( offsetMX_LOCAL_PAYLOAD ){ + a = MX_LOCAL_PAYLOAD - offset; + } + memcpy(zBuf, &aData[offset], a); + if( a==amt ){ + return SQLITE_OK; + } + offset += a; + zBuf += a; + amt -= a; + if( amt>0 ){ + assert( a==ROUNDUP(a) ); + nextPage = *(Pgno*)&aData[a]; + } + } + while( amt>0 && nextPage ){ + OverflowPage *pOvfl; + rc = sqlitepager_get(pCur->pBt->pPager, nextPage, &pOvfl); + if( rc!=0 ){ + return rc; + } + nextPage = pOvfl->next; + if( offset OVERFLOW_SIZE ){ + a = OVERFLOW_SIZE - offset; + } + memcpy(zBuf, &pOvfl->aData[offset], a); + offset += a; + amt -= a; + zBuf += a; + } + sqlitepager_unref(pOvfl); + } + return amt==0 ? SQLITE_OK : SQLITE_CORRUPT; +} + int sqliteBtreeKey(BtCursor*, int offset, int amt, char *zBuf); int sqliteBtreeDataSize(BtCursor*); int sqliteBtreeData(BtCursor*, int offset, int amt, char *zBuf); +/* +** Compare the key for the entry that pCur points to against the +** given key (pKey,nKeyOrig). Put the comparison result in *pResult. +** The result is negative if pCurpKey. +** +** SQLITE_OK is returned on success. If part of the cursor key +** is on overflow pages and we are unable to access those overflow +** pages, then some other value might be returned to indicate the +** reason for the error. +*/ +static int compareKey(BtCursor *pCur, char *pKey, int nKeyOrig, int *pResult){ + Pgno nextPage; + int nKey = nKeyOrig; + int n; + Cell *pCell; + + assert( pCur->pPage ); + assert( pCur->idx>=0 && pCur->idxpPage->nCell ); + pCell = &pCur->pPage->aCell[pCur->idx]; + if( nKey > pCell->nKey ){ + nKey = pCell->nKey; + } + n = nKey; + if( n>MX_LOCAL_PAYLOAD ){ + n = MX_LOCAL_PAYLOAD; + } + c = memcmp(pCell->aData, pKey, n); + if( c!=0 ){ + *pResult = c; + return SQLITE_OK; + } + pKey += n; + nKey -= n; + nextPage = *(Pgno*)&pCell->aData[MX_LOCAL_PAYLOAD]; + while( nKey>0 ){ + OverflowPage *pOvfl; + if( nextPage==0 ){ + return SQLITE_CORRUPT; + } + rc = sqlitepager_get(pCur->pBt->pPager, nextPage, &pOvfl); + if( rc!=0 ){ + return rc; + } + nextPage = pOvfl->next; + n = nKey; + if( n>OVERFLOW_SIZE ){ + n = OVERFLOW_SIZE; + } + c = memcmp(pOvfl->aData, pKey, n); + sqlitepager_unref(pOvfl); + if( c!=0 ){ + *pResult = c; + return SQLITE_OK; + } + nKey -= n; + pKey += n; + } + c = pCell->nKey - nKeyOrig; + *pResult = c; + return SQLITE_OK; +} + + /* Move the cursor so that it points to an entry near pKey. ** Return 0 if the cursor is left pointing exactly at pKey. ** Return -1 if the cursor points to the largest entry less than pKey.