From debbcdfeadf59f64cee8b9cbc717f2f374a10afb Mon Sep 17 00:00:00 2001 From: shess Date: Mon, 13 Nov 2006 21:00:54 +0000 Subject: [PATCH] Require a minimum fanout for interior nodes. This prevents cases where excessively large terms keep the tree from finding a single root. A downside is that this could result in large interior nodes in the presence of large terms, which may be prone to fragmentation, though if the nodes were smaller that would translate into more levels in the tree, which would also have that problem. (CVS 3510) FossilOrigin-Name: 64b7e3406134ac4891113b9bb432ad97504268bb --- ext/fts2/fts2.c | 28 +++++++++++++++++++++++----- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/ext/fts2/fts2.c b/ext/fts2/fts2.c index 3cd1ad59bf..4689db65c9 100644 --- a/ext/fts2/fts2.c +++ b/ext/fts2/fts2.c @@ -159,9 +159,11 @@ ** iBlockid+i. ** ** New data is spilled to a new interior node at the same height when -** the current node exceeds INTERIOR_MAX bytes (default 2048). The -** interior nodes at a given height are naturally tracked by interior -** nodes at height+1, and so on. +** the current node exceeds INTERIOR_MAX bytes (default 2048). +** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing +** interior nodes and making the tree too skinny. The interior nodes +** at a given height are naturally tracked by interior nodes at +** height+1, and so on. ** ** **** Segment directory **** @@ -3598,6 +3600,16 @@ static int index_update(fulltext_vtab *v, sqlite_int64 iRow, /* How large interior nodes can grow. */ #define INTERIOR_MAX 2048 +/* Minimum number of terms per interior node (except the root). This +** prevents large terms from making the tree too skinny - must be >0 +** so that the tree always makes progress. Note that the min tree +** fanout will be INTERIOR_MIN_TERMS+1. +*/ +#define INTERIOR_MIN_TERMS 7 +#if INTERIOR_MIN_TERMS<1 +# error INTERIOR_MIN_TERMS must be greater than 0. +#endif + /* ROOT_MAX controls how much data is stored inline in the segment ** directory. */ @@ -3642,6 +3654,7 @@ typedef struct InteriorWriter { InteriorBlock *first, *last; struct InteriorWriter *parentWriter; + sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */ #ifndef NDEBUG sqlite_int64 iLastChildBlock; /* for consistency checks. */ #endif @@ -3659,6 +3672,7 @@ static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm, CLEAR(pWriter); pWriter->iHeight = iHeight; + pWriter->iOpeningChildBlock = iChildBlock; #ifndef NDEBUG pWriter->iLastChildBlock = iChildBlock; #endif @@ -3680,11 +3694,15 @@ static void interiorWriterAppend(InteriorWriter *pWriter, #endif assert( pWriter->iLastChildBlock==iChildBlock ); - if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX ){ - /* Overflow to a new block. */ + /* Overflow to a new block if the new term makes the current block + ** too big, and the current block already has enough terms. + */ + if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX && + iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){ pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->last->next; + pWriter->iOpeningChildBlock = iChildBlock; }else{ dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm); } diff --git a/manifest b/manifest index 6f947555e1..787fdc05bc 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Allow\sbacking\stables\sto\sbe\smissing\son\sdropping\sfts\stable.\s\sFixes\nhttp://www.sqlite.org/cvstrac/tktview?tn=1992,35\s.\s(CVS\s3509) -D 2006-11-13T20:15:27 +C Require\sa\sminimum\sfanout\sfor\sinterior\snodes.\s\sThis\sprevents\scases\nwhere\sexcessively\slarge\sterms\skeep\sthe\stree\sfrom\sfinding\sa\ssingle\nroot.\s\sA\sdownside\sis\sthat\sthis\scould\sresult\sin\slarge\sinterior\snodes\sin\nthe\spresence\sof\slarge\sterms,\swhich\smay\sbe\sprone\sto\sfragmentation,\nthough\sif\sthe\snodes\swere\ssmaller\sthat\swould\stranslate\sinto\smore\slevels\nin\sthe\stree,\swhich\swould\salso\shave\sthat\sproblem.\s(CVS\s3510) +D 2006-11-13T21:00:55 F Makefile.in 8e14898d41a53033ecb687d93c9cd5d109fb9ae3 F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -33,7 +33,7 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9 F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts2/fts2.c 9b28f218c003319c7cddc374e4eae62229ab4f83 +F ext/fts2/fts2.c 667a93b3fe079f20870a3042bd4b4c3841925c01 F ext/fts2/fts2.h bbdab26d34f91974d5b9ade8b7836c140a7c4ce1 F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e @@ -421,7 +421,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513 -P b9dd16ef3df0785d8d0eea9a6f2ad580a7289834 -R fe80027f82af0692a9ff86e7596c1cb4 +P 9628a61a6f33b7bec3455086534b76437d2622b4 +R 2b75980ff8122f283fe2f8c11a712490 U shess -Z fc29ce5f806e2ffd152c1e84f8a98a96 +Z 5a39d4513967a7196d065949839c93cd diff --git a/manifest.uuid b/manifest.uuid index 9d1bc15954..bb7665bdd3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9628a61a6f33b7bec3455086534b76437d2622b4 \ No newline at end of file +64b7e3406134ac4891113b9bb432ad97504268bb \ No newline at end of file