1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Add "doclist index" records to the database. These are to make navigating within very large doclists faster. They are not yet used by queries.

FossilOrigin-Name: 89377421ff69f2450364987afe781b6d8bcbf087
This commit is contained in:
dan
2014-08-01 11:16:25 +00:00
parent 3dbfc8d8e8
commit a29284d65f
4 changed files with 177 additions and 27 deletions

View File

@ -47,6 +47,8 @@
#define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
#define FTS5_MIN_MERGE 4 /* Minimum number of segments to merge */
#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
/*
** Details:
**
@ -184,8 +186,10 @@
** 5. Segment doclist indexes:
**
** A list of varints - the first docid on each page (starting with the
** second) of the doclist. First element in the list is a literal docid.
** Each docid thereafter is a (negative) delta.
** first termless page) of the doclist. First element in the list is a
** literal docid. Each docid thereafter is a (negative) delta. If there
** are no docids at all on a page, a 0x00 byte takes the place of the
** delta value.
*/
/*
@ -235,7 +239,7 @@
** (1<<HEIGHT_BITS). This is because the rowid address space for nodes
** with such a height is used by doclist indexes.
*/
#define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_SEGMENT_HEIGHT_BITS)-1)
#define FTS5_SEGMENT_MAX_HEIGHT ((1 << FTS5_DATA_HEIGHT_B)-1)
/*
** The rowid for the doclist index associated with leaf page pgno of segment
@ -377,7 +381,6 @@ struct Fts5PageWriter {
Fts5Buffer buf; /* Buffer containing page data */
Fts5Buffer term; /* Buffer containing previous term on page */
};
struct Fts5SegWriter {
int iIdx; /* Index to write to */
int iSegid; /* Segid to write to */
@ -388,6 +391,9 @@ struct Fts5SegWriter {
u8 bFirstRowidInPage; /* True if next rowid is first in page */
int nLeafWritten; /* Number of leaf pages written */
int nEmpty; /* Number of contiguous term-less nodes */
Fts5Buffer dlidx; /* Doclist index */
i64 iDlidxPrev; /* Previous rowid appended to dlidx */
int bDlidxPrevValid; /* True if iDlidxPrev is valid */
};
/*
@ -534,7 +540,7 @@ struct Fts5NodeIter {
**
** iLeaf: The page number of the leaf page the entry points to.
**
** term: A split-key that all terms on leaf page $leaf must be greater
** term: A split-key that all terms on leaf page $iLeaf must be greater
** than or equal to. The "term" associated with the first b-tree
** hierarchy entry (the one that points to leaf page 1) is always
** an empty string.
@ -1082,6 +1088,15 @@ static void fts5SegIterInit(
Fts5StructureSegment *pSeg, /* Description of segment */
Fts5SegIter *pIter /* Object to populate */
){
if( pSeg->pgnoFirst==0 ){
/* This happens if the segment is being used as an input to an incremental
** merge and all data has already been "trimmed". See function
** fts5TrimSegments() for details. In this case leave the iterator empty.
** The caller will see the (pIter->pLeaf==0) and assume the iterator is
** at EOF already. */
assert( pIter->pLeaf==0 );
return;
}
if( p->rc==SQLITE_OK ){
memset(pIter, 0, sizeof(*pIter));
@ -2061,6 +2076,33 @@ static int fts5PrefixCompress(
return i;
}
/*
** If an "nEmpty" record must be written to the b-tree before the next
** term, write it now.
*/
static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
if( pWriter->nEmpty ){
Fts5PageWriter *pPg = &pWriter->aWriter[1];
int bFlag = 0;
if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
i64 iKey = FTS5_DOCLIST_IDX_ROWID(
pWriter->iIdx, pWriter->iSegid,
pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
);
fts5DataWrite(p, iKey, pWriter->dlidx.p, pWriter->dlidx.n);
bFlag = 1;
}
fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag);
fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
pWriter->nEmpty = 0;
}
/* Whether or not it was written to disk, zero the doclist index at this
** point */
sqlite3Fts5BufferZero(&pWriter->dlidx);
pWriter->bDlidxPrevValid = 0;
}
/*
** This is called once for each leaf page except the first that contains
@ -2097,12 +2139,7 @@ static void fts5WriteBtreeTerm(
}
pPage = &pWriter->aWriter[iHeight];
if( pWriter->nEmpty ){
assert( iHeight==1 );
fts5BufferAppendVarint(&p->rc, &pPage->buf, 0);
fts5BufferAppendVarint(&p->rc, &pPage->buf, pWriter->nEmpty);
pWriter->nEmpty = 0;
}
fts5WriteBtreeNEmpty(p, pWriter);
if( pPage->buf.n>=p->pgsz ){
/* pPage will be written to disk. The term will be written into the
@ -2130,9 +2167,34 @@ static void fts5WriteBtreeNoTerm(
Fts5Index *p, /* FTS5 backend object */
Fts5SegWriter *pWriter /* Writer object */
){
if( pWriter->bFirstRowidInPage ){
/* No rowids on this page. Append an 0x00 byte to the current
** doclist-index */
sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, 0);
}
pWriter->nEmpty++;
}
/*
** Rowid iRowid has just been appended to the current leaf page. As it is
** the first on its page, append an entry to the current doclist-index.
*/
static void fts5WriteDlidxAppend(
Fts5Index *p,
Fts5SegWriter *pWriter,
i64 iRowid
){
i64 iVal;
if( pWriter->bDlidxPrevValid ){
iVal = pWriter->iDlidxPrev - iRowid;
}else{
iVal = iRowid;
}
sqlite3Fts5BufferAppendVarint(&p->rc, &pWriter->dlidx, iVal);
pWriter->bDlidxPrevValid = 1;
pWriter->iDlidxPrev = iRowid;
}
static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
Fts5PageWriter *pPage = &pWriter->aWriter[0];
@ -2226,8 +2288,12 @@ static void fts5WriteAppendRowid(
Fts5PageWriter *pPage = &pWriter->aWriter[0];
/* If this is to be the first docid written to the page, set the
** docid-pointer in the page-header. */
if( pWriter->bFirstRowidInPage ) fts5PutU16(pPage->buf.p, pPage->buf.n);
** docid-pointer in the page-header. Also append a value to the dlidx
** buffer, in case a doclist-index is required. */
if( pWriter->bFirstRowidInPage ){
fts5PutU16(pPage->buf.p, pPage->buf.n);
fts5WriteDlidxAppend(p, pWriter, iRowid);
}
/* Write the docid. */
if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
@ -2301,20 +2367,22 @@ static void fts5WritePendingDoclist(
fts5WriteAppendZerobyte(p, pWriter);
}
/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
*/
static void fts5WriteFinish(
Fts5Index *p,
Fts5SegWriter *pWriter,
int *pnHeight,
int *pnLeaf
Fts5SegWriter *pWriter, /* Writer object */
int *pnHeight, /* OUT: Height of the b-tree */
int *pnLeaf /* OUT: Number of leaf pages in b-tree */
){
int i;
*pnLeaf = pWriter->aWriter[0].pgno;
*pnHeight = pWriter->nWriter;
fts5WriteFlushLeaf(p, pWriter);
if( pWriter->nWriter>1 && pWriter->nEmpty ){
Fts5PageWriter *pPg = &pWriter->aWriter[1];
fts5BufferAppendVarint(&p->rc, &pPg->buf, 0);
fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
if( pWriter->nWriter>1 ){
fts5WriteBtreeNEmpty(p, pWriter);
}
for(i=1; i<pWriter->nWriter; i++){
Fts5PageWriter *pPg = &pWriter->aWriter[i];
@ -2327,6 +2395,7 @@ static void fts5WriteFinish(
fts5BufferFree(&pPg->buf);
}
sqlite3_free(pWriter->aWriter);
sqlite3Fts5BufferFree(&pWriter->dlidx);
}
static void fts5WriteInit(
@ -3145,6 +3214,28 @@ static void fts5DecodeFunction(
a = sqlite3_value_blob(apVal[1]);
fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno);
if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){
int i = 0;
i64 iPrev;
sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)",
iIdx, iSegid, iHeight, iPgno
);
if( n>0 ){
i = getVarint(&a[i], (u64*)&iPrev);
sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
}
while( i<n ){
i64 iVal;
i += getVarint(&a[i], (u64*)&iVal);
if( iVal==0 ){
sqlite3Fts5BufferAppendPrintf(&rc, &s, " x");
}else{
iPrev = iPrev - iVal;
sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev);
}
}
}else
if( iSegid==0 ){
if( iRowid==FTS5_AVERAGES_ROWID ){
sqlite3Fts5BufferAppendPrintf(&rc, &s, "{averages} ");

View File

@ -1,5 +1,5 @@
C Add\sa\scomment\sexplaining\swhy\sfts5\scannot\scache\s"sorter\sstatements".
D 2014-07-31T17:53:03.405
C Add\s"doclist\sindex"\srecords\sto\sthe\sdatabase.\sThese\sare\sto\smake\snavigating\swithin\svery\slarge\sdoclists\sfaster.\sThey\sare\snot\syet\sused\sby\squeries.
D 2014-08-01T11:16:25.207
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc
F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710
F ext/fts5/fts5_expr.c e764d75c58a3accda795f1da1b45960ac87dc77a
F ext/fts5/fts5_index.c 68d2d41b5c6d2f8838c3d6ebdc8b242718b8e997
F ext/fts5/fts5_index.c 618d54ecf41887b6db59491b71e654ae3315f8c9
F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
@ -602,6 +602,7 @@ F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07
F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e
F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74
F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420
F test/fts5ah.test bfa6ebd7ee87f73c4146b9e316a105fd0e43d01a
F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4
F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d
F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef
@ -1198,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 37a417d27e4ebafd4783f62728d7467316b75b17
R e0b14b9e45e7f8113c4d7a699a937c5a
P e6af3b7a3cf331210f4c87848e2af007dbd5ef30
R a017a4de54c141d4f4f840978af83e33
U dan
Z cd0e862a57439796abd2a3aa1ce5c8f8
Z 90f2786a7e9f28e43c6798f77c65d6dc

View File

@ -1 +1 @@
e6af3b7a3cf331210f4c87848e2af007dbd5ef30
89377421ff69f2450364987afe781b6d8bcbf087

58
test/fts5ah.test Normal file
View File

@ -0,0 +1,58 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix fts5ah
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
#-------------------------------------------------------------------------
# This file contains tests for very large doclists.
#
do_test 1.0 {
execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) }
execsql { INSERT INTO t1(t1) VALUES('pgsz=128') }
for {set i 1} {$i <= 10000} {incr i} {
set v {x x x x x x x x x x x x x x x x x x x x}
if {($i % 2139)==0} {lset v 3 Y ; lappend Y $i}
if {($i % 1577)==0} {lset v 5 W ; lappend W $i}
execsql { INSERT INTO t1 VALUES($v) }
}
} {}
do_execsql_test 1.1 {
SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w'
} [lsort -integer -decr $W]
do_execsql_test 1.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x'
} [lsort -integer -decr $Y]
do_execsql_test 1.3 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
do_execsql_test 1.4 {
SELECT count(*) FROM t1_data
}
finish_test