Merge the latest trunk enhancements into the begin-concurrent branch.

FossilOrigin-Name: faef90eac3925341df958825d80466d737b478d4f0bdaead73f0e069770e923a
2025-08-08 14:02:16 +03:00 · 2024-12-16 11:19:00 +00:00
parent a835f61c01 bba1953426
commit bf943176d0
65 changed files with 2301 additions and 879 deletions
--- a/ext/expert/sqlite3expert.c
+++ b/ext/expert/sqlite3expert.c
@@ -1491,7 +1491,7 @@ static int idxCreateVtabSchema(sqlite3expert *p, char **pzErrmsg){
    }else{
      IdxTable *pTab;
      rc = idxGetTableInfo(p->db, zName, &pTab, pzErrmsg);
-      if( rc==SQLITE_OK ){
+      if( rc==SQLITE_OK && ALWAYS(pTab!=0) ){
        int i;
        char *zInner = 0;
        char *zOuter = 0;
--- a/ext/fts3/fts3_expr.c
+++ b/ext/fts3/fts3_expr.c
@@ -319,10 +319,11 @@ static int getNextString(
        Fts3PhraseToken *pToken;

        p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
-        if( !p ) goto no_mem;
-
        zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
-        if( !zTemp ) goto no_mem;
+        if( !zTemp || !p ){
+          rc = SQLITE_NOMEM;
+          goto getnextstring_out;
+        }

        assert( nToken==ii );
        pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
@@ -337,9 +338,6 @@ static int getNextString(
        nToken = ii+1;
      }
    }
-
-    pModule->xClose(pCursor);
-    pCursor = 0;
  }

  if( rc==SQLITE_DONE ){
@@ -347,7 +345,10 @@ static int getNextString(
    char *zBuf = 0;

    p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
-    if( !p ) goto no_mem;
+    if( !p ){
+      rc = SQLITE_NOMEM;
+      goto getnextstring_out;
+    }
    memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
    p->eType = FTSQUERY_PHRASE;
    p->pPhrase = (Fts3Phrase *)&p[1];
@@ -355,11 +356,9 @@ static int getNextString(
    p->pPhrase->nToken = nToken;

    zBuf = (char *)&p->pPhrase->aToken[nToken];
+    assert( nTemp==0 || zTemp );
    if( zTemp ){
      memcpy(zBuf, zTemp, nTemp);
-      sqlite3_free(zTemp);
-    }else{
-      assert( nTemp==0 );
    }

    for(jj=0; jj<p->pPhrase->nToken; jj++){
@@ -369,17 +368,17 @@ static int getNextString(
    rc = SQLITE_OK;
  }

-  *ppExpr = p;
-  return rc;
-no_mem:
-
+ getnextstring_out:
  if( pCursor ){
    pModule->xClose(pCursor);
  }
  sqlite3_free(zTemp);
-  sqlite3_free(p);
-  *ppExpr = 0;
-  return SQLITE_NOMEM;
+  if( rc!=SQLITE_OK ){
+    sqlite3_free(p);
+    p = 0;
+  }
+  *ppExpr = p;
+  return rc;
 }

 /*
--- a/ext/fts5/fts5.h
+++ b/ext/fts5/fts5.h
@@ -298,16 +298,31 @@ struct Fts5PhraseIter {
 **   value returned by xInstCount(), SQLITE_RANGE is returned.  Otherwise,
 **   output variable (*ppToken) is set to point to a buffer containing the
 **   matching document token, and (*pnToken) to the size of that buffer in 
-**   bytes. This API is not available if the specified token matches a 
-**   prefix query term. In that case both output variables are always set 
-**   to 0.
+**   bytes. 
 **
 **   The output text is not a copy of the document text that was tokenized.
 **   It is the output of the tokenizer module. For tokendata=1 tables, this 
 **   includes any embedded 0x00 and trailing data.
 **
+**   This API may be slow in some cases if the token identified by parameters 
+**   iIdx and iToken matched a prefix token in the query. In most cases, the
+**   first call to this API for each prefix token in the query is forced
+**   to scan the portion of the full-text index that matches the prefix
+**   token to collect the extra data required by this API. If the prefix
+**   token matches a large number of token instances in the document set,
+**   this may be a performance problem. 
+**
+**   If the user knows in advance that a query may use this API for a
+**   prefix token, FTS5 may be configured to collect all required data as part
+**   of the initial querying of the full-text index, avoiding the second scan
+**   entirely. This also causes prefix queries that do not use this API to 
+**   run more slowly and use more memory. FTS5 may be configured in this way
+**   either on a per-table basis using the [FTS5 insttoken | 'insttoken'] 
+**   option, or on a per-query basis using the 
+**   [fts5_insttoken | fts5_insttoken()] user function.
+**
 **   This API can be quite slow if used with an FTS5 table created with the
-**   "detail=none" or "detail=column" option.
+**   "detail=none" or "detail=column" option. 
 **
 ** xColumnLocale(pFts5, iIdx, pzLocale, pnLocale)
 **   If parameter iCol is less than zero, or greater than or equal to the
--- a/ext/fts5/fts5Int.h
+++ b/ext/fts5/fts5Int.h
@@ -247,7 +247,8 @@ struct Fts5Config {
  char *zRank;                    /* Name of rank function */
  char *zRankArgs;                /* Arguments to rank function */
  int bSecureDelete;              /* 'secure-delete' */
-  int nDeleteMerge;           /* 'deletemerge' */
+  int nDeleteMerge;               /* 'deletemerge' */
+  int bPrefixInsttoken;           /* 'prefix-insttoken' */

  /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
  char **pzErrmsg;
@@ -504,7 +505,14 @@ int sqlite3Fts5StructureTest(Fts5Index*, void*);
 /*
 ** Used by xInstToken():
 */
-int sqlite3Fts5IterToken(Fts5IndexIter*, i64, int, int, const char**, int*);
+int sqlite3Fts5IterToken(
+  Fts5IndexIter *pIndexIter, 
+  const char *pToken, int nToken,
+  i64 iRowid,
+  int iCol, 
+  int iOff, 
+  const char **ppOut, int *pnOut
+);

 /*
 ** Insert or remove data to or from the index. Each time a document is 
--- a/ext/fts5/fts5_config.c
+++ b/ext/fts5/fts5_config.c
@@ -1026,6 +1026,19 @@ int sqlite3Fts5ConfigSetValue(
    }else{
      pConfig->bSecureDelete = (bVal ? 1 : 0);
    }
+  }
+
+  else if( 0==sqlite3_stricmp(zKey, "insttoken") ){
+    int bVal = -1;
+    if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
+      bVal = sqlite3_value_int(pVal);
+    }
+    if( bVal<0 ){
+      *pbBadkey = 1;
+    }else{
+      pConfig->bPrefixInsttoken = (bVal ? 1 : 0);
+    }
+
  }else{
    *pbBadkey = 1;
  }
--- a/ext/fts5/fts5_expr.c
+++ b/ext/fts5/fts5_expr.c
@@ -3046,7 +3046,7 @@ static int fts5ExprPopulatePoslistsCb(
        int rc = sqlite3Fts5PoslistWriterAppend(
            &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
        );
-        if( rc==SQLITE_OK && pExpr->pConfig->bTokendata && !pT->bPrefix ){
+        if( rc==SQLITE_OK && (pExpr->pConfig->bTokendata || pT->bPrefix) ){
          int iCol = p->iOff>>32;
          int iTokOff = p->iOff & 0x7FFFFFFF;
          rc = sqlite3Fts5IndexIterWriteTokendata(
@@ -3239,15 +3239,14 @@ int sqlite3Fts5ExprInstToken(
    return SQLITE_RANGE;
  }
  pTerm = &pPhrase->aTerm[iToken];
-  if( pTerm->bPrefix==0 ){
-    if( pExpr->pConfig->bTokendata ){
-      rc = sqlite3Fts5IterToken(
-          pTerm->pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut
-      );
-    }else{
-      *ppOut = pTerm->pTerm;
-      *pnOut = pTerm->nFullTerm;
-    }
+  if( pExpr->pConfig->bTokendata || pTerm->bPrefix ){
+    rc = sqlite3Fts5IterToken(
+        pTerm->pIter, pTerm->pTerm, pTerm->nQueryTerm, 
+        iRowid, iCol, iOff+iToken, ppOut, pnOut
+    );
+  }else{
+    *ppOut = pTerm->pTerm;
+    *pnOut = pTerm->nFullTerm;
  }
  return rc;
 }
--- a/ext/fts5/fts5_index.c
+++ b/ext/fts5/fts5_index.c
@@ -6203,6 +6203,383 @@ static void fts5MergePrefixLists(
  *p1 = out;
 }

+
+/*
+** Iterate through a range of entries in the FTS index, invoking the xVisit
+** callback for each of them.
+**
+** Parameter pToken points to an nToken buffer containing an FTS index term
+** (i.e. a document term with the preceding 1 byte index identifier -
+** FTS5_MAIN_PREFIX or similar). If bPrefix is true, then the call visits
+** all entries for terms that have pToken/nToken as a prefix. If bPrefix
+** is false, then only entries with pToken/nToken as the entire key are
+** visited. 
+**
+** If the current table is a tokendata=1 table, then if bPrefix is true then
+** each index term is treated separately. However, if bPrefix is false, then
+** all index terms corresponding to pToken/nToken are collapsed into a single
+** term before the callback is invoked.
+**
+** The callback invoked for each entry visited is specified by paramter xVisit.
+** Each time it is invoked, it is passed a pointer to the Fts5Index object,
+** a copy of the 7th paramter to this function (pCtx) and a pointer to the
+** iterator that indicates the current entry. If the current entry is the
+** first with a new term (i.e. different from that of the previous entry,
+** including the very first term), then the final two parameters are passed
+** a pointer to the term and its size in bytes, respectively. If the current
+** entry is not the first associated with its term, these two parameters
+** are passed 0.
+**
+** If parameter pColset is not NULL, then it is used to filter entries before
+** the callback is invoked.
+*/
+static int fts5VisitEntries(
+  Fts5Index *p,                   /* Fts5 index object */
+  Fts5Colset *pColset,            /* Columns filter to apply, or NULL */
+  u8 *pToken,                     /* Buffer containing token */
+  int nToken,                     /* Size of buffer pToken in bytes */
+  int bPrefix,                    /* True for a prefix scan */
+  void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int),
+  void *pCtx                      /* Passed as second argument to xVisit() */
+){
+  const int flags = (bPrefix ? FTS5INDEX_QUERY_SCAN : 0)
+                  | FTS5INDEX_QUERY_SKIPEMPTY 
+                  | FTS5INDEX_QUERY_NOOUTPUT;
+  Fts5Iter *p1 = 0;     /* Iterator used to gather data from index */
+  int bNewTerm = 1;
+  Fts5Structure *pStruct = fts5StructureRead(p);
+
+  fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
+  fts5IterSetOutputCb(&p->rc, p1);
+  for( /* no-op */ ;
+      fts5MultiIterEof(p, p1)==0;
+      fts5MultiIterNext2(p, p1, &bNewTerm)
+  ){
+    Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
+    int nNew = 0;
+    const u8 *pNew = 0;
+
+    p1->xSetOutputs(p1, pSeg);
+    if( p->rc ) break;
+
+    if( bNewTerm ){
+      nNew = pSeg->term.n;
+      pNew = pSeg->term.p;
+      if( nNew<nToken || memcmp(pToken, pNew, nToken) ) break;
+    }
+
+    xVisit(p, pCtx, p1, pNew, nNew);
+  }
+  fts5MultiIterFree(p1);
+
+  fts5StructureRelease(pStruct);
+  return p->rc;
+}
+
+
+/*
+** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an
+** array of these for each row it visits (so all iRowid fields are the same).
+** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an
+** array of these for the entire query (in which case iRowid fields may take
+** a variety of values).
+**
+** Each instance in the array indicates the iterator (and therefore term)
+** associated with position iPos of rowid iRowid. This is used by the
+** xInstToken() API.
+**
+** iRowid:
+**   Rowid for the current entry.
+**
+** iPos:
+**   Position of current entry within row. In the usual ((iCol<<32)+iOff)
+**   format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()).
+**
+** iIter:
+**   If the Fts5TokenDataIter iterator that the entry is part of is
+**   actually an iterator (i.e. with nIter>0, not just a container for
+**   Fts5TokenDataMap structures), then this variable is an index into
+**   the apIter[] array. The corresponding term is that which the iterator
+**   at apIter[iIter] currently points to.
+**
+**   Or, if the Fts5TokenDataIter iterator is just a container object
+**   (nIter==0), then iIter is an index into the term.p[] buffer where
+**   the term is stored.
+**
+** nByte:
+**   In the case where iIter is an index into term.p[], this variable
+**   is the size of the term in bytes. If iIter is an index into apIter[],
+**   this variable is unused.
+*/
+struct Fts5TokenDataMap {
+  i64 iRowid;                     /* Row this token is located in */
+  i64 iPos;                       /* Position of token */
+  int iIter;                      /* Iterator token was read from */
+  int nByte;                      /* Length of token in bytes (or 0) */
+};
+
+/*
+** An object used to supplement Fts5Iter for tokendata=1 iterators.
+**
+** This object serves two purposes. The first is as a container for an array
+** of Fts5TokenDataMap structures, which are used to find the token required
+** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and
+** aMap[] variables.
+*/
+struct Fts5TokenDataIter {
+  int nMapAlloc;                  /* Allocated size of aMap[] in entries */
+  int nMap;                       /* Number of valid entries in aMap[] */
+  Fts5TokenDataMap *aMap;         /* Array of (rowid+pos -> token) mappings */
+
+  /* The following are used for prefix-queries only. */
+  Fts5Buffer terms;
+
+  /* The following are used for other full-token tokendata queries only. */
+  int nIter;
+  int nIterAlloc;
+  Fts5PoslistReader *aPoslistReader;
+  int *aPoslistToIter;
+  Fts5Iter *apIter[1];
+};
+
+/*
+** The two input arrays - a1[] and a2[] - are in sorted order. This function
+** merges the two arrays together and writes the result to output array 
+** aOut[]. aOut[] is guaranteed to be large enough to hold the result.
+**
+** Duplicate entries are copied into the output. So the size of the output
+** array is always (n1+n2) entries.
+*/
+static void fts5TokendataMerge(
+  Fts5TokenDataMap *a1, int n1,   /* Input array 1 */
+  Fts5TokenDataMap *a2, int n2,   /* Input array 2 */
+  Fts5TokenDataMap *aOut          /* Output array */
+){
+  int i1 = 0;
+  int i2 = 0;
+
+  assert( n1>=0 && n2>=0 );
+  while( i1<n1 || i2<n2 ){
+    Fts5TokenDataMap *pOut = &aOut[i1+i2];
+    if( i2>=n2 || (i1<n1 && (
+        a1[i1].iRowid<a2[i2].iRowid
+     || (a1[i1].iRowid==a2[i2].iRowid && a1[i1].iPos<=a2[i2].iPos)
+    ))){
+      memcpy(pOut, &a1[i1], sizeof(Fts5TokenDataMap));
+      i1++;
+    }else{
+      memcpy(pOut, &a2[i2], sizeof(Fts5TokenDataMap));
+      i2++;
+    }
+  }
+}
+
+
+/*
+** Append a mapping to the token-map belonging to object pT.
+*/
+static void fts5TokendataIterAppendMap(
+  Fts5Index *p, 
+  Fts5TokenDataIter *pT, 
+  int iIter,
+  int nByte,
+  i64 iRowid, 
+  i64 iPos
+){
+  if( p->rc==SQLITE_OK ){
+    if( pT->nMap==pT->nMapAlloc ){
+      int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64;
+      int nAlloc = nNew * sizeof(Fts5TokenDataMap);
+      Fts5TokenDataMap *aNew;
+
+      aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nAlloc);
+      if( aNew==0 ){
+        p->rc = SQLITE_NOMEM;
+        return;
+      }
+
+      pT->aMap = aNew;
+      pT->nMapAlloc = nNew;
+    }
+
+    pT->aMap[pT->nMap].iRowid = iRowid;
+    pT->aMap[pT->nMap].iPos = iPos;
+    pT->aMap[pT->nMap].iIter = iIter;
+    pT->aMap[pT->nMap].nByte = nByte;
+    pT->nMap++;
+  }
+}
+
+/*
+** Sort the contents of the pT->aMap[] array.
+**
+** The sorting algorithm requries a malloc(). If this fails, an error code
+** is left in Fts5Index.rc before returning.
+*/
+static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){
+  Fts5TokenDataMap *aTmp = 0;
+  int nByte = pT->nMap * sizeof(Fts5TokenDataMap);
+
+  aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte);
+  if( aTmp ){
+    Fts5TokenDataMap *a1 = pT->aMap;
+    Fts5TokenDataMap *a2 = aTmp;
+    i64 nHalf;
+
+    for(nHalf=1; nHalf<pT->nMap; nHalf=nHalf*2){
+      int i1;
+      for(i1=0; i1<pT->nMap; i1+=(nHalf*2)){
+        int n1 = MIN(nHalf, pT->nMap-i1);
+        int n2 = MIN(nHalf, pT->nMap-i1-n1);
+        fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]);
+      }
+      SWAPVAL(Fts5TokenDataMap*, a1, a2);
+    }
+
+    if( a1!=pT->aMap ){
+      memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap));
+    }
+    sqlite3_free(aTmp);
+
+#ifdef SQLITE_DEBUG
+    {
+      int ii;
+      for(ii=1; ii<pT->nMap; ii++){
+        Fts5TokenDataMap *p1 = &pT->aMap[ii-1];
+        Fts5TokenDataMap *p2 = &pT->aMap[ii];
+        assert( p1->iRowid<p2->iRowid 
+             || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos)
+        );
+      }
+    }
+#endif
+  }
+}
+
+/*
+** Delete an Fts5TokenDataIter structure and its contents.
+*/
+static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){
+  if( pSet ){
+    int ii;
+    for(ii=0; ii<pSet->nIter; ii++){
+      fts5MultiIterFree(pSet->apIter[ii]);
+    }
+    fts5BufferFree(&pSet->terms);
+    sqlite3_free(pSet->aPoslistReader);
+    sqlite3_free(pSet->aMap);
+    sqlite3_free(pSet);
+  }
+}
+
+
+/*
+** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata()
+** to pass data to prefixIterSetupTokendataCb().
+*/
+typedef struct TokendataSetupCtx TokendataSetupCtx;
+struct TokendataSetupCtx {
+  Fts5TokenDataIter *pT;          /* Object being populated with mappings */
+  int iTermOff;                   /* Offset of current term in terms.p[] */
+  int nTermByte;                  /* Size of current term in bytes */
+};
+
+/*
+** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This
+** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each
+** position in the current position-list. It doesn't matter that some of
+** these may be out of order - they will be sorted later.
+*/
+static void prefixIterSetupTokendataCb(
+  Fts5Index *p, 
+  void *pCtx, 
+  Fts5Iter *p1, 
+  const u8 *pNew,
+  int nNew
+){
+  TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx;
+  int iPosOff = 0;
+  i64 iPos = 0;
+
+  if( pNew ){
+    pSetup->nTermByte = nNew-1;
+    pSetup->iTermOff = pSetup->pT->terms.n;
+    fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1);
+  }
+
+  while( 0==sqlite3Fts5PoslistNext64(
+     p1->base.pData, p1->base.nData, &iPosOff, &iPos
+  ) ){
+    fts5TokendataIterAppendMap(p, 
+        pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos
+    );
+  }
+}
+
+
+/*
+** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries().
+*/
+typedef struct PrefixSetupCtx PrefixSetupCtx;
+struct PrefixSetupCtx {
+  void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
+  void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
+  i64 iLastRowid;
+  int nMerge;
+  Fts5Buffer *aBuf;
+  int nBuf;
+  Fts5Buffer doclist;
+  TokendataSetupCtx *pTokendata;
+};
+
+/*
+** fts5VisitEntries() callback used by fts5SetupPrefixIter()
+*/
+static void prefixIterSetupCb(
+  Fts5Index *p, 
+  void *pCtx, 
+  Fts5Iter *p1, 
+  const u8 *pNew,
+  int nNew
+){
+  PrefixSetupCtx *pSetup = (PrefixSetupCtx*)pCtx;
+  const int nMerge = pSetup->nMerge;
+
+  if( p1->base.nData>0 ){
+    if( p1->base.iRowid<=pSetup->iLastRowid && pSetup->doclist.n>0 ){
+      int i;
+      for(i=0; p->rc==SQLITE_OK && pSetup->doclist.n; i++){
+        int i1 = i*nMerge;
+        int iStore;
+        assert( i1+nMerge<=pSetup->nBuf );
+        for(iStore=i1; iStore<i1+nMerge; iStore++){
+          if( pSetup->aBuf[iStore].n==0 ){
+            fts5BufferSwap(&pSetup->doclist, &pSetup->aBuf[iStore]);
+            fts5BufferZero(&pSetup->doclist);
+            break;
+          }
+        }
+        if( iStore==i1+nMerge ){
+          pSetup->xMerge(p, &pSetup->doclist, nMerge, &pSetup->aBuf[i1]);
+          for(iStore=i1; iStore<i1+nMerge; iStore++){
+            fts5BufferZero(&pSetup->aBuf[iStore]);
+          }
+        }
+      }
+      pSetup->iLastRowid = 0;
+    }
+
+    pSetup->xAppend(
+        p, (u64)p1->base.iRowid-(u64)pSetup->iLastRowid, p1, &pSetup->doclist
+    );
+    pSetup->iLastRowid = p1->base.iRowid;
+  }
+
+  if( pSetup->pTokendata ){
+    prefixIterSetupTokendataCb(p, (void*)pSetup->pTokendata, p1, pNew, nNew);
+  }
+}
+
 static void fts5SetupPrefixIter(
  Fts5Index *p,                   /* Index to read from */
  int bDesc,                      /* True for "ORDER BY rowid DESC" */
@@ -6213,38 +6590,41 @@ static void fts5SetupPrefixIter(
  Fts5Iter **ppIter               /* OUT: New iterator */
 ){
  Fts5Structure *pStruct;
-  Fts5Buffer *aBuf;
-  int nBuf = 32;
-  int nMerge = 1;
+  PrefixSetupCtx s;
+  TokendataSetupCtx s2;

-  void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
-  void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
-  if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
-    xMerge = fts5MergeRowidLists;
-    xAppend = fts5AppendRowid;
-  }else{
-    nMerge = FTS5_MERGE_NLIST-1;
-    nBuf = nMerge*8;   /* Sufficient to merge (16^8)==(2^32) lists */
-    xMerge = fts5MergePrefixLists;
-    xAppend = fts5AppendPoslist;
+  memset(&s, 0, sizeof(s));
+  memset(&s2, 0, sizeof(s2));
+
+  s.nMerge = 1;
+  s.iLastRowid = 0;
+  s.nBuf = 32;
+  if( iIdx==0 
+   && p->pConfig->eDetail==FTS5_DETAIL_FULL 
+   && p->pConfig->bPrefixInsttoken 
+  ){
+    s.pTokendata = &s2;
+    s2.pT = (Fts5TokenDataIter*)fts5IdxMalloc(p, sizeof(*s2.pT));
  }

-  aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
+  if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
+    s.xMerge = fts5MergeRowidLists;
+    s.xAppend = fts5AppendRowid;
+  }else{
+    s.nMerge = FTS5_MERGE_NLIST-1;
+    s.nBuf = s.nMerge*8;   /* Sufficient to merge (16^8)==(2^32) lists */
+    s.xMerge = fts5MergePrefixLists;
+    s.xAppend = fts5AppendPoslist;
+  }
+
+  s.aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*s.nBuf);
  pStruct = fts5StructureRead(p);
-  assert( p->rc!=SQLITE_OK || (aBuf && pStruct) );
+  assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) );

  if( p->rc==SQLITE_OK ){
-    const int flags = FTS5INDEX_QUERY_SCAN 
-                    | FTS5INDEX_QUERY_SKIPEMPTY 
-                    | FTS5INDEX_QUERY_NOOUTPUT;
+    void *pCtx = (void*)&s;
    int i;
-    i64 iLastRowid = 0;
-    Fts5Iter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Data *pData;
-    Fts5Buffer doclist;
-    int bNewTerm = 1;
-
-    memset(&doclist, 0, sizeof(doclist));

    /* If iIdx is non-zero, then it is the number of a prefix-index for
    ** prefixes 1 character longer than the prefix being queried for. That
@@ -6252,94 +6632,43 @@ static void fts5SetupPrefixIter(
    ** corresponding to the prefix itself. That one is extracted from the
    ** main term index here.  */
    if( iIdx!=0 ){
-      int dummy = 0;
-      const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
      pToken[0] = FTS5_MAIN_PREFIX;
-      fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
-      fts5IterSetOutputCb(&p->rc, p1);
-      for(;
-        fts5MultiIterEof(p, p1)==0;
-        fts5MultiIterNext2(p, p1, &dummy)
-      ){
-        Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
-        p1->xSetOutputs(p1, pSeg);
-        if( p1->base.nData ){
-          xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
-          iLastRowid = p1->base.iRowid;
-        }
-      }
-      fts5MultiIterFree(p1);
+      fts5VisitEntries(p, pColset, pToken, nToken, 0, prefixIterSetupCb, pCtx);
    }

    pToken[0] = FTS5_MAIN_PREFIX + iIdx;
-    fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
-    fts5IterSetOutputCb(&p->rc, p1);
+    fts5VisitEntries(p, pColset, pToken, nToken, 1, prefixIterSetupCb, pCtx);

-    for( /* no-op */ ;
-        fts5MultiIterEof(p, p1)==0;
-        fts5MultiIterNext2(p, p1, &bNewTerm)
-    ){
-      Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
-      int nTerm = pSeg->term.n;
-      const u8 *pTerm = pSeg->term.p;
-      p1->xSetOutputs(p1, pSeg);
-
-      assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
-      if( bNewTerm ){
-        if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
-      }
-
-      if( p1->base.nData==0 ) continue;
-      if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
-        for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
-          int i1 = i*nMerge;
-          int iStore;
-          assert( i1+nMerge<=nBuf );
-          for(iStore=i1; iStore<i1+nMerge; iStore++){
-            if( aBuf[iStore].n==0 ){
-              fts5BufferSwap(&doclist, &aBuf[iStore]);
-              fts5BufferZero(&doclist);
-              break;
-            }
-          }
-          if( iStore==i1+nMerge ){
-            xMerge(p, &doclist, nMerge, &aBuf[i1]);
-            for(iStore=i1; iStore<i1+nMerge; iStore++){
-              fts5BufferZero(&aBuf[iStore]);
-            }
-          }
-        }
-        iLastRowid = 0;
-      }
-
-      xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
-      iLastRowid = p1->base.iRowid;
-    }
-
-    assert( (nBuf%nMerge)==0 );
-    for(i=0; i<nBuf; i+=nMerge){
+    assert( (s.nBuf%s.nMerge)==0 );
+    for(i=0; i<s.nBuf; i+=s.nMerge){
      int iFree;
      if( p->rc==SQLITE_OK ){
-        xMerge(p, &doclist, nMerge, &aBuf[i]);
+        s.xMerge(p, &s.doclist, s.nMerge, &s.aBuf[i]);
      }
-      for(iFree=i; iFree<i+nMerge; iFree++){
-        fts5BufferFree(&aBuf[iFree]);
+      for(iFree=i; iFree<i+s.nMerge; iFree++){
+        fts5BufferFree(&s.aBuf[iFree]);
      }
    }
-    fts5MultiIterFree(p1);

-    pData = fts5IdxMalloc(p, sizeof(*pData)+doclist.n+FTS5_DATA_ZERO_PADDING);
+    pData = fts5IdxMalloc(p, sizeof(*pData)+s.doclist.n+FTS5_DATA_ZERO_PADDING);
    if( pData ){
      pData->p = (u8*)&pData[1];
-      pData->nn = pData->szLeaf = doclist.n;
-      if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
+      pData->nn = pData->szLeaf = s.doclist.n;
+      if( s.doclist.n ) memcpy(pData->p, s.doclist.p, s.doclist.n);
      fts5MultiIterNew2(p, pData, bDesc, ppIter);
    }
-    fts5BufferFree(&doclist);
+
+    if( p->rc==SQLITE_OK && s.pTokendata ){
+      fts5TokendataIterSortMap(p, s2.pT);
+      (*ppIter)->pTokenDataIter = s2.pT;
+      s2.pT = 0;
+    }
  }

+  fts5TokendataIterDelete(s2.pT);
+  fts5BufferFree(&s.doclist);
  fts5StructureRelease(pStruct);
-  sqlite3_free(aBuf);
+  sqlite3_free(s.aBuf);
 }


@@ -6593,38 +6922,6 @@ static void fts5SegIterSetEOF(Fts5SegIter *pSeg){
  pSeg->pLeaf = 0;
 }

-/*
-** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an
-** array of these for each row it visits. Or, for an iterator used by an
-** "ORDER BY rank" query, it accumulates an array of these for the entire
-** query.
-**
-** Each instance in the array indicates the iterator (and therefore term)
-** associated with position iPos of rowid iRowid. This is used by the
-** xInstToken() API.
-*/
-struct Fts5TokenDataMap {
-  i64 iRowid;                     /* Row this token is located in */
-  i64 iPos;                       /* Position of token */
-  int iIter;                      /* Iterator token was read from */
-};
-
-/*
-** An object used to supplement Fts5Iter for tokendata=1 iterators.
-*/
-struct Fts5TokenDataIter {
-  int nIter;
-  int nIterAlloc;
-
-  int nMap;
-  int nMapAlloc;
-  Fts5TokenDataMap *aMap;
-
-  Fts5PoslistReader *aPoslistReader;
-  int *aPoslistToIter;
-  Fts5Iter *apIter[1];
-};
-
 /*
 ** This function appends iterator pAppend to Fts5TokenDataIter pIn and 
 ** returns the result.
@@ -6661,54 +6958,6 @@ static Fts5TokenDataIter *fts5AppendTokendataIter(
  return pRet;
 }

-/*
-** Delete an Fts5TokenDataIter structure and its contents.
-*/
-static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){
-  if( pSet ){
-    int ii;
-    for(ii=0; ii<pSet->nIter; ii++){
-      fts5MultiIterFree(pSet->apIter[ii]);
-    }
-    sqlite3_free(pSet->aPoslistReader);
-    sqlite3_free(pSet->aMap);
-    sqlite3_free(pSet);
-  }
-}
-
-/*
-** Append a mapping to the token-map belonging to object pT.
-*/
-static void fts5TokendataIterAppendMap(
-  Fts5Index *p, 
-  Fts5TokenDataIter *pT, 
-  int iIter,
-  i64 iRowid, 
-  i64 iPos
-){
-  if( p->rc==SQLITE_OK ){
-    if( pT->nMap==pT->nMapAlloc ){
-      int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64;
-      int nByte = nNew * sizeof(Fts5TokenDataMap);
-      Fts5TokenDataMap *aNew;
-
-      aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte);
-      if( aNew==0 ){
-        p->rc = SQLITE_NOMEM;
-        return;
-      }
-
-      pT->aMap = aNew;
-      pT->nMapAlloc = nNew;
-    }
-
-    pT->aMap[pT->nMap].iRowid = iRowid;
-    pT->aMap[pT->nMap].iPos = iPos;
-    pT->aMap[pT->nMap].iIter = iIter;
-    pT->nMap++;
-  }
-}
-
 /*
 ** The iterator passed as the only argument must be a tokendata=1 iterator
 ** (pIter->pTokenDataIter!=0). This function sets the iterator output
@@ -6749,7 +6998,7 @@ static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){
    pIter->base.iRowid = iRowid;

    if( nHit==1 && eDetail==FTS5_DETAIL_FULL ){
-      fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, iRowid, -1);
+      fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, 0, iRowid, -1);
    }else
    if( nHit>1 && eDetail!=FTS5_DETAIL_NONE ){
      int nReader = 0;
@@ -7002,6 +7251,7 @@ static Fts5Iter *fts5SetupTokendataIter(
    pRet = fts5MultiIterAlloc(p, 0);
  }
  if( pRet ){
+    pRet->nSeg = 0;
    pRet->pTokenDataIter = pSet;
    if( pSet ){
      fts5IterSetOutputsTokendata(pRet);
@@ -7017,7 +7267,6 @@ static Fts5Iter *fts5SetupTokendataIter(
  return pRet;
 }

-
 /*
 ** Open a new iterator to iterate though all rowid that match the 
 ** specified token or token prefix.
@@ -7042,6 +7291,11 @@ int sqlite3Fts5IndexQuery(
    int bTokendata = pConfig->bTokendata;
    if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);

+    /* The NOTOKENDATA flag is set when each token in a tokendata=1 table
+    ** should be treated individually, instead of merging all those with
+    ** a common prefix into a single entry. This is used, for example, by
+    ** queries performed as part of an integrity-check, or by the fts5vocab
+    ** module.  */
    if( flags & (FTS5INDEX_QUERY_NOTOKENDATA|FTS5INDEX_QUERY_SCAN) ){
      bTokendata = 0;
    }
@@ -7072,7 +7326,7 @@ int sqlite3Fts5IndexQuery(
    }

    if( bTokendata && iIdx==0 ){
-      buf.p[0] = '0';
+      buf.p[0] = FTS5_MAIN_PREFIX;
      pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset);
    }else if( iIdx<=pConfig->nPrefix ){
      /* Straight index lookup */
@@ -7085,7 +7339,7 @@ int sqlite3Fts5IndexQuery(
        fts5StructureRelease(pStruct);
      }
    }else{
-      /* Scan multiple terms in the main index */
+      /* Scan multiple terms in the main index for a prefix query. */
      int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
      fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
      if( pRet==0 ){
@@ -7121,7 +7375,8 @@ int sqlite3Fts5IndexQuery(
 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
  Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  assert( pIter->pIndex->rc==SQLITE_OK );
-  if( pIter->pTokenDataIter ){
+  if( pIter->nSeg==0 ){
+    assert( pIter->pTokenDataIter );
    fts5TokendataIterNext(pIter, 0, 0);
  }else{
    fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
@@ -7158,7 +7413,8 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
 */
 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
  Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
-  if( pIter->pTokenDataIter ){
+  if( pIter->nSeg==0 ){
+    assert( pIter->pTokenDataIter );
    fts5TokendataIterNext(pIter, 1, iMatch);
  }else{
    fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
@@ -7177,14 +7433,60 @@ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
  return (z ? &z[1] : 0);
 }

+/*
+** pIter is a prefix query. This function populates pIter->pTokenDataIter
+** with an Fts5TokenDataIter object containing mappings for all rows
+** matched by the query.
+*/
+static int fts5SetupPrefixIterTokendata(
+  Fts5Iter *pIter,
+  const char *pToken,             /* Token prefix to search for */
+  int nToken                      /* Size of pToken in bytes */
+){
+  Fts5Index *p = pIter->pIndex;
+  Fts5Buffer token = {0, 0, 0};
+  TokendataSetupCtx ctx;
+
+  memset(&ctx, 0, sizeof(ctx));
+
+  fts5BufferGrow(&p->rc, &token, nToken+1);
+  ctx.pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(*ctx.pT));
+
+  if( p->rc==SQLITE_OK ){
+
+    /* Fill in the token prefix to search for */
+    token.p[0] = FTS5_MAIN_PREFIX;
+    memcpy(&token.p[1], pToken, nToken);
+    token.n = nToken+1;
+
+    fts5VisitEntries(
+        p, 0, token.p, token.n, 1, prefixIterSetupTokendataCb, (void*)&ctx
+    );
+
+    fts5TokendataIterSortMap(p, ctx.pT);
+  }
+
+  if( p->rc==SQLITE_OK ){
+    pIter->pTokenDataIter = ctx.pT;
+  }else{
+    fts5TokendataIterDelete(ctx.pT);
+  }
+  fts5BufferFree(&token);
+
+  return fts5IndexReturn(p);
+}
+
 /*
 ** This is used by xInstToken() to access the token at offset iOff, column
 ** iCol of row iRowid. The token is returned via output variables *ppOut
 ** and *pnOut. The iterator passed as the first argument must be a tokendata=1
 ** iterator (pIter->pTokenDataIter!=0).
+**
+** pToken/nToken:
 */
 int sqlite3Fts5IterToken(
  Fts5IndexIter *pIndexIter, 
+  const char *pToken, int nToken,
  i64 iRowid,
  int iCol, 
  int iOff, 
@@ -7192,13 +7494,22 @@ int sqlite3Fts5IterToken(
 ){
  Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  Fts5TokenDataIter *pT = pIter->pTokenDataIter;
-  Fts5TokenDataMap *aMap = pT->aMap;
  i64 iPos = (((i64)iCol)<<32) + iOff;
-
+  Fts5TokenDataMap *aMap = 0;
  int i1 = 0;
-  int i2 = pT->nMap;
+  int i2 = 0;
  int iTest = 0;

+  assert( pT || (pToken && pIter->nSeg>0) );
+  if( pT==0 ){
+    int rc = fts5SetupPrefixIterTokendata(pIter, pToken, nToken);
+    if( rc!=SQLITE_OK ) return rc;
+    pT = pIter->pTokenDataIter;
+  }
+
+  i2 = pT->nMap;
+  aMap = pT->aMap;
+
  while( i2>i1 ){
    iTest = (i1 + i2) / 2;

@@ -7221,9 +7532,15 @@ int sqlite3Fts5IterToken(
  }

  if( i2>i1 ){
-    Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter];
-    *ppOut = (const char*)pMap->aSeg[0].term.p+1;
-    *pnOut = pMap->aSeg[0].term.n-1;
+    if( pIter->nSeg==0 ){
+      Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter];
+      *ppOut = (const char*)pMap->aSeg[0].term.p+1;
+      *pnOut = pMap->aSeg[0].term.n-1;
+    }else{
+      Fts5TokenDataMap *p = &aMap[iTest];
+      *ppOut = (const char*)&pT->terms.p[p->iIter];
+      *pnOut = aMap[iTest].nByte;
+    }
  }

  return SQLITE_OK;
@@ -7235,7 +7552,9 @@ int sqlite3Fts5IterToken(
 */
 void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){
  Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
-  if( pIter && pIter->pTokenDataIter ){
+  if( pIter && pIter->pTokenDataIter 
+   && (pIter->nSeg==0 || pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL)
+  ){
    pIter->pTokenDataIter->nMap = 0;
  }
 }
@@ -7255,17 +7574,29 @@ int sqlite3Fts5IndexIterWriteTokendata(
  Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  Fts5TokenDataIter *pT = pIter->pTokenDataIter;
  Fts5Index *p = pIter->pIndex;
-  int ii;
+  i64 iPos = (((i64)iCol)<<32) + iOff;

  assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL );
-  assert( pIter->pTokenDataIter );
-
-  for(ii=0; ii<pT->nIter; ii++){
-    Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term;
-    if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break;
-  }
-  if( ii<pT->nIter ){
-    fts5TokendataIterAppendMap(p, pT, ii, iRowid, (((i64)iCol)<<32) + iOff);
+  assert( pIter->pTokenDataIter || pIter->nSeg>0 );
+  if( pIter->nSeg>0 ){
+    /* This is a prefix term iterator. */
+    if( pT==0 ){
+      pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(*pT));
+      pIter->pTokenDataIter = pT;
+    }
+    if( pT ){
+      fts5TokendataIterAppendMap(p, pT, pT->terms.n, nToken, iRowid, iPos);
+      fts5BufferAppendBlob(&p->rc, &pT->terms, nToken, (const u8*)pToken);
+    }
+  }else{
+    int ii;
+    for(ii=0; ii<pT->nIter; ii++){
+      Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term;
+      if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break;
+    }
+    if( ii<pT->nIter ){
+      fts5TokendataIterAppendMap(p, pT, ii, 0, iRowid, iPos);
+    }
  }
  return fts5IndexReturn(p);
 }
--- a/ext/fts5/fts5_main.c
+++ b/ext/fts5/fts5_main.c
@@ -93,6 +93,7 @@ struct Fts5Global {
 #define FTS5_LOCALE_HDR_SIZE ((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))
 #define FTS5_LOCALE_HDR(pConfig) ((const u8*)(pConfig->pGlobal->aLocaleHdr))

+#define FTS5_INSTTOKEN_SUBTYPE 73

 /*
 ** Each auxiliary function registered with the FTS5 module is represented
@@ -1418,6 +1419,7 @@ static int fts5FilterMethod(
  sqlite3_value *pRowidGe = 0;    /* rowid >= ? expression (or NULL) */
  int iCol;                       /* Column on LHS of MATCH operator */
  char **pzErrmsg = pConfig->pzErrmsg;
+  int bPrefixInsttoken = pConfig->bPrefixInsttoken;
  int i;
  int iIdxStr = 0;
  Fts5Expr *pExpr = 0;
@@ -1453,6 +1455,9 @@ static int fts5FilterMethod(
        rc = fts5ExtractExprText(pConfig, apVal[i], &zText, &bFreeAndReset);
        if( rc!=SQLITE_OK ) goto filter_out;
        if( zText==0 ) zText = "";
+        if( sqlite3_value_subtype(apVal[i])==FTS5_INSTTOKEN_SUBTYPE ){
+          pConfig->bPrefixInsttoken = 1;
+        }

        iCol = 0;
        do{
@@ -1593,6 +1598,7 @@ static int fts5FilterMethod(
 filter_out:
  sqlite3Fts5ExprFree(pExpr);
  pConfig->pzErrmsg = pzErrmsg;
+  pConfig->bPrefixInsttoken = bPrefixInsttoken;
  return rc;
 }

@@ -3652,6 +3658,20 @@ static void fts5LocaleFunc(
  }
 }

+/*
+** Implementation of fts5_insttoken() function.
+*/
+static void fts5InsttokenFunc(
+  sqlite3_context *pCtx,          /* Function call context */
+  int nArg,                       /* Number of args */
+  sqlite3_value **apArg           /* Function arguments */
+){
+  assert( nArg==1 );
+  (void)nArg;
+  sqlite3_result_value(pCtx, apArg[0]);
+  sqlite3_result_subtype(pCtx, FTS5_INSTTOKEN_SUBTYPE);
+}
+
 /*
 ** Return true if zName is the extension on one of the shadow tables used
 ** by this module.
@@ -3781,10 +3801,17 @@ static int fts5Init(sqlite3 *db){
    if( rc==SQLITE_OK ){
      rc = sqlite3_create_function(
          db, "fts5_locale", 2, 
-          SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE,
+          SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE|SQLITE_SUBTYPE,
          p, fts5LocaleFunc, 0, 0
      );
    }
+    if( rc==SQLITE_OK ){
+      rc = sqlite3_create_function(
+          db, "fts5_insttoken", 1, 
+          SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE,
+          p, fts5InsttokenFunc, 0, 0
+      );
+    }
  }

  /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file
--- a/ext/fts5/test/fts5origintext.test
+++ b/ext/fts5/test/fts5origintext.test
@@ -22,34 +22,40 @@ ifcapable !fts5 {
 }

 foreach_detail_mode $testprefix {
+foreach {tn insttoken} {
+  1 0
+  2 1
+} {
+reset_db

 sqlite3_fts5_register_origintext db
-do_execsql_test 1.0 {
+do_execsql_test $tn.1.0 {
  CREATE VIRTUAL TABLE ft USING fts5(
      x, tokenize="origintext unicode61", detail=%DETAIL%
  );
+  INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
 }

-do_execsql_test 1.1 {
+do_execsql_test $tn.1.1 {
  INSERT INTO ft VALUES('Hello world');
 }

-do_execsql_test 1.2 {
+do_execsql_test $tn.1.2 {
  INSERT INTO ft(ft) VALUES('integrity-check');
 }

 proc b {x} { string map [list "\0" "."] $x }
 db func b b

-do_execsql_test 1.3 {
+do_execsql_test $tn.1.3 {
  select b(term) from vocab;
 } {
  hello.Hello
  world
 }

-do_execsql_test 1.4 {
+do_execsql_test $tn.1.4 {
  SELECT rowid FROM ft('Hello');
 } {1}

@@ -88,33 +94,34 @@ proc document {} {
 db func document document

 sqlite3_fts5_register_origintext db
-do_execsql_test 2.0 {
+do_execsql_test $tn.2.0 {
  CREATE VIRTUAL TABLE ft USING fts5(
      x, tokenize="origintext unicode61", detail=%DETAIL%
  );
+  INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  INSERT INTO ft(ft, rank) VALUES('pgsz', 128);
  CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
 }

-do_test 2.1 {
+do_test $tn.2.1 {
  for {set ii 0} {$ii < 500} {incr ii} {
    execsql { INSERT INTO ft VALUES( document() ) }
  }
 } {}

-do_execsql_test 2.2 {
+do_execsql_test $tn.2.2 {
  INSERT INTO ft(ft) VALUES('integrity-check');
 }

-do_execsql_test 2.3 {
+do_execsql_test $tn.2.3 {
  INSERT INTO ft(ft, rank) VALUES('merge', 16);
 }

-do_execsql_test 2.4 {
+do_execsql_test $tn.2.4 {
  INSERT INTO ft(ft) VALUES('integrity-check');
 }

-do_execsql_test 2.5 {
+do_execsql_test $tn.2.5 {
  INSERT INTO ft(ft) VALUES('optimize');
 }

@@ -122,10 +129,11 @@ do_execsql_test 2.5 {
 reset_db

 sqlite3_fts5_register_origintext db
-do_execsql_test 3.0 {
+do_execsql_test $tn.3.0 {
  CREATE VIRTUAL TABLE ft USING fts5(
      x, tokenize="origintext unicode61", detail=%DETAIL%
  );
+  INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);

  INSERT INTO ft(rowid, x) VALUES(1, 'hello');
@@ -137,16 +145,17 @@ do_execsql_test 3.0 {
 #db func b b
 #execsql_pp { SELECT b(term) FROM vocab }

-do_execsql_test 3.1.1 { SELECT rowid FROM ft('hello') } 1
-do_execsql_test 3.1.2 { SELECT rowid FROM ft('Hello') } 2
-do_execsql_test 3.1.3 { SELECT rowid FROM ft('HELLO') } 3
+do_execsql_test $tn.3.1.1 { SELECT rowid FROM ft('hello') } 1
+do_execsql_test $tn.3.1.2 { SELECT rowid FROM ft('Hello') } 2
+do_execsql_test $tn.3.1.3 { SELECT rowid FROM ft('HELLO') } 3

-do_execsql_test 3.2 {
+do_execsql_test $tn.3.2 {
  CREATE VIRTUAL TABLE ft2 USING fts5(x, 
      tokenize="origintext unicode61", 
      tokendata=1,
      detail=%DETAIL%
  );
+  INSERT INTO ft2(ft2, rank) VALUES('insttoken', $insttoken);
  CREATE VIRTUAL TABLE vocab2 USING fts5vocab(ft2, instance);

  INSERT INTO ft2(rowid, x) VALUES(1, 'hello');
@@ -160,11 +169,18 @@ do_execsql_test 3.2 {
 #db func b b
 #execsql_pp { SELECT b(term) FROM vocab }

-do_execsql_test 3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3}
-do_execsql_test 3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3}
-do_execsql_test 3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3}
+do_execsql_test $tn.3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3}
+do_execsql_test $tn.3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3}
+do_execsql_test $tn.3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3}

-do_execsql_test 3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10}
+do_execsql_test $tn.3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10}
+
+do_execsql_test $tn.3.3.5.1 { SELECT rowid FROM ft2('HELLO') ORDER BY rowid DESC} {
+  3 2 1
+}
+do_execsql_test $tn.3.3.5.2 { SELECT rowid FROM ft2('HELLO') ORDER BY +rowid DESC} {
+  3 2 1
+}

 #-------------------------------------------------------------------------
 #
@@ -176,36 +192,37 @@ proc querytoken {cmd iPhrase iToken} {
 }
 sqlite3_fts5_create_function db querytoken querytoken

-do_execsql_test 4.0 {
+do_execsql_test $tn.4.0 {
  CREATE VIRTUAL TABLE ft USING fts5(
      x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL%
  );
+  INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  INSERT INTO ft VALUES('one two three four');
 }

-do_execsql_test 4.1 {
+do_execsql_test $tn.4.1 {
  SELECT rowid, querytoken(ft, 0, 0) FROM ft('TwO')
 } {1 two.TwO}
-do_execsql_test 4.2 {
+do_execsql_test $tn.4.2 {
  SELECT rowid, querytoken(ft, 0, 0) FROM ft('one TWO ThreE')
 } {1 one}
-do_execsql_test 4.3 {
+do_execsql_test $tn.4.3 {
  SELECT rowid, querytoken(ft, 1, 0) FROM ft('one TWO ThreE')
 } {1 two.TWO}

 if {"%DETAIL%"=="full"} {
  # Phrase queries are only supported for detail=full.
  #
-  do_execsql_test 4.4 {
+  do_execsql_test $tn.4.4 {
    SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"')
  } {1 three.ThreE}
-  do_catchsql_test 4.5 {
+  do_catchsql_test $tn.4.5 {
    SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"')
  } {1 SQLITE_RANGE}
-  do_catchsql_test 4.6 {
+  do_catchsql_test $tn.4.6 {
    SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"')
  } {1 SQLITE_RANGE}
-  do_catchsql_test 4.7 {
+  do_catchsql_test $tn.4.7 {
    SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"')
  } {1 SQLITE_RANGE}
 }
@@ -221,14 +238,15 @@ proc insttoken {cmd iIdx iToken} {
 sqlite3_fts5_create_function db insttoken insttoken
 fts5_aux_test_functions db

-do_execsql_test 5.0 {
+do_execsql_test $tn.5.0 {
  CREATE VIRTUAL TABLE ft USING fts5(
      x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL%
  );
+  INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  INSERT INTO ft VALUES('one ONE One oNe oNE one');
 }

-do_execsql_test 5.1 {
+do_execsql_test $tn.5.1 {
  SELECT insttoken(ft, 0, 0), 
         insttoken(ft, 1, 0),
         insttoken(ft, 2, 0),
@@ -240,13 +258,37 @@ do_execsql_test 5.1 {
  one one.ONE one.One one.oNe one.oNE one
 }

-do_execsql_test 5.2 {
+do_execsql_test $tn.5.2 {
+  SELECT insttoken(ft, 0, 0), 
+         insttoken(ft, 1, 0),
+         insttoken(ft, 2, 0),
+         insttoken(ft, 3, 0),
+         insttoken(ft, 4, 0),
+         insttoken(ft, 5, 0)
+  FROM ft('on*');
+} {
+  one one.ONE one.One one.oNe one.oNE one
+}
+
+do_execsql_test $tn.5.3 {
+  SELECT insttoken(ft, 0, 0), 
+         insttoken(ft, 1, 0),
+         insttoken(ft, 2, 0),
+         insttoken(ft, 3, 0),
+         insttoken(ft, 4, 0),
+         insttoken(ft, 5, 0)
+  FROM ft(fts5_insttoken('on*'));
+} {
+  one one.ONE one.One one.oNe one.oNE one
+}
+
+do_execsql_test $tn.5.4 {
  SELECT insttoken(ft, 1, 0) FROM ft('one');
 } {
  one.ONE
 }

-do_execsql_test 5.3 {
+do_execsql_test $tn.5.5 {
  SELECT fts5_test_poslist(ft) FROM ft('one');
 } {
  {0.0.0 0.0.1 0.0.2 0.0.3 0.0.4 0.0.5}
@@ -260,10 +302,11 @@ do_execsql_test 5.3 {
 #
 reset_db
 sqlite3_fts5_register_origintext db
-do_execsql_test 6.0 {
+do_execsql_test $tn.6.0 {
  CREATE VIRTUAL TABLE ft USING fts5( 
      x, y, tokenize='origintext unicode61', detail=%DETAIL%, tokendata=0
  );
+  INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);

  INSERT INTO ft VALUES('One Two', 'Three two');
  INSERT INTO ft VALUES('three Three', 'one One');
@@ -279,34 +322,35 @@ proc tokens {cmd} {
 }
 sqlite3_fts5_create_function db tokens tokens

-do_execsql_test 6.1 {
+do_execsql_test $tn.6.1 {
  SELECT rowid, tokens(ft) FROM ft('One');
 } {1 one.One 2 one.One}

-do_execsql_test 6.2 {
+do_execsql_test $tn.6.2 {
  SELECT rowid, tokens(ft) FROM ft('on*');
-} {1 {{}} 2 {{} {}}}
+} {1 one.One 2 {one one.One}}

-do_execsql_test 6.3 {
+do_execsql_test $tn.6.3 {
  SELECT rowid, tokens(ft) FROM ft('Three*');
-} {1 {{}} 2 {{}}}
+} {1 three.Three 2 three.Three}

 fts5_aux_test_functions db
-do_catchsql_test 6.4 {
+do_catchsql_test $tn.6.4 {
  SELECT fts5_test_insttoken(ft, -1, 0) FROM ft('one');
 } {1 SQLITE_RANGE}

-do_catchsql_test 6.5 {
+do_catchsql_test $tn.6.5 {
  SELECT fts5_test_insttoken(ft, 1, 0) FROM ft('one');
 } {1 SQLITE_RANGE}

-do_catchsql_test 6.6 {
+do_catchsql_test $tn.6.6 {
  CREATE VIRTUAL TABLE ft2 USING fts5(x, tokendata=2);
 } {1 {malformed tokendata=... directive}}
-do_catchsql_test 6.7 {
+do_catchsql_test $tn.6.7 {
  CREATE VIRTUAL TABLE ft2 USING fts5(x, content='', tokendata=11);
 } {1 {malformed tokendata=... directive}}

+}
 }

 finish_test
--- a/ext/fts5/test/fts5origintext3.test
+++ b/ext/fts5/test/fts5origintext3.test
@@ -22,6 +22,11 @@ ifcapable !fts5 {
 }

 foreach_detail_mode $testprefix {
+  foreach {tn insttoken} {
+    1 0
+    2 1
+  } {
+
  reset_db

  sqlite3_fts5_register_origintext db
@@ -32,21 +37,25 @@ foreach_detail_mode $testprefix {
  }
  sqlite3_fts5_create_function db insttoken insttoken
  
-  do_execsql_test 1.0 {
+  do_execsql_test $tn.1.0 {
    CREATE VIRTUAL TABLE ft USING fts5(
        x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%
    );
  }
+
+  do_execsql_test $tn.1.0.1 {
+    INSERT INTO ft(ft, rank) VALUES('insttoken', 1);
+  }
  
-  do_execsql_test 1.1 {
+  do_execsql_test $tn.1.1 {
    INSERT INTO ft VALUES('Hello world HELLO WORLD hello');
  }
  
-  do_execsql_test 1.2 {
+  do_execsql_test $tn.1.2 {
    SELECT fts5_test_poslist(ft) FROM ft('hello');
  } {{0.0.0 0.0.2 0.0.4}}

-  do_execsql_test 1.3 {
+  do_execsql_test $tn.1.3 {
    SELECT 
      insttoken(ft, 0, 0),
      insttoken(ft, 1, 0),
@@ -54,7 +63,15 @@ foreach_detail_mode $testprefix {
    FROM ft('hello');
  } {hello.Hello hello.HELLO hello}

-  do_execsql_test 1.4 {
+  do_execsql_test $tn.1.3.1 {
+    SELECT 
+      insttoken(ft, 0, 0),
+      insttoken(ft, 1, 0),
+      insttoken(ft, 2, 0)
+    FROM ft('hel*');
+  } {hello.Hello hello.HELLO hello}
+
+  do_execsql_test $tn.1.4 {
    SELECT 
      insttoken(ft, 0, 0),
      insttoken(ft, 1, 0),
@@ -62,7 +79,7 @@ foreach_detail_mode $testprefix {
    FROM ft('hello') ORDER BY rank;
  } {hello.Hello hello.HELLO hello}

-  do_execsql_test 1.5 {
+  do_execsql_test $tn.1.5 {
    CREATE VIRTUAL TABLE ft2 USING fts5(
        x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%
    );
@@ -71,11 +88,11 @@ foreach_detail_mode $testprefix {
    INSERT INTO ft2(rowid, x) VALUES(3, 'THREE one two three THREE');
  }

-  do_execsql_test 1.6 {
+  do_execsql_test $tn.1.6 {
    SELECT insttoken(ft2, 0, 0), rowid FROM ft2('three') ORDER BY rank;
  } {three.THREE 3 three 1 three 2}

-  do_execsql_test 1.7 {
+  do_execsql_test $tn.1.7 {
    INSERT INTO ft2(rowid, x) VALUES(10, 'aaa bbb BBB');
    INSERT INTO ft2(rowid, x) VALUES(12, 'bbb bbb bbb');
    INSERT INTO ft2(rowid, x) VALUES(13, 'bbb bbb bbb');
@@ -92,9 +109,32 @@ foreach_detail_mode $testprefix {
    INSERT INTO ft2(rowid, x) VALUES(24, 'aaa bbb BBB');
  }

-  do_execsql_test 1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24}
-  do_execsql_test 1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24}
+  do_execsql_test $tn.1.8 { SELECT rowid FROM ft2('aaa AND bbb'); } {10 24}
+  do_execsql_test $tn.1.9 { SELECT rowid FROM ft2('bbb AND aaa'); } {10 24}

+  do_execsql_test $tn.2.0 {
+    CREATE VIRTUAL TABLE ft3 USING fts5(
+        x, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%,
+        prefix=2
+    );
+  }
+  do_execsql_test $tn.2.1 {
+    INSERT INTO ft3(rowid, x) VALUES(1, 'one');
+    INSERT INTO ft3(rowid, x) VALUES(2, 'ONE');
+    INSERT INTO ft3(rowid, x) VALUES(3, 'ONT');
+    INSERT INTO ft3(rowid, x) VALUES(4, 'on');
+    INSERT INTO ft3(rowid, x) VALUES(5, 'On');
+  }
+
+  do_execsql_test $tn.2.2 {
+    SELECT rowid FROM ft3('on*');
+  } {1 2 3 4 5}
+
+  do_execsql_test $tn.2.3 {
+    SELECT rowid, insttoken(ft3, 0, 0) FROM ft3('on*');
+  } {1 one 2 one.ONE 3 ont.ONT 4 on 5 on.On}
+
+  }
 }

 finish_test
--- a/ext/fts5/test/fts5origintext6.test
+++ b/ext/fts5/test/fts5origintext6.test
@@ -0,0 +1,209 @@
+# 2014 Jan 08
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focused on phrase queries.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5origintext6
+
+# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
+ifcapable !fts5 {
+  finish_test
+  return
+}
+
+proc insert_data {tbl} {
+  db eval "
+  INSERT INTO $tbl (rowid, x, y) VALUES
+    (1, 'ChH BDd HhG efc BjJ BGi GBG FdD','ciJ AFf ADf fBJ fhC GFI JEH fcA'),
+    (2, 'deg AIG Fie jII cCd Hbf igF fEE','GeA Ija gJg EDc HFi DDI dCf aDd'),
+    (3, 'IJC hga deC Jfa Aeg hfh CcH dfb','ajD hgC Jaf IfH CHe jIG AjD adF'),
+    (4, 'FiH GJH IDA AiG bBc CGG Eih bIH','hHg JaH aii IHE Ggd gcH gji CGc'),
+    (5, 'ceg CAd jFI GAB BGg EeC IdH acG','bBC eIG ifH eDE Adj bjb GCj ebA'),
+    (6, 'Eac Fbh aFF Eea jeG EIj HCc JJH','hbd giE Gfe eiI dEF abE cJf cAb'),
+    (7, 'dic hAc jEC AiG FEF jHc HiD HBI','aEd ebE Gfi AJG EBA faj GiG jjE'),
+    (8, 'Fca iEe EgE jjJ gce ijf EGc EBi','gaI dhH bFg CFc HeC CjI Jfg ccH'),
+    (9, 'cfd iaa HCf iHJ HjG ffh ABb ibi','CfG bia Dai eii Ejg Jeg fCg hDb'),
+    (10, 'Jjf hJC IID HJj bGB EbJ cgg eBj','jci jhi JAF jIg Bei Bcd cAC AJd'),
+    (11, 'egG Cdi bFf fEB hfH jDH jia Efd','FAd eCg fAi aiC baC eJG acF iGE'),
+    (12, 'Ada Gde CJI ADG gJA Cbb ccF iAB','eAE ajC FBB ccd Jgh fJg ieg hGE'),
+    (13, 'gBb fDG Jdd HdD fiJ Bed Cig iGg','heC FeI iaj gdg ebB giC HaD FIe'),
+    (14, 'FiI iDd Ffe igI bgB EJf FHG hDF','cjC AeI abf Fah cbJ ffH jEb aib'),
+    (15, 'jaF hBI jIH Gdh FEc Fij hgj jFh','dGA ADH feh AAI AfJ DbC gBi hGH'),
+    (16, 'gjH BGg iGj aFE CAH edI idf HEH','hIf DDg fjB hGi cHF BCH FjG Bgd'),
+    (17, 'iaI JGH hji gcj Dda eeG jDd CBi','cHg jeh caG gIc feF ihG hgJ Abj'),
+    (18, 'jHI iDB eFf AiH EFB CDb IAj GbC','Ghe dEI gdI jai gib dAG BIa djb'),
+    (19, 'abI fHG Ccf aAc FDa fiC agF bdB','afi hde IgE bGF cfg DHD diE aca'),
+    (20, 'IFh eDJ jfh cDg dde JGJ GAf fIJ','IBa EfH faE aeI FIF baJ FGj EIH'),
+    (21, 'Dee bFC bBA dEI CEj aJI ghA dCH','hBA ddA HJh dfj egI Dij dFE bGE'),
+    (22, 'JFE BCj FgA afc Jda FGD iHJ HDh','eAI jHe BHD Gah bbD Bgj gbh eGB'),
+    (23, 'edE CJE FjG aFI edA Cea FId iFe','ABG jcA ddj EEc Dcg hAI agA biA'),
+    (24, 'AgE cfc eef cGh aFB DcH efJ hcH','eGF HaB diG fgi bdc iGJ FGJ fFB'),
+    (25, 'aCa AgI GhC DDI hGJ Hgc Gcg bbG','iID Fga jHa jIj idj DFD bAC AFJ'),
+    (26, 'gjC JGh Fge faa eCA iGG gHE Gai','bDi hFE BbI DHD Adb Fgi hCa Hij'),
+    (27, 'Eji jEI jhF DFC afH cDh AGc dHA','IDe GcA ChF DIb Bif HfH agD DGh'),
+    (28, 'gDD AEE Dfg ICf Cbi JdE jgH eEi','eEb dBG FDE jgf cAI FaJ jaA cDd'),
+    (29, 'cbe Gec hgB Egi bca dHg bAJ jBf','EFB DgD GJc fDb EeE bBA GFC Hbe'),
+    (30, 'Adc eHB afI hDc Bhh baE hcJ BBd','JAH deg bcF Dab Bgj Gbb JHi FIB'),
+    (31, 'agF dIj AJJ Hfg cCG hED Igc fHC','JEf eia dHf Ggc Agj geD bEE Gei'),
+    (32, 'DAd cCe cbJ FjG gJe gba dJA GCf','eAf hFc bGE ABI hHA IcE abF CCE'),
+    (33, 'fFh jJe DhJ cDJ EBi AfD eFI IhG','fEG GCc Bjd EFF ggg CFe EHd ciB'),
+    (34, 'Ejb BjI eAF HaD eEJ FaG Eda AHC','Iah hgD EJG fdD cIE Daj IFf eJh'),
+    (35, 'aHG eCe FjA djJ dAJ jiJ IaE GGB','Acg iEF JfB FIC Eei ggj dic Iii'),
+    (36, 'Fdb EDF GaF JjB ehH IgC hgi DCG','cag DHI Fah hAJ bbh egG Hia hgJ'),
+    (37, 'HGg icC JEC AFJ Ddh dhi hfC Ich','fEg bED Bff hCJ EiA cIf bfG cGA'),
+    (38, 'aEJ jGI BCi FaA ebA BHj cIJ GcC','dCH ADd bGB cFE AgF geD cbG jIc'),
+    (39, 'JFB bBi heA BFA hgB Ahj EIE CgI','EIJ JFG FJE GeA Hdg HeH ACh GiA'),
+    (40, 'agB DDC CED igC Dfc DhI eiC fHi','dAB dcg iJF cej Fcc cAc AfB Fdd'),
+    (41, 'BdF DHj Ege hcG DEd eFa dCf gBb','FBG ChB cej iGd Hbh fCc Ibe Abh'),
+    (42, 'Bgc DjI cbC jGD bdb hHB IJA IJH','heg cii abb IGf eDe hJc dii fcE'),
+    (43, 'fhf ECa FiA aDh Jbf CiB Jhe ajD','GFE bIF aeD gDE BIE Jea DfC BEc'),
+    (44, 'GjE dBj DbJ ICF aDh EEH Ejb jFb','dJj aEc IBg bEG Faf fjA hjf FAF'),
+    (45, 'BfA efd IIJ AHG dDF eGg dIJ Gcb','Bfj jeb Ahc dAE ACH Dfb ieb dhC'),
+    (46, 'Ibj ege geC dJh CIi hbD EAG fGA','DEb BFe Bjg FId Fhg HeF JAc BbE'),
+    (47, 'dhB afC hgG bEJ aIe Cbe iEE JCD','bdg Ajc FGA jbh Jge iAj fIA jbE'),
+    (48, 'egH iDi bfH iiI hGC jFF Hfd AHB','bjE Beb iCc haB gIH Dea bga dfd'),
+    (49, 'jgf chc jGc Baj HBb jdE hgh heI','FFB aBd iEB EIG HGf Bbj EIi JbI'),
+    (50, 'jhe EGi ajA fbH geh EHe FdC bij','jDE bBC gbH HeE dcH iBH IFE AHi'),
+    (51, 'aCb JiD cgJ Bjj iAI Hbe IAF FhH','ijf bhE Jdf FED dCH bbG HcJ ebH');
+  "
+}
+
+foreach_detail_mode $testprefix {
+foreach external {0 1 2} {
+  reset_db
+
+  proc tokens {cmd} { 
+    set ret [list]
+    for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} {
+      set txt [$cmd xInstToken $iTok 0]
+      set txt [string map [list "\0" "."] $txt]
+      lappend ret $txt
+    }
+    set ret
+  }
+  sqlite3_fts5_create_function db tokens tokens
+  sqlite3_fts5_register_origintext db
+
+  set E(0) internal
+  set E(1) external
+  set E(2) contentless
+  set e $E($external)
+
+  db eval { CREATE TABLE ex(x, y) }
+  switch -- $external {
+    0 {
+      do_execsql_test 1.$e.0 {
+        CREATE VIRTUAL TABLE ft USING fts5(
+            x, y, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%
+        );
+      }
+    }
+
+    1 {
+      do_execsql_test 1.$e.0 {
+        CREATE VIRTUAL TABLE ft USING fts5(
+            x, y, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%,
+            content=ex
+        );
+      }
+    }
+
+    2 {
+      do_execsql_test 1.$e.0 {
+        CREATE VIRTUAL TABLE ft USING fts5(
+            x, y, tokenize="origintext unicode61", tokendata=1, detail=%DETAIL%,
+            content=
+        );
+      }
+    }
+  }
+  insert_data ex
+  insert_data ft
+  
+  proc prefixquery {prefix bInst bYOnly} {
+    set ret [list]
+    db eval { SELECT rowid, x, y FROM ex ORDER BY rowid } {
+      set row [list]
+      set bSeen 0
+
+      set T [concat $x $y]
+      if {$bYOnly} { set T $y }
+
+      foreach w $T {
+        if {[string match -nocase $prefix $w]} {
+          set bSeen 1
+          if {$bInst} {
+            set v [string tolower $w]
+            if {$w != $v} { append v ".$w" }
+            lappend row $v
+          }
+        }
+      }
+  
+      if {$bSeen} {
+        lappend ret $rowid
+        lappend ret $row
+      }
+    }
+  
+    set ret
+  }
+  
+  proc do_prefixquery_test {tn prefix} {
+    set bInst [expr {$::e!="contentless" || "%DETAIL%"=="full"}]
+    set expect [prefixquery $prefix $bInst 0]
+    set expect2 [prefixquery $prefix $bInst 1]
+
+    uplevel [list do_execsql_test $tn.1 "
+        SELECT rowid, tokens(ft) FROM ft('$prefix')
+    " $expect]
+    uplevel [list do_execsql_test $tn.2 "
+        SELECT rowid, tokens(ft) FROM ft(fts5_insttoken('$prefix'))
+    " $expect]
+    db eval { INSERT INTO ft(ft, rank) VALUES('insttoken', 1) }
+    uplevel [list do_execsql_test $tn.3 "
+        SELECT rowid, tokens(ft) FROM ft('$prefix')
+    " $expect]
+    db eval { INSERT INTO ft(ft, rank) VALUES('insttoken', 0) }
+
+    if {"%DETAIL%"!="none"} {
+      uplevel [list do_execsql_test $tn.4 "
+          SELECT rowid, tokens(ft) FROM ft('y: $prefix')
+      " $expect2]
+      uplevel [list do_execsql_test $tn.5 "
+          SELECT rowid, tokens(ft) FROM ft(fts5_insttoken('y: $prefix'))
+      " $expect2]
+      db eval { INSERT INTO ft(ft, rank) VALUES('insttoken', 1) }
+      uplevel [list do_execsql_test $tn.6 "
+          SELECT rowid, tokens(ft) FROM ft('y: $prefix')
+      " $expect2]
+      db eval { INSERT INTO ft(ft, rank) VALUES('insttoken', 0) }
+    }
+  }
+  
+  do_prefixquery_test 1.$e.1 a*
+  do_prefixquery_test 1.$e.2 b*
+  do_prefixquery_test 1.$e.3 c*
+  do_prefixquery_test 1.$e.4 d*
+  do_prefixquery_test 1.$e.5 e*
+  do_prefixquery_test 1.$e.6 f*
+  do_prefixquery_test 1.$e.7 g*
+  do_prefixquery_test 1.$e.8 h*
+  do_prefixquery_test 1.$e.9 i*
+  do_prefixquery_test 1.$e.10 j*
+}}
+
+
+
+finish_test
+
--- a/ext/fts5/test/fts5tokendata.test
+++ b/ext/fts5/test/fts5tokendata.test
@@ -0,0 +1,105 @@
+# 2014 Jan 08
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focused on phrase queries.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5tokendata
+
+# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
+ifcapable !fts5 {
+  finish_test
+  return
+}
+
+foreach_detail_mode $testprefix {
+
+  sqlite3_fts5_register_origintext db
+  fts5_aux_test_functions db
+  proc b {x} { string map [list "\0" "."] $x }
+  db func b b
+
+  do_execsql_test 1.0 {
+    CREATE VIRTUAL TABLE ft USING fts5(a, b, tokendata=1,
+      tokenize="origintext unicode61", detail=%DETAIL%
+    );
+    CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
+  }
+
+  do_execsql_test 1.1 {
+    INSERT INTO ft(rowid, a, b) VALUES
+      (1, 'Pedagog Pedal Pedant', 'Peculier Day Today'),
+      (2, 'Pedant pedantic pecked', 'Peck Penalize Pen');
+
+    INSERT INTO ft(rowid, a, b) VALUES
+      (3, 'Penalty Pence Penciled', 'One Two Three'),
+      (4, 'Pedant Pedal Pedant', 'Peculier Day Today');
+  }
+
+  do_execsql_test 1.2 {
+    SELECT DISTINCT b(term) FROM vocab
+  } {
+    day.Day one.One peck.Peck pecked peculier.Peculier pedagog.Pedagog
+    pedal.Pedal pedant.Pedant pedantic pen.Pen penalize.Penalize 
+    penalty.Penalty pence.Pence penciled.Penciled three.Three 
+    today.Today two.Two
+  }
+
+  do_execsql_test 1.3.1 {
+    SELECT rowid FROM ft('pe*')
+  } {
+    1 2 3 4
+  }
+
+  do_execsql_test 1.3.2 {
+    SELECT rowid FROM ft('pe*') ORDER BY rowid DESC
+  } {
+    4 3 2 1
+  }
+  
+  if {"%DETAIL%"!="none"} {
+    do_execsql_test 1.3.3 {
+      SELECT rowid FROM ft WHERE a MATCH 'pe*' ORDER BY rowid DESC
+    } {
+      4 3 2 1
+    }
+  }
+
+  do_execsql_test 1.4 {
+    SELECT rowid, b( fts5_test_insttoken(ft, 0, 0) ) FROM ft('pedant')
+  } {
+    1 pedant.Pedant
+    2 pedant.Pedant
+    4 pedant.Pedant
+  }
+
+  do_execsql_test 1.5 {
+    SELECT rowid, b( fts5_test_insttoken(ft, 0, 0) ) FROM ft('pe*')
+  } {
+    1 pedagog.Pedagog
+    2 pedant.Pedant
+    3 penalty.Penalty
+    4 pedant.Pedant
+  }
+
+  do_execsql_test 1.6 {
+    SELECT rowid, fts5_test_poslist(ft) FROM ft('pe*')
+  } {
+    1 {0.0.0 0.0.1 0.0.2 0.1.0} 
+    2 {0.0.0 0.0.1 0.0.2 0.1.0 0.1.1 0.1.2}
+    3 {0.0.0 0.0.1 0.0.2}
+    4 {0.0.0 0.0.1 0.0.2 0.1.0}
+  }
+}
+
+finish_test
+
--- a/ext/icu/README.txt
+++ b/ext/icu/README.txt
@@ -1,19 +1,18 @@
-
 This directory contains source code for the SQLite "ICU" extension, an
 integration of the "International Components for Unicode" library with
 SQLite. Documentation follows.

    1. Features
-    
+
        1.1  SQL Scalars upper() and lower()
        1.2  Unicode Aware LIKE Operator
        1.3  ICU Collation Sequences
        1.4  SQL REGEXP Operator
-    
+
    2. Compilation and Usage
-    
+
    3. Bugs, Problems and Security Issues
-    
+
        3.1  The "case_sensitive_like" Pragma
        3.2  The SQLITE_MAX_LIKE_PATTERN_LENGTH Macro
        3.3  Collation Sequence Security Issue
@@ -23,10 +22,10 @@ SQLite. Documentation follows.

  1.1  SQL Scalars upper() and lower()

-    SQLite's built-in implementations of these two functions only 
+    SQLite's built-in implementations of these two functions only
    provide case mapping for the 26 letters used in the English
    language. The ICU based functions provided by this extension
-    provide case mapping, where defined, for the full range of 
+    provide case mapping, where defined, for the full range of
    unicode characters.

    ICU provides two types of case mapping, "general" case mapping and
@@ -36,7 +35,7 @@ SQLite. Documentation follows.
       http://www.icu-project.org/userguide/caseMappings.html
       http://www.icu-project.org/userguide/posix.html#case_mappings

-    To utilise "general" case mapping, the upper() or lower() scalar 
+    To utilise "general" case mapping, the upper() or lower() scalar
    functions are invoked with one argument:

        upper('abc') -> 'ABC'
@@ -57,7 +56,7 @@ SQLite. Documentation follows.
    operator understands case equivalence for the 26 letters of the English
    language alphabet. The implementation of LIKE included in this
    extension uses the ICU function u_foldCase() to provide case
-    independent comparisons for the full range of unicode characters.  
+    independent comparisons for the full range of unicode characters.

    The U_FOLD_CASE_DEFAULT flag is passed to u_foldCase(), meaning the
    dotless 'I' character used in the Turkish language is considered
@@ -66,9 +65,9 @@ SQLite. Documentation follows.

  1.3  ICU Collation Sequences

-    A special SQL scalar function, icu_load_collation() is provided that 
+    A special SQL scalar function, icu_load_collation() is provided that
    may be used to register ICU collation sequences with SQLite. It
-    is always called with exactly two arguments, the ICU locale 
+    is always called with exactly two arguments, the ICU locale
    identifying the collation sequence to ICU, and the name of the
    SQLite collation sequence to create. For example, to create an
    SQLite collation sequence named "turkish" using Turkish language
@@ -87,7 +86,7 @@ SQLite. Documentation follows.
          australian_penpal_name TEXT COLLATE australian,
          turkish_penpal_name    TEXT COLLATE turkish
        );
-  
+
  1.4 SQL REGEXP Operator

    This extension provides an implementation of the SQL binary
@@ -116,7 +115,7 @@ SQLite. Documentation follows.
  and use it as a dynamically loadable SQLite extension. To do this
  using gcc on *nix:

-    gcc -fPIC -shared icu.c `pkg-config --libs --cflags icu-uc icu-io` \
+    gcc -fPIC -shared icu.c `pkg-config --libs --cflags icu-io` \
        -o libSqliteIcu.so

  You may need to add "-I" flags so that gcc can find sqlite3ext.h
@@ -124,6 +123,11 @@ SQLite. Documentation follows.
  loaded into sqlite in the same way as any other dynamically loadable
  extension.

+  As of version 3.48, it can be enabled in the canonical build process
+  by passing one of --with-icu-config or --with-icu-ldflags to the
+  configure script, optionally together with --enable-icu-collations.
+  See the configure --help for more details.
+

 3 BUGS, PROBLEMS AND SECURITY ISSUES

@@ -144,13 +148,13 @@ SQLite. Documentation follows.
    SQLITE_MAX_LIKE_PATTERN_LENGTH macro as the maximum length of a
    pattern in bytes (irrespective of encoding). The default value is
    defined in internal header file "limits.h".
-    
-    The ICU extension LIKE implementation suffers from the same 
+
+    The ICU extension LIKE implementation suffers from the same
    problem and uses the same solution. However, since the ICU extension
    code does not include the SQLite file "limits.h", modifying
    the default value therein does not affect the ICU extension.
    The default value of SQLITE_MAX_LIKE_PATTERN_LENGTH used by
-    the ICU extension LIKE operator is 50000, defined in source 
+    the ICU extension LIKE operator is 50000, defined in source
    file "icu.c".

  3.3 Collation Sequence Security
--- a/ext/misc/shathree.c
+++ b/ext/misc/shathree.c
@@ -12,7 +12,7 @@
 **
 ** This SQLite extension implements functions that compute SHA3 hashes
 ** in the way described by the (U.S.) NIST FIPS 202 SHA-3 Standard.
-** Two SQL functions are implemented:
+** Three SQL functions are implemented:
 **
 **     sha3(X,SIZE)
 **     sha3_agg(Y,SIZE)
--- a/ext/misc/vfstrace.c
+++ b/ext/misc/vfstrace.c
@@ -637,7 +637,7 @@ static int vfstraceFileControl(sqlite3_file *pFile, int op, void *pArg){
            }
            if( zArg[0]=='x' && isalpha(zArg[1]) ) zArg++;
            for(n=0; isalpha(zArg[n]); n++){}
-            for(jj=0; jj<sizeof(aKw)/sizeof(aKw[0]); jj++){
+            for(jj=0; jj<(int)(sizeof(aKw)/sizeof(aKw[0])); jj++){
              if( sqlite3_strnicmp(aKw[jj].z,(const char*)zArg,n)==0 ){
                if( onOff ){
                  pInfo->mTrace |= aKw[jj].m;
@@ -796,7 +796,7 @@ static int vfstraceShmLock(sqlite3_file *pFile, int ofst, int n, int flags){
  if( flags & ~(0xf) ){
     sqlite3_snprintf(sizeof(zLck)-i, &zLck[i], "|0x%x", flags);
  }
-  if( ofst>=0 && ofst<sizeof(azLockName)/sizeof(azLockName[0]) ){
+  if( ofst>=0 && ofst<(int)(sizeof(azLockName)/sizeof(azLockName[0])) ){
    vfstrace_printf(pInfo, "%s.xShmLock(%s,ofst=%d(%s),n=%d,%s)",
                  pInfo->zVfsName, p->zFName, ofst, azLockName[ofst],
                  n, &zLck[1]);
--- a/ext/wasm/tester1.c-pp.js
+++ b/ext/wasm/tester1.c-pp.js
@@ -1248,8 +1248,6 @@ globalThis.sqlite3InitModule = sqlite3InitModule;
      let st = this.db.prepare(
        new TextEncoder('utf-8').encode("select 3 as a")
      );
-      //debug("statement =",st);
-      T.assert( !this.progressHandlerCount );
      let rc;
      try {
        T.assert(wasm.isPtr(st.pointer))