Begin changes to allow synonym support by adding multiple terms to a query (an alternative to adding multiple terms to the FTS index).

FossilOrigin-Name: ad7feaed4cd6b1d6e6376bb82d1f5664ddd083f3
2025-08-08 14:02:16 +03:00 · 2015-08-31 20:06:06 +00:00
parent 0cb01f5c2c
commit cb8b58caf7
8 changed files with 297 additions and 106 deletions
--- a/ext/fts5/fts5Int.h
+++ b/ext/fts5/fts5Int.h
@@ -236,7 +236,7 @@ struct Fts5PoslistReader {
  int i;                          /* Current offset in a[] */

  /* Output variables */
-  int bEof;                       /* Set to true at EOF */
+  u8 bEof;                        /* Set to true at EOF */
  i64 iPos;                       /* (iCol<<32) + iPos */
 };
 int sqlite3Fts5PoslistReaderInit(
--- a/ext/fts5/fts5_expr.c
+++ b/ext/fts5/fts5_expr.c
@@ -73,6 +73,7 @@ struct Fts5ExprTerm {
  int bPrefix;                    /* True for a prefix term */
  char *zTerm;                    /* nul-terminated term */
  Fts5IndexIter *pIter;           /* Iterator for this term */
+  Fts5ExprTerm *pSynonym;         /* Pointer to first in list of synonyms */
 };

 /*
@@ -181,6 +182,10 @@ static int fts5ExprGetToken(

    default: {
      const char *z2;
+      if( sqlite3Fts5IsBareword(z[0])==0 ){
+        sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
+        return FTS5_EOF;
+      }
      tok = FTS5_STRING;
      for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
      pToken->n = (z2 - z);
@@ -350,6 +355,60 @@ static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
  return 0;
 }

+/*
+** Argument pTerm must be a synonym iterator. Return the current rowid
+** that it points to.
+*/
+static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc){
+  i64 iRet;
+  int bRetValid = 0;
+  Fts5ExprTerm *p;
+
+  assert( pTerm->pSynonym );
+  assert( bDesc==0 || bDesc==1 );
+  for(p=pTerm; p; p=p->pSynonym){
+    if( 0==sqlite3Fts5IterEof(p->pIter) ){
+      i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
+      if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
+        iRet = iRowid;
+        bRetValid = 1;
+      }
+    }
+  }
+
+  assert( bRetValid );
+  return iRet;
+}
+
+/*
+** Argument pTerm must be a synonym iterator.
+*/
+static int fts5ExprSynonymPoslist(
+  Fts5ExprTerm *pTerm, 
+  i64 iRowid,
+  u8 **pa, int *pn
+){
+  Fts5PoslistWriter writer = {0};
+  Fts5PoslistReader aStatic[4];
+  Fts5PoslistReader *aIter = aStatic;
+  int nIter = 0;
+  Fts5ExprTerm *p;
+
+  assert( pTerm->pSynonym );
+  for(p=pTerm; p; p=p->pSynonym){
+    Fts5IndexIter *pIter = p->pIter;
+    if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
+      i64 dummy;
+      int rc = sqlite3Fts5IterPoslist(pIter, (const u8**)pa, pn, &dummy);
+      return rc;
+    }
+  }
+
+  assert( 0 );
+  return SQLITE_ERROR;
+}
+
+
 /*
 ** All individual term iterators in pPhrase are guaranteed to be valid and
 ** pointing to the same rowid when this function is called. This function 
@@ -362,7 +421,7 @@ static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
 ** not a match.
 */
 static int fts5ExprPhraseIsMatch(
-  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
+  Fts5ExprNode *pNode,            /* Node pPhrase belongs to */
  Fts5ExprColset *pColset,        /* Restrict matches to these columns */
  Fts5ExprPhrase *pPhrase,        /* Phrase object to initialize */
  int *pbMatch                    /* OUT: Set to true if really a match */
@@ -391,10 +450,15 @@ static int fts5ExprPhraseIsMatch(

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){
+    Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
    i64 dummy;
    int n;
    const u8 *a;
-    rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy);
+    if( pTerm->pSynonym ){
+      rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, (u8**)&a, &n);
+    }else{
+      rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy);
+    }
    if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
      goto ismatch_out;
    }
@@ -598,17 +662,43 @@ static int fts5ExprNearAdvanceFirst(
  int bFromValid,
  i64 iFrom 
 ){
-  Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
+  Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
  int rc;

-  assert( Fts5NodeIsString(pNode) );
-  if( bFromValid ){
-    rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
+  if( pTerm->pSynonym ){
+    int bEof = 1;
+    Fts5ExprTerm *p;
+
+    /* Find the firstest rowid any synonym points to. */
+    i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc);
+
+    /* Advance each iterator that currently points to iRowid */
+    for(p=pTerm; p; p=p->pSynonym){
+      if( sqlite3Fts5IterEof(p->pIter)==0 ){
+        bEof = 0;
+        if( sqlite3Fts5IterRowid(p->pIter)==iRowid ){
+          rc = sqlite3Fts5IterNext(p->pIter);
+          if( rc!=SQLITE_OK ) break;
+        }
+      }
+    }
+
+    /* Set the EOF flag if either all synonym iterators are at EOF or an
+    ** error has occurred.  */
+    pNode->bEof = (rc || bEof);
  }else{
-    rc = sqlite3Fts5IterNext(pIter);
+    Fts5IndexIter *pIter = pTerm->pIter;
+
+    assert( Fts5NodeIsString(pNode) );
+    if( bFromValid ){
+      rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
+    }else{
+      rc = sqlite3Fts5IterNext(pIter);
+    }
+
+    pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
  }

-  pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
  return rc;
 }

@@ -719,7 +809,7 @@ static int fts5ExprNearTest(
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    if( pPhrase->nTerm>1 || pNear->pColset ){
      int bMatch = 0;
-      rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
+      rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch);
      if( bMatch==0 ) break;
    }else{
      rc = sqlite3Fts5IterPoslistBuffer(
@@ -755,6 +845,7 @@ static int fts5ExprTokenTest(

  assert( pNode->eType==FTS5_TERM );
  assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
+  assert( pPhrase->aTerm[0].pSynonym==0 );

  rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);

@@ -801,69 +892,107 @@ static int fts5ExprNearNextMatch(
  i64 iLast;                      /* Lastest rowid any iterator points to */
  int i, j;                       /* Phrase and token index, respectively */
  int bMatch;                     /* True if all terms are at the same rowid */
+  const int bDesc = pExpr->bDesc;

-  assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 );
+  /* Check that this node should not be FTS5_TERM */
+  assert( pNear->nPhrase>1 
+       || pNear->apPhrase[0]->nTerm>1 
+       || pNear->apPhrase[0]->aTerm[0].pSynonym
+  );

  /* Initialize iLast, the "lastest" rowid any iterator points to. If the
  ** iterator skips through rowids in the default ascending order, this means
  ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
  ** means the minimum rowid.  */
-  iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
+  if( pLeft->aTerm[0].pSynonym ){
+    iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc);
+  }else{
+    iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
+  }

  do {
    bMatch = 1;
    for(i=0; i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      for(j=0; j<pPhrase->nTerm; j++){
-        Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
-        i64 iRowid = sqlite3Fts5IterRowid(pIter);
-        if( iRowid!=iLast ) bMatch = 0;
-        if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){
-          return rc;
+        Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
+        if( pTerm->pSynonym ){
+          Fts5ExprTerm *p;
+          int bEof = 1;
+          i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc);
+          if( iRowid==iLast ) continue;
+          for(p=pTerm; p; p=p->pSynonym){
+            Fts5IndexIter *pIter = p->pIter;
+            int dummy;
+            if( 0==sqlite3Fts5IterEof(pIter)
+             && 0==fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &dummy)==0 
+            ){
+              bEof = 0;
+            }
+          }
+          if( bEof || rc ){
+            pNode->bEof = 1;
+            return rc;
+          }
+        }else{
+          Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
+          i64 iRowid = sqlite3Fts5IterRowid(pIter);
+          if( iRowid==iLast ) continue;
+          bMatch = 0;
+          if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
+            return rc;
+          }
        }
      }
    }
  }while( bMatch==0 );

-  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
  pNode->iRowid = iLast;
+  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));

  return rc;
 }

 /*
 ** Initialize all term iterators in the pNear object. If any term is found
-** to match no documents at all, set *pbEof to true and return immediately,
-** without initializing any further iterators.
+** to match no documents at all, return immediately without initializing any
+** further iterators.
 */
 static int fts5ExprNearInitAll(
  Fts5Expr *pExpr,
  Fts5ExprNode *pNode
 ){
  Fts5ExprNearset *pNear = pNode->pNear;
-  Fts5ExprTerm *pTerm;
-  Fts5ExprPhrase *pPhrase;
  int i, j;
  int rc = SQLITE_OK;

  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
-    pPhrase = pNear->apPhrase[i];
+    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    for(j=0; j<pPhrase->nTerm; j++){
-      pTerm = &pPhrase->aTerm[j];
-      if( pTerm->pIter ){
-        sqlite3Fts5IterClose(pTerm->pIter);
-        pTerm->pIter = 0;
+      Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
+      Fts5ExprTerm *p;
+      int bEof = 1;
+
+      for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){
+        if( p->pIter ){
+          sqlite3Fts5IterClose(p->pIter);
+          p->pIter = 0;
+        }
+        rc = sqlite3Fts5IndexQuery(
+            pExpr->pIndex, p->zTerm, strlen(p->zTerm),
+            (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
+            (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
+            &p->pIter
+        );
+        assert( rc==SQLITE_OK || p->pIter==0 );
+        if( p->pIter && 0==sqlite3Fts5IterEof(pTerm->pIter) ){
+          bEof = 0;
+        }
      }
-      rc = sqlite3Fts5IndexQuery(
-          pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm),
-          (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
-          (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
-          &pTerm->pIter
-      );
-      assert( rc==SQLITE_OK || pTerm->pIter==0 );
-      if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){
+
+      if( bEof ){
        pNode->bEof = 1;
-        break;
+        return rc;
      }
    }
  }
@@ -1266,10 +1395,16 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
  if( pPhrase ){
    int i;
    for(i=0; i<pPhrase->nTerm; i++){
+      Fts5ExprTerm *pSyn;
+      Fts5ExprTerm *pNext;
      Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
      sqlite3_free(pTerm->zTerm);
-      if( pTerm->pIter ){
-        sqlite3Fts5IterClose(pTerm->pIter);
+      sqlite3Fts5IterClose(pTerm->pIter);
+
+      for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
+        pNext = pSyn->pSynonym;
+        sqlite3Fts5IterClose(pSyn->pIter);
+        sqlite3_free(pSyn);
      }
    }
    if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
@@ -1348,27 +1483,37 @@ static int fts5ParseTokenize(
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
-  Fts5ExprTerm *pTerm;

-  if( tflags & FTS5_TOKEN_COLOCATED ) return rc;
+  assert( pPhrase==0 || pPhrase->nTerm>0 );
+  if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
+    Fts5ExprTerm *pSyn;
+    int nByte = sizeof(Fts5ExprTerm) + nToken+1;
+    pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
+    if( pSyn==0 ) return SQLITE_NOMEM;
+    memset(pSyn, 0, nByte);
+    pSyn->zTerm = (char*)&pSyn[1];
+    memcpy(pSyn->zTerm, pToken, nToken);
+    pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
+    pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
+  }else{
+    Fts5ExprTerm *pTerm;
+    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
+      Fts5ExprPhrase *pNew;
+      int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

-  if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
-    Fts5ExprPhrase *pNew;
-    int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
+      pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
+          sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
+          );
+      if( pNew==0 ) return SQLITE_NOMEM;
+      if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
+      pCtx->pPhrase = pPhrase = pNew;
+      pNew->nTerm = nNew - SZALLOC;
+    }

-    pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
-        sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
-    );
-    if( pNew==0 ) return SQLITE_NOMEM;
-    if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
-    pCtx->pPhrase = pPhrase = pNew;
-    pNew->nTerm = nNew - SZALLOC;
+    pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
+    memset(pTerm, 0, sizeof(Fts5ExprTerm));
+    pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
  }
-
-  pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
-  memset(pTerm, 0, sizeof(Fts5ExprTerm));
-  pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
-
  return rc;
 }

@@ -1636,7 +1781,10 @@ Fts5ExprNode *sqlite3Fts5ParseNode(
        for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
          pNear->apPhrase[iPhrase]->pNode = pRet;
        }
-        if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){
+        if( pNear->nPhrase==1 
+         && pNear->apPhrase[0]->nTerm==1 
+         && pNear->apPhrase[0]->aTerm[0].pSynonym==0
+        ){
          pRet->eType = FTS5_TERM;
        }
      }else{
@@ -1656,16 +1804,28 @@ Fts5ExprNode *sqlite3Fts5ParseNode(
 }

 static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
-  char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2);
+  int nByte = 0;
+  Fts5ExprTerm *p;
+  char *zQuoted;
+
+  /* Determine the maximum amount of space required. */
+  for(p=pTerm; p; p=p->pSynonym){
+    nByte += strlen(pTerm->zTerm) * 2 + 3 + 2;
+  }
+  zQuoted = sqlite3_malloc(nByte);
+
  if( zQuoted ){
    int i = 0;
-    char *zIn = pTerm->zTerm;
-    zQuoted[i++] = '"';
-    while( *zIn ){
-      if( *zIn=='"' ) zQuoted[i++] = '"';
-      zQuoted[i++] = *zIn++;
+    for(p=pTerm; p; p=p->pSynonym){
+      char *zIn = p->zTerm;
+      zQuoted[i++] = '"';
+      while( *zIn ){
+        if( *zIn=='"' ) zQuoted[i++] = '"';
+        zQuoted[i++] = *zIn++;
+      }
+      zQuoted[i++] = '"';
+      if( p->pSynonym ) zQuoted[i++] = '|';
    }
-    zQuoted[i++] = '"';
    if( pTerm->bPrefix ){
      zQuoted[i++] = ' ';
      zQuoted[i++] = '*';
--- a/ext/fts5/test/fts5aa.test
+++ b/ext/fts5/test/fts5aa.test
@@ -343,7 +343,7 @@ do_execsql_test 13.5 {
 } {1}

 do_execsql_test 13.6 {
-  SELECT rowid FROM t1 WHERE t1 MATCH '.';
+  SELECT rowid FROM t1 WHERE t1 MATCH '""';
 } {}

 #-------------------------------------------------------------------------
--- a/ext/fts5/test/fts5ea.test
+++ b/ext/fts5/test/fts5ea.test
@@ -87,6 +87,12 @@ do_execsql_test 4.0 {
  SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
 } {{"a" AND """"}}

+#-------------------------------------------------------------------------
+# Experiment with a tokenizer that considers " to be a token character.
+#
+do_catchsql_test 5.0 {
+  SELECT fts5_expr('abc | def');
+} {1 {fts5: syntax error near "|"}}



--- a/ext/fts5/test/fts5eb.test
+++ b/ext/fts5/test/fts5eb.test
@@ -30,18 +30,18 @@ proc do_syntax_test {tn expr res} {
 }

 foreach {tn expr res} {
-  1  {abc}                           {"abc"}
-  2  {abc .}                         {"abc"}
-  3  {.}                             {}
-  4  {abc OR .}                      {"abc"}
-  5  {abc NOT .}                     {"abc"}
-  6  {abc AND .}                     {"abc"}
-  7  {. OR abc}                      {"abc"}
-  8  {. NOT abc}                     {"abc"}
-  9  {. AND abc}                     {"abc"}
-  10 {abc + . + def}                 {"abc" + "def"}
-  11 {abc . def}                     {"abc" AND "def"}
-  12 {r+e OR w}                      {"r" + "e" OR "w"}
+  1  {abc}                            {"abc"}
+  2  {abc ""}                         {"abc"}
+  3  {""}                             {}
+  4  {abc OR ""}                      {"abc"}
+  5  {abc NOT ""}                     {"abc"}
+  6  {abc AND ""}                     {"abc"}
+  7  {"" OR abc}                      {"abc"}
+  8  {"" NOT abc}                     {"abc"}
+  9  {"" AND abc}                     {"abc"}
+  10 {abc + "" + def}                 {"abc" + "def"}
+  11 {abc "" def}                     {"abc" AND "def"}
+  12 {r+e OR w}                       {"r" + "e" OR "w"}
 } {
  do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
 }
--- a/ext/fts5/test/fts5synonym.test
+++ b/ext/fts5/test/fts5synonym.test
@@ -21,6 +21,24 @@ ifcapable !fts5 {
  return
 }

+foreach S {
+  {zero 0}
+  {one 1}
+  {two 2}
+  {three 3 iii}
+  {four 4}
+  {five 5}
+  {six 6}
+  {seven 7}
+  {eight 8}
+  {nine 9}
+} {
+  foreach s $S {
+    set o [list]
+    foreach x $S {if {$x!=$s} {lappend o $x}}
+    set ::syn($s) $o
+  }
+}

 proc gobble_whitespace {textvar} {
  upvar $textvar t
@@ -75,24 +93,6 @@ do_execsql_test 1.0 {
 # Test a tokenizer that supports synonyms by adding extra entries to the
 # FTS index.
 #
-foreach S {
-  {zero 0}
-  {one 1}
-  {two 2}
-  {three 3 iii}
-  {four 4}
-  {five 5}
-  {six 6}
-  {seven 7}
-  {eight 8}
-  {nine 9}
-} {
-  foreach s $S {
-    set o [list]
-    foreach x $S {if {$x!=$s} {lappend o $x}}
-    set ::syn($s) $o
-  }
-}

 proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [do_tokenize_split $text] {
@@ -194,5 +194,30 @@ do_execsql_test 3.2.2 {
  SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
 } {1}

+#-------------------------------------------------------------------------
+# Check that expressions with synonyms can be parsed.
+#
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+proc tcl_tokenize {tflags text} {
+  foreach {w iStart iEnd} [do_tokenize_split $text] {
+    sqlite3_fts5_token $w $iStart $iEnd
+    if {$tflags=="query" && [info exists ::syn($w)]} {
+      foreach s $::syn($w) {
+        sqlite3_fts5_token -colo $s $iStart $iEnd
+      }
+    }
+  }
+}
+
+foreach {tn expr res} {
+  1  {abc}                           {"abc"}
+  2  {one}                           {"one"|"1"}
+  3  {3}                             {"3"|"iii"|"three"}
+  4  {3*}                            {"3"|"iii"|"three" *}
+} {
+  do_execsql_test 4.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res]
+}
+
 finish_test