Add the xLanguageid method to sqlite3_fts3_tokenizer versions 1 and greater.

FossilOrigin-Name: f8e9c445dd358c40e5a7bf3756b9f291909dbea7
2025-07-29 08:01:23 +03:00 · 2012-03-03 18:46:41 +00:00
parent 7395599031
commit 996073b3ae
12 changed files with 340 additions and 63 deletions
--- a/ext/fts3/fts3.c
+++ b/ext/fts3/fts3.c
@ -2958,8 +2958,11 @@ static int fts3FilterMethod(
      return SQLITE_NOMEM;
    }

-    rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->bHasStat, 
-        p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
+    pCsr->iLangid = 0;
+    if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
+
+    rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
+        p->azColumn, p->bHasStat, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
    );
    if( rc!=SQLITE_OK ){
      if( rc==SQLITE_ERROR ){
@ -2969,9 +2972,6 @@ static int fts3FilterMethod(
      return rc;
    }

-    pCsr->iLangid = 0;
-    if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
-
    rc = sqlite3Fts3ReadLock(p);
    if( rc!=SQLITE_OK ) return rc;

--- a/ext/fts3/fts3Int.h
+++ b/ext/fts3/fts3Int.h
@ -498,7 +498,7 @@ void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
 void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);

 /* fts3_expr.c */
-int sqlite3Fts3ExprParse(sqlite3_tokenizer *, 
+int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
  char **, int, int, int, const char *, int, Fts3Expr **
 );
 void sqlite3Fts3ExprFree(Fts3Expr *);
@ -507,6 +507,10 @@ int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
 int sqlite3Fts3InitTerm(sqlite3 *db);
 #endif

+int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
+  sqlite3_tokenizer_cursor **
+);
+
 /* fts3_aux.c */
 int sqlite3Fts3InitAux(sqlite3 *db);

--- a/ext/fts3/fts3_expr.c
+++ b/ext/fts3/fts3_expr.c
@ -92,6 +92,7 @@ int sqlite3_fts3_enable_parentheses = 0;
 typedef struct ParseContext ParseContext;
 struct ParseContext {
  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
+  int iLangid;                        /* Language id used with tokenizer */
  const char **azCol;                 /* Array of column names for fts3 table */
  int bFts4;                          /* True to allow FTS4-only syntax */
  int nCol;                           /* Number of entries in azCol[] */
@ -127,6 +128,33 @@ static void *fts3MallocZero(int nByte){
  return pRet;
 }

+int sqlite3Fts3OpenTokenizer(
+  sqlite3_tokenizer *pTokenizer,
+  int iLangid,
+  const char *z,
+  int n,
+  sqlite3_tokenizer_cursor **ppCsr
+){
+  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+  sqlite3_tokenizer_cursor *pCsr = 0;
+  int rc;
+
+  rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
+  assert( rc==SQLITE_OK || pCsr==0 );
+  if( rc==SQLITE_OK ){
+    pCsr->pTokenizer = pTokenizer;
+    if( pModule->iVersion>=1 ){
+      rc = pModule->xLanguageid(pCsr, iLangid);
+      if( rc!=SQLITE_OK ){
+        pModule->xClose(pCsr);
+        pCsr = 0;
+      }
+    }
+  }
+  *ppCsr = pCsr;
+  return rc;
+}
+

 /*
 ** Extract the next token from buffer z (length n) using the tokenizer
@ -154,15 +182,13 @@ static int getNextToken(
  Fts3Expr *pRet = 0;
  int nConsumed = 0;

-  rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
+  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
  if( rc==SQLITE_OK ){
    const char *zToken;
    int nToken, iStart, iEnd, iPosition;
    int nByte;                               /* total space to allocate */

-    pCursor->pTokenizer = pTokenizer;
    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
-
    if( rc==SQLITE_OK ){
      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)fts3MallocZero(nByte);
@ -268,10 +294,10 @@ static int getNextString(
  ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
  ** structures.
  */
-  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
+  rc = sqlite3Fts3OpenTokenizer(
+      pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
  if( rc==SQLITE_OK ){
    int ii;
-    pCursor->pTokenizer = pTokenizer;
    for(ii=0; rc==SQLITE_OK; ii++){
      const char *zByte;
      int nByte, iBegin, iEnd, iPos;
@ -745,6 +771,7 @@ exprparse_out:
 */
 int sqlite3Fts3ExprParse(
  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
+  int iLangid,                        /* Language id for tokenizer */
  char **azCol,                       /* Array of column names for fts3 table */
  int bFts4,                          /* True to allow FTS4-only syntax */
  int nCol,                           /* Number of entries in azCol[] */
@ -755,11 +782,13 @@ int sqlite3Fts3ExprParse(
  int nParsed;
  int rc;
  ParseContext sParse;
+
+  memset(&sParse, 0, sizeof(ParseContext));
  sParse.pTokenizer = pTokenizer;
+  sParse.iLangid = iLangid;
  sParse.azCol = (const char **)azCol;
  sParse.nCol = nCol;
  sParse.iDefaultCol = iDefaultCol;
-  sParse.nNest = 0;
  sParse.bFts4 = bFts4;
  if( z==0 ){
    *ppExpr = 0;
@ -950,7 +979,7 @@ static void fts3ExprTest(
  }

  rc = sqlite3Fts3ExprParse(
-      pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
+      pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
  );
  if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
    sqlite3_result_error(context, "Error parsing expression", -1);
--- a/ext/fts3/fts3_snippet.c
+++ b/ext/fts3/fts3_snippet.c
@ -532,6 +532,7 @@ static int fts3StringAppend(
 */
 static int fts3SnippetShift(
  Fts3Table *pTab,                /* FTS3 table snippet comes from */
+  int iLangid,                    /* Language id to use in tokenizing */
  int nSnippet,                   /* Number of tokens desired for snippet */
  const char *zDoc,               /* Document text to extract snippet from */
  int nDoc,                       /* Size of buffer zDoc in bytes */
@ -567,11 +568,10 @@ static int fts3SnippetShift(
      /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
      ** or more tokens in zDoc/nDoc.
      */
-      rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+      rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
      if( rc!=SQLITE_OK ){
        return rc;
      }
-      pC->pTokenizer = pTab->pTokenizer;
      while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
        const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
        rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
@ -631,11 +631,10 @@ static int fts3SnippetText(

  /* Open a token cursor on the document. */
  pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
-  rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+  rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
  if( rc!=SQLITE_OK ){
    return rc;
  }
-  pC->pTokenizer = pTab->pTokenizer;

  while( rc==SQLITE_OK ){
    int iBegin;                   /* Offset in zDoc of start of token */
@ -657,7 +656,9 @@ static int fts3SnippetText(

    if( !isShiftDone ){
      int n = nDoc - iBegin;
-      rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
+      rc = fts3SnippetShift(
+          pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
+      );
      isShiftDone = 1;

      /* Now that the shift has been done, check if the initial "..." are
@ -1390,9 +1391,10 @@ void sqlite3Fts3Offsets(
    }

    /* Initialize a tokenizer iterator to iterate through column iCol. */
-    rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+    rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
+        zDoc, nDoc, &pC
+    );
    if( rc!=SQLITE_OK ) goto offsets_out;
-    pC->pTokenizer = pTab->pTokenizer;

    rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
    while( rc==SQLITE_OK ){
--- a/ext/fts3/fts3_test.c
+++ b/ext/fts3/fts3_test.c
@ -13,6 +13,9 @@
 ** This file is not part of the production FTS code. It is only used for
 ** testing. It contains a Tcl command that can be used to test if a document
 ** matches an FTS NEAR expression.
+**
+** As of March 2012, it also contains a version 1 tokenizer used for testing
+** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
 */

 #include <tcl.h>
@ -314,11 +317,207 @@ static int fts3_configure_incr_load_cmd(
  return TCL_OK;
 }

+/**************************************************************************
+** Beginning of test tokenizer code.
+**
+** For language 0, this tokenizer is similar to the default 'simple' 
+** tokenizer. For other languages L, the following:
+**
+**   * Odd numbered languages are case-sensitive. Even numbered 
+**     languages are not.
+**
+**   * Language ids 100 or greater are considered an error.
+**
+** The implementation assumes that the input contains only ASCII characters
+** (i.e. those that may be encoded in UTF-8 using a single byte).
+*/
+typedef struct test_tokenizer {
+  sqlite3_tokenizer base;
+} test_tokenizer;
+
+typedef struct test_tokenizer_cursor {
+  sqlite3_tokenizer_cursor base;
+  const char *aInput;          /* Input being tokenized */
+  int nInput;                  /* Size of the input in bytes */
+  int iInput;                  /* Current offset in aInput */
+  int iToken;                  /* Index of next token to be returned */
+  char *aBuffer;               /* Buffer containing current token */
+  int nBuffer;                 /* Number of bytes allocated at pToken */
+  int iLangid;                 /* Configured language id */
+} test_tokenizer_cursor;
+
+static int testTokenizerCreate(
+  int argc, const char * const *argv,
+  sqlite3_tokenizer **ppTokenizer
+){
+  test_tokenizer *pNew;
+
+  pNew = sqlite3_malloc(sizeof(test_tokenizer));
+  if( !pNew ) return SQLITE_NOMEM;
+  memset(pNew, 0, sizeof(test_tokenizer));
+
+  *ppTokenizer = (sqlite3_tokenizer *)pNew;
+  return SQLITE_OK;
+}
+
+static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
+  test_tokenizer *p = (test_tokenizer *)pTokenizer;
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+static int testTokenizerOpen(
+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
+  const char *pInput, int nBytes,        /* String to be tokenized */
+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
+){
+  int rc = SQLITE_OK;                    /* Return code */
+  test_tokenizer_cursor *pCsr;           /* New cursor object */
+
+  UNUSED_PARAMETER(pTokenizer);
+
+  pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
+  if( pCsr==0 ){
+    rc = SQLITE_NOMEM;
+  }else{
+    memset(pCsr, 0, sizeof(test_tokenizer_cursor));
+    pCsr->aInput = pInput;
+    if( nBytes<0 ){
+      pCsr->nInput = strlen(pInput);
+    }else{
+      pCsr->nInput = nBytes;
+    }
+  }
+
+  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
+  return rc;
+}
+
+static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
+  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+  sqlite3_free(pCsr->aBuffer);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+static int testIsTokenChar(char c){
+  return (c>='a' && c<='z') || (c>='A' && c<='Z');
+}
+static int testTolower(char c){
+  char ret = c;
+  if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
+  return ret;
+}
+
+static int testTokenizerNext(
+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by testTokenizerOpen */
+  const char **ppToken,               /* OUT: *ppToken is the token text */
+  int *pnBytes,                       /* OUT: Number of bytes in token */
+  int *piStartOffset,                 /* OUT: Starting offset of token */
+  int *piEndOffset,                   /* OUT: Ending offset of token */
+  int *piPosition                     /* OUT: Position integer of token */
+){
+  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+  int rc = SQLITE_OK;
+  const char *p;
+  const char *pEnd;
+
+  p = &pCsr->aInput[pCsr->iInput];
+  pEnd = &pCsr->aInput[pCsr->nInput];
+
+  /* Skip past any white-space */
+  assert( p<=pEnd );
+  while( p<pEnd && testIsTokenChar(*p)==0 ) p++;
+
+  if( p==pEnd ){
+    rc = SQLITE_DONE;
+  }else{
+    /* Advance to the end of the token */
+    const char *pToken = p;
+    int nToken;
+    while( p<pEnd && testIsTokenChar(*p) ) p++;
+    nToken = p-pToken;
+
+    /* Copy the token into the buffer */
+    if( nToken>pCsr->nBuffer ){
+      sqlite3_free(pCsr->aBuffer);
+      pCsr->aBuffer = sqlite3_malloc(nToken);
+    }
+    if( pCsr->aBuffer==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      int i;
+
+      if( pCsr->iLangid & 0x00000001 ){
+        for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
+      }else{
+        for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);
+      }
+      pCsr->iToken++;
+      pCsr->iInput = p - pCsr->aInput;
+
+      *ppToken = pCsr->aBuffer;
+      *pnBytes = nToken;
+      *piStartOffset = pToken - pCsr->aInput;
+      *piEndOffset = p - pCsr->aInput;
+      *piPosition = pCsr->iToken;
+    }
+  }
+
+  return rc;
+}
+
+static int testTokenizerLanguage(
+  sqlite3_tokenizer_cursor *pCursor,
+  int iLangid
+){
+  int rc = SQLITE_OK;
+  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+  pCsr->iLangid = iLangid;
+  if( pCsr->iLangid>=100 ){
+    rc = SQLITE_ERROR;
+  }
+  return rc;
+}
+
+static int fts3_test_tokenizer_cmd(
+  ClientData clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+  static const sqlite3_tokenizer_module testTokenizerModule = {
+    1,
+    testTokenizerCreate,
+    testTokenizerDestroy,
+    testTokenizerOpen,
+    testTokenizerClose,
+    testTokenizerNext,
+    testTokenizerLanguage
+  };
+  const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
+  if( objc!=1 ){
+    Tcl_WrongNumArgs(interp, 1, objv, "");
+    return TCL_ERROR;
+  }
+  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
+    (const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
+  ));
+  return TCL_OK;
+}
+
+/* 
+** End of tokenizer code.
+**************************************************************************/ 
+
 int Sqlitetestfts3_Init(Tcl_Interp *interp){
  Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
  Tcl_CreateObjCommand(interp, 
      "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
  );
+  Tcl_CreateObjCommand(
+      interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
+  );
  return TCL_OK;
 }
 #endif                  /* ifdef SQLITE_TEST */
--- a/ext/fts3/fts3_tokenizer.c
+++ b/ext/fts3/fts3_tokenizer.c
@ -288,11 +288,10 @@ static void testFunc(
    goto finish;
  }
  pTokenizer->pModule = p;
-  if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
+  if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
    zErr = "error in xOpen()";
    goto finish;
  }
-  pCsr->pTokenizer = pTokenizer;

  while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
--- a/ext/fts3/fts3_tokenizer.h
+++ b/ext/fts3/fts3_tokenizer.h
@ -52,7 +52,7 @@ typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
 struct sqlite3_tokenizer_module {

  /*
-  ** Structure version. Should always be set to 0.
+  ** Structure version. Should always be set to 0 or 1.
  */
  int iVersion;

@ -133,6 +133,15 @@ struct sqlite3_tokenizer_module {
    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
    int *piPosition      /* OUT: Number of tokens returned before this one */
  );
+
+  /***********************************************************************
+  ** Methods below this point are only available if iVersion>=1.
+  */
+
+  /* 
+  ** Configure the language id of a tokenizer cursor.
+  */
+  int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
 };

 struct sqlite3_tokenizer {
--- a/ext/fts3/fts3_write.c
+++ b/ext/fts3/fts3_write.c
@ -657,6 +657,7 @@ static int fts3PendingTermsAddOne(
 */
 static int fts3PendingTermsAdd(
  Fts3Table *p,                   /* Table into which text will be inserted */
+  int iLangid,                    /* Language id to use */
  const char *zText,              /* Text of document to be inserted */
  int iCol,                       /* Column into which text is being inserted */
  u32 *pnWord                     /* OUT: Number of tokens inserted */
@ -686,11 +687,10 @@ static int fts3PendingTermsAdd(
    return SQLITE_OK;
  }

-  rc = pModule->xOpen(pTokenizer, zText, -1, &pCsr);
+  rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
  if( rc!=SQLITE_OK ){
    return rc;
  }
-  pCsr->pTokenizer = pTokenizer;

  xNext = pModule->xNext;
  while( SQLITE_OK==rc
@ -783,11 +783,16 @@ void sqlite3Fts3PendingTermsClear(Fts3Table *p){
 ** Argument apVal is the same as the similarly named argument passed to
 ** fts3InsertData(). Parameter iDocid is the docid of the new row.
 */
-static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){
+static int fts3InsertTerms(
+  Fts3Table *p, 
+  int iLangid, 
+  sqlite3_value **apVal, 
+  u32 *aSz
+){
  int i;                          /* Iterator variable */
  for(i=2; i<p->nColumn+2; i++){
    const char *zText = (const char *)sqlite3_value_text(apVal[i]);
-    int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
+    int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]);
    if( rc!=SQLITE_OK ){
      return rc;
    }
@ -933,13 +938,11 @@ static void fts3DeleteTerms(
  if( rc==SQLITE_OK ){
    if( SQLITE_ROW==sqlite3_step(pSelect) ){
      int i;
-      rc = fts3PendingTermsDocid(p, 
-          langidFromSelect(p, pSelect), 
-          sqlite3_column_int64(pSelect, 0)
-      );
+      int iLangid = langidFromSelect(p, pSelect);
+      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));
      for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
        const char *zText = (const char *)sqlite3_column_text(pSelect, i);
-        rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
+        rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]);
        aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
      }
      if( rc!=SQLITE_OK ){
@ -3102,13 +3105,12 @@ static int fts3DoRebuild(Fts3Table *p){

    while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
      int iCol;
-      rc = fts3PendingTermsDocid(p, 
-          langidFromSelect(p, pStmt), sqlite3_column_int64(pStmt, 0)
-      );
+      int iLangid = langidFromSelect(p, pStmt);
+      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
      aSz[p->nColumn] = 0;
      for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
        const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
-        rc = fts3PendingTermsAdd(p, z, iCol, &aSz[iCol]);
+        rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
        aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
      }
      if( p->bHasDocsize ){
@ -3227,14 +3229,13 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
      const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
      sqlite3_tokenizer_cursor *pTC = 0;
  
-      rc = pModule->xOpen(pT, zText, -1, &pTC);
+      rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
      while( rc==SQLITE_OK ){
        char const *zToken;       /* Buffer containing token */
        int nToken;               /* Number of bytes in token */
        int iDum1, iDum2;         /* Dummy variables */
        int iPos;                 /* Position of token in zText */
  
-        pTC->pTokenizer = pT;
        rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
        for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
          Fts3PhraseToken *pPT = pDef->pToken;
@ -3467,6 +3468,7 @@ int sqlite3Fts3UpdateMethod(
  
  /* If this is an INSERT or UPDATE operation, insert the new record. */
  if( nArg>1 && rc==SQLITE_OK ){
+    int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
    if( bInsertDone==0 ){
      rc = fts3InsertData(p, apVal, pRowid);
      if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
@ -3474,14 +3476,11 @@ int sqlite3Fts3UpdateMethod(
      }
    }
    if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
-      rc = fts3PendingTermsDocid(p, 
-          sqlite3_value_int(apVal[2 + p->nColumn + 2]),
-          *pRowid
-      );
+      rc = fts3PendingTermsDocid(p, iLangid, *pRowid);
    }
    if( rc==SQLITE_OK ){
      assert( p->iPrevDocid==*pRowid );
-      rc = fts3InsertTerms(p, apVal, aSzIns);
+      rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
    }
    if( p->bHasDocsize ){
      fts3InsertDocsize(&rc, p, aSzIns);