diff --git a/ext/fts5/extract_api_docs.tcl b/ext/fts5/extract_api_docs.tcl index 6762a036d5..634dc70cbd 100644 --- a/ext/fts5/extract_api_docs.tcl +++ b/ext/fts5/extract_api_docs.tcl @@ -108,8 +108,11 @@ proc get_tokenizer_docs {data} { append res "
$line

\n" continue } + if {[regexp {FTS5_TOKENIZER} $line]} { + set line

+ } if {[regexp {SYNONYM SUPPORT} $line]} { - set line "

Synonym Support

" + set line "

Synonym Support

" } if {[string trim $line] == ""} { append res "

\n" diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index dfa075f0a9..682a8da386 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -429,7 +429,7 @@ struct Fts5ExtensionApi { ** argument passed to this function is a pointer to an Fts5Tokenizer object ** returned by an earlier call to xCreate(). ** -** The second argument indicates the reason that FTS5 is requesting +** The third argument indicates the reason that FTS5 is requesting ** tokenization of the supplied text. This is always one of the following ** four values: ** @@ -453,6 +453,13 @@ struct Fts5ExtensionApi { ** on a columnsize=0 database. ** ** +** The sixth and seventh arguments passed to xTokenize() - pLocale and +** nLocale - are a pointer to a buffer containing the locale to use for +** tokenization (e.g. "en_US") and its size in bytes, respectively. The +** pLocale buffer is not nul-terminated. pLocale may be passed NULL (in +** which case nLocale is always 0) to indicate that the tokenizer should +** use its default locale. +** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer ** passed as the second argument to xTokenize(). The third and fourth @@ -484,6 +491,21 @@ struct Fts5ExtensionApi { ** an nLocale byte buffer containing the name of the locale to use as utf-8 ** text. pLocale is not nul-terminated. ** +** FTS5_TOKENIZER +** +** There is also an fts5_tokenizer object. This is an older version of +** fts5_tokenizer_v2. It is similar except that: +** +**

+** +** fts5_tokenizer tokenizers should be registered with the xCreateTokenizer() +** function, instead of xCreateTokenizer_v2(). Tokenizers implementations +** registered using either API may be retrieved using both xFindTokenizer() +** and xFindTokenizer_v2(). +** ** SYNONYM SUPPORT ** ** Custom tokenizers may also support synonyms. Consider a case in which a diff --git a/manifest b/manifest index 72992c9562..f8108ac374 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sthe\sfts5_locale()\sfunction\sand\srelated\sfunctionality\sto\sfts5. -D 2024-08-19T14:33:39.681 +C Update\ssome\scomments\sthat\sare\sprocessed\sinto\sthe\sfts5.html\swebpage. +D 2024-08-19T20:35:30.209 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -91,8 +91,8 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 63db9624ccf70d4887836c320eda93ab552f21008f3be7ede551eac3ead62baa F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb -F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e -F ext/fts5/fts5.h 4c6998c6186268b4dbe9baef2c0d2ab974bd90996d61d4dbe801367249be6de4 +F ext/fts5/extract_api_docs.tcl 1db7f85f4d84b7b6f33336155d5053fafc3c8debd074422d8003c8f7fa4d0fdb +F ext/fts5/fts5.h c65fc7799a4cd6774628da4fa9408955623e504d7369ab5b89c4413fdfe11eb5 F ext/fts5/fts5Int.h 26a71a09cefa4ef6b4516b204ed48da3e1380970a19b3482eea7c5d805655360 F ext/fts5/fts5_aux.c 12cd2512f869217c38b70c31de5b5f741812734fafa80f55b32ea9bbd96e2152 F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09 @@ -2210,9 +2210,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 474b7e34b83bc5e85854bc3b386f31ff39b390549d89b94921f33bbc5b658d1d 6787603bd0d8117e41e6b572c0d7cada0f6d7266a4ff2ad79f9471b9beb28a9e -R 2c3030846c904496873c37b37c82be1d -T +closed 6787603bd0d8117e41e6b572c0d7cada0f6d7266a4ff2ad79f9471b9beb28a9e +P b9632900100bdbc913f83bfb03b32585cf07a192b9a7f26b9bebc7d91e63a610 +R ea532ddb7482adbca514ac43836cd1cf U dan -Z fdb5bbd69f79c041555a00a73e5efcdd +Z 0d4360caeae428f14a1f60694bd440b2 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 43438a3211..39ef92242c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b9632900100bdbc913f83bfb03b32585cf07a192b9a7f26b9bebc7d91e63a610 +3e06ab218bbd1ed75a24afb44e8df9ce84e9fc24701428cb8b3459760f44006d