diff --git a/ext/fts5/extract_api_docs.tcl b/ext/fts5/extract_api_docs.tcl
new file mode 100644
index 0000000000..e0f3191d61
--- /dev/null
+++ b/ext/fts5/extract_api_docs.tcl
@@ -0,0 +1,131 @@
+#
+# 2014 August 24
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#--------------------------------------------------------------------------
+#
+# This script extracts the documentation for the API used by fts5 auxiliary
+# functions from header file fts5.h. It outputs html text on stdout that
+# is included in the documentation on the web.
+#
+
+set input_file [file join [file dir [info script]] fts5.h]
+set fd [open $input_file]
+set data [read $fd]
+close $fd
+
+
+# Argument $data is the entire text of the fts5.h file. This function
+# extracts the definition of the Fts5ExtensionApi structure from it and
+# returns a key/value list of structure member names and definitions. i.e.
+#
+# iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ...
+#
+proc get_struct_members {data} {
+
+ # Extract the structure definition from the fts5.h file.
+ regexp "struct Fts5ExtensionApi {(.*)};" $data -> defn
+
+ # Remove all comments from the structure definition
+ regsub -all {/[*].*?[*]/} $defn {} defn2
+
+ set res [list]
+ foreach member [split $defn2 {;}] {
+
+ set member [string trim $member]
+ if {$member!=""} {
+ catch { set name [lindex $member end] }
+ regexp {.*?[(][*]([^)]*)[)]} $member -> name
+ lappend res $name $member
+ }
+ }
+
+ set res
+}
+
+proc get_struct_docs {data names} {
+ # Extract the structure definition from the fts5.h file.
+ regexp {EXTENSION API FUNCTIONS(.*?)[*]/} $data -> docs
+
+ set current_doc ""
+ set current_header ""
+
+ foreach line [split $docs "\n"] {
+ regsub {[*]*} $line {} line
+ if {[regexp {^ } $line]} {
+ append current_doc "$line\n"
+ } elseif {[string trim $line]==""} {
+ if {$current_header!=""} { append current_doc "\n" }
+ } else {
+ if {$current_doc != ""} {
+ lappend res $current_header $current_doc
+ set current_doc ""
+ }
+ set subject n/a
+ regexp {^ *([[:alpha:]]*)} $line -> subject
+ if {[lsearch $names $subject]>=0} {
+ set current_header $subject
+ } else {
+ set current_header [string trim $line]
+ }
+ }
+ }
+
+ if {$current_doc != ""} {
+ lappend res $current_header $current_doc
+ }
+
+ set res
+}
+
+# Initialize global array M as a map from Fts5StructureApi member name
+# to member definition. i.e.
+#
+# iVersion -> {int iVersion}
+# xUserData -> {void *(*xUserData)(Fts5Context*)}
+# ...
+#
+array set M [get_struct_members $data]
+
+# Initialize global list D as a map from section name to documentation
+# text. Most (all?) section names are structure member names.
+#
+set D [get_struct_docs $data [array names M]]
+
+foreach {hdr docs} $D {
+ if {[info exists M($hdr)]} {
+ set hdr $M($hdr)
+ }
+ puts "
$hdr
"
+
+ set mode ""
+ set bEmpty 1
+ foreach line [split [string trim $docs] "\n"] {
+ if {[string trim $line]==""} {
+ if {$mode != ""} {puts "$mode>"}
+ set mode ""
+ } elseif {$mode == ""} {
+ if {[regexp {^ } $line]} {
+ set mode code
+ } else {
+ set mode p
+ }
+ puts "<$mode>"
+ }
+ puts $line
+ }
+ if {$mode != ""} {puts "$mode>"}
+}
+
+
+
+
+
+
+
diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h
index b2865d6609..d3db15cc36 100644
--- a/ext/fts5/fts5.h
+++ b/ext/fts5/fts5.h
@@ -42,14 +42,14 @@ typedef void (*fts5_extension_function)(
);
/*
-** xUserData(pFts):
+** EXTENSION API FUNCTIONS
**
+** xUserData(pFts):
** Return a copy of the context pointer the extension function was
** registered with.
**
**
** xColumnTotalSize(pFts, iCol, pnToken):
-**
** Returns the total number of tokens in column iCol, considering all
** rows in the FTS5 table.
**
@@ -83,7 +83,6 @@ typedef void (*fts5_extension_function)(
**
**
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
-**
** This API function is used to query the FTS table for phrase iPhrase
** of the current query. Specifically, a query equivalent to:
**
@@ -179,5 +178,88 @@ struct Fts5ExtensionApi {
/*
** CUSTOM AUXILIARY FUNCTIONS
*************************************************************************/
+
+/*************************************************************************
+** CUSTOM TOKENIZERS
+**
+** Applications may also register custom tokenizer types. A tokenizer
+** is registered by providing fts5 with a populated instance of the
+** following structure. The structure methods are expected to function
+** as follows:
+**
+** xCreate:
+** This function is used to allocate and inititalize a tokenizer instance.
+** A tokenizer instance is required to actually tokenize text.
+**
+** The first argument passed to this function is a copy of the (void*)
+** pointer provided by the application when the fts5_tokenizer object
+** was registered with SQLite. The second and third arguments are an
+** array of nul-terminated strings containing the tokenizer arguments,
+** if any, specified as part of the CREATE VIRTUAL TABLE statement used
+** to create the fts5 table.
+**
+** The final argument is an output variable. If successful, (*ppOut)
+** should be set to point to the new tokenizer handle and SQLITE_OK
+** returned. If an error occurs, some value other than SQLITE_OK should
+** be returned. In this case, fts5 assumes that the final value of *ppOut
+** is undefined.
+**
+** xDelete:
+** This function is invoked to delete a tokenizer handle previously
+** allocated using xCreate(). Fts5 guarantees that this function will
+** be invoked exactly once for each successful call to xCreate().
+**
+** xTokenize:
+** This function is expected to tokenize the nText byte string indicated
+** by argument pText. pText may not be nul-terminated. The first argument
+** passed to this function is a pointer to an Fts5Tokenizer object returned
+** by an earlier call to xCreate().
+**
+** For each token in the input string, the supplied callback xToken() must
+** be invoked. The first argument to it should be a copy of the pointer
+** passed as the second argument to xTokenize(). The next two arguments
+** are a pointer to a buffer containing the token text, and the size of
+** the token in bytes. The 4th and 5th arguments are the byte offsets of
+** the first byte of and first byte immediately following the text from
+** which the token is derived within the input. The final argument is the
+** token position - the total number of tokens that appear before this one
+** in the input buffer.
+**
+** The xToken() callback must be invoked with non-decreasing values of
+** the iPos parameter.
+**
+** If an xToken() callback returns any value other than SQLITE_OK, then
+** the tokenization should be abandoned and the xTokenize() method should
+** immediately return a copy of the xToken() return value. Or, if the
+** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
+** if an error occurs with the xTokenize() implementation itself, it
+** may abandon the tokenization and return any error code other than
+** SQLITE_OK or SQLITE_DONE.
+**
+*/
+typedef struct fts5_tokenizer fts5_tokenizer;
+typedef struct Fts5Tokenizer Fts5Tokenizer;
+
+struct fts5_tokenizer {
+ int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
+ void (*xDelete)(Fts5Tokenizer*);
+ int (*xTokenize)(Fts5Tokenizer*,
+ void *pCtx,
+ const char *pText, int nText,
+ int (*xToken)(
+ void *pCtx, /* Copy of 2nd argument to xTokenize() */
+ const char *pToken, /* Pointer to buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iStart, /* Byte offset of token within input text */
+ int iEnd, /* Byte offset of end of token within input text */
+ int iPos /* Position of token in input (first token is 0) */
+ )
+ );
+};
+
+/*
+** END OF CUSTOM TOKENIZERS
+*************************************************************************/
+
#endif /* _FTS5_H */
diff --git a/manifest b/manifest
index 0edf60c0aa..10e421f7a2 100644
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Add\san\s"automerge=0"\smode\sthat\sdisables\sauto-merging\sand\sfalls\sback\sto\sfts4-style\scrisis\smerges.
-D 2014-08-18T19:30:01.020
+C Add\sdocumentation\sfor\stokenizer\sapi\sto\sfts5.h.\sAlso\sadd\sa\sscript\sto\sextract\sextension\sAPI\sdocs\sand\sformat\sthem\sas\shtml.
+D 2014-08-25T19:58:54.559
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -103,8 +103,9 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368
+F ext/fts5/extract_api_docs.tcl c30b9aa60260b3a208b0b89df3d8dbf92c6d460c
F ext/fts5/fts5.c dd56525d45b354218b86c9accab2ed12ea4b4f4f
-F ext/fts5/fts5.h 1c501ea7c5c686b8aa7fba0382badc5df6026aa7
+F ext/fts5/fts5.h 064f9bf705e59d23abaa2191b3950604dad98b9f
F ext/fts5/fts5Int.h bc6fa374a42c6121ae8276b20f141d6cd6d8d9f9
F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e
F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
@@ -1202,7 +1203,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P 05dfdad445b22f375b71abe0b1fa1bf7ca331be7
-R a4a2c1b7e4d79c30cafb117d4f31d356
+P 2397404e152b908d838e6491294b263b05943b3f
+R f1a35566903c71a22822fa6dd6758208
U dan
-Z a836ac39870a35d2f8436dd5e99c8845
+Z 8c301746cf7784949ad4603ff5681e4e
diff --git a/manifest.uuid b/manifest.uuid
index 7ae37fa218..3372bdccc8 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-2397404e152b908d838e6491294b263b05943b3f
\ No newline at end of file
+e240d467e60b7755486aae5e8b0824f7c741f852
\ No newline at end of file