diff --git a/ext/fts5/extract_api_docs.tcl b/ext/fts5/extract_api_docs.tcl new file mode 100644 index 0000000000..e0f3191d61 --- /dev/null +++ b/ext/fts5/extract_api_docs.tcl @@ -0,0 +1,131 @@ +# +# 2014 August 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#-------------------------------------------------------------------------- +# +# This script extracts the documentation for the API used by fts5 auxiliary +# functions from header file fts5.h. It outputs html text on stdout that +# is included in the documentation on the web. +# + +set input_file [file join [file dir [info script]] fts5.h] +set fd [open $input_file] +set data [read $fd] +close $fd + + +# Argument $data is the entire text of the fts5.h file. This function +# extracts the definition of the Fts5ExtensionApi structure from it and +# returns a key/value list of structure member names and definitions. i.e. +# +# iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ... +# +proc get_struct_members {data} { + + # Extract the structure definition from the fts5.h file. + regexp "struct Fts5ExtensionApi {(.*)};" $data -> defn + + # Remove all comments from the structure definition + regsub -all {/[*].*?[*]/} $defn {} defn2 + + set res [list] + foreach member [split $defn2 {;}] { + + set member [string trim $member] + if {$member!=""} { + catch { set name [lindex $member end] } + regexp {.*?[(][*]([^)]*)[)]} $member -> name + lappend res $name $member + } + } + + set res +} + +proc get_struct_docs {data names} { + # Extract the structure definition from the fts5.h file. + regexp {EXTENSION API FUNCTIONS(.*?)[*]/} $data -> docs + + set current_doc "" + set current_header "" + + foreach line [split $docs "\n"] { + regsub {[*]*} $line {} line + if {[regexp {^ } $line]} { + append current_doc "$line\n" + } elseif {[string trim $line]==""} { + if {$current_header!=""} { append current_doc "\n" } + } else { + if {$current_doc != ""} { + lappend res $current_header $current_doc + set current_doc "" + } + set subject n/a + regexp {^ *([[:alpha:]]*)} $line -> subject + if {[lsearch $names $subject]>=0} { + set current_header $subject + } else { + set current_header [string trim $line] + } + } + } + + if {$current_doc != ""} { + lappend res $current_header $current_doc + } + + set res +} + +# Initialize global array M as a map from Fts5StructureApi member name +# to member definition. i.e. +# +# iVersion -> {int iVersion} +# xUserData -> {void *(*xUserData)(Fts5Context*)} +# ... +# +array set M [get_struct_members $data] + +# Initialize global list D as a map from section name to documentation +# text. Most (all?) section names are structure member names. +# +set D [get_struct_docs $data [array names M]] + +foreach {hdr docs} $D { + if {[info exists M($hdr)]} { + set hdr $M($hdr) + } + puts "

  $hdr

" + + set mode "" + set bEmpty 1 + foreach line [split [string trim $docs] "\n"] { + if {[string trim $line]==""} { + if {$mode != ""} {puts ""} + set mode "" + } elseif {$mode == ""} { + if {[regexp {^ } $line]} { + set mode code + } else { + set mode p + } + puts "<$mode>" + } + puts $line + } + if {$mode != ""} {puts ""} +} + + + + + + + diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index b2865d6609..d3db15cc36 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -42,14 +42,14 @@ typedef void (*fts5_extension_function)( ); /* -** xUserData(pFts): +** EXTENSION API FUNCTIONS ** +** xUserData(pFts): ** Return a copy of the context pointer the extension function was ** registered with. ** ** ** xColumnTotalSize(pFts, iCol, pnToken): -** ** Returns the total number of tokens in column iCol, considering all ** rows in the FTS5 table. ** @@ -83,7 +83,6 @@ typedef void (*fts5_extension_function)( ** ** ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): -** ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: ** @@ -179,5 +178,88 @@ struct Fts5ExtensionApi { /* ** CUSTOM AUXILIARY FUNCTIONS *************************************************************************/ + +/************************************************************************* +** CUSTOM TOKENIZERS +** +** Applications may also register custom tokenizer types. A tokenizer +** is registered by providing fts5 with a populated instance of the +** following structure. The structure methods are expected to function +** as follows: +** +** xCreate: +** This function is used to allocate and inititalize a tokenizer instance. +** A tokenizer instance is required to actually tokenize text. +** +** The first argument passed to this function is a copy of the (void*) +** pointer provided by the application when the fts5_tokenizer object +** was registered with SQLite. The second and third arguments are an +** array of nul-terminated strings containing the tokenizer arguments, +** if any, specified as part of the CREATE VIRTUAL TABLE statement used +** to create the fts5 table. +** +** The final argument is an output variable. If successful, (*ppOut) +** should be set to point to the new tokenizer handle and SQLITE_OK +** returned. If an error occurs, some value other than SQLITE_OK should +** be returned. In this case, fts5 assumes that the final value of *ppOut +** is undefined. +** +** xDelete: +** This function is invoked to delete a tokenizer handle previously +** allocated using xCreate(). Fts5 guarantees that this function will +** be invoked exactly once for each successful call to xCreate(). +** +** xTokenize: +** This function is expected to tokenize the nText byte string indicated +** by argument pText. pText may not be nul-terminated. The first argument +** passed to this function is a pointer to an Fts5Tokenizer object returned +** by an earlier call to xCreate(). +** +** For each token in the input string, the supplied callback xToken() must +** be invoked. The first argument to it should be a copy of the pointer +** passed as the second argument to xTokenize(). The next two arguments +** are a pointer to a buffer containing the token text, and the size of +** the token in bytes. The 4th and 5th arguments are the byte offsets of +** the first byte of and first byte immediately following the text from +** which the token is derived within the input. The final argument is the +** token position - the total number of tokens that appear before this one +** in the input buffer. +** +** The xToken() callback must be invoked with non-decreasing values of +** the iPos parameter. +** +** If an xToken() callback returns any value other than SQLITE_OK, then +** the tokenization should be abandoned and the xTokenize() method should +** immediately return a copy of the xToken() return value. Or, if the +** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, +** if an error occurs with the xTokenize() implementation itself, it +** may abandon the tokenization and return any error code other than +** SQLITE_OK or SQLITE_DONE. +** +*/ +typedef struct fts5_tokenizer fts5_tokenizer; +typedef struct Fts5Tokenizer Fts5Tokenizer; + +struct fts5_tokenizer { + int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); + void (*xDelete)(Fts5Tokenizer*); + int (*xTokenize)(Fts5Tokenizer*, + void *pCtx, + const char *pText, int nText, + int (*xToken)( + void *pCtx, /* Copy of 2nd argument to xTokenize() */ + const char *pToken, /* Pointer to buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Byte offset of token within input text */ + int iEnd, /* Byte offset of end of token within input text */ + int iPos /* Position of token in input (first token is 0) */ + ) + ); +}; + +/* +** END OF CUSTOM TOKENIZERS +*************************************************************************/ + #endif /* _FTS5_H */ diff --git a/manifest b/manifest index 0edf60c0aa..10e421f7a2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\san\s"automerge=0"\smode\sthat\sdisables\sauto-merging\sand\sfalls\sback\sto\sfts4-style\scrisis\smerges. -D 2014-08-18T19:30:01.020 +C Add\sdocumentation\sfor\stokenizer\sapi\sto\sfts5.h.\sAlso\sadd\sa\sscript\sto\sextract\sextension\sAPI\sdocs\sand\sformat\sthem\sas\shtml. +D 2014-08-25T19:58:54.559 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,8 +103,9 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 +F ext/fts5/extract_api_docs.tcl c30b9aa60260b3a208b0b89df3d8dbf92c6d460c F ext/fts5/fts5.c dd56525d45b354218b86c9accab2ed12ea4b4f4f -F ext/fts5/fts5.h 1c501ea7c5c686b8aa7fba0382badc5df6026aa7 +F ext/fts5/fts5.h 064f9bf705e59d23abaa2191b3950604dad98b9f F ext/fts5/fts5Int.h bc6fa374a42c6121ae8276b20f141d6cd6d8d9f9 F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 @@ -1202,7 +1203,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 05dfdad445b22f375b71abe0b1fa1bf7ca331be7 -R a4a2c1b7e4d79c30cafb117d4f31d356 +P 2397404e152b908d838e6491294b263b05943b3f +R f1a35566903c71a22822fa6dd6758208 U dan -Z a836ac39870a35d2f8436dd5e99c8845 +Z 8c301746cf7784949ad4603ff5681e4e diff --git a/manifest.uuid b/manifest.uuid index 7ae37fa218..3372bdccc8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2397404e152b908d838e6491294b263b05943b3f \ No newline at end of file +e240d467e60b7755486aae5e8b0824f7c741f852 \ No newline at end of file