1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Add documentation for tokenizer api to fts5.h. Also add a script to extract extension API docs and format them as html.

FossilOrigin-Name: e240d467e60b7755486aae5e8b0824f7c741f852
This commit is contained in:
dan
2014-08-25 19:58:54 +00:00
parent 6885bbc713
commit 4aee5dff83
4 changed files with 224 additions and 10 deletions

View File

@ -0,0 +1,131 @@
#
# 2014 August 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#--------------------------------------------------------------------------
#
# This script extracts the documentation for the API used by fts5 auxiliary
# functions from header file fts5.h. It outputs html text on stdout that
# is included in the documentation on the web.
#
set input_file [file join [file dir [info script]] fts5.h]
set fd [open $input_file]
set data [read $fd]
close $fd
# Argument $data is the entire text of the fts5.h file. This function
# extracts the definition of the Fts5ExtensionApi structure from it and
# returns a key/value list of structure member names and definitions. i.e.
#
# iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ...
#
proc get_struct_members {data} {
# Extract the structure definition from the fts5.h file.
regexp "struct Fts5ExtensionApi {(.*)};" $data -> defn
# Remove all comments from the structure definition
regsub -all {/[*].*?[*]/} $defn {} defn2
set res [list]
foreach member [split $defn2 {;}] {
set member [string trim $member]
if {$member!=""} {
catch { set name [lindex $member end] }
regexp {.*?[(][*]([^)]*)[)]} $member -> name
lappend res $name $member
}
}
set res
}
proc get_struct_docs {data names} {
# Extract the structure definition from the fts5.h file.
regexp {EXTENSION API FUNCTIONS(.*?)[*]/} $data -> docs
set current_doc ""
set current_header ""
foreach line [split $docs "\n"] {
regsub {[*]*} $line {} line
if {[regexp {^ } $line]} {
append current_doc "$line\n"
} elseif {[string trim $line]==""} {
if {$current_header!=""} { append current_doc "\n" }
} else {
if {$current_doc != ""} {
lappend res $current_header $current_doc
set current_doc ""
}
set subject n/a
regexp {^ *([[:alpha:]]*)} $line -> subject
if {[lsearch $names $subject]>=0} {
set current_header $subject
} else {
set current_header [string trim $line]
}
}
}
if {$current_doc != ""} {
lappend res $current_header $current_doc
}
set res
}
# Initialize global array M as a map from Fts5StructureApi member name
# to member definition. i.e.
#
# iVersion -> {int iVersion}
# xUserData -> {void *(*xUserData)(Fts5Context*)}
# ...
#
array set M [get_struct_members $data]
# Initialize global list D as a map from section name to documentation
# text. Most (all?) section names are structure member names.
#
set D [get_struct_docs $data [array names M]]
foreach {hdr docs} $D {
if {[info exists M($hdr)]} {
set hdr $M($hdr)
}
puts "<h3><pre> $hdr</pre></h3>"
set mode ""
set bEmpty 1
foreach line [split [string trim $docs] "\n"] {
if {[string trim $line]==""} {
if {$mode != ""} {puts "</$mode>"}
set mode ""
} elseif {$mode == ""} {
if {[regexp {^ } $line]} {
set mode code
} else {
set mode p
}
puts "<$mode>"
}
puts $line
}
if {$mode != ""} {puts "</$mode>"}
}

View File

@ -42,14 +42,14 @@ typedef void (*fts5_extension_function)(
);
/*
** xUserData(pFts):
** EXTENSION API FUNCTIONS
**
** xUserData(pFts):
** Return a copy of the context pointer the extension function was
** registered with.
**
**
** xColumnTotalSize(pFts, iCol, pnToken):
**
** Returns the total number of tokens in column iCol, considering all
** rows in the FTS5 table.
**
@ -83,7 +83,6 @@ typedef void (*fts5_extension_function)(
**
**
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
**
** This API function is used to query the FTS table for phrase iPhrase
** of the current query. Specifically, a query equivalent to:
**
@ -179,5 +178,88 @@ struct Fts5ExtensionApi {
/*
** CUSTOM AUXILIARY FUNCTIONS
*************************************************************************/
/*************************************************************************
** CUSTOM TOKENIZERS
**
** Applications may also register custom tokenizer types. A tokenizer
** is registered by providing fts5 with a populated instance of the
** following structure. The structure methods are expected to function
** as follows:
**
** xCreate:
** This function is used to allocate and inititalize a tokenizer instance.
** A tokenizer instance is required to actually tokenize text.
**
** The first argument passed to this function is a copy of the (void*)
** pointer provided by the application when the fts5_tokenizer object
** was registered with SQLite. The second and third arguments are an
** array of nul-terminated strings containing the tokenizer arguments,
** if any, specified as part of the CREATE VIRTUAL TABLE statement used
** to create the fts5 table.
**
** The final argument is an output variable. If successful, (*ppOut)
** should be set to point to the new tokenizer handle and SQLITE_OK
** returned. If an error occurs, some value other than SQLITE_OK should
** be returned. In this case, fts5 assumes that the final value of *ppOut
** is undefined.
**
** xDelete:
** This function is invoked to delete a tokenizer handle previously
** allocated using xCreate(). Fts5 guarantees that this function will
** be invoked exactly once for each successful call to xCreate().
**
** xTokenize:
** This function is expected to tokenize the nText byte string indicated
** by argument pText. pText may not be nul-terminated. The first argument
** passed to this function is a pointer to an Fts5Tokenizer object returned
** by an earlier call to xCreate().
**
** For each token in the input string, the supplied callback xToken() must
** be invoked. The first argument to it should be a copy of the pointer
** passed as the second argument to xTokenize(). The next two arguments
** are a pointer to a buffer containing the token text, and the size of
** the token in bytes. The 4th and 5th arguments are the byte offsets of
** the first byte of and first byte immediately following the text from
** which the token is derived within the input. The final argument is the
** token position - the total number of tokens that appear before this one
** in the input buffer.
**
** The xToken() callback must be invoked with non-decreasing values of
** the iPos parameter.
**
** If an xToken() callback returns any value other than SQLITE_OK, then
** the tokenization should be abandoned and the xTokenize() method should
** immediately return a copy of the xToken() return value. Or, if the
** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
** if an error occurs with the xTokenize() implementation itself, it
** may abandon the tokenization and return any error code other than
** SQLITE_OK or SQLITE_DONE.
**
*/
typedef struct fts5_tokenizer fts5_tokenizer;
typedef struct Fts5Tokenizer Fts5Tokenizer;
struct fts5_tokenizer {
int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
void (*xDelete)(Fts5Tokenizer*);
int (*xTokenize)(Fts5Tokenizer*,
void *pCtx,
const char *pText, int nText,
int (*xToken)(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
int iEnd, /* Byte offset of end of token within input text */
int iPos /* Position of token in input (first token is 0) */
)
);
};
/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/
#endif /* _FTS5_H */

View File

@ -1,5 +1,5 @@
C Add\san\s"automerge=0"\smode\sthat\sdisables\sauto-merging\sand\sfalls\sback\sto\sfts4-style\scrisis\smerges.
D 2014-08-18T19:30:01.020
C Add\sdocumentation\sfor\stokenizer\sapi\sto\sfts5.h.\sAlso\sadd\sa\sscript\sto\sextract\sextension\sAPI\sdocs\sand\sformat\sthem\sas\shtml.
D 2014-08-25T19:58:54.559
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -103,8 +103,9 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368
F ext/fts5/extract_api_docs.tcl c30b9aa60260b3a208b0b89df3d8dbf92c6d460c
F ext/fts5/fts5.c dd56525d45b354218b86c9accab2ed12ea4b4f4f
F ext/fts5/fts5.h 1c501ea7c5c686b8aa7fba0382badc5df6026aa7
F ext/fts5/fts5.h 064f9bf705e59d23abaa2191b3950604dad98b9f
F ext/fts5/fts5Int.h bc6fa374a42c6121ae8276b20f141d6cd6d8d9f9
F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e
F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
@ -1202,7 +1203,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 05dfdad445b22f375b71abe0b1fa1bf7ca331be7
R a4a2c1b7e4d79c30cafb117d4f31d356
P 2397404e152b908d838e6491294b263b05943b3f
R f1a35566903c71a22822fa6dd6758208
U dan
Z a836ac39870a35d2f8436dd5e99c8845
Z 8c301746cf7784949ad4603ff5681e4e

View File

@ -1 +1 @@
2397404e152b908d838e6491294b263b05943b3f
e240d467e60b7755486aae5e8b0824f7c741f852