mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-30 19:03:16 +03:00
Add documentation for tokenizer api to fts5.h. Also add a script to extract extension API docs and format them as html.
FossilOrigin-Name: e240d467e60b7755486aae5e8b0824f7c741f852
This commit is contained in:
131
ext/fts5/extract_api_docs.tcl
Normal file
131
ext/fts5/extract_api_docs.tcl
Normal file
@ -0,0 +1,131 @@
|
||||
#
|
||||
# 2014 August 24
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#--------------------------------------------------------------------------
|
||||
#
|
||||
# This script extracts the documentation for the API used by fts5 auxiliary
|
||||
# functions from header file fts5.h. It outputs html text on stdout that
|
||||
# is included in the documentation on the web.
|
||||
#
|
||||
|
||||
set input_file [file join [file dir [info script]] fts5.h]
|
||||
set fd [open $input_file]
|
||||
set data [read $fd]
|
||||
close $fd
|
||||
|
||||
|
||||
# Argument $data is the entire text of the fts5.h file. This function
|
||||
# extracts the definition of the Fts5ExtensionApi structure from it and
|
||||
# returns a key/value list of structure member names and definitions. i.e.
|
||||
#
|
||||
# iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ...
|
||||
#
|
||||
proc get_struct_members {data} {
|
||||
|
||||
# Extract the structure definition from the fts5.h file.
|
||||
regexp "struct Fts5ExtensionApi {(.*)};" $data -> defn
|
||||
|
||||
# Remove all comments from the structure definition
|
||||
regsub -all {/[*].*?[*]/} $defn {} defn2
|
||||
|
||||
set res [list]
|
||||
foreach member [split $defn2 {;}] {
|
||||
|
||||
set member [string trim $member]
|
||||
if {$member!=""} {
|
||||
catch { set name [lindex $member end] }
|
||||
regexp {.*?[(][*]([^)]*)[)]} $member -> name
|
||||
lappend res $name $member
|
||||
}
|
||||
}
|
||||
|
||||
set res
|
||||
}
|
||||
|
||||
proc get_struct_docs {data names} {
|
||||
# Extract the structure definition from the fts5.h file.
|
||||
regexp {EXTENSION API FUNCTIONS(.*?)[*]/} $data -> docs
|
||||
|
||||
set current_doc ""
|
||||
set current_header ""
|
||||
|
||||
foreach line [split $docs "\n"] {
|
||||
regsub {[*]*} $line {} line
|
||||
if {[regexp {^ } $line]} {
|
||||
append current_doc "$line\n"
|
||||
} elseif {[string trim $line]==""} {
|
||||
if {$current_header!=""} { append current_doc "\n" }
|
||||
} else {
|
||||
if {$current_doc != ""} {
|
||||
lappend res $current_header $current_doc
|
||||
set current_doc ""
|
||||
}
|
||||
set subject n/a
|
||||
regexp {^ *([[:alpha:]]*)} $line -> subject
|
||||
if {[lsearch $names $subject]>=0} {
|
||||
set current_header $subject
|
||||
} else {
|
||||
set current_header [string trim $line]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if {$current_doc != ""} {
|
||||
lappend res $current_header $current_doc
|
||||
}
|
||||
|
||||
set res
|
||||
}
|
||||
|
||||
# Initialize global array M as a map from Fts5StructureApi member name
|
||||
# to member definition. i.e.
|
||||
#
|
||||
# iVersion -> {int iVersion}
|
||||
# xUserData -> {void *(*xUserData)(Fts5Context*)}
|
||||
# ...
|
||||
#
|
||||
array set M [get_struct_members $data]
|
||||
|
||||
# Initialize global list D as a map from section name to documentation
|
||||
# text. Most (all?) section names are structure member names.
|
||||
#
|
||||
set D [get_struct_docs $data [array names M]]
|
||||
|
||||
foreach {hdr docs} $D {
|
||||
if {[info exists M($hdr)]} {
|
||||
set hdr $M($hdr)
|
||||
}
|
||||
puts "<h3><pre> $hdr</pre></h3>"
|
||||
|
||||
set mode ""
|
||||
set bEmpty 1
|
||||
foreach line [split [string trim $docs] "\n"] {
|
||||
if {[string trim $line]==""} {
|
||||
if {$mode != ""} {puts "</$mode>"}
|
||||
set mode ""
|
||||
} elseif {$mode == ""} {
|
||||
if {[regexp {^ } $line]} {
|
||||
set mode code
|
||||
} else {
|
||||
set mode p
|
||||
}
|
||||
puts "<$mode>"
|
||||
}
|
||||
puts $line
|
||||
}
|
||||
if {$mode != ""} {puts "</$mode>"}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -42,14 +42,14 @@ typedef void (*fts5_extension_function)(
|
||||
);
|
||||
|
||||
/*
|
||||
** xUserData(pFts):
|
||||
** EXTENSION API FUNCTIONS
|
||||
**
|
||||
** xUserData(pFts):
|
||||
** Return a copy of the context pointer the extension function was
|
||||
** registered with.
|
||||
**
|
||||
**
|
||||
** xColumnTotalSize(pFts, iCol, pnToken):
|
||||
**
|
||||
** Returns the total number of tokens in column iCol, considering all
|
||||
** rows in the FTS5 table.
|
||||
**
|
||||
@ -83,7 +83,6 @@ typedef void (*fts5_extension_function)(
|
||||
**
|
||||
**
|
||||
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
|
||||
**
|
||||
** This API function is used to query the FTS table for phrase iPhrase
|
||||
** of the current query. Specifically, a query equivalent to:
|
||||
**
|
||||
@ -179,5 +178,88 @@ struct Fts5ExtensionApi {
|
||||
/*
|
||||
** CUSTOM AUXILIARY FUNCTIONS
|
||||
*************************************************************************/
|
||||
|
||||
/*************************************************************************
|
||||
** CUSTOM TOKENIZERS
|
||||
**
|
||||
** Applications may also register custom tokenizer types. A tokenizer
|
||||
** is registered by providing fts5 with a populated instance of the
|
||||
** following structure. The structure methods are expected to function
|
||||
** as follows:
|
||||
**
|
||||
** xCreate:
|
||||
** This function is used to allocate and inititalize a tokenizer instance.
|
||||
** A tokenizer instance is required to actually tokenize text.
|
||||
**
|
||||
** The first argument passed to this function is a copy of the (void*)
|
||||
** pointer provided by the application when the fts5_tokenizer object
|
||||
** was registered with SQLite. The second and third arguments are an
|
||||
** array of nul-terminated strings containing the tokenizer arguments,
|
||||
** if any, specified as part of the CREATE VIRTUAL TABLE statement used
|
||||
** to create the fts5 table.
|
||||
**
|
||||
** The final argument is an output variable. If successful, (*ppOut)
|
||||
** should be set to point to the new tokenizer handle and SQLITE_OK
|
||||
** returned. If an error occurs, some value other than SQLITE_OK should
|
||||
** be returned. In this case, fts5 assumes that the final value of *ppOut
|
||||
** is undefined.
|
||||
**
|
||||
** xDelete:
|
||||
** This function is invoked to delete a tokenizer handle previously
|
||||
** allocated using xCreate(). Fts5 guarantees that this function will
|
||||
** be invoked exactly once for each successful call to xCreate().
|
||||
**
|
||||
** xTokenize:
|
||||
** This function is expected to tokenize the nText byte string indicated
|
||||
** by argument pText. pText may not be nul-terminated. The first argument
|
||||
** passed to this function is a pointer to an Fts5Tokenizer object returned
|
||||
** by an earlier call to xCreate().
|
||||
**
|
||||
** For each token in the input string, the supplied callback xToken() must
|
||||
** be invoked. The first argument to it should be a copy of the pointer
|
||||
** passed as the second argument to xTokenize(). The next two arguments
|
||||
** are a pointer to a buffer containing the token text, and the size of
|
||||
** the token in bytes. The 4th and 5th arguments are the byte offsets of
|
||||
** the first byte of and first byte immediately following the text from
|
||||
** which the token is derived within the input. The final argument is the
|
||||
** token position - the total number of tokens that appear before this one
|
||||
** in the input buffer.
|
||||
**
|
||||
** The xToken() callback must be invoked with non-decreasing values of
|
||||
** the iPos parameter.
|
||||
**
|
||||
** If an xToken() callback returns any value other than SQLITE_OK, then
|
||||
** the tokenization should be abandoned and the xTokenize() method should
|
||||
** immediately return a copy of the xToken() return value. Or, if the
|
||||
** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
|
||||
** if an error occurs with the xTokenize() implementation itself, it
|
||||
** may abandon the tokenization and return any error code other than
|
||||
** SQLITE_OK or SQLITE_DONE.
|
||||
**
|
||||
*/
|
||||
typedef struct fts5_tokenizer fts5_tokenizer;
|
||||
typedef struct Fts5Tokenizer Fts5Tokenizer;
|
||||
|
||||
struct fts5_tokenizer {
|
||||
int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
|
||||
void (*xDelete)(Fts5Tokenizer*);
|
||||
int (*xTokenize)(Fts5Tokenizer*,
|
||||
void *pCtx,
|
||||
const char *pText, int nText,
|
||||
int (*xToken)(
|
||||
void *pCtx, /* Copy of 2nd argument to xTokenize() */
|
||||
const char *pToken, /* Pointer to buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Byte offset of token within input text */
|
||||
int iEnd, /* Byte offset of end of token within input text */
|
||||
int iPos /* Position of token in input (first token is 0) */
|
||||
)
|
||||
);
|
||||
};
|
||||
|
||||
/*
|
||||
** END OF CUSTOM TOKENIZERS
|
||||
*************************************************************************/
|
||||
|
||||
#endif /* _FTS5_H */
|
||||
|
||||
|
13
manifest
13
manifest
@ -1,5 +1,5 @@
|
||||
C Add\san\s"automerge=0"\smode\sthat\sdisables\sauto-merging\sand\sfalls\sback\sto\sfts4-style\scrisis\smerges.
|
||||
D 2014-08-18T19:30:01.020
|
||||
C Add\sdocumentation\sfor\stokenizer\sapi\sto\sfts5.h.\sAlso\sadd\sa\sscript\sto\sextract\sextension\sAPI\sdocs\sand\sformat\sthem\sas\shtml.
|
||||
D 2014-08-25T19:58:54.559
|
||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
|
||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||
@ -103,8 +103,9 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
|
||||
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
|
||||
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
|
||||
F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368
|
||||
F ext/fts5/extract_api_docs.tcl c30b9aa60260b3a208b0b89df3d8dbf92c6d460c
|
||||
F ext/fts5/fts5.c dd56525d45b354218b86c9accab2ed12ea4b4f4f
|
||||
F ext/fts5/fts5.h 1c501ea7c5c686b8aa7fba0382badc5df6026aa7
|
||||
F ext/fts5/fts5.h 064f9bf705e59d23abaa2191b3950604dad98b9f
|
||||
F ext/fts5/fts5Int.h bc6fa374a42c6121ae8276b20f141d6cd6d8d9f9
|
||||
F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e
|
||||
F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
|
||||
@ -1202,7 +1203,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
|
||||
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
|
||||
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
|
||||
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
|
||||
P 05dfdad445b22f375b71abe0b1fa1bf7ca331be7
|
||||
R a4a2c1b7e4d79c30cafb117d4f31d356
|
||||
P 2397404e152b908d838e6491294b263b05943b3f
|
||||
R f1a35566903c71a22822fa6dd6758208
|
||||
U dan
|
||||
Z a836ac39870a35d2f8436dd5e99c8845
|
||||
Z 8c301746cf7784949ad4603ff5681e4e
|
||||
|
@ -1 +1 @@
|
||||
2397404e152b908d838e6491294b263b05943b3f
|
||||
e240d467e60b7755486aae5e8b0824f7c741f852
|
Reference in New Issue
Block a user