mirror of
				https://github.com/MariaDB/server.git
				synced 2025-11-03 14:33:32 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			223 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			223 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* Copyright (c) 2005 MySQL AB, 2009 Sun Microsystems, Inc.
 | 
						|
   Use is subject to license terms.
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or modify
 | 
						|
   it under the terms of the GNU General Public License as published by
 | 
						|
   the Free Software Foundation; version 2 of the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
 | 
						|
 | 
						|
#ifndef _my_plugin_ftparser_h
 | 
						|
#define _my_plugin_ftparser_h
 | 
						|
#include "plugin.h"
 | 
						|
 | 
						|
#ifdef __cplusplus
 | 
						|
extern "C" {
 | 
						|
#endif
 | 
						|
 | 
						|
/*************************************************************************
 | 
						|
  API for Full-text parser plugin. (MYSQL_FTPARSER_PLUGIN)
 | 
						|
*/
 | 
						|
 | 
						|
#define MYSQL_FTPARSER_INTERFACE_VERSION 0x0100
 | 
						|
 | 
						|
/* Parsing modes. Set in  MYSQL_FTPARSER_PARAM::mode */
 | 
						|
enum enum_ftparser_mode
 | 
						|
{
 | 
						|
/*
 | 
						|
  Fast and simple mode.  This mode is used for indexing, and natural
 | 
						|
  language queries.
 | 
						|
 | 
						|
  The parser is expected to return only those words that go into the
 | 
						|
  index. Stopwords or too short/long words should not be returned. The
 | 
						|
  'boolean_info' argument of mysql_add_word() does not have to be set.
 | 
						|
*/
 | 
						|
  MYSQL_FTPARSER_SIMPLE_MODE= 0,
 | 
						|
 | 
						|
/*
 | 
						|
  Parse with stopwords mode.  This mode is used in boolean searches for
 | 
						|
  "phrase matching."
 | 
						|
 | 
						|
  The parser is not allowed to ignore words in this mode.  Every word
 | 
						|
  should be returned, including stopwords and words that are too short
 | 
						|
  or long.  The 'boolean_info' argument of mysql_add_word() does not
 | 
						|
  have to be set.
 | 
						|
*/
 | 
						|
  MYSQL_FTPARSER_WITH_STOPWORDS= 1,
 | 
						|
 | 
						|
/*
 | 
						|
  Parse in boolean mode.  This mode is used to parse a boolean query string.
 | 
						|
 | 
						|
  The parser should provide a valid MYSQL_FTPARSER_BOOLEAN_INFO
 | 
						|
  structure in the 'boolean_info' argument to mysql_add_word().
 | 
						|
  Usually that means that the parser should recognize boolean operators
 | 
						|
  in the parsing stream and set appropriate fields in
 | 
						|
  MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly.  As for
 | 
						|
  MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored.
 | 
						|
  Instead, use FT_TOKEN_STOPWORD for the token type of such a word.
 | 
						|
*/
 | 
						|
  MYSQL_FTPARSER_FULL_BOOLEAN_INFO= 2
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
  Token types for boolean mode searching (used for the type member of
 | 
						|
  MYSQL_FTPARSER_BOOLEAN_INFO struct)
 | 
						|
 | 
						|
  FT_TOKEN_EOF: End of data.
 | 
						|
  FT_TOKEN_WORD: Regular word.
 | 
						|
  FT_TOKEN_LEFT_PAREN: Left parenthesis (start of group/sub-expression).
 | 
						|
  FT_TOKEN_RIGHT_PAREN: Right parenthesis (end of group/sub-expression).
 | 
						|
  FT_TOKEN_STOPWORD: Stopword.
 | 
						|
*/
 | 
						|
 | 
						|
enum enum_ft_token_type
 | 
						|
{
 | 
						|
  FT_TOKEN_EOF= 0,
 | 
						|
  FT_TOKEN_WORD= 1,
 | 
						|
  FT_TOKEN_LEFT_PAREN= 2,
 | 
						|
  FT_TOKEN_RIGHT_PAREN= 3,
 | 
						|
  FT_TOKEN_STOPWORD= 4
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
  This structure is used in boolean search mode only. It conveys
 | 
						|
  boolean-mode metadata to the MySQL search engine for every word in
 | 
						|
  the search query. A valid instance of this structure must be filled
 | 
						|
  in by the plugin parser and passed as an argument in the call to
 | 
						|
  mysql_add_word (the callback function in the MYSQL_FTPARSER_PARAM
 | 
						|
  structure) when a query is parsed in boolean mode.
 | 
						|
 | 
						|
  type: The token type.  Should be one of the enum_ft_token_type values.
 | 
						|
 | 
						|
  yesno: Whether the word must be present for a match to occur:
 | 
						|
    >0 Must be present
 | 
						|
    <0 Must not be present
 | 
						|
    0  Neither; the word is optional but its presence increases the relevance
 | 
						|
  With the default settings of the ft_boolean_syntax system variable,
 | 
						|
  >0 corresponds to the '+' operator, <0 corresponds to the '-' operator,
 | 
						|
  and 0 means neither operator was used.
 | 
						|
 | 
						|
  weight_adjust: A weighting factor that determines how much a match
 | 
						|
  for the word counts.  Positive values increase, negative - decrease the
 | 
						|
  relative word's importance in the query.
 | 
						|
 | 
						|
  wasign: The sign of the word's weight in the query. If it's non-negative
 | 
						|
  the match for the word will increase document relevance, if it's
 | 
						|
  negative - decrease (the word becomes a "noise word", the less of it the
 | 
						|
  better).
 | 
						|
 | 
						|
  trunc: Corresponds to the '*' operator in the default setting of the
 | 
						|
  ft_boolean_syntax system variable.
 | 
						|
*/
 | 
						|
 | 
						|
typedef struct st_mysql_ftparser_boolean_info
 | 
						|
{
 | 
						|
  enum enum_ft_token_type type;
 | 
						|
  int yesno;
 | 
						|
  int weight_adjust;
 | 
						|
  char wasign;
 | 
						|
  char trunc;
 | 
						|
  /* These are parser state and must be removed. */
 | 
						|
  char prev;
 | 
						|
  char *quot;
 | 
						|
} MYSQL_FTPARSER_BOOLEAN_INFO;
 | 
						|
 | 
						|
/*
 | 
						|
  The following flag means that buffer with a string (document, word)
 | 
						|
  may be overwritten by the caller before the end of the parsing (that is
 | 
						|
  before st_mysql_ftparser::deinit() call). If one needs the string
 | 
						|
  to survive between two successive calls of the parsing function, she
 | 
						|
  needs to save a copy of it. The flag may be set by MySQL before calling
 | 
						|
  st_mysql_ftparser::parse(), or it may be set by a plugin before calling
 | 
						|
  st_mysql_ftparser_param::mysql_parse() or
 | 
						|
  st_mysql_ftparser_param::mysql_add_word().
 | 
						|
*/
 | 
						|
#define MYSQL_FTFLAGS_NEED_COPY 1
 | 
						|
 | 
						|
/*
 | 
						|
  An argument of the full-text parser plugin. This structure is
 | 
						|
  filled in by MySQL server and passed to the parsing function of the
 | 
						|
  plugin as an in/out parameter.
 | 
						|
 | 
						|
  mysql_parse: A pointer to the built-in parser implementation of the
 | 
						|
  server. It's set by the server and can be used by the parser plugin
 | 
						|
  to invoke the MySQL default parser.  If plugin's role is to extract
 | 
						|
  textual data from .doc, .pdf or .xml content, it might extract
 | 
						|
  plaintext from the content, and then pass the text to the default
 | 
						|
  MySQL parser to be parsed.
 | 
						|
 | 
						|
  mysql_add_word: A server callback to add a new word.  When parsing
 | 
						|
  a document, the server sets this to point at a function that adds
 | 
						|
  the word to MySQL full-text index.  When parsing a search query,
 | 
						|
  this function will add the new word to the list of words to search
 | 
						|
  for.  The boolean_info argument can be NULL for all cases except
 | 
						|
  when mode is MYSQL_FTPARSER_FULL_BOOLEAN_INFO. A plugin can replace this
 | 
						|
  callback to post-process every parsed word before passing it to the original
 | 
						|
  mysql_add_word function.
 | 
						|
 | 
						|
  ftparser_state: A generic pointer. The plugin can set it to point
 | 
						|
  to information to be used internally for its own purposes.
 | 
						|
 | 
						|
  mysql_ftparam: This is set by the server.  It is used by MySQL functions
 | 
						|
  called via mysql_parse() and mysql_add_word() callback.  The plugin
 | 
						|
  should not modify it.
 | 
						|
 | 
						|
  cs: Information about the character set of the document or query string.
 | 
						|
 | 
						|
  doc: A pointer to the document or query string to be parsed.
 | 
						|
 | 
						|
  length: Length of the document or query string, in bytes.
 | 
						|
 | 
						|
  flags: See MYSQL_FTFLAGS_* constants above.
 | 
						|
 | 
						|
  mode: The parsing mode.  With boolean operators, with stopwords, or
 | 
						|
  nothing.  See  enum_ftparser_mode above.
 | 
						|
*/
 | 
						|
 | 
						|
typedef struct st_mysql_ftparser_param
 | 
						|
{
 | 
						|
  int (*mysql_parse)(struct st_mysql_ftparser_param *,
 | 
						|
                     const char *doc, int doc_len);
 | 
						|
  int (*mysql_add_word)(struct st_mysql_ftparser_param *,
 | 
						|
                        const char *word, int word_len,
 | 
						|
                        MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info);
 | 
						|
  void *ftparser_state;
 | 
						|
  void *mysql_ftparam;
 | 
						|
  const struct charset_info_st *cs;
 | 
						|
  const char *doc;
 | 
						|
  int length;
 | 
						|
  unsigned int flags;
 | 
						|
  enum enum_ftparser_mode mode;
 | 
						|
} MYSQL_FTPARSER_PARAM;
 | 
						|
 | 
						|
/*
 | 
						|
  Full-text parser descriptor.
 | 
						|
 | 
						|
  interface_version is, e.g., MYSQL_FTPARSER_INTERFACE_VERSION.
 | 
						|
  The parsing, initialization, and deinitialization functions are
 | 
						|
  invoked per SQL statement for which the parser is used.
 | 
						|
*/
 | 
						|
 | 
						|
struct st_mysql_ftparser
 | 
						|
{
 | 
						|
  int interface_version;
 | 
						|
  int (*parse)(MYSQL_FTPARSER_PARAM *param);
 | 
						|
  int (*init)(MYSQL_FTPARSER_PARAM *param);
 | 
						|
  int (*deinit)(MYSQL_FTPARSER_PARAM *param);
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
#ifdef __cplusplus
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
#endif
 | 
						|
 |