mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
Update Mroonga to the latest version on 2017-10-10T23:15:25+0900
This commit is contained in:
361
storage/mroonga/lib/mrn_query_parser.cpp
Normal file
361
storage/mroonga/lib/mrn_query_parser.cpp
Normal file
@@ -0,0 +1,361 @@
|
||||
/* -*- c-basic-offset: 2 -*- */
|
||||
/*
|
||||
Copyright(C) 2017 Kouhei Sutou <kou@clear-code.com>
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "mrn_query_parser.hpp"
|
||||
|
||||
#include <mrn_variables.hpp>
|
||||
|
||||
extern "C" {
|
||||
/* Groonga's internal functions */
|
||||
int grn_atoi(const char *nptr, const char *end, const char **rest);
|
||||
uint grn_atoui(const char *nptr, const char *end, const char **rest);
|
||||
}
|
||||
|
||||
#define MRN_CLASS_NAME "mrn::QueryParser"
|
||||
|
||||
namespace mrn {
|
||||
QueryParser::QueryParser(grn_ctx *ctx,
|
||||
THD *thd,
|
||||
grn_obj *expression,
|
||||
grn_obj *default_column,
|
||||
uint n_sections,
|
||||
grn_obj *match_columns)
|
||||
: ctx_(ctx),
|
||||
thd_(thd),
|
||||
expression_(expression),
|
||||
default_column_(default_column),
|
||||
n_sections_(n_sections),
|
||||
match_columns_(match_columns) {
|
||||
}
|
||||
|
||||
QueryParser::~QueryParser() {
|
||||
}
|
||||
|
||||
grn_rc QueryParser::parse(const char *query, size_t query_length) {
|
||||
MRN_DBUG_ENTER_METHOD();
|
||||
|
||||
const char *raw_query = NULL;
|
||||
size_t raw_query_length = 0;
|
||||
grn_operator default_operator = GRN_OP_OR;
|
||||
grn_expr_flags expression_flags = 0;
|
||||
parse_pragma(query,
|
||||
query_length,
|
||||
&raw_query,
|
||||
&raw_query_length,
|
||||
&default_operator,
|
||||
&expression_flags);
|
||||
|
||||
grn_obj *default_column = default_column_;
|
||||
if (match_columns_) {
|
||||
default_column = match_columns_;
|
||||
}
|
||||
grn_rc rc = grn_expr_parse(ctx_,
|
||||
expression_,
|
||||
raw_query,
|
||||
raw_query_length,
|
||||
default_column,
|
||||
GRN_OP_MATCH,
|
||||
default_operator,
|
||||
expression_flags);
|
||||
if (rc != GRN_SUCCESS) {
|
||||
char error_message[MRN_MESSAGE_BUFFER_SIZE];
|
||||
snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE,
|
||||
"failed to parse fulltext search keyword: <%.*s>: <%s>",
|
||||
static_cast<int>(query_length),
|
||||
query,
|
||||
ctx_->errbuf);
|
||||
variables::ActionOnError action =
|
||||
variables::get_action_on_fulltext_query_error(thd_);
|
||||
switch (action) {
|
||||
case variables::ACTION_ON_ERROR_ERROR:
|
||||
my_message(ER_PARSE_ERROR, error_message, MYF(0));
|
||||
break;
|
||||
case variables::ACTION_ON_ERROR_ERROR_AND_LOG:
|
||||
my_message(ER_PARSE_ERROR, error_message, MYF(0));
|
||||
GRN_LOG(ctx_, GRN_LOG_ERROR, "%s", error_message);
|
||||
break;
|
||||
case variables::ACTION_ON_ERROR_IGNORE:
|
||||
break;
|
||||
case variables::ACTION_ON_ERROR_IGNORE_AND_LOG:
|
||||
GRN_LOG(ctx_, GRN_LOG_ERROR, "%s", error_message);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DBUG_RETURN(rc);
|
||||
}
|
||||
|
||||
void QueryParser::parse_pragma(const char *query,
|
||||
size_t query_length,
|
||||
const char **raw_query,
|
||||
size_t *raw_query_length,
|
||||
grn_operator *default_operator,
|
||||
grn_expr_flags *flags) {
|
||||
MRN_DBUG_ENTER_METHOD();
|
||||
|
||||
const char *current_query = query;
|
||||
size_t current_query_length = query_length;
|
||||
|
||||
*default_operator = GRN_OP_OR;
|
||||
|
||||
if (current_query_length >= 4 && memcmp(current_query, "*SS ", 4) == 0) {
|
||||
*raw_query = current_query + 4;
|
||||
*raw_query_length = current_query_length - 4;
|
||||
*flags = GRN_EXPR_SYNTAX_SCRIPT;
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
bool weight_specified = false;
|
||||
*raw_query = query;
|
||||
*raw_query_length = query_length;
|
||||
*flags = default_expression_flags();
|
||||
if (current_query_length >= 2 && current_query[0] == '*') {
|
||||
bool parsed = false;
|
||||
bool done = false;
|
||||
current_query++;
|
||||
current_query_length--;
|
||||
while (!done) {
|
||||
size_t consumed_query_length = 0;
|
||||
switch (current_query[0]) {
|
||||
case 'D':
|
||||
if (parse_pragma_d(current_query + 1,
|
||||
current_query_length - 1,
|
||||
default_operator,
|
||||
&consumed_query_length)) {
|
||||
parsed = true;
|
||||
consumed_query_length += 1;
|
||||
current_query += consumed_query_length;
|
||||
current_query_length -= consumed_query_length;
|
||||
} else {
|
||||
done = true;
|
||||
}
|
||||
break;
|
||||
case 'W':
|
||||
if (parse_pragma_w(current_query + 1,
|
||||
current_query_length - 1,
|
||||
&consumed_query_length)) {
|
||||
parsed = true;
|
||||
weight_specified = true;
|
||||
consumed_query_length += 1;
|
||||
current_query += consumed_query_length;
|
||||
current_query_length -= consumed_query_length;
|
||||
} else {
|
||||
done = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (parsed) {
|
||||
*raw_query = current_query;
|
||||
*raw_query_length = current_query_length;
|
||||
}
|
||||
}
|
||||
|
||||
// WORKAROUND: ignore the first '+' to support "+apple macintosh" pattern.
|
||||
while (*raw_query_length > 0 && (*raw_query)[0] == ' ') {
|
||||
(*raw_query)++;
|
||||
(*raw_query_length)--;
|
||||
}
|
||||
if (*raw_query_length > 0 && (*raw_query)[0] == '+') {
|
||||
(*raw_query)++;
|
||||
(*raw_query_length)--;
|
||||
}
|
||||
if (!weight_specified && match_columns_) {
|
||||
grn_expr_append_obj(ctx_, match_columns_, default_column_, GRN_OP_PUSH, 1);
|
||||
}
|
||||
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
bool QueryParser::parse_pragma_w(const char *query,
|
||||
size_t query_length,
|
||||
size_t *consumed_query_length) {
|
||||
MRN_DBUG_ENTER_METHOD();
|
||||
|
||||
*consumed_query_length = 0;
|
||||
|
||||
grn_obj section_value_buffer;
|
||||
GRN_UINT32_INIT(§ion_value_buffer, 0);
|
||||
|
||||
MRN_ALLOCATE_VARIABLE_LENGTH_ARRAYS(bool, specified_sections, n_sections_);
|
||||
for (uint i = 0; i < n_sections_; ++i) {
|
||||
specified_sections[i] = false;
|
||||
}
|
||||
|
||||
uint n_weights = 0;
|
||||
while (query_length >= 1) {
|
||||
if (n_weights >= 1) {
|
||||
if (query[0] != ',') {
|
||||
break;
|
||||
}
|
||||
size_t n_used_query_length = 1;
|
||||
*consumed_query_length += n_used_query_length;
|
||||
query_length -= n_used_query_length;
|
||||
query += n_used_query_length;
|
||||
if (query_length == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint section = 0;
|
||||
if ('1' <= query[0] && query[0] <= '9') {
|
||||
const char *section_start = query;
|
||||
const char *query_end = query + query_length;
|
||||
const char *query_rest;
|
||||
section = grn_atoui(section_start, query_end, &query_rest);
|
||||
if (section_start == query_rest) {
|
||||
break;
|
||||
}
|
||||
if (!(0 < section && section <= n_sections_)) {
|
||||
break;
|
||||
}
|
||||
section -= 1;
|
||||
specified_sections[section] = true;
|
||||
size_t n_used_query_length = query_rest - query;
|
||||
*consumed_query_length += n_used_query_length;
|
||||
query_length -= n_used_query_length;
|
||||
query += n_used_query_length;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
int weight = 1;
|
||||
if (query_length >= 2 && query[0] == ':') {
|
||||
const char *weight_start = query + 1;
|
||||
const char *query_end = query + query_length;
|
||||
const char *query_rest;
|
||||
weight = grn_atoi(weight_start, query_end, &query_rest);
|
||||
if (weight_start == query_rest) {
|
||||
break;
|
||||
}
|
||||
size_t n_used_query_length = query_rest - query;
|
||||
*consumed_query_length += n_used_query_length;
|
||||
query_length -= n_used_query_length;
|
||||
query += n_used_query_length;
|
||||
}
|
||||
|
||||
n_weights++;
|
||||
|
||||
append_section(section,
|
||||
§ion_value_buffer,
|
||||
weight,
|
||||
n_weights);
|
||||
}
|
||||
|
||||
for (uint section = 0; section < n_sections_; ++section) {
|
||||
if (specified_sections[section]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
++n_weights;
|
||||
|
||||
int default_weight = 1;
|
||||
append_section(section,
|
||||
§ion_value_buffer,
|
||||
default_weight,
|
||||
n_weights);
|
||||
}
|
||||
MRN_FREE_VARIABLE_LENGTH_ARRAYS(specified_sections);
|
||||
|
||||
GRN_OBJ_FIN(ctx_, §ion_value_buffer);
|
||||
|
||||
DBUG_RETURN(n_weights > 0);
|
||||
}
|
||||
|
||||
void QueryParser::append_section(uint section,
|
||||
grn_obj *section_value_buffer,
|
||||
int weight,
|
||||
uint n_weights) {
|
||||
MRN_DBUG_ENTER_METHOD();
|
||||
|
||||
if (!match_columns_) {
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
grn_expr_append_obj(ctx_, match_columns_, default_column_, GRN_OP_PUSH, 1);
|
||||
GRN_UINT32_SET(ctx_, section_value_buffer, section);
|
||||
grn_expr_append_const(ctx_, match_columns_, section_value_buffer,
|
||||
GRN_OP_PUSH, 1);
|
||||
grn_expr_append_op(ctx_, match_columns_, GRN_OP_GET_MEMBER, 2);
|
||||
|
||||
if (weight != 1) {
|
||||
grn_expr_append_const_int(ctx_, match_columns_, weight, GRN_OP_PUSH, 1);
|
||||
grn_expr_append_op(ctx_, match_columns_, GRN_OP_STAR, 2);
|
||||
}
|
||||
|
||||
if (n_weights >= 2) {
|
||||
grn_expr_append_op(ctx_, match_columns_, GRN_OP_OR, 2);
|
||||
}
|
||||
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
bool QueryParser::parse_pragma_d(const char *query,
|
||||
size_t query_length,
|
||||
grn_operator *default_operator,
|
||||
size_t *consumed_query_length) {
|
||||
MRN_DBUG_ENTER_METHOD();
|
||||
|
||||
bool succeeded = true;
|
||||
if (query_length >= 1 && query[0] == '+') {
|
||||
*default_operator = GRN_OP_AND;
|
||||
*consumed_query_length = 1;
|
||||
} else if (query_length >= 1 && query[0] == '-') {
|
||||
*default_operator = GRN_OP_AND_NOT;
|
||||
*consumed_query_length = 1;
|
||||
} else if (query_length >= 2 && memcmp(query, "OR", 2) == 0) {
|
||||
*default_operator = GRN_OP_OR;
|
||||
*consumed_query_length = 2;
|
||||
} else {
|
||||
succeeded = false;
|
||||
}
|
||||
|
||||
DBUG_RETURN(succeeded);
|
||||
}
|
||||
|
||||
grn_expr_flags QueryParser::default_expression_flags() {
|
||||
MRN_DBUG_ENTER_METHOD();
|
||||
|
||||
ulonglong syntax_flags = variables::get_boolean_mode_syntax_flags(thd_);
|
||||
grn_expr_flags expression_flags = 0;
|
||||
if (syntax_flags == variables::BOOLEAN_MODE_SYNTAX_FLAG_DEFAULT) {
|
||||
expression_flags = GRN_EXPR_SYNTAX_QUERY | GRN_EXPR_ALLOW_LEADING_NOT;
|
||||
} else {
|
||||
if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_SYNTAX_SCRIPT) {
|
||||
expression_flags |= GRN_EXPR_SYNTAX_SCRIPT;
|
||||
} else {
|
||||
expression_flags |= GRN_EXPR_SYNTAX_QUERY;
|
||||
}
|
||||
if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_COLUMN) {
|
||||
expression_flags |= GRN_EXPR_ALLOW_COLUMN;
|
||||
}
|
||||
if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_UPDATE) {
|
||||
expression_flags |= GRN_EXPR_ALLOW_UPDATE;
|
||||
}
|
||||
if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_LEADING_NOT) {
|
||||
expression_flags |= GRN_EXPR_ALLOW_LEADING_NOT;
|
||||
}
|
||||
}
|
||||
|
||||
DBUG_RETURN(expression_flags);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user