1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

Fix JSON parsing: future-proof data representation in JSON, code cleanup

This commit is contained in:
Sergei Petrunia
2021-08-29 14:37:45 +03:00
parent a0b4a86822
commit 2a1cdbabec
6 changed files with 1351 additions and 1290 deletions

View File

@@ -1123,6 +1123,7 @@ public:
void get_stat_values()
{
table_field->read_stats->set_all_nulls();
// default: hist_type=NULL means there's no histogram
table_field->read_stats->histogram_type_on_disk= INVALID_HISTOGRAM;
if (table_field->read_stats->min_value)
@@ -1196,7 +1197,10 @@ public:
break;
}
case COLUMN_STAT_HISTOGRAM:
//TODO: if stat_field->length() == 0 then histogram_type_on_disk is set to INVALID_HISTOGRAM
/*
Do nothing here: we take the histogram length from the 'histogram'
column itself
*/
break;
}
}
@@ -1245,7 +1249,7 @@ public:
}
if (!hist->parse(mem_root, table_field,
table_field->read_stats->histogram_type_on_disk,
(const uchar*)val.ptr(), val.length()))
val.ptr(), val.length()))
{
table_field->read_stats->histogram_= hist;
return hist;
@@ -1255,19 +1259,19 @@ public:
}
};
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *,
Histogram_type type_arg,
const uchar *ptr_arg, uint size_arg)
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field*,
Histogram_type type_arg, const char *hist_data,
size_t hist_data_len)
{
// Just copy the data
size = (uint8) size_arg;
type = type_arg;
if ((values = (uchar*)alloc_root(mem_root, size_arg)))
{
memcpy(values, ptr_arg, size_arg);
return false;
}
return true;
/* On-disk an in-memory formats are the same. Just copy the data. */
type= type_arg;
size= (uint8) hist_data_len; // 'size' holds the size of histogram in bytes
if (!(values= (uchar*)alloc_root(mem_root, hist_data_len)))
return true;
memcpy(values, hist_data, hist_data_len);
return false;
}
/*
@@ -1307,39 +1311,81 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root,
*/
bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field,
Histogram_type type_arg, const uchar *ptr,
uint size_arg)
Histogram_type type_arg, const char *hist_data,
size_t hist_data_len)
{
DBUG_ENTER("Histogram_json::parse");
DBUG_ASSERT(type_arg == JSON_HB);
size = (uint8) size_arg;
const char *json = (char *)ptr;
int vt;
std::vector<std::string> hist_buckets_text;
bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets_text);
if (!result)
{
my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), vt);
DBUG_RETURN(true);
}
size= hist_buckets_text.size();
const char *err;
json_engine_t je;
json_string_t key_name;
/*
Convert the text based array into a data structure that allows lookups and
estimates
*/
for (auto &s : hist_buckets_text)
{
field->store_text(s.data(), s.size(), &my_charset_bin);
json_scan_start(&je, &my_charset_utf8mb4_bin,
(const uchar*)hist_data,
(const uchar*)hist_data+hist_data_len);
// Get the value in "truncated key tuple format" here:
uchar buf[MAX_KEY_LENGTH];
uint len_to_copy= field->key_length();
uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
histogram_bounds.push_back(std::string((char*)buf, bytes));
if (json_read_value(&je) || je.value_type != JSON_VALUE_OBJECT)
{
err= "Root JSON element must be a JSON object";
goto error;
}
json_string_set_str(&key_name, (const uchar*)JSON_NAME,
(const uchar*)JSON_NAME + strlen(JSON_NAME));
json_string_set_cs(&key_name, system_charset_info);
if (json_scan_next(&je) || je.state != JST_KEY ||
!json_key_matches(&je, &key_name))
{
err= "The first key in the object must be histogram_hb_v1";
goto error;
}
// The value must be a JSON array
if (json_read_value(&je) || (je.value_type != JSON_VALUE_ARRAY))
{
err= "A JSON array expected";
goto error;
}
// Read the array
while (!json_scan_next(&je))
{
switch(je.state)
{
case JST_VALUE:
{
const char *val;
int val_len;
json_smart_read_value(&je, &val, &val_len);
if (je.value_type != JSON_VALUE_STRING &&
je.value_type != JSON_VALUE_NUMBER &&
je.value_type != JSON_VALUE_TRUE &&
je.value_type != JSON_VALUE_FALSE)
{
err= "Scalar value expected";
goto error;
}
uchar buf[MAX_KEY_LENGTH];
uint len_to_copy= field->key_length();
field->store_text(val, val_len, &my_charset_bin);
uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
histogram_bounds.push_back(std::string((char*)buf, bytes));
// TODO: Should we also compare this endpoint with the previous
// to verify that the ordering is right?
break;
}
case JST_ARRAY_END:
break;
}
}
size= histogram_bounds.size();
DBUG_RETURN(false);
error:
my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), err,
je.s.c_str - (const uchar*)hist_data);
DBUG_RETURN(true);
}
@@ -1347,7 +1393,7 @@ static
void store_key_image_to_rec_no_null(Field *field, uchar *ptr) {
MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
&field->table->write_set);
field->set_key_image(ptr, field->key_length());
field->set_key_image(ptr, field->key_length());
dbug_tmp_restore_column_map(&field->table->write_set, old_map);
}
@@ -1506,9 +1552,9 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
/*
@param field The table field histogram is for. We don't care about the
field's current value, we only need its virtual functions to
field's current value, we only need its virtual functions to
perform various operations
@param min_endp, max_endp - this specifies the range.
*/
double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
@@ -1594,7 +1640,7 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
void Histogram_json::serialize(Field *field)
{
field->store((char*)json_text, strlen((char*)json_text), &my_charset_bin);
field->store(json_text.data(), json_text.size(), &my_charset_bin);
}
@@ -2052,13 +2098,16 @@ public:
}
void build_json_from_histogram() {
Json_writer *writer = new Json_writer();
writer->start_array();
Json_writer writer;
writer.start_object();
writer.add_member(Histogram_json::JSON_NAME).start_array();
for(auto& value: bucket_bounds) {
writer->add_str(value.c_str());
writer.add_str(value.c_str());
}
writer->end_array();
Binary_string *json_string = (Binary_string *) writer->output.get_string();
writer.end_array();
writer.end_object();
Binary_string *json_string = (Binary_string *) writer.output.get_string();
Histogram_json *hist= (Histogram_json*)histogram;
hist->set_json_text(bucket_bounds.size(), (uchar *) json_string->c_ptr());
}
@@ -2080,42 +2129,6 @@ Histogram_base *create_histogram(Histogram_type hist_type)
}
bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container) {
json_engine_t je;
int vl;
const char *v;
json_scan_start(&je, &my_charset_utf8mb4_bin, (const uchar *)json, (const uchar *)json_end);
if (json_read_value(&je) || (*value_type = je.value_type) != JSON_VALUE_ARRAY)
{
return false;
}
std::string val;
while(!json_scan_next(&je))
{
switch(je.state)
{
case JST_VALUE:
*value_type = json_smart_read_value(&je, &v, &vl);
if (je.value_type != JSON_VALUE_STRING &&
je.value_type != JSON_VALUE_NUMBER &&
je.value_type != JSON_VALUE_TRUE &&
je.value_type != JSON_VALUE_FALSE)
{
return false;
}
val = std::string(v, vl);
container.emplace_back(val);
break;
case JST_ARRAY_END:
break;
}
}
return true;
}
C_MODE_START
int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)