1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-01 03:47:19 +03:00

MDEV-26711: Values in JSON histograms are not properly quoted

Escape values when serializing to JSON. Un-escape when reading back.
This commit is contained in:
Sergei Petrunia
2021-09-29 20:11:48 +03:00
parent d03daaf8a8
commit 61cd4f4412
6 changed files with 608 additions and 80 deletions

View File

@ -223,15 +223,16 @@ test.t1 analyze status Table is already up to date
SELECT db_name, table_name, column_name, SELECT db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) ORDER BY db_name, table_name, column_name;
test t1 a 0 49 0.0000 1.0000 4 SINGLE_PREC_HB 2E62A1D0 db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram)
test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 SINGLE_PREC_HB 003FBFFF test t1 a 0 49 0.0000 1.0000 4 SINGLE_PREC_HB 0.180,0.204,0.247,0.184,0.184
test t1 c aaaa dddddddd 0.1250 7.0000 4 SINGLE_PREC_HB 0055AAFF test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 SINGLE_PREC_HB 0.000,0.247,0.502,0.251,0.000
test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 SINGLE_PREC_HB 001919FF test t1 c aaaa dddddddd 0.1250 7.0000 4 SINGLE_PREC_HB 0.000,0.333,0.333,0.333,0.000
test t1 e 0.01 0.112 0.2250 6.2000 4 SINGLE_PREC_HB 000564E1 test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 SINGLE_PREC_HB 0.000,0.098,0.000,0.902,0.000
test t1 f 1 5 0.2000 6.4000 4 SINGLE_PREC_HB 3F7FBFBF test t1 e 0.01 0.112 0.2250 6.2000 4 SINGLE_PREC_HB 0.000,0.020,0.373,0.490,0.118
test t1 f 1 5 0.2000 6.4000 4 SINGLE_PREC_HB 0.247,0.251,0.251,0.000,0.251
DELETE FROM mysql.column_stats; DELETE FROM mysql.column_stats;
set histogram_size=8; set histogram_size=8;
set histogram_type=@DOUBLE_PREC_TYPE; set histogram_type=@DOUBLE_PREC_TYPE;
@ -242,15 +243,16 @@ test.t1 analyze status Table is already up to date
SELECT db_name, table_name, column_name, SELECT db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) ORDER BY db_name, table_name, column_name;
test t1 a 0 49 0.0000 1.0000 8 DOUBLE_PREC_HB 052F4363F4A1F9D0 db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram)
test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 8 DOUBLE_PREC_HB 0000FF3FFFBFFFFF test t1 a 0 49 0.0000 1.0000 8 DOUBLE_PREC_HB 0.18367,0.20407,0.24489,0.18367,0.18369
test t1 c aaaa dddddddd 0.1250 7.0000 8 DOUBLE_PREC_HB 00005555AAAAFFFF test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 8 DOUBLE_PREC_HB 0.00000,0.24999,0.50001,0.25000,0.00000
test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 8 DOUBLE_PREC_HB 0000031A031AFFFF test t1 c aaaa dddddddd 0.1250 7.0000 8 DOUBLE_PREC_HB 0.00000,0.33333,0.33333,0.33333,0.00000
test t1 e 0.01 0.112 0.2250 6.2000 8 DOUBLE_PREC_HB 000005056464E1E1 test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 8 DOUBLE_PREC_HB 0.00000,0.10161,0.00000,0.89839,0.00000
test t1 f 1 5 0.2000 6.4000 8 DOUBLE_PREC_HB FF3FFF7FFFBFFFBF test t1 e 0.01 0.112 0.2250 6.2000 8 DOUBLE_PREC_HB 0.00000,0.01961,0.37255,0.49020,0.11765
test t1 f 1 5 0.2000 6.4000 8 DOUBLE_PREC_HB 0.24999,0.25000,0.25000,0.00000,0.25000
DELETE FROM mysql.column_stats; DELETE FROM mysql.column_stats;
set histogram_size= 0; set histogram_size= 0;
set histogram_type=@SINGLE_PREC_TYPE; set histogram_type=@SINGLE_PREC_TYPE;
@ -1578,10 +1580,11 @@ test.t1 analyze status OK
select db_name, table_name, column_name, select db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) ORDER BY db_name, table_name, column_name;
test t1 a 1 3 0.0000 1.0000 10 SINGLE_PREC_HB 0000007F7F7F7FFFFFFF db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram)
test t1 a 1 3 0.0000 1.0000 10 SINGLE_PREC_HB 0.000,0.000,0.000,0.498,0.000,0.000,0.000,0.502,0.000,0.000,0.000
set histogram_size=default; set histogram_size=default;
drop table t1; drop table t1;
# #
@ -1603,10 +1606,11 @@ test.t1 analyze status OK
select db_name, table_name, column_name, select db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) ORDER BY db_name, table_name, column_name;
test t1 a 1 5 0.0000 1.0000 10 DOUBLE_PREC_HB 0000FF3FFF7FFFBFFFFF db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram)
test t1 a 1 5 0.0000 1.0000 10 DOUBLE_PREC_HB 0.00000,0.24999,0.25000,0.25000,0.25000,0.00000
set histogram_size=0; set histogram_size=0;
set histogram_type=@SINGLE_PREC_TYPE; set histogram_type=@SINGLE_PREC_TYPE;
drop table t1; drop table t1;

View File

@ -187,12 +187,12 @@ DELETE FROM mysql.column_stats;
set histogram_size=4; set histogram_size=4;
ANALYZE TABLE t1; ANALYZE TABLE t1;
--sorted_result
SELECT db_name, table_name, column_name, SELECT db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
ORDER BY db_name, table_name, column_name;
DELETE FROM mysql.column_stats; DELETE FROM mysql.column_stats;
@ -200,12 +200,12 @@ set histogram_size=8;
set histogram_type=@DOUBLE_PREC_TYPE; set histogram_type=@DOUBLE_PREC_TYPE;
ANALYZE TABLE t1; ANALYZE TABLE t1;
--sorted_result
SELECT db_name, table_name, column_name, SELECT db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
ORDER BY db_name, table_name, column_name;
DELETE FROM mysql.column_stats; DELETE FROM mysql.column_stats;
@ -718,8 +718,9 @@ analyze table t1 persistent for all;
select db_name, table_name, column_name, select db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
ORDER BY db_name, table_name, column_name;
set histogram_size=default; set histogram_size=default;
@ -743,8 +744,9 @@ analyze table t1 persistent for all;
select db_name, table_name, column_name, select db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
ORDER BY db_name, table_name, column_name;
set histogram_size=0; set histogram_size=0;
set histogram_type=@SINGLE_PREC_TYPE; set histogram_type=@SINGLE_PREC_TYPE;

View File

@ -229,15 +229,160 @@ test.t1 analyze status Table is already up to date
SELECT db_name, table_name, column_name, SELECT db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) ORDER BY db_name, table_name, column_name;
test t1 a 0 49 0.0000 1.0000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202230222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A2031300A202020207D2C0A202020207B0A202020202020227374617274223A20223131222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A2031300A202020207D2C0A202020207B0A202020202020227374617274223A20223234222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A2031300A202020207D2C0A202020207B0A202020202020227374617274223A20223338222C0A20202020202022656E64223A20223439222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A2031300A202020207D0A20205D0A7D db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram)
test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202276767676767676767676767676222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202277777777777777777777777777777777777777777777777777777777222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A20227878787878787878787878787878787878787878787878787878222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A2022797979222C0A20202020202022656E64223A20227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D0A20205D0A7D test t1 a 0 49 0.0000 1.0000 4 JSON_HB {
test t1 c aaaa dddddddd 0.1250 7.0000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202261616161222C0A2020202020202273697A65223A20302E3235373134323835372C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022626262222C0A2020202020202273697A65223A20302E3232383537313432392C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A2022636363636363636363222C0A2020202020202273697A65223A20302E3232383537313432392C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A20226464646464646464222C0A20202020202022656E64223A20226464646464646464222C0A2020202020202273697A65223A20302E3238353731343238362C0A202020202020226E6476223A20310A202020207D0A20205D0A7D "histogram_hb_v2": [
test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A2022313938392D30332D3132222C0A2020202020202273697A65223A20302E3332333532393431322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022313939302D30352D3135222C0A2020202020202273697A65223A20302E3431313736343730362C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022313939382D30382D3238222C0A2020202020202273697A65223A20302E3233353239343131382C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A2022313939392D30372D3233222C0A20202020202022656E64223A2022313939392D30372D3233222C0A2020202020202273697A65223A20302E3032393431313736352C0A202020202020226E6476223A20310A202020207D0A20205D0A7D {
test t1 e 0.01 0.112 0.2250 6.2000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A2022302E3031222C0A2020202020202273697A65223A20302E3338373039363737342C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E303132222C0A2020202020202273697A65223A20302E3232353830363435322C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A2022302E31222C0A2020202020202273697A65223A20302E3235383036343531362C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E313132222C0A20202020202022656E64223A2022302E313132222C0A2020202020202273697A65223A20302E3132393033323235382C0A202020202020226E6476223A20310A202020207D0A20205D0A7D "start": "0",
test t1 f 1 5 0.2000 6.4000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202201222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A202202222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A202203222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D2C0A202020207B0A202020202020227374617274223A202204222C0A20202020202022656E64223A202205222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20320A202020207D0A20205D0A7D "size": 0.25,
"ndv": 10
},
{
"start": "11",
"size": 0.25,
"ndv": 10
},
{
"start": "24",
"size": 0.25,
"ndv": 10
},
{
"start": "38",
"end": "49",
"size": 0.25,
"ndv": 10
}
]
}
test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 JSON_HB {
"histogram_hb_v2": [
{
"start": "vvvvvvvvvvvvv",
"size": 0.25,
"ndv": 1
},
{
"start": "wwwwwwwwwwwwwwwwwwwwwwwwwwww",
"size": 0.25,
"ndv": 2
},
{
"start": "xxxxxxxxxxxxxxxxxxxxxxxxxx",
"size": 0.25,
"ndv": 2
},
{
"start": "yyy",
"end": "zzzzzzzzzzzzzzzzzz",
"size": 0.25,
"ndv": 2
}
]
}
test t1 c aaaa dddddddd 0.1250 7.0000 4 JSON_HB {
"histogram_hb_v2": [
{
"start": "aaaa",
"size": 0.257142857,
"ndv": 1
},
{
"start": "bbb",
"size": 0.228571429,
"ndv": 2
},
{
"start": "ccccccccc",
"size": 0.228571429,
"ndv": 1
},
{
"start": "dddddddd",
"end": "dddddddd",
"size": 0.285714286,
"ndv": 1
}
]
}
test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON_HB {
"histogram_hb_v2": [
{
"start": "1989-03-12",
"size": 0.323529412,
"ndv": 1
},
{
"start": "1990-05-15",
"size": 0.411764706,
"ndv": 1
},
{
"start": "1998-08-28",
"size": 0.235294118,
"ndv": 2
},
{
"start": "1999-07-23",
"end": "1999-07-23",
"size": 0.029411765,
"ndv": 1
}
]
}
test t1 e 0.01 0.112 0.2250 6.2000 4 JSON_HB {
"histogram_hb_v2": [
{
"start": "0.01",
"size": 0.387096774,
"ndv": 1
},
{
"start": "0.012",
"size": 0.225806452,
"ndv": 2
},
{
"start": "0.1",
"size": 0.258064516,
"ndv": 1
},
{
"start": "0.112",
"end": "0.112",
"size": 0.129032258,
"ndv": 1
}
]
}
test t1 f 1 5 0.2000 6.4000 4 JSON_HB {
"histogram_hb_v2": [
{
"start": "\u0001",
"size": 0.25,
"ndv": 2
},
{
"start": "\u0002",
"size": 0.25,
"ndv": 2
},
{
"start": "\u0003",
"size": 0.25,
"ndv": 2
},
{
"start": "\u0004",
"end": "\u0005",
"size": 0.25,
"ndv": 2
}
]
}
DELETE FROM mysql.column_stats; DELETE FROM mysql.column_stats;
set histogram_size=8; set histogram_size=8;
set histogram_type=@DOUBLE_PREC_TYPE; set histogram_type=@DOUBLE_PREC_TYPE;
@ -248,15 +393,200 @@ test.t1 analyze status Table is already up to date
SELECT db_name, table_name, column_name, SELECT db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) ORDER BY db_name, table_name, column_name;
test t1 a 0 49 0.0000 1.0000 8 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202230222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A202235222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223131222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223138222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223234222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223332222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223338222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D2C0A202020207B0A202020202020227374617274223A20223433222C0A20202020202022656E64223A20223439222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20350A202020207D0A20205D0A7D db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram)
test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 5 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202276767676767676767676767676222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202277777777777777777777777777777777777777777777777777777777222C0A2020202020202273697A65223A20302E32313837352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A20227878787878787878787878787878787878787878787878787878222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022797979222C0A2020202020202273697A65223A20302E313837352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A20227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A222C0A20202020202022656E64223A20227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A222C0A2020202020202273697A65223A20302E32313837352C0A202020202020226E6476223A20310A202020207D0A20205D0A7D test t1 a 0 49 0.0000 1.0000 8 JSON_HB {
test t1 c aaaa dddddddd 0.1250 7.0000 5 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202261616161222C0A2020202020202273697A65223A20302E3235373134323835372C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022626262222C0A2020202020202273697A65223A20302E3032383537313432392C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022626262626262222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022636363636363636363222C0A2020202020202273697A65223A20302E3232383537313432392C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A20226464646464646464222C0A20202020202022656E64223A20226464646464646464222C0A2020202020202273697A65223A20302E3238353731343238362C0A202020202020226E6476223A20310A202020207D0A20205D0A7D "histogram_hb_v2": [
test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A2022313938392D30332D3132222C0A2020202020202273697A65223A20302E3332333532393431322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022313939302D30352D3135222C0A2020202020202273697A65223A20302E3431313736343730362C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022313939382D30382D3238222C0A2020202020202273697A65223A20302E3032393431313736352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022313939392D30372D3233222C0A20202020202022656E64223A2022313939392D30372D3233222C0A2020202020202273697A65223A20302E3233353239343131382C0A202020202020226E6476223A20310A202020207D0A20205D0A7D {
test t1 e 0.01 0.112 0.2250 6.2000 5 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A2022302E3031222C0A2020202020202273697A65223A20302E3338373039363737342C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E303132222C0A2020202020202273697A65223A20302E3036343531363132392C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E3035222C0A2020202020202273697A65223A20302E3136313239303332332C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E31222C0A2020202020202273697A65223A20302E3235383036343531362C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A2022302E313132222C0A20202020202022656E64223A2022302E313132222C0A2020202020202273697A65223A20302E3132393033323235382C0A202020202020226E6476223A20310A202020207D0A20205D0A7D "start": "0",
test t1 f 1 5 0.2000 6.4000 5 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202201222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202202222C0A2020202020202273697A65223A20302E32352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202203222C0A2020202020202273697A65223A20302E313837352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202204222C0A2020202020202273697A65223A20302E333132352C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202205222C0A20202020202022656E64223A202205222C0A2020202020202273697A65223A20302E3132352C0A202020202020226E6476223A20310A202020207D0A20205D0A7D "size": 0.125,
"ndv": 5
},
{
"start": "5",
"size": 0.125,
"ndv": 5
},
{
"start": "11",
"size": 0.125,
"ndv": 5
},
{
"start": "18",
"size": 0.125,
"ndv": 5
},
{
"start": "24",
"size": 0.125,
"ndv": 5
},
{
"start": "32",
"size": 0.125,
"ndv": 5
},
{
"start": "38",
"size": 0.125,
"ndv": 5
},
{
"start": "43",
"end": "49",
"size": 0.125,
"ndv": 5
}
]
}
test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 5 JSON_HB {
"histogram_hb_v2": [
{
"start": "vvvvvvvvvvvvv",
"size": 0.25,
"ndv": 1
},
{
"start": "wwwwwwwwwwwwwwwwwwwwwwwwwwww",
"size": 0.21875,
"ndv": 1
},
{
"start": "xxxxxxxxxxxxxxxxxxxxxxxxxx",
"size": 0.125,
"ndv": 1
},
{
"start": "yyy",
"size": 0.1875,
"ndv": 1
},
{
"start": "zzzzzzzzzzzzzzzzzz",
"end": "zzzzzzzzzzzzzzzzzz",
"size": 0.21875,
"ndv": 1
}
]
}
test t1 c aaaa dddddddd 0.1250 7.0000 5 JSON_HB {
"histogram_hb_v2": [
{
"start": "aaaa",
"size": 0.257142857,
"ndv": 1
},
{
"start": "bbb",
"size": 0.028571429,
"ndv": 1
},
{
"start": "bbbbbb",
"size": 0.2,
"ndv": 1
},
{
"start": "ccccccccc",
"size": 0.228571429,
"ndv": 1
},
{
"start": "dddddddd",
"end": "dddddddd",
"size": 0.285714286,
"ndv": 1
}
]
}
test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON_HB {
"histogram_hb_v2": [
{
"start": "1989-03-12",
"size": 0.323529412,
"ndv": 1
},
{
"start": "1990-05-15",
"size": 0.411764706,
"ndv": 1
},
{
"start": "1998-08-28",
"size": 0.029411765,
"ndv": 1
},
{
"start": "1999-07-23",
"end": "1999-07-23",
"size": 0.235294118,
"ndv": 1
}
]
}
test t1 e 0.01 0.112 0.2250 6.2000 5 JSON_HB {
"histogram_hb_v2": [
{
"start": "0.01",
"size": 0.387096774,
"ndv": 1
},
{
"start": "0.012",
"size": 0.064516129,
"ndv": 1
},
{
"start": "0.05",
"size": 0.161290323,
"ndv": 1
},
{
"start": "0.1",
"size": 0.258064516,
"ndv": 1
},
{
"start": "0.112",
"end": "0.112",
"size": 0.129032258,
"ndv": 1
}
]
}
test t1 f 1 5 0.2000 6.4000 5 JSON_HB {
"histogram_hb_v2": [
{
"start": "\u0001",
"size": 0.125,
"ndv": 1
},
{
"start": "\u0002",
"size": 0.25,
"ndv": 1
},
{
"start": "\u0003",
"size": 0.1875,
"ndv": 1
},
{
"start": "\u0004",
"size": 0.3125,
"ndv": 1
},
{
"start": "\u0005",
"end": "\u0005",
"size": 0.125,
"ndv": 1
}
]
}
DELETE FROM mysql.column_stats; DELETE FROM mysql.column_stats;
set histogram_size= 0; set histogram_size= 0;
set histogram_type=@SINGLE_PREC_TYPE; set histogram_type=@SINGLE_PREC_TYPE;
@ -3332,10 +3662,30 @@ test.t1 analyze status OK
select db_name, table_name, column_name, select db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) ORDER BY db_name, table_name, column_name;
test t1 a 1 3 0.0000 1.0000 3 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202231222C0A2020202020202273697A65223A20302E3333333333333333332C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202232222C0A2020202020202273697A65223A20302E3333333333333333332C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202233222C0A20202020202022656E64223A202233222C0A2020202020202273697A65223A20302E3333333333333333332C0A202020202020226E6476223A20310A202020207D0A20205D0A7D db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram)
test t1 a 1 3 0.0000 1.0000 3 JSON_HB {
"histogram_hb_v2": [
{
"start": "1",
"size": 0.333333333,
"ndv": 1
},
{
"start": "2",
"size": 0.333333333,
"ndv": 1
},
{
"start": "3",
"end": "3",
"size": 0.333333333,
"ndv": 1
}
]
}
set histogram_size=default; set histogram_size=default;
drop table t1; drop table t1;
# #
@ -3357,10 +3707,40 @@ test.t1 analyze status OK
select db_name, table_name, column_name, select db_name, table_name, column_name,
min_value, max_value, min_value, max_value,
nulls_ratio, avg_frequency, nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram) hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats; FROM mysql.column_stats
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) ORDER BY db_name, table_name, column_name;
test t1 a 1 5 0.0000 1.0000 5 JSON_HB 7B0A202022686973746F6772616D5F68625F7632223A205B0A202020207B0A202020202020227374617274223A202231222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202232222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202233222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202234222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D2C0A202020207B0A202020202020227374617274223A202235222C0A20202020202022656E64223A202235222C0A2020202020202273697A65223A20302E322C0A202020202020226E6476223A20310A202020207D0A20205D0A7D db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram)
test t1 a 1 5 0.0000 1.0000 5 JSON_HB {
"histogram_hb_v2": [
{
"start": "1",
"size": 0.2,
"ndv": 1
},
{
"start": "2",
"size": 0.2,
"ndv": 1
},
{
"start": "3",
"size": 0.2,
"ndv": 1
},
{
"start": "4",
"size": 0.2,
"ndv": 1
},
{
"start": "5",
"end": "5",
"size": 0.2,
"ndv": 1
}
]
}
set histogram_size=0; set histogram_size=0;
set histogram_type=@SINGLE_PREC_TYPE; set histogram_type=@SINGLE_PREC_TYPE;
drop table t1; drop table t1;
@ -7460,3 +7840,16 @@ b a
1 foo 1 foo
2 bar 2 bar
drop table t1; drop table t1;
#
# MDEV-26711: Values in JSON histograms are not properly quoted
#
create table t1 (a varchar(32));
insert into t1 values ('this is "quoted" text');
set histogram_type= JSON_HB;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
select * from t1 where a = 'foo';
a
drop table t1;

View File

@ -182,3 +182,14 @@ SET histogram_type= JSON_HB;
ANALYZE TABLE t1 PERSISTENT FOR ALL; ANALYZE TABLE t1 PERSISTENT FOR ALL;
SELECT * FROM t1; SELECT * FROM t1;
drop table t1; drop table t1;
--echo #
--echo # MDEV-26711: Values in JSON histograms are not properly quoted
--echo #
create table t1 (a varchar(32));
insert into t1 values ('this is "quoted" text');
set histogram_type= JSON_HB;
analyze table t1 persistent for all;
select * from t1 where a = 'foo';
drop table t1;

View File

@ -238,6 +238,8 @@ public:
Json_writer& add_member(const char *name, size_t len); Json_writer& add_member(const char *name, size_t len);
/* Add atomic values */ /* Add atomic values */
/* Note: the add_str methods do not do escapes. Should this change? */
void add_str(const char* val); void add_str(const char* val);
void add_str(const char* val, size_t num_bytes); void add_str(const char* val, size_t num_bytes);
void add_str(const String &str); void add_str(const String &str);

View File

@ -20,6 +20,73 @@
#include "sql_statistics.h" #include "sql_statistics.h"
#include "opt_histogram_json.h" #include "opt_histogram_json.h"
/*
Un-escape a JSON string and save it into *out.
*/
static bool json_unescape_to_string(const char *val, int val_len, String* out)
{
// Make sure 'out' has some memory allocated.
if (!out->alloced_length() && out->alloc(128))
return true;
while (1)
{
uchar *buf= (uchar*)out->ptr();
out->length(out->alloced_length());
int res= json_unescape(&my_charset_utf8mb4_bin,
(const uchar*)val,
(const uchar*)val + val_len,
&my_charset_utf8mb4_bin,
buf, buf + out->length());
if (res > 0)
{
out->length(res);
return false; // Ok
}
// We get here if the unescaped string didn't fit into memory.
if (out->alloc(out->alloced_length()*2))
return true;
}
}
/*
Escape a JSON string and save it into *out.
*/
static bool json_escape_to_string(const char *val, int val_len, String* out)
{
// Make sure 'out' has some memory allocated.
if (!out->alloced_length() && out->alloc(128))
return true;
while (1)
{
uchar *buf= (uchar*)out->ptr();
out->length(out->alloced_length());
int res= json_escape(&my_charset_utf8mb4_bin,
(const uchar*)val,
(const uchar*)val + val_len,
&my_charset_utf8mb4_bin,
buf, buf + out->length());
if (res > 0)
{
out->length(res);
return false; // Ok
}
// We get here if the escaped string didn't fit into memory.
if (out->alloc(out->alloced_length()*2))
return true;
}
}
class Histogram_json_builder : public Histogram_builder class Histogram_json_builder : public Histogram_builder
{ {
Histogram_json_hb *histogram; Histogram_json_hb *histogram;
@ -72,6 +139,7 @@ public:
~Histogram_json_builder() override = default; ~Histogram_json_builder() override = default;
private:
bool bucket_is_empty() { return bucket.ndv == 0; } bool bucket_is_empty() { return bucket.ndv == 0; }
/* /*
@ -92,13 +160,13 @@ public:
/* /*
Same as finalize_bucket() but also provide the bucket's end value. Same as finalize_bucket() but also provide the bucket's end value.
*/ */
void finalize_bucket_with_end_value(void *elem) bool finalize_bucket_with_end_value(void *elem)
{ {
column->store_field_value((uchar*) elem, col_length); writer.add_member("end");
StringBuffer<MAX_FIELD_WIDTH> val; if (append_column_value(elem))
String *str= column->val_str(&val); return true;
writer.add_member("end").add_str(str->c_ptr_safe());
finalize_bucket(); finalize_bucket();
return false;
} }
/* /*
@ -106,18 +174,38 @@ public:
@param elem The value we are writing @param elem The value we are writing
@param cnt The number of such values. @param cnt The number of such values.
*/ */
void start_bucket(void *elem, longlong cnt) bool start_bucket(void *elem, longlong cnt)
{ {
DBUG_ASSERT(bucket.size == 0); DBUG_ASSERT(bucket.size == 0);
column->store_field_value((uchar*) elem, col_length);
StringBuffer<MAX_FIELD_WIDTH> val;
String *str= column->val_str(&val);
writer.start_object(); writer.start_object();
writer.add_member("start").add_str(str->c_ptr_safe()); writer.add_member("start");
if (append_column_value(elem))
return true;
bucket.ndv= 1; bucket.ndv= 1;
bucket.size= cnt; bucket.size= cnt;
return false;
}
/*
Append the passed value into the JSON writer as string value
*/
bool append_column_value(void *elem)
{
StringBuffer<MAX_FIELD_WIDTH> val;
// Get the text representation of the value
column->store_field_value((uchar*) elem, col_length);
String *str= column->val_str(&val);
// Escape the value for JSON
StringBuffer<MAX_FIELD_WIDTH> escaped_val;
if (json_escape_to_string(str->ptr(), str->length(), &escaped_val))
return true;
// Note: The Json_writer does NOT do escapes (perhaps this should change?)
writer.add_str(escaped_val.c_ptr_safe());
return false;
} }
/* /*
@ -129,6 +217,7 @@ public:
bucket.size += cnt; bucket.size += cnt;
} }
public:
/* /*
@brief @brief
Add data to the histogram. Add data to the histogram.
@ -169,9 +258,14 @@ public:
finalize_bucket(); finalize_bucket();
// Start/end the separate bucket for this value group. // Start/end the separate bucket for this value group.
start_bucket(elem, elem_cnt); if (start_bucket(elem, elem_cnt))
return 1; // OOM
if (records == count) if (records == count)
finalize_bucket_with_end_value(elem); {
if (finalize_bucket_with_end_value(elem))
return 1;
}
else else
finalize_bucket(); finalize_bucket();
} }
@ -191,21 +285,28 @@ public:
*/ */
append_to_bucket(bucket_capacity - bucket.size); append_to_bucket(bucket_capacity - bucket.size);
if (records == count && !overflow) if (records == count && !overflow)
finalize_bucket_with_end_value(elem); {
if (finalize_bucket_with_end_value(elem))
return 1;
}
else else
finalize_bucket(); finalize_bucket();
if (overflow > 0) if (overflow > 0)
{ {
// Then, start the new bucket with the remaining values. // Then, start the new bucket with the remaining values.
start_bucket(elem, overflow); if (start_bucket(elem, overflow))
return 1;
} }
} }
else else
{ {
// Case #3: there's not enough values to fill the current bucket. // Case #3: there's not enough values to fill the current bucket.
if (bucket_is_empty()) if (bucket_is_empty())
start_bucket(elem, elem_cnt); {
if (start_bucket(elem, elem_cnt))
return 1;
}
else else
append_to_bucket(elem_cnt); append_to_bucket(elem_cnt);
} }
@ -273,6 +374,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
double cumulative_size= 0.0; double cumulative_size= 0.0;
size_t end_member_index= (size_t)-1; size_t end_member_index= (size_t)-1;
StringBuffer<128> value_buf; StringBuffer<128> value_buf;
StringBuffer<128> unescape_buf;
if (JSV_OBJECT != json_type(hist_data, hist_data + hist_data_len, if (JSV_OBJECT != json_type(hist_data, hist_data + hist_data_len,
&obj1, &obj1_len)) &obj1, &obj1_len))
@ -372,7 +474,14 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
} }
uint len_to_copy= field->key_length(); uint len_to_copy= field->key_length();
field->store_text(val, val_len, &my_charset_bin); if (json_unescape_to_string(val, val_len, &unescape_buf))
{
err_pos= ndv;
err= "Out of memory";
goto error;
}
field->store_text(unescape_buf.ptr(), unescape_buf.length(),
&my_charset_bin);
value_buf.alloc(field->pack_length()); value_buf.alloc(field->pack_length());
uint bytes= field->get_key_image((uchar*)value_buf.ptr(), len_to_copy, uint bytes= field->get_key_image((uchar*)value_buf.ptr(), len_to_copy,
Field::itRAW); Field::itRAW);
@ -392,7 +501,14 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
} }
if (ret != JSV_NOTHING) if (ret != JSV_NOTHING)
{ {
field->store_text(end_val, end_val_len, &my_charset_bin); if (json_unescape_to_string(end_val, end_val_len, &unescape_buf))
{
err_pos= bucket_info;
err= "Out of memory";
goto error;
}
field->store_text(unescape_buf.ptr(), unescape_buf.length(),
&my_charset_bin);
value_buf.alloc(field->pack_length()); value_buf.alloc(field->pack_length());
uint bytes= field->get_key_image((uchar*)value_buf.ptr(), len_to_copy, uint bytes= field->get_key_image((uchar*)value_buf.ptr(), len_to_copy,
Field::itRAW); Field::itRAW);