From 9954aecc2b9dc41baa027324af9824d6f58c2574 Mon Sep 17 00:00:00 2001 From: Michael Okoko Date: Wed, 30 Jun 2021 05:51:08 +0100 Subject: [PATCH] Store bucket bounds and extend test cases for JSON histogram This fixes the memory allocation for json histogram builder and add more column types for testing. Some challenges at the moment include: * Garbage value at the end of JSON array still persists. * Garbage value also gets appended to bucket values if the column is a primary key. * There's a memory leak resulting in a "Warning: Memory not freed" message at the end of tests. Signed-off-by: Michael Okoko --- mysql-test/main/statistics_json.result | Bin 3493 -> 24466 bytes mysql-test/main/statistics_json.test | 56 +++++++++++++++++-------- sql/sql_statistics.cc | 21 +++++++--- sql/sql_statistics.h | 6 +-- 4 files changed, 57 insertions(+), 26 deletions(-) diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index 3e7a9a43634820375c226f93045309fec268058c..a6b9458823ea055d35cedfc0ec6b5d77778b9b46 100644 GIT binary patch literal 24466 zcmeHP&2QVt6-N#QN*;<{dnhnT4oS9NgERbwkws)#b`(ppD#=NcATUPKSYnqV9e&tR z4&7}RJ@?vUi=sXC(nF8EY|z^lXzx9=i=z84^w>vo_#w;oVi!?FIpRT_H#401y^nb_ zdNbs0@iyu)KS05l2I!2T0dvasdrZ<_cp`< ziqlbYh=y(m_{4i#V!hR^b$Y1Y>K#N+cWaGWwTFE6a%1)f9eeI1$~wYFfqO*MEwNkc z<)TVBxf99YPJ%z@DMpT^EwR$7G@d>yfNj@0-FmlIYxU6XK?hYDjV-YkiB32;5t{xe zZZ%1;SAiIS`ZfbBP>0&n*G!0X5&A;S8Ceed&~k?Cgu#y!fL>_;g0}`f0=?O65U}B8 zk00y2(2pxw?v^CEped06X!cq3>>4xO<3cLEH24*M}BSsUvMvQD_Ue;D zP1E=^rwE>*5j?qE@?Q%cU4owHtNgB`Ti=#}dGNy-yD=_De_n>M6=8|%g9{!j zoYwf=oMY+RLIpO(;5ZJ$tw%hCWM^ zkvGm=RP4a7@LrL>oAq8*H*^ATiY{_t+?^C$!3N5F0VGprun{AO2RctiIFe*C0bR;% zq3sLZ%+(&G>JFJu*?uBCds2&BNbV{sohFK}L&pN2iMrrOnU9*5<9g^I4D6}pj8NUT zsWbGGn(zKMF|h4gV`ZQ=o~;VhYGrq2pdRe43{+V4#8>%;RjpHB6{z4HR|V=`b9-f= z9zI+pSeGfX3s>sz7b-@2m{e!|tj;ZPlx*0<~RPnPJt2kWhS; zf4S1F9Ig!1W@lxfK3$n%)oZp_MXHDUD`NnU>#G1&e38b7*7vmp-ZKgO_^#Luy<<9H z6^M00Bwc|3x-RJ|MA9LqI~&)NHOS%tS~Uu@^B}F3r)47w1t<9$&eIr6QG`1wfVQK? zc}tRM7AS-0(Iy0MS=ZIPOoj&Y7FCQ*J*%l>UCC>jvLxpe#zZ4|?^uI|@=_ujvY9hL z3|TGmBO6#zvu+f{G_@R`YUq-ZKdYOXWagEDXGJb@swoC8loOpOW>PM`NT#2q{)%n) zB($SVB}V`&ARv6zh<1f$g1Z=UZe?3OQTZ1K&kwo2R3Ni5ZH6)52 zr6mzPg34_bYjLBT(lCiwf#LK))(TNgtw5QYo>MUFjRK`h=8vN1*2}u(FJ*oy@*U1>pK!)f zwmUrV7L0Ci(|gFn8(GfIv-5T8JLl`vTRbY3J@u%4zD~3Dlf<60I_-x%?wjRw(vVe_ zbJ#8Pe&rx-clqWFJX@Sd6#}CpnkAx>3B_R&%|IkGaHs=ogLFQ`*pP{qQ8JOSkx?p< zO+BOFseE=MB$~R?GGf5oDW{Zd8ZgbJl%`8WirQQcljRSSn_w}_8^D<(I%3gj7Qr+( z&v{j8lOL6kHq$c_oBYrONfYuWKQ^Il^7AcbR$1TVM|NN}%L@%1Qs3ZMMJgyPwxWYc zmE+NvI&jE{I>Iw+(x?41HoIJ?cq8D+a_E&a$#M!4f3r=!J{`JCJA`l*?;N2U_-Ald z?W&r~D*Fb@g(X4c2;Bjw`0hSqHXJ;;x9pQAGqT_aU$em&b=V0Usk8<&6_!pU2x~U# zQu`DJi??({=K&-LcRlJ1*!i+@&%)sKw6EUI?!TlxkQf z2s`cs7U87=mZ>X2*rgMJ1T`mU!au&p!VXMRyxCr|t=FKKt_1KX_w!_jP7pVw-uxY4GM3fBmg>dcj=!_OgmQ zCdvF)wV4O&u;9a82JcbBN@r2meAjz$>;cyI;i7uX+MVyZ9?%muV4gLg2-YXvi1)1e z+f2-Ny&q14@Len;=eurN@b=%kSTW9beGn{0zH_>o@4Djx(;vTsAmx15-4oji<{&O0 zW4`M{+kZDfyP2-nJDa7Rp#WUv#@Cz=*yH>9ZPyFJQOMxU>$iOU?Tf}oSH8Btz{0*}2#x^{oCvf;Xl4ALua`KKck>BeX4l{5JpWmW9Iwu*Sty77q8A>y4~MoTDoX zhY+&1fx)+Fo+Z;^!=1p_T^DhcMTbq-qo)f|Vp%wBht5xF-&%kY%feyT9W7)wM5rtr z_NU48s5b literal 3493 zcmeHK&2HO95Ec>y!S2;PG&v38gUEt4EGJG82nH-m>qNCg8YG=W2?}B*u4Mw`%Hb{* zsh2)NpQ88PgWUTFeT2M4yStPnI+2rHf*?P0;Lmb)=jWUGcDac+A!I^=JfjkR4LlbT z0TiY*O=37pgq)0Xnt`03GoU<%mkdUfgBs6SsV`)n@Ueh|LpB#L(yh@XEixVnNu|J> zIL;^MAn7n=kemXWsWk=2MiUDrVPDWGiHB^~KnPQ$Dbn#8N@KB^!ux zn$CY`NMlk6MEE%2?B|5X?3}5K%B23HkV!@Y6n-fiNfuu<-8;e^614s90a*ii6CWll zjZvteL~fC$=!nfxhn>o$`qHY_+c#&E4pzA*Dk?7bb z?dZ`#-KlrH6V)}90aNRj{9d*j8m5zBb@fF<`~-)-*Ix_v1(X%Us=%!Bzshnomz9+1 z$5m!l30OtA^wU%(QiVN4)gGFnE4yxnwzsyGpvHr(U2`vAcXsa~oe=aK1Khc^*d6EH zA^h84pIH7}$c^hYBY)ql{qyRNTJ7I!<-5M>_aATFG~@dl<$YuE`StA|o@f_0YBy?c ztfza|3@O)2cnl}k)4X@$mFNUKGDVn`3%^8j7P^e940x^~Bg5|&NqQ2br zXjMF)t0^T2fA_R-S=uIL#;7S`w^~-*T$v t_K#T=D;j$Xtdgp>o$q%|+22kc8fH)WcH;eB-)rj|cYN=7`Lol+{{wRFJh=b> diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test index 9ffb27b621d..acc44456d8f 100644 --- a/mysql-test/main/statistics_json.test +++ b/mysql-test/main/statistics_json.test @@ -1,34 +1,56 @@ --source include/have_stat_tables.inc +--source include/have_sequence.inc +--source include/analyze-format.inc --echo # ---echo # Test that JSON is a valid histogram type and we can store JSON strings in mysql.column_stats +--echo # Test that we can store JSON arrays in histogram field mysql.column_stats when histogram_type=JSON --echo # --disable_warnings drop table if exists t1; --enable_warnings set @save_histogram_type=@@histogram_type; +set @save_histogram_size=@@histogram_size; CREATE TABLE t1 ( - a int NOT NULL PRIMARY KEY, - b varchar(32) -) ENGINE=MYISAM; + a int, + b varchar(32), + c char(2), + d double +); + +--disable_result_log +INSERT INTO t1 SELECT seq, seq, seq, seq from seq_1_to_25; +--enable_result_log SET histogram_type='JSON'; -SELECT @@histogram_type; +# set histogram size to be < row count (25 in this case) to see how histogram behaves +set histogram_size=10; -INSERT INTO t1 VALUES - (7, 'xxxxxxxxxxxxxxxxxxxxxxxxxx'), - (17, 'vvvvvvvvvvvvv'); +ANALYZE TABLE t1 PERSISTENT FOR ALL; +SELECT * FROM mysql.column_stats WHERE table_name='t1'; +DELETE FROM mysql.column_stats; +DROP TABLE t1; -ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(b) INDEXES(); -DESCRIBE mysql.column_stats; -SELECT * FROM mysql.column_stats; +create schema world; +use world; +--disable_query_log +--disable_result_log +--disable_warnings +--source include/world_schema_utf8.inc +--source include/world.inc +--enable_warnings +--enable_result_log +--enable_query_log + +set histogram_type='JSON'; +set histogram_size=25; +--disable_result_log +ANALYZE TABLE Country PERSISTENT FOR ALL; +--enable_result_log + +SELECT column_name, min_value, max_value, hist_size, hist_type, histogram FROM mysql.column_stats; set histogram_type=@save_histogram_type; +set histogram_size=@save_histogram_size; -## Remove against Milestone-2 -ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(b) INDEXES(); -SELECT * FROM mysql.column_stats; -select table_name, hist_type, decode_histogram(hist_type, histogram ) from mysql.column_stats; - -DROP TABLE t1; \ No newline at end of file +DROP SCHEMA world; \ No newline at end of file diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index d2cad99f130..cbbd7d30b42 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1073,9 +1073,16 @@ public: stat_field->store(stats->histogram.get_type() + 1); break; case COLUMN_STAT_HISTOGRAM: + if (stats->histogram.get_type() == JSON) + { + stat_field->store((char *) stats->histogram.get_values(), + strlen((char *) stats->histogram.get_values()), &my_charset_bin); + } else + { stat_field->store((char *) stats->histogram.get_values(), stats->histogram.get_size(), &my_charset_bin); - break; + } + break; } } } @@ -1588,7 +1595,7 @@ public: class Histogram_builder_json : public Histogram_builder { -std::vector bucket_bounds; +std::vector bucket_bounds; public: Histogram_builder_json(Field *col, uint col_len, ha_rows rows) @@ -1619,9 +1626,10 @@ public: return 0; if (count > bucket_capacity * (curr_bucket + 1)) { - auto *val= new StringBuffer; - column->val_str(val); - bucket_bounds.emplace_back(String(val->ptr(), val->length(), &my_charset_bin)); + column->store_field_value((uchar *) elem, col_length); + StringBuffer val; + column->val_str(&val); + bucket_bounds.emplace_back(val.ptr()); curr_bucket++; } return 0; @@ -1631,9 +1639,10 @@ public: Json_writer *writer = new Json_writer(); writer->start_array(); for(auto& value: bucket_bounds) { - writer->add_str(value); + writer->add_str(value.c_str()); } writer->end_array(); + histogram->set_size(bucket_bounds.size()); histogram->set_values((uchar *) writer->output.get_string()->ptr()); } }; diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 555d03cfa22..a554721d50b 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -153,9 +153,9 @@ private: { switch (type) { case SINGLE_PREC_HB: + case JSON: return ((uint) (1 << 8) - 1); case DOUBLE_PREC_HB: - case JSON: return ((uint) (1 << 16) - 1); } return 1; @@ -166,9 +166,9 @@ public: { switch (type) { case SINGLE_PREC_HB: + case JSON: return size; case DOUBLE_PREC_HB: - case JSON: return size / 2; } return 0; @@ -180,9 +180,9 @@ private: DBUG_ASSERT(i < get_width()); switch (type) { case SINGLE_PREC_HB: + case JSON: return (uint) (((uint8 *) values)[i]); case DOUBLE_PREC_HB: - case JSON: return (uint) uint2korr(values + i * 2); } return 0;