From 57886afd7e133ee9a43e10aad98d1ac1449771a8 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Mon, 4 Aug 2025 15:50:52 +0100 Subject: [PATCH] chore(docs): added a shared notepad for the feature --- docs/QA_parallel.md | 1136 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1136 insertions(+) create mode 100644 docs/QA_parallel.md diff --git a/docs/QA_parallel.md b/docs/QA_parallel.md new file mode 100644 index 000000000..c8cd5ce76 --- /dev/null +++ b/docs/QA_parallel.md @@ -0,0 +1,1136 @@ + + +- addExpresssionStepsToBps, combineJobStepsByTable + - if TableInfoMap fTableOid is 0 + + jobInfo.keyInfo->tupleKeyMap +CrossEngineStep is created, replacing TBPS if fTableOid is 0 +setTupleInfo sets + +``` +mcsSetConfig CrossEngineSupport User 'cejuser' +mcsSetConfig CrossEngineSupport Password 'Vagrant1|0000001' + +CREATE USER IF NOT EXISTS'cejuser'@'localhost' IDENTIFIED BY 'Vagrant1|0000001'; +GRANT ALL PRIVILEGES ON *.* TO 'cejuser'@'localhost'; +FLUSH PRIVILEGES; +``` + +```SQL +create table i1(i bigint)engine=innodb; +insert into i1 SELECT floor(rand(seq)*50001) FROM seq_1_to_1000; + +analyze table i1 PERSISTENT FOR ALL; +``` + + +```sql +MariaDB [mysql]> select * from column_stats; ++---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+ +| db_name | table_name | column_name | min_value | max_value | nulls_ratio | avg_length | avg_frequency | hist_size | hist_type | histogram | ++---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+ +| test | i1 | i | 12 | 49977 | 0.0000 | 8.0000 | 1.0183 | 10 | JSON_HB | { + "target_histogram_size": 10, + "collected_at": "2025-05-29 14:54:20", + "collected_by": "11.4.5-3-MariaDB-debug", + "histogram_hb": [ + { + "start": "12", + "size": 0.100081811, + "ndv": 1101 + }, + { + "start": "30480", + "size": 0.100081811, + "ndv": 1101 + }, + { + "start": "31581", + "size": 0.100081811, + "ndv": 1101 + }, + { + "start": "32682", + "size": 0.100081811, + "ndv": 1074 + }, + { + "start": "33756", + "size": 0.100081811, + "ndv": 1072 + }, + { + "start": "34828", + "size": 0.100081811, + "ndv": 1071 + }, + { + "start": "35899", + "size": 0.100081811, + "ndv": 1072 + }, + { + "start": "36971", + "size": 0.100081811, + "ndv": 1072 + }, + { + "start": "38043", + "size": 0.100081811, + "ndv": 1072 + }, + { + "start": "39115", + "end": "49977", + "size": 0.099263703, + "ndv": 1067 + } + ] +} | ++---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+ +1 row in set (0.001 sec) +``` + +Get buckets. Need to come up with recursive CTE that reduces buckets down to N, where N is a parallel factor for CES. +```sql +SELECT + JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.start')) AS start, + JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.size')) AS size, + JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.ndv')) AS ndv +FROM + column_stats AS cs, + JSON_TABLE( + cs.histogram, + '$.histogram_hb[*]' COLUMNS ( + value JSON PATH '$' + ) + ) AS hb +WHERE + cs.db_name = 'test' and + cs.table_name = 'i1' and + cs.column_name = 'i' and + cs.hist_type = 'JSON_HB'; +``` + +MDB +Histogram_json_hb +read_statistics_for_table once per runtime +get_column_range_cardinality -> every Field::read_stats::histogram -> ? Histogram_json_hb ? + +```SQL +select * from cs1 union all select cs1.i from cs1, cs1 as cs11 where cs1.i=cs1.i; +select * from cs1 union all select cs2.i from cs2, cs3 where cs2.i=cs3.i; + +select * from cs1 union all select cs1.i from cs1, cs1 as cs11 where cs1.i=cs1.i; + +CREATE TABLE cs1 (i bigint) engine=columnstore; +CREATE table i1(i bigint); +select * from i1 union all select i1.i from i1, cs1 where i1.i=cs1.i; + +select i1.i from i1 union all select i1.i from i1, cs1 where i1.i=cs1.i; +Нужно менять верхний уровень + +select i1.i from (select i1.i from i1 union all select i1.i from i1) sub union all select i1.i from i1, cs1 where i1.i=cs1.i; +select * from (select i1.i from i1 union all select i1.i from i1) sub union all select i1.i from i1, cs1 where i1.i=cs1.i; + + +select i1.i from i1, cs1 where i1.i=cs1.i union all select * from (select i1.i from i1 union all select i1.i from i1) sub; + +select i1.i from i1, cs1 where i1.i=cs1.i union all select i1.i from i1; +``` + + +- Простой вариант переписывать leaf без JOIN +- Сложный переписывать leaf с JOIN + - обе MCS + - одна MCS + +Получается, что если в запросе UNION с UNIT из Foreign, main query будет содержать запрос из primary union unit -> нужен detect primary или нет и переставлять + +select * from i1; + +select i1.i from i1 rewritten plan differs in columnmap + + +adjustLastStep +JobInfo:: pjColList + +When column doesn't have table name it fails to ct +makeSubQueryStep and projectSimpleColumn +via mapping that delivers ct +oid = 100 p jobInfo + +buildSimpleColFromDerivedTable + +to add derived table: +- + +so colPosition for SC is used to build a mapping of subquery columns' types and store it in JobInfo::vtableColTypes using (subQ table oid, table.alias). +В JobList-е есть два неявных инварианта, с поиском которых я провозился дня три: + +- TableAliasName для sub должен иметь пустую schema и tableName, но иметь alias +- все ReturnedColumn исходного CSEP должны иметь пустые schema и table и ReturnedColumn::colPosition() должен быть явно установлен в соответствии с порядком возвращаемых колонок - иначе возвращаемый из sub RowGroup экзотически взорвёт весь JobList  - вариантов масса + + +select * from (select * from i1) i; + + + +``` +#include "simplecolumn.h" +#include "execplan/calpontsystemcatalog.h" +#include "simplefilter.h" +#include "constantcolumn.h" + +using namespace execplan; +const SOP opeq(new Operator("=")); + + + +int main() +{ + CalpontSelectExecutionPlan csep; + + CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnList; + CalpontSelectExecutionPlan::ColumnMap colMap; + + string columnlength = CALPONT_SCHEMA + "." + SYSCOLUMN_TABLE + "." + COLUMNLEN_COL; + + SimpleColumn* col[1]; + col[0] = new SimpleColumn(columnlength, 22222222); + + SRCP srcp; + srcp.reset(col[0]); + colMap.insert({columnlength, srcp}); + csep.columnMapNonStatic(colMap); + srcp.reset(col[0]->clone()); + returnedColumnList.push_back(srcp); + csep.returnedCols(returnedColumnList); + + { + SCSEP csepDerived(new CalpontSelectExecutionPlan()); + CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnListLocal; + CalpontSelectExecutionPlan::ColumnMap colMapLocal; + + string columnlength = CALPONT_SCHEMA + "." + SYSCOLUMN_TABLE + "." + COLUMNLEN_COL; + + SimpleColumn* col[1]; + col[0] = new SimpleColumn(columnlength, 22222222); + + SRCP srcpLocal; + srcpLocal.reset(col[0]); + colMapLocal.insert({columnlength, srcpLocal}); + csepDerived->columnMapNonStatic(colMapLocal); + + srcp.reset(col[0]->clone()); + returnedColumnListLocal.push_back(srcpLocal); + csepDerived->returnedCols(returnedColumnList); + + CalpontSelectExecutionPlan::SelectList derivedTables; + derivedTables.push_back(csepDerived); + csep.derivedTableList(derivedTables); + } + + CalpontSelectExecutionPlan::TableList tableList = {execplan::CalpontSystemCatalog::TableAliasName("", "", "alias")}; + csep.tableList(tableList); + + CalpontSelectExecutionPlan::SelectList unionVec; + + for (size_t i = 0; i < 3; ++i) + { + SCSEP plan(new CalpontSelectExecutionPlan()); + CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnListLocal; + CalpontSelectExecutionPlan::ColumnMap colMapLocal; + + SRCP srcpLocal; + srcpLocal.reset(col[0]); + colMapLocal.insert({columnlength, srcpLocal}); + plan->columnMapNonStatic(colMapLocal); + srcpLocal.reset(col[0]->clone()); + returnedColumnListLocal.push_back(srcpLocal); + plan->returnedCols(returnedColumnListLocal); + + plan->txnID(csep.txnID()); + plan->verID(csep.verID()); + plan->sessionID(csep.sessionID()); + plan->columnMapNonStatic(colMapLocal); + plan->returnedCols(returnedColumnListLocal); + unionVec.push_back(plan); + + // std::cout << plan->toString() << std::endl; + } + + csep.unionVec(unionVec); + std::cout << csep.toString() << std::endl; +} +``` + + +set columnstore_unstable_optimizer=on; +set @@optimizer_switch='derived_merge=off'; + +Any type of columns must produce SimpleColumn that preserves ?OID?, op type, CS, timezone and position but sets alias +sc = new SimpleColumn(); +sc->columnName(tcn.column); +sc->tableName(tcn.table); +sc->schemaName(tcn.schema); +sc->oid(oidlist[j].objnum); +sc->alias(!ifp->is_explicit_name() ? tcn.column : ifp->name.str); +sc->tableAlias(gwi.tbList[i].alias); +sc->viewName(viewName, lower_case_table_names); +sc->partitions(gwi.tbList[i].partitions); +sc->resultType(ct); +sc->timeZone(gwi.timeZone); + +cols = csep->returnedCols() +sc->resultType(cols[j]->resultType()); + +select i as c1, i as c2 from i1; doesn't work properly + + newConstantColumnNotNullUsingValNativeNoTz + + + +``` + +set columnstore_unstable_optimizer=on; +set @@optimizer_switch='derived_merge=off'; + +```SQL +select + s_name, + count(*) as numwait + from + supplier, + (select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem) l1, + orders, + nation + where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.recdate_gt_commitdate = 1 + and exists( + select + l2.l_orderkey + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + l3.l_orderkey + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.recdate_gt_commitdate = 1 + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by + s_name +order by + numwait desc, + s_name +limit + 100; +``` + +```SQL +select + s_name, + count(*) as numwait + from + supplier, + lineitem l1, + orders, + nation + where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.recdate_gt_commitdate = 1 + and exists( + select + l_orderkey + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + l_orderkey + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.recdate_gt_commitdate = 1 + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by + s_name +order by + numwait desc, + s_name +limit + 100; +``` + +Potentially representative q +```SQL +select + s_name, + count(*) as numwait + from + supplier, + lineitem l1 + where + s_suppkey = l1.l_suppkey + and exists( + select + l_orderkey + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + ) +group by + s_name +order by + numwait desc, + s_name +limit + 100; +``` + + +```SQL +select + s_name, + count(*) as numwait + from + supplier, + (select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 3000961 UNION ALL select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey >= 3000961 ) l1, + orders, + nation + where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.recdate_gt_commitdate = 1 + and exists( + select * from + ( + select + l_orderkey,l_suppkey + from + lineitem + where + l_orderkey < 3000961 + UNION ALL + select + l_orderkey,l_suppkey + from + lineitem + where + l_orderkey >= 3000961 + ) l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select * from + ( + select + l_orderkey,l_suppkey,recdate_gt_commitdate + from + lineitem l3_s + where + l3_s.l_orderkey < 3000961 + and l3_s.l_orderkey >= 0 + and l3_s.recdate_gt_commitdate = 1 + UNION ALL + select + l_orderkey,l_suppkey,recdate_gt_commitdate + from + lineitem l3_s + where + l_orderkey >= 3000961 and l3_s.l_orderkey < 18000000000 + and l3_s.recdate_gt_commitdate = 1 + ) l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by + s_name +order by + numwait desc, + s_name +limit + 100; +``` + +```SQL +select + s_name, + count(*) as numwait + from + supplier, + (select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem) l1, + orders, + nation + where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.recdate_gt_commitdate = 1 + and exists( + select * from + ( + select + l_orderkey,l_suppkey + from + lineitem + where + l_orderkey < 3000961 + UNION ALL + select + l_orderkey,l_suppkey + from + lineitem + where + l_orderkey >= 3000961 + ) l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select * from + ( + select + l_orderkey,l_suppkey,recdate_gt_commitdate + from + lineitem + where + l_orderkey < 3000961 + and lineitem.recdate_gt_commitdate = 1 + UNION ALL + select + l_orderkey,l_suppkey,recdate_gt_commitdate + from + lineitem + where + l_orderkey >= 3000961 + and lineitem.recdate_gt_commitdate = 1 + ) l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by + s_name +order by + numwait desc, + s_name +limit + 100; +``` + + +```SQL +select + s_name, + count(*) as numwait + from + supplier, + lineitem l1 + where + s_suppkey = l1.l_suppkey + and l1.recdate_gt_commitdate = 1 + and not exists ( + select + l_orderkey + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + ) +group by + s_name +order by + numwait desc, + s_name +limit + 100; +``` + + +```bash +cs_package_manager.sh install dev stable-23.10 cron/792 -dev cspkg +``` + + +```SQL +select + s_name, + count(*) as numwait + from + supplier, + (select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 300096 UNION ALL + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960 + ) l1, + orders, + nation + where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.recdate_gt_commitdate = 1 + and exists( + select * from + ( + select l_orderkey,l_suppkey from lineitem where l_orderkey < 300096 UNION ALL + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all + select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 57000960 and 60000960 + ) l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select * from + ( + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey < 300096 UNION ALL + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 3000960 and 6000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 6000960 and 9000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 9000960 and 12000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 12000960 and 15000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 and l3_s.recdate_gt_commitdate = 1 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960 and l3_s.recdate_gt_commitdate = 1 + ) l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by + s_name +order by + numwait desc, + s_name +limit + 100; +``` + + +```SQL +select + l_suppkey, + count(l_orderkey) as numwait + from ( + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 300096 UNION ALL + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all + select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960 + ) tmp + group by + l_suppkey +order by + numwait desc, + l_suppkey +limit + 100; + +``` + +```SQL +create table lineitem ( +l_orderkey int, +l_partkey int, +l_suppkey int, +l_linenumber bigint, +l_quantity decimal(12,2), +l_extendedprice decimal(12,2), +l_discount decimal(12,2), +l_tax decimal(12,2), +l_returnflag char (1), +l_linestatus char (1), +l_shipdate date, +l_commitdate date, +l_receiptdate date, +l_shipinstruct char (25), +l_shipmode char (10), +l_comment varchar (44) +); + +--mariadb tpc_h_i -e "load data infile '/data/columnstore-tooling/tpc-h/dbgen/lineitem.tbl' INTO TABLE lineitem FIELDS TERMINATED BY '|'"; +mariadb test -e "load data infile '/data/regr/testData/tpch/1g/lineitem.tbl' INTO TABLE lineitem FIELDS TERMINATED BY '|'"; + + +SET SESSION alter_algorithm='INSTANT'; +alter table lineitem add column `recdate_gt_commitdate` tinyint(4) not null default 0; +set autocommit=0;LOCK TABLE lineitem WRITE; update lineitem set recdate_gt_commitdate = 1 where l_receiptdate > l_commitdate; commit; UNLOCK TABLES; + +alter table lineitem add index(`l_orderkey`, `l_suppkey`,recdate_gt_commitdate); +``` + +```SQL + +select l_suppkey, count(l_orderkey) as numwait from lineitem group by l_suppkey order by numwait desc, l_suppkey limit 100; + +set columnstore_unstable_optimizer=on; +set @@optimizer_switch='derived_merge=off'; +select l_suppkey, count(l_orderkey) as numwait from (select l_suppkey, l_orderkey from lineitem) s group by l_suppkey order by numwait desc, l_suppkey limit 100; + + +/etc/my.cnf.d/columnstore.cnf +columnstore_innodb_queries_use_mcs = on + +mcsSetConfig CrossEngineSupport User 'cejuser' +mcsSetConfig CrossEngineSupport Password 'Vagrant1|0000001' + +CREATE USER IF NOT EXISTS'cejuser'@'localhost' IDENTIFIED BY 'Vagrant1|0000001'; +GRANT ALL PRIVILEGES ON *.* TO 'cejuser'@'localhost'; +FLUSH PRIVILEGES; +create table i1(col bigint); +insert into i1 values (42),(45),(46); +analyze persistant table i1; +create index on i1 (col); + ANALYZE TABLE i1 PERSISTENT FOR ALL; +alter table i1 add index(col); +select col from i1; + + +set columnstore_unstable_optimizer=on; +set @@optimizer_switch='derived_merge=off'; +select calsettrace(1); + +select l_orderkey,l_suppkey from lineitem limit 10; +select l_suppkey,l_orderkey from ( + select l_suppkey,l_orderkey from lineitem where l_orderkey > min and l_orderkey < median_value union all + select l_suppkey,l_orderkey from lineitem where l_orderkey >= median and l_orderkey < last_value + ) s1 +select l_suppkey,l_orderkey from (select l_suppkey,l_orderkey from lineitem limit 10) s1 + + +select l_suppkey,l_orderkey from (select l_suppkey,l_orderkey from lineitem limit 10) s1; + + +select l_suppkey+1,l_orderkey+1 from (select l_suppkey+1,l_orderkey+1 from lineitem limit 10) s1; + +-- must ignore +select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_suppkey from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10; +-- test set with join +select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10; + +-- join with derived +select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10; +-- subquery with AC +select * from (select l_orderkey,l_suppkey+1 from lineitem limit 10) sa where sa.l_orderkey = 1999905; +-- subquery with FC + +select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,(select l_orderkey,l_suppkey from lineitem_10rows) nl WHERE l.l_suppkey=nl.l_suppkey limit 10; + +select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10; + +-- feat +-- Запросы без ключевой колонки +select nl.l_orderkey,l.l_suppkey+1 from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10; +``` + +Check if the expression uses only index keys `uses_index_fields_only` + +Tasks: +- expand support types adding varchar, timestamps +- add tests to see if expressions works +- search for a specific interesting column available +- replacing rule filter to work on tables also + - alter rule to handle this case +- add support for correlated subquery + +``` +Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: looking for l_orderkey in ctx.gwi.columnStatisticsMap with size 1 +Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: key l_orderkey vector size 4 +Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: columnStatistics.size() 4 +Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 1 +Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 1500738 +Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: currentLowerBound 1 currentUpperBound 1875794 +Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 3000961 +Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 6000000 +Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: currentLowerBound 3000961 currentUpperBound 6374911 +Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: Adding column statistics for l_orderkey +Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: Type of histogram object: 17Histogram_json_hb +Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: gwi.columnStatisticsMap[ifp->field->field_name.str].size() 4 +``` + +Алгоритм для derived c учётом semi-join/scalar subquery фильтров: +- пройти по таблицам и найти подходящие + - для каждой подходящей + - оставить фильтр, заменив обращение к локальным колонкам на true + - создать union и добавить + - в проекцию все колонки/выражения, содержащие только колонки таблицы для целевой таблицы + - в фильтр все пригодные выражения из исходного фильтра + - в фильтре найти для каждой данной таблицы эквивалент? + - использовать исходный фильтр, заменив все предикаты с нелокальными колонками на true? + - не должно быть выражений, содержащих несколько таблиц + - что делать с join правилами? + - добавить в map с ключём (schema,table,alias) -> имя derived, если derived был создан +- пройти по проекции и заменить обращения к колонке из derived на обращение к derived + + replaceRefCol + getSimpleCols + pt->walk(getSimpleCols, &fSimpleColumnList); + SC::derivedTable + +Для запроса вида + +```SQL +select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,(select l_orderkey,l_suppkey from lineitem_10rows) nl WHERE l.l_suppkey=nl.l_suppkey limit 10; +``` + + +Добавить в фильтр ограничение на то, что может быть в проекции масштабируемой таблицы +Проход по SF для поиска SCs setSimpleColumnList + simpleColumnList +-DWITH_SAFEMALLOC=OFF and sql_alloc.h SqlAlloc class controls memory allocation in a thread blowing up if I want to copy Histogram_json_bb instance +Если несколько таблиц, то AC/FC -> можно выбрать и заменить данные в указателях на колонки. + +1 UNION UNIT возвращает нормальное значение для второй колонки, 2ой UNION UNIT в RClist имеет две l_orderkey ! - SC::setSimpleColumnList - добавлял в vector, не очищая его. + +``` +set columnstore_unstable_optimizer=on; +set @@optimizer_switch='derived_merge=off'; +select calsettrace(1); + +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= date '1998-12-01' - interval '90' day +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; +16,5 secs + +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from ( +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 1 AND l_orderkey < 250001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 250001 AND l_orderkey < 500001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 500001 AND l_orderkey < 750001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 750001 AND l_orderkey < 1000001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 1000001 AND l_orderkey < 1250001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 1250001 AND l_orderkey < 1500001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 1500001 AND l_orderkey < 1750001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 1750001 AND l_orderkey < 2000001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 2000001 AND l_orderkey < 2250001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 2250001 AND l_orderkey < 2500001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 2500001 AND l_orderkey < 2750001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 2750001 AND l_orderkey < 3000001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 3000001 AND l_orderkey < 3250001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 3250001 AND l_orderkey < 3500001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 3500001 AND l_orderkey < 3750001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 3750001 AND l_orderkey < 4000001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 4000001 AND l_orderkey < 4250001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 4250001 AND l_orderkey < 4500001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 4500001 AND l_orderkey < 4750001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 4750001 AND l_orderkey < 5000001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 5000001 AND l_orderkey < 5250001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 5250001 AND l_orderkey < 5500001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 5500001 AND l_orderkey < 5750001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 5750001 AND l_orderkey < 6000001 + +UNION ALL + +SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate +FROM lineitem +WHERE l_orderkey >= 6000001 AND l_orderkey <= 6144000 + ) tmp +where + l_shipdate <= date '1998-12-01' - interval '90' day +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; + +9,5 secs +``` + + +```SQL +set columnstore_unstable_optimizer=on; +set @@optimizer_switch='derived_merge=off'; +select calsettrace(1); + +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= date '1998-12-01' - interval '90' day +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; + +(45,42) +(46,40) +(48,41), +(NULL,41) + +1 - 45*42 + 46 * 40 +2 - 48 * 41 + +? sum(c1 * c2) | range(k) = [45,46] === sum( sum(c1 * c2) | range(k) =[45,46), sum(c1 * c2) | range(k) =[46,48)) + +select count(col),col2 from i1 GROUP BY col2; + +select count(s1.col),s1.col2,min(s1.col)+s1.col2 +from +( + select col,col2 from i1 where col >= min AND < median UNION ALL + select col,col2 from i1 where col >= median AND <= max +) s1 +GROUP BY s1.col2; + + + +``` + +фичи: +- собрать первые колонки ключей, по которым есть статистика и сделать из них SC, которые можно использовать - extractColumnStatistics ходит по table, а не по ifp +- запрос `select l_suppkey from lineitem limit 10;` +- Статистика собирается из таблиц, нужно добавить SC-кандидаты в статистику \ No newline at end of file