42 KiB
- addExpresssionStepsToBps, combineJobStepsByTable
- if TableInfoMap fTableOid is 0
jobInfo.keyInfo->tupleKeyMap CrossEngineStep is created, replacing TBPS if fTableOid is 0 setTupleInfo sets
mcsSetConfig CrossEngineSupport User 'cejuser'
mcsSetConfig CrossEngineSupport Password 'Vagrant1|0000001'
CREATE USER IF NOT EXISTS'cejuser'@'localhost' IDENTIFIED BY 'Vagrant1|0000001';
GRANT ALL PRIVILEGES ON *.* TO 'cejuser'@'localhost';
FLUSH PRIVILEGES;
create table i1(i bigint)engine=innodb;
insert into i1 SELECT floor(rand(seq)*50001) FROM seq_1_to_1000;
analyze table i1 PERSISTENT FOR ALL;
MariaDB [mysql]> select * from column_stats;
+---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+
| db_name | table_name | column_name | min_value | max_value | nulls_ratio | avg_length | avg_frequency | hist_size | hist_type | histogram |
+---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+
| test | i1 | i | 12 | 49977 | 0.0000 | 8.0000 | 1.0183 | 10 | JSON_HB | {
"target_histogram_size": 10,
"collected_at": "2025-05-29 14:54:20",
"collected_by": "11.4.5-3-MariaDB-debug",
"histogram_hb": [
{
"start": "12",
"size": 0.100081811,
"ndv": 1101
},
{
"start": "30480",
"size": 0.100081811,
"ndv": 1101
},
{
"start": "31581",
"size": 0.100081811,
"ndv": 1101
},
{
"start": "32682",
"size": 0.100081811,
"ndv": 1074
},
{
"start": "33756",
"size": 0.100081811,
"ndv": 1072
},
{
"start": "34828",
"size": 0.100081811,
"ndv": 1071
},
{
"start": "35899",
"size": 0.100081811,
"ndv": 1072
},
{
"start": "36971",
"size": 0.100081811,
"ndv": 1072
},
{
"start": "38043",
"size": 0.100081811,
"ndv": 1072
},
{
"start": "39115",
"end": "49977",
"size": 0.099263703,
"ndv": 1067
}
]
} |
+---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+
1 row in set (0.001 sec)
Get buckets. Need to come up with recursive CTE that reduces buckets down to N, where N is a parallel factor for CES.
SELECT
JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.start')) AS start,
JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.size')) AS size,
JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.ndv')) AS ndv
FROM
column_stats AS cs,
JSON_TABLE(
cs.histogram,
'$.histogram_hb[*]' COLUMNS (
value JSON PATH '$'
)
) AS hb
WHERE
cs.db_name = 'test' and
cs.table_name = 'i1' and
cs.column_name = 'i' and
cs.hist_type = 'JSON_HB';
MDB Histogram_json_hb read_statistics_for_table once per runtime get_column_range_cardinality -> every Field::read_stats::histogram -> ? Histogram_json_hb ?
select * from cs1 union all select cs1.i from cs1, cs1 as cs11 where cs1.i=cs1.i;
select * from cs1 union all select cs2.i from cs2, cs3 where cs2.i=cs3.i;
select * from cs1 union all select cs1.i from cs1, cs1 as cs11 where cs1.i=cs1.i;
CREATE TABLE cs1 (i bigint) engine=columnstore;
CREATE table i1(i bigint);
select * from i1 union all select i1.i from i1, cs1 where i1.i=cs1.i;
select i1.i from i1 union all select i1.i from i1, cs1 where i1.i=cs1.i;
Нужно менять верхний уровень
select i1.i from (select i1.i from i1 union all select i1.i from i1) sub union all select i1.i from i1, cs1 where i1.i=cs1.i;
select * from (select i1.i from i1 union all select i1.i from i1) sub union all select i1.i from i1, cs1 where i1.i=cs1.i;
select i1.i from i1, cs1 where i1.i=cs1.i union all select * from (select i1.i from i1 union all select i1.i from i1) sub;
select i1.i from i1, cs1 where i1.i=cs1.i union all select i1.i from i1;
- Простой вариант переписывать leaf без JOIN
- Сложный переписывать leaf с JOIN
- обе MCS
- одна MCS
Получается, что если в запросе UNION с UNIT из Foreign, main query будет содержать запрос из primary union unit -> нужен detect primary или нет и переставлять
select * from i1;
select i1.i from i1 rewritten plan differs in columnmap
adjustLastStep JobInfo:: pjColList
When column doesn't have table name it fails to ct makeSubQueryStep and projectSimpleColumn via mapping that delivers ct oid = 100 p jobInfo
buildSimpleColFromDerivedTable
to add derived table:
so colPosition for SC is used to build a mapping of subquery columns' types and store it in JobInfo::vtableColTypes using (subQ table oid, table.alias). В JobList-е есть два неявных инварианта, с поиском которых я провозился дня три:
- TableAliasName для sub должен иметь пустую schema и tableName, но иметь alias
- все ReturnedColumn исходного CSEP должны иметь пустые schema и table и ReturnedColumn::colPosition() должен быть явно установлен в соответствии с порядком возвращаемых колонок - иначе возвращаемый из sub RowGroup экзотически взорвёт весь JobList - вариантов масса
select * from (select * from i1) i;
#include "simplecolumn.h"
#include "execplan/calpontsystemcatalog.h"
#include "simplefilter.h"
#include "constantcolumn.h"
using namespace execplan;
const SOP opeq(new Operator("="));
int main()
{
CalpontSelectExecutionPlan csep;
CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnList;
CalpontSelectExecutionPlan::ColumnMap colMap;
string columnlength = CALPONT_SCHEMA + "." + SYSCOLUMN_TABLE + "." + COLUMNLEN_COL;
SimpleColumn* col[1];
col[0] = new SimpleColumn(columnlength, 22222222);
SRCP srcp;
srcp.reset(col[0]);
colMap.insert({columnlength, srcp});
csep.columnMapNonStatic(colMap);
srcp.reset(col[0]->clone());
returnedColumnList.push_back(srcp);
csep.returnedCols(returnedColumnList);
{
SCSEP csepDerived(new CalpontSelectExecutionPlan());
CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnListLocal;
CalpontSelectExecutionPlan::ColumnMap colMapLocal;
string columnlength = CALPONT_SCHEMA + "." + SYSCOLUMN_TABLE + "." + COLUMNLEN_COL;
SimpleColumn* col[1];
col[0] = new SimpleColumn(columnlength, 22222222);
SRCP srcpLocal;
srcpLocal.reset(col[0]);
colMapLocal.insert({columnlength, srcpLocal});
csepDerived->columnMapNonStatic(colMapLocal);
srcp.reset(col[0]->clone());
returnedColumnListLocal.push_back(srcpLocal);
csepDerived->returnedCols(returnedColumnList);
CalpontSelectExecutionPlan::SelectList derivedTables;
derivedTables.push_back(csepDerived);
csep.derivedTableList(derivedTables);
}
CalpontSelectExecutionPlan::TableList tableList = {execplan::CalpontSystemCatalog::TableAliasName("", "", "alias")};
csep.tableList(tableList);
CalpontSelectExecutionPlan::SelectList unionVec;
for (size_t i = 0; i < 3; ++i)
{
SCSEP plan(new CalpontSelectExecutionPlan());
CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnListLocal;
CalpontSelectExecutionPlan::ColumnMap colMapLocal;
SRCP srcpLocal;
srcpLocal.reset(col[0]);
colMapLocal.insert({columnlength, srcpLocal});
plan->columnMapNonStatic(colMapLocal);
srcpLocal.reset(col[0]->clone());
returnedColumnListLocal.push_back(srcpLocal);
plan->returnedCols(returnedColumnListLocal);
plan->txnID(csep.txnID());
plan->verID(csep.verID());
plan->sessionID(csep.sessionID());
plan->columnMapNonStatic(colMapLocal);
plan->returnedCols(returnedColumnListLocal);
unionVec.push_back(plan);
// std::cout << plan->toString() << std::endl;
}
csep.unionVec(unionVec);
std::cout << csep.toString() << std::endl;
}
set columnstore_unstable_optimizer=on; set @@optimizer_switch='derived_merge=off';
Any type of columns must produce SimpleColumn that preserves ?OID?, op type, CS, timezone and position but sets alias sc = new SimpleColumn(); sc->columnName(tcn.column); sc->tableName(tcn.table); sc->schemaName(tcn.schema); sc->oid(oidlist[j].objnum); sc->alias(!ifp->is_explicit_name() ? tcn.column : ifp->name.str); sc->tableAlias(gwi.tbList[i].alias); sc->viewName(viewName, lower_case_table_names); sc->partitions(gwi.tbList[i].partitions); sc->resultType(ct); sc->timeZone(gwi.timeZone);
cols = csep->returnedCols() sc->resultType(cols[j]->resultType());
select i as c1, i as c2 from i1; doesn't work properly
newConstantColumnNotNullUsingValNativeNoTz
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
```SQL
select
s_name,
count(*) as numwait
from
supplier,
(select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem) l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.recdate_gt_commitdate = 1
and exists(
select
l2.l_orderkey
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
l3.l_orderkey
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.recdate_gt_commitdate = 1
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit
100;
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.recdate_gt_commitdate = 1
and exists(
select
l_orderkey
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
l_orderkey
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.recdate_gt_commitdate = 1
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit
100;
Potentially representative q
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1
where
s_suppkey = l1.l_suppkey
and exists(
select
l_orderkey
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
)
group by
s_name
order by
numwait desc,
s_name
limit
100;
select
s_name,
count(*) as numwait
from
supplier,
(select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 3000961 UNION ALL select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey >= 3000961 ) l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.recdate_gt_commitdate = 1
and exists(
select * from
(
select
l_orderkey,l_suppkey
from
lineitem
where
l_orderkey < 3000961
UNION ALL
select
l_orderkey,l_suppkey
from
lineitem
where
l_orderkey >= 3000961
) l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select * from
(
select
l_orderkey,l_suppkey,recdate_gt_commitdate
from
lineitem l3_s
where
l3_s.l_orderkey < 3000961
and l3_s.l_orderkey >= 0
and l3_s.recdate_gt_commitdate = 1
UNION ALL
select
l_orderkey,l_suppkey,recdate_gt_commitdate
from
lineitem l3_s
where
l_orderkey >= 3000961 and l3_s.l_orderkey < 18000000000
and l3_s.recdate_gt_commitdate = 1
) l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit
100;
select
s_name,
count(*) as numwait
from
supplier,
(select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem) l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.recdate_gt_commitdate = 1
and exists(
select * from
(
select
l_orderkey,l_suppkey
from
lineitem
where
l_orderkey < 3000961
UNION ALL
select
l_orderkey,l_suppkey
from
lineitem
where
l_orderkey >= 3000961
) l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select * from
(
select
l_orderkey,l_suppkey,recdate_gt_commitdate
from
lineitem
where
l_orderkey < 3000961
and lineitem.recdate_gt_commitdate = 1
UNION ALL
select
l_orderkey,l_suppkey,recdate_gt_commitdate
from
lineitem
where
l_orderkey >= 3000961
and lineitem.recdate_gt_commitdate = 1
) l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit
100;
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1
where
s_suppkey = l1.l_suppkey
and l1.recdate_gt_commitdate = 1
and not exists (
select
l_orderkey
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
)
group by
s_name
order by
numwait desc,
s_name
limit
100;
cs_package_manager.sh install dev stable-23.10 cron/792 -dev cspkg
select
s_name,
count(*) as numwait
from
supplier,
(select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 300096 UNION ALL
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960
) l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.recdate_gt_commitdate = 1
and exists(
select * from
(
select l_orderkey,l_suppkey from lineitem where l_orderkey < 300096 UNION ALL
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 57000960 and 60000960
) l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select * from
(
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey < 300096 UNION ALL
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 3000960 and 6000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 6000960 and 9000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 9000960 and 12000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 12000960 and 15000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960 and l3_s.recdate_gt_commitdate = 1
) l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit
100;
select
l_suppkey,
count(l_orderkey) as numwait
from (
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 300096 UNION ALL
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960
) tmp
group by
l_suppkey
order by
numwait desc,
l_suppkey
limit
100;
create table lineitem (
l_orderkey int,
l_partkey int,
l_suppkey int,
l_linenumber bigint,
l_quantity decimal(12,2),
l_extendedprice decimal(12,2),
l_discount decimal(12,2),
l_tax decimal(12,2),
l_returnflag char (1),
l_linestatus char (1),
l_shipdate date,
l_commitdate date,
l_receiptdate date,
l_shipinstruct char (25),
l_shipmode char (10),
l_comment varchar (44)
);
--mariadb tpc_h_i -e "load data infile '/data/columnstore-tooling/tpc-h/dbgen/lineitem.tbl' INTO TABLE lineitem FIELDS TERMINATED BY '|'";
mariadb test -e "load data infile '/data/regr/testData/tpch/1g/lineitem.tbl' INTO TABLE lineitem FIELDS TERMINATED BY '|'";
SET SESSION alter_algorithm='INSTANT';
alter table lineitem add column `recdate_gt_commitdate` tinyint(4) not null default 0;
set autocommit=0;LOCK TABLE lineitem WRITE; update lineitem set recdate_gt_commitdate = 1 where l_receiptdate > l_commitdate; commit; UNLOCK TABLES;
alter table lineitem add index(`l_orderkey`, `l_suppkey`,recdate_gt_commitdate);
select l_suppkey, count(l_orderkey) as numwait from lineitem group by l_suppkey order by numwait desc, l_suppkey limit 100;
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select l_suppkey, count(l_orderkey) as numwait from (select l_suppkey, l_orderkey from lineitem) s group by l_suppkey order by numwait desc, l_suppkey limit 100;
/etc/my.cnf.d/columnstore.cnf
columnstore_innodb_queries_use_mcs = on
mcsSetConfig CrossEngineSupport User 'cejuser'
mcsSetConfig CrossEngineSupport Password 'Vagrant1|0000001'
CREATE USER IF NOT EXISTS'cejuser'@'localhost' IDENTIFIED BY 'Vagrant1|0000001';
GRANT ALL PRIVILEGES ON *.* TO 'cejuser'@'localhost';
FLUSH PRIVILEGES;
create table i1(col bigint);
insert into i1 values (42),(45),(46);
analyze persistant table i1;
create index on i1 (col);
ANALYZE TABLE i1 PERSISTENT FOR ALL;
alter table i1 add index(col);
select col from i1;
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select calsettrace(1);
select l_orderkey,l_suppkey from lineitem limit 10;
select l_suppkey,l_orderkey from (
select l_suppkey,l_orderkey from lineitem where l_orderkey > min and l_orderkey < median_value union all
select l_suppkey,l_orderkey from lineitem where l_orderkey >= median and l_orderkey < last_value
) s1
select l_suppkey,l_orderkey from (select l_suppkey,l_orderkey from lineitem limit 10) s1
select l_suppkey,l_orderkey from (select l_suppkey,l_orderkey from lineitem limit 10) s1;
select l_suppkey+1,l_orderkey+1 from (select l_suppkey+1,l_orderkey+1 from lineitem limit 10) s1;
-- must ignore
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_suppkey from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
-- test set with join
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
-- join with derived
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
-- subquery with AC
select * from (select l_orderkey,l_suppkey+1 from lineitem limit 10) sa where sa.l_orderkey = 1999905;
-- subquery with FC
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,(select l_orderkey,l_suppkey from lineitem_10rows) nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
-- feat
-- Запросы без ключевой колонки
select nl.l_orderkey,l.l_suppkey+1 from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
Check if the expression uses only index keys uses_index_fields_only
Tasks:
- expand support types adding varchar, timestamps
- add tests to see if expressions works
- search for a specific interesting column available
- replacing rule filter to work on tables also
- alter rule to handle this case
- add support for correlated subquery
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: looking for l_orderkey in ctx.gwi.columnStatisticsMap with size 1
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: key l_orderkey vector size 4
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: columnStatistics.size() 4
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 1
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 1500738
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: currentLowerBound 1 currentUpperBound 1875794
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 3000961
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 6000000
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: currentLowerBound 3000961 currentUpperBound 6374911
Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: Adding column statistics for l_orderkey
Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: Type of histogram object: 17Histogram_json_hb
Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: gwi.columnStatisticsMap[ifp->field->field_name.str].size() 4
Алгоритм для derived c учётом semi-join/scalar subquery фильтров:
- пройти по таблицам и найти подходящие
- для каждой подходящей
- оставить фильтр, заменив обращение к локальным колонкам на true
- создать union и добавить
- в проекцию все колонки/выражения, содержащие только колонки таблицы для целевой таблицы
- в фильтр все пригодные выражения из исходного фильтра
- в фильтре найти для каждой данной таблицы эквивалент?
- использовать исходный фильтр, заменив все предикаты с нелокальными колонками на true?
- не должно быть выражений, содержащих несколько таблиц
- что делать с join правилами?
- в фильтре найти для каждой данной таблицы эквивалент?
- добавить в map с ключём (schema,table,alias) -> имя derived, если derived был создан
- для каждой подходящей
- пройти по проекции и заменить обращения к колонке из derived на обращение к derived
replaceRefCol getSimpleCols pt->walk(getSimpleCols, &fSimpleColumnList); SC::derivedTable
Для запроса вида
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,(select l_orderkey,l_suppkey from lineitem_10rows) nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
Добавить в фильтр ограничение на то, что может быть в проекции масштабируемой таблицы Проход по SF для поиска SCs setSimpleColumnList + simpleColumnList -DWITH_SAFEMALLOC=OFF and sql_alloc.h SqlAlloc class controls memory allocation in a thread blowing up if I want to copy Histogram_json_bb instance Если несколько таблиц, то AC/FC -> можно выбрать и заменить данные в указателях на колонки.
1 UNION UNIT возвращает нормальное значение для второй колонки, 2ой UNION UNIT в RClist имеет две l_orderkey ! - SC::setSimpleColumnList - добавлял в vector, не очищая его.
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select calsettrace(1);
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= date '1998-12-01' - interval '90' day
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
16,5 secs
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from (
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1 AND l_orderkey < 250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 250001 AND l_orderkey < 500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 500001 AND l_orderkey < 750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 750001 AND l_orderkey < 1000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1000001 AND l_orderkey < 1250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1250001 AND l_orderkey < 1500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1500001 AND l_orderkey < 1750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1750001 AND l_orderkey < 2000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2000001 AND l_orderkey < 2250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2250001 AND l_orderkey < 2500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2500001 AND l_orderkey < 2750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2750001 AND l_orderkey < 3000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3000001 AND l_orderkey < 3250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3250001 AND l_orderkey < 3500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3500001 AND l_orderkey < 3750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3750001 AND l_orderkey < 4000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4000001 AND l_orderkey < 4250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4250001 AND l_orderkey < 4500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4500001 AND l_orderkey < 4750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4750001 AND l_orderkey < 5000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5000001 AND l_orderkey < 5250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5250001 AND l_orderkey < 5500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5500001 AND l_orderkey < 5750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5750001 AND l_orderkey < 6000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 6000001 AND l_orderkey <= 6144000
) tmp
where
l_shipdate <= date '1998-12-01' - interval '90' day
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
9,5 secs
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select calsettrace(1);
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= date '1998-12-01' - interval '90' day
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
(45,42)
(46,40)
(48,41),
(NULL,41)
1 - 45*42 + 46 * 40
2 - 48 * 41
? sum(c1 * c2) | range(k) = [45,46] === sum( sum(c1 * c2) | range(k) =[45,46), sum(c1 * c2) | range(k) =[46,48))
select count(col),col2 from i1 GROUP BY col2;
select count(s1.col),s1.col2,min(s1.col)+s1.col2
from
(
select col,col2 from i1 where col >= min AND < median UNION ALL
select col,col2 from i1 where col >= median AND <= max
) s1
GROUP BY s1.col2;
фичи:
- собрать первые колонки ключей, по которым есть статистика и сделать из них SC, которые можно использовать - extractColumnStatistics ходит по table, а не по ifp
- запрос
select l_suppkey from lineitem limit 10; - Статистика собирается из таблиц, нужно добавить SC-кандидаты в статистику
Про фильтры MCOL-6117
- Рассмотрев, что фильтр содержит выражения из:
-
колонок, прендалежищих не затронутым таблицам (гр 1)
-
выражений, содержащих колонки только незатронутых таблиц (гр 1)
-
колонок затронутых таблиц SC (гр 2)
- замапить на SC соответствующих derived
-
выражений, содержащих колонки только затронутых таблиц (гр 2)
- замапить SC затронутых таблиц на SC derived
- ОПТ замапить на SC затронутых таблиц, если нет AC в поддереве
-
выражений, содержащих колонки затронутых таблиц и колонок не затронутых таблиц (гр 2)
- замапить SC затронутых таблиц на SC derived
-
- применить правила маппинга SC в выражении фильтра
- прогнать существующее правило проброса условий вниз
- если не получится, то клонить дерево фильтра и заменять нерелевантные части на true
- добавить правило очистки от constant true в дереве
- правило проброса условий вниз работает для derived, но не работает для UNION
- оптимизация добавить правило проброса условий в UNION