1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-10-31 18:30:33 +03:00
Files
mariadb-columnstore-engine/docs/QA_parallel.md

1157 lines
42 KiB
Markdown
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

- addExpresssionStepsToBps, combineJobStepsByTable
- if TableInfoMap fTableOid is 0
jobInfo.keyInfo->tupleKeyMap
CrossEngineStep is created, replacing TBPS if fTableOid is 0
setTupleInfo sets
```
mcsSetConfig CrossEngineSupport User 'cejuser'
mcsSetConfig CrossEngineSupport Password 'Vagrant1|0000001'
CREATE USER IF NOT EXISTS'cejuser'@'localhost' IDENTIFIED BY 'Vagrant1|0000001';
GRANT ALL PRIVILEGES ON *.* TO 'cejuser'@'localhost';
FLUSH PRIVILEGES;
```
```SQL
create table i1(i bigint)engine=innodb;
insert into i1 SELECT floor(rand(seq)*50001) FROM seq_1_to_1000;
analyze table i1 PERSISTENT FOR ALL;
```
```sql
MariaDB [mysql]> select * from column_stats;
+---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+
| db_name | table_name | column_name | min_value | max_value | nulls_ratio | avg_length | avg_frequency | hist_size | hist_type | histogram |
+---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+
| test | i1 | i | 12 | 49977 | 0.0000 | 8.0000 | 1.0183 | 10 | JSON_HB | {
"target_histogram_size": 10,
"collected_at": "2025-05-29 14:54:20",
"collected_by": "11.4.5-3-MariaDB-debug",
"histogram_hb": [
{
"start": "12",
"size": 0.100081811,
"ndv": 1101
},
{
"start": "30480",
"size": 0.100081811,
"ndv": 1101
},
{
"start": "31581",
"size": 0.100081811,
"ndv": 1101
},
{
"start": "32682",
"size": 0.100081811,
"ndv": 1074
},
{
"start": "33756",
"size": 0.100081811,
"ndv": 1072
},
{
"start": "34828",
"size": 0.100081811,
"ndv": 1071
},
{
"start": "35899",
"size": 0.100081811,
"ndv": 1072
},
{
"start": "36971",
"size": 0.100081811,
"ndv": 1072
},
{
"start": "38043",
"size": 0.100081811,
"ndv": 1072
},
{
"start": "39115",
"end": "49977",
"size": 0.099263703,
"ndv": 1067
}
]
} |
+---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+
1 row in set (0.001 sec)
```
Get buckets. Need to come up with recursive CTE that reduces buckets down to N, where N is a parallel factor for CES.
```sql
SELECT
JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.start')) AS start,
JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.size')) AS size,
JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.ndv')) AS ndv
FROM
column_stats AS cs,
JSON_TABLE(
cs.histogram,
'$.histogram_hb[*]' COLUMNS (
value JSON PATH '$'
)
) AS hb
WHERE
cs.db_name = 'test' and
cs.table_name = 'i1' and
cs.column_name = 'i' and
cs.hist_type = 'JSON_HB';
```
MDB
Histogram_json_hb
read_statistics_for_table once per runtime
get_column_range_cardinality -> every Field::read_stats::histogram -> ? Histogram_json_hb ?
```SQL
select * from cs1 union all select cs1.i from cs1, cs1 as cs11 where cs1.i=cs1.i;
select * from cs1 union all select cs2.i from cs2, cs3 where cs2.i=cs3.i;
select * from cs1 union all select cs1.i from cs1, cs1 as cs11 where cs1.i=cs1.i;
CREATE TABLE cs1 (i bigint) engine=columnstore;
CREATE table i1(i bigint);
select * from i1 union all select i1.i from i1, cs1 where i1.i=cs1.i;
select i1.i from i1 union all select i1.i from i1, cs1 where i1.i=cs1.i;
Нужно менять верхний уровень
select i1.i from (select i1.i from i1 union all select i1.i from i1) sub union all select i1.i from i1, cs1 where i1.i=cs1.i;
select * from (select i1.i from i1 union all select i1.i from i1) sub union all select i1.i from i1, cs1 where i1.i=cs1.i;
select i1.i from i1, cs1 where i1.i=cs1.i union all select * from (select i1.i from i1 union all select i1.i from i1) sub;
select i1.i from i1, cs1 where i1.i=cs1.i union all select i1.i from i1;
```
- Простой вариант переписывать leaf без JOIN
- Сложный переписывать leaf с JOIN
- обе MCS
- одна MCS
Получается, что если в запросе UNION с UNIT из Foreign, main query будет содержать запрос из primary union unit -> нужен detect primary или нет и переставлять
select * from i1;
select i1.i from i1 rewritten plan differs in columnmap
adjustLastStep
JobInfo:: pjColList
When column doesn't have table name it fails to ct
makeSubQueryStep and projectSimpleColumn
via mapping that delivers ct
oid = 100 p jobInfo
buildSimpleColFromDerivedTable
to add derived table:
-
so colPosition for SC is used to build a mapping of subquery columns' types and store it in JobInfo::vtableColTypes using (subQ table oid, table.alias).
В JobList-е есть два неявных инварианта, с поиском которых я провозился дня три:
- TableAliasName для sub должен иметь пустую schema и tableName, но иметь alias
- все ReturnedColumn исходного CSEP должны иметь пустые schema и table и ReturnedColumn::colPosition() должен быть явно установлен в соответствии с порядком возвращаемых колонок - иначе возвращаемый из sub RowGroup экзотически взорвёт весь JobList  - вариантов масса
select * from (select * from i1) i;
```
#include "simplecolumn.h"
#include "execplan/calpontsystemcatalog.h"
#include "simplefilter.h"
#include "constantcolumn.h"
using namespace execplan;
const SOP opeq(new Operator("="));
int main()
{
CalpontSelectExecutionPlan csep;
CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnList;
CalpontSelectExecutionPlan::ColumnMap colMap;
string columnlength = CALPONT_SCHEMA + "." + SYSCOLUMN_TABLE + "." + COLUMNLEN_COL;
SimpleColumn* col[1];
col[0] = new SimpleColumn(columnlength, 22222222);
SRCP srcp;
srcp.reset(col[0]);
colMap.insert({columnlength, srcp});
csep.columnMapNonStatic(colMap);
srcp.reset(col[0]->clone());
returnedColumnList.push_back(srcp);
csep.returnedCols(returnedColumnList);
{
SCSEP csepDerived(new CalpontSelectExecutionPlan());
CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnListLocal;
CalpontSelectExecutionPlan::ColumnMap colMapLocal;
string columnlength = CALPONT_SCHEMA + "." + SYSCOLUMN_TABLE + "." + COLUMNLEN_COL;
SimpleColumn* col[1];
col[0] = new SimpleColumn(columnlength, 22222222);
SRCP srcpLocal;
srcpLocal.reset(col[0]);
colMapLocal.insert({columnlength, srcpLocal});
csepDerived->columnMapNonStatic(colMapLocal);
srcp.reset(col[0]->clone());
returnedColumnListLocal.push_back(srcpLocal);
csepDerived->returnedCols(returnedColumnList);
CalpontSelectExecutionPlan::SelectList derivedTables;
derivedTables.push_back(csepDerived);
csep.derivedTableList(derivedTables);
}
CalpontSelectExecutionPlan::TableList tableList = {execplan::CalpontSystemCatalog::TableAliasName("", "", "alias")};
csep.tableList(tableList);
CalpontSelectExecutionPlan::SelectList unionVec;
for (size_t i = 0; i < 3; ++i)
{
SCSEP plan(new CalpontSelectExecutionPlan());
CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnListLocal;
CalpontSelectExecutionPlan::ColumnMap colMapLocal;
SRCP srcpLocal;
srcpLocal.reset(col[0]);
colMapLocal.insert({columnlength, srcpLocal});
plan->columnMapNonStatic(colMapLocal);
srcpLocal.reset(col[0]->clone());
returnedColumnListLocal.push_back(srcpLocal);
plan->returnedCols(returnedColumnListLocal);
plan->txnID(csep.txnID());
plan->verID(csep.verID());
plan->sessionID(csep.sessionID());
plan->columnMapNonStatic(colMapLocal);
plan->returnedCols(returnedColumnListLocal);
unionVec.push_back(plan);
// std::cout << plan->toString() << std::endl;
}
csep.unionVec(unionVec);
std::cout << csep.toString() << std::endl;
}
```
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
Any type of columns must produce SimpleColumn that preserves ?OID?, op type, CS, timezone and position but sets alias
sc = new SimpleColumn();
sc->columnName(tcn.column);
sc->tableName(tcn.table);
sc->schemaName(tcn.schema);
sc->oid(oidlist[j].objnum);
sc->alias(!ifp->is_explicit_name() ? tcn.column : ifp->name.str);
sc->tableAlias(gwi.tbList[i].alias);
sc->viewName(viewName, lower_case_table_names);
sc->partitions(gwi.tbList[i].partitions);
sc->resultType(ct);
sc->timeZone(gwi.timeZone);
cols = csep->returnedCols()
sc->resultType(cols[j]->resultType());
select i as c1, i as c2 from i1; doesn't work properly
newConstantColumnNotNullUsingValNativeNoTz
```
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
```SQL
select
s_name,
count(*) as numwait
from
supplier,
(select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem) l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.recdate_gt_commitdate = 1
and exists(
select
l2.l_orderkey
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
l3.l_orderkey
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.recdate_gt_commitdate = 1
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit
100;
```
```SQL
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.recdate_gt_commitdate = 1
and exists(
select
l_orderkey
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
l_orderkey
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.recdate_gt_commitdate = 1
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit
100;
```
Potentially representative q
```SQL
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1
where
s_suppkey = l1.l_suppkey
and exists(
select
l_orderkey
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
)
group by
s_name
order by
numwait desc,
s_name
limit
100;
```
```SQL
select
s_name,
count(*) as numwait
from
supplier,
(select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 3000961 UNION ALL select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey >= 3000961 ) l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.recdate_gt_commitdate = 1
and exists(
select * from
(
select
l_orderkey,l_suppkey
from
lineitem
where
l_orderkey < 3000961
UNION ALL
select
l_orderkey,l_suppkey
from
lineitem
where
l_orderkey >= 3000961
) l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select * from
(
select
l_orderkey,l_suppkey,recdate_gt_commitdate
from
lineitem l3_s
where
l3_s.l_orderkey < 3000961
and l3_s.l_orderkey >= 0
and l3_s.recdate_gt_commitdate = 1
UNION ALL
select
l_orderkey,l_suppkey,recdate_gt_commitdate
from
lineitem l3_s
where
l_orderkey >= 3000961 and l3_s.l_orderkey < 18000000000
and l3_s.recdate_gt_commitdate = 1
) l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit
100;
```
```SQL
select
s_name,
count(*) as numwait
from
supplier,
(select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem) l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.recdate_gt_commitdate = 1
and exists(
select * from
(
select
l_orderkey,l_suppkey
from
lineitem
where
l_orderkey < 3000961
UNION ALL
select
l_orderkey,l_suppkey
from
lineitem
where
l_orderkey >= 3000961
) l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select * from
(
select
l_orderkey,l_suppkey,recdate_gt_commitdate
from
lineitem
where
l_orderkey < 3000961
and lineitem.recdate_gt_commitdate = 1
UNION ALL
select
l_orderkey,l_suppkey,recdate_gt_commitdate
from
lineitem
where
l_orderkey >= 3000961
and lineitem.recdate_gt_commitdate = 1
) l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit
100;
```
```SQL
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1
where
s_suppkey = l1.l_suppkey
and l1.recdate_gt_commitdate = 1
and not exists (
select
l_orderkey
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
)
group by
s_name
order by
numwait desc,
s_name
limit
100;
```
```bash
cs_package_manager.sh install dev stable-23.10 cron/792 -dev cspkg
```
```SQL
select
s_name,
count(*) as numwait
from
supplier,
(select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 300096 UNION ALL
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960
) l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.recdate_gt_commitdate = 1
and exists(
select * from
(
select l_orderkey,l_suppkey from lineitem where l_orderkey < 300096 UNION ALL
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all
select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 57000960 and 60000960
) l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select * from
(
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey < 300096 UNION ALL
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 3000960 and 6000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 6000960 and 9000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 9000960 and 12000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 12000960 and 15000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 and l3_s.recdate_gt_commitdate = 1 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960 and l3_s.recdate_gt_commitdate = 1
) l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit
100;
```
```SQL
select
l_suppkey,
count(l_orderkey) as numwait
from (
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 300096 UNION ALL
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all
select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960
) tmp
group by
l_suppkey
order by
numwait desc,
l_suppkey
limit
100;
```
```SQL
create table lineitem (
l_orderkey int,
l_partkey int,
l_suppkey int,
l_linenumber bigint,
l_quantity decimal(12,2),
l_extendedprice decimal(12,2),
l_discount decimal(12,2),
l_tax decimal(12,2),
l_returnflag char (1),
l_linestatus char (1),
l_shipdate date,
l_commitdate date,
l_receiptdate date,
l_shipinstruct char (25),
l_shipmode char (10),
l_comment varchar (44)
);
--mariadb tpc_h_i -e "load data infile '/data/columnstore-tooling/tpc-h/dbgen/lineitem.tbl' INTO TABLE lineitem FIELDS TERMINATED BY '|'";
mariadb test -e "load data infile '/data/regr/testData/tpch/1g/lineitem.tbl' INTO TABLE lineitem FIELDS TERMINATED BY '|'";
SET SESSION alter_algorithm='INSTANT';
alter table lineitem add column `recdate_gt_commitdate` tinyint(4) not null default 0;
set autocommit=0;LOCK TABLE lineitem WRITE; update lineitem set recdate_gt_commitdate = 1 where l_receiptdate > l_commitdate; commit; UNLOCK TABLES;
alter table lineitem add index(`l_orderkey`, `l_suppkey`,recdate_gt_commitdate);
```
```SQL
select l_suppkey, count(l_orderkey) as numwait from lineitem group by l_suppkey order by numwait desc, l_suppkey limit 100;
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select l_suppkey, count(l_orderkey) as numwait from (select l_suppkey, l_orderkey from lineitem) s group by l_suppkey order by numwait desc, l_suppkey limit 100;
/etc/my.cnf.d/columnstore.cnf
columnstore_innodb_queries_use_mcs = on
mcsSetConfig CrossEngineSupport User 'cejuser'
mcsSetConfig CrossEngineSupport Password 'Vagrant1|0000001'
CREATE USER IF NOT EXISTS'cejuser'@'localhost' IDENTIFIED BY 'Vagrant1|0000001';
GRANT ALL PRIVILEGES ON *.* TO 'cejuser'@'localhost';
FLUSH PRIVILEGES;
create table i1(col bigint);
insert into i1 values (42),(45),(46);
analyze persistant table i1;
create index on i1 (col);
ANALYZE TABLE i1 PERSISTENT FOR ALL;
alter table i1 add index(col);
select col from i1;
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select calsettrace(1);
select l_orderkey,l_suppkey from lineitem limit 10;
select l_suppkey,l_orderkey from (
select l_suppkey,l_orderkey from lineitem where l_orderkey > min and l_orderkey < median_value union all
select l_suppkey,l_orderkey from lineitem where l_orderkey >= median and l_orderkey < last_value
) s1
select l_suppkey,l_orderkey from (select l_suppkey,l_orderkey from lineitem limit 10) s1
select l_suppkey,l_orderkey from (select l_suppkey,l_orderkey from lineitem limit 10) s1;
select l_suppkey+1,l_orderkey+1 from (select l_suppkey+1,l_orderkey+1 from lineitem limit 10) s1;
-- must ignore
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_suppkey from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
-- test set with join
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
-- join with derived
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
-- subquery with AC
select * from (select l_orderkey,l_suppkey+1 from lineitem limit 10) sa where sa.l_orderkey = 1999905;
-- subquery with FC
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,(select l_orderkey,l_suppkey from lineitem_10rows) nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
-- feat
-- Запросы без ключевой колонки
select nl.l_orderkey,l.l_suppkey+1 from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
```
Check if the expression uses only index keys `uses_index_fields_only`
Tasks:
- expand support types adding varchar, timestamps
- add tests to see if expressions works
- search for a specific interesting column available
- replacing rule filter to work on tables also
- alter rule to handle this case
- add support for correlated subquery
```
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: looking for l_orderkey in ctx.gwi.columnStatisticsMap with size 1
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: key l_orderkey vector size 4
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: columnStatistics.size() 4
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 1
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 1500738
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: currentLowerBound 1 currentUpperBound 1875794
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 3000961
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 6000000
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: currentLowerBound 3000961 currentUpperBound 6374911
Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: Adding column statistics for l_orderkey
Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: Type of histogram object: 17Histogram_json_hb
Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: gwi.columnStatisticsMap[ifp->field->field_name.str].size() 4
```
Алгоритм для derived c учётом semi-join/scalar subquery фильтров:
- пройти по таблицам и найти подходящие
- для каждой подходящей
- оставить фильтр, заменив обращение к локальным колонкам на true
- создать union и добавить
- в проекцию все колонки/выражения, содержащие только колонки таблицы для целевой таблицы
- в фильтр все пригодные выражения из исходного фильтра
- в фильтре найти для каждой данной таблицы эквивалент?
- использовать исходный фильтр, заменив все предикаты с нелокальными колонками на true?
- не должно быть выражений, содержащих несколько таблиц
- что делать с join правилами?
- добавить в map с ключём (schema,table,alias) -> имя derived, если derived был создан
- пройти по проекции и заменить обращения к колонке из derived на обращение к derived
replaceRefCol
getSimpleCols
pt->walk(getSimpleCols, &fSimpleColumnList);
SC::derivedTable
Для запроса вида
```SQL
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,(select l_orderkey,l_suppkey from lineitem_10rows) nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
```
Добавить в фильтр ограничение на то, что может быть в проекции масштабируемой таблицы
Проход по SF для поиска SCs setSimpleColumnList + simpleColumnList
-DWITH_SAFEMALLOC=OFF and sql_alloc.h SqlAlloc class controls memory allocation in a thread blowing up if I want to copy Histogram_json_bb instance
Если несколько таблиц, то AC/FC -> можно выбрать и заменить данные в указателях на колонки.
1 UNION UNIT возвращает нормальное значение для второй колонки, 2ой UNION UNIT в RClist имеет две l_orderkey ! - SC::setSimpleColumnList - добавлял в vector, не очищая его.
```
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select calsettrace(1);
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= date '1998-12-01' - interval '90' day
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
16,5 secs
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from (
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1 AND l_orderkey < 250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 250001 AND l_orderkey < 500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 500001 AND l_orderkey < 750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 750001 AND l_orderkey < 1000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1000001 AND l_orderkey < 1250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1250001 AND l_orderkey < 1500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1500001 AND l_orderkey < 1750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1750001 AND l_orderkey < 2000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2000001 AND l_orderkey < 2250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2250001 AND l_orderkey < 2500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2500001 AND l_orderkey < 2750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2750001 AND l_orderkey < 3000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3000001 AND l_orderkey < 3250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3250001 AND l_orderkey < 3500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3500001 AND l_orderkey < 3750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3750001 AND l_orderkey < 4000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4000001 AND l_orderkey < 4250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4250001 AND l_orderkey < 4500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4500001 AND l_orderkey < 4750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4750001 AND l_orderkey < 5000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5000001 AND l_orderkey < 5250001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5250001 AND l_orderkey < 5500001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5500001 AND l_orderkey < 5750001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5750001 AND l_orderkey < 6000001
UNION ALL
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 6000001 AND l_orderkey <= 6144000
) tmp
where
l_shipdate <= date '1998-12-01' - interval '90' day
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
9,5 secs
```
```SQL
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select calsettrace(1);
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= date '1998-12-01' - interval '90' day
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
(45,42)
(46,40)
(48,41),
(NULL,41)
1 - 45*42 + 46 * 40
2 - 48 * 41
? sum(c1 * c2) | range(k) = [45,46] === sum( sum(c1 * c2) | range(k) =[45,46), sum(c1 * c2) | range(k) =[46,48))
select count(col),col2 from i1 GROUP BY col2;
select count(s1.col),s1.col2,min(s1.col)+s1.col2
from
(
select col,col2 from i1 where col >= min AND < median UNION ALL
select col,col2 from i1 where col >= median AND <= max
) s1
GROUP BY s1.col2;
```
фичи:
- собрать первые колонки ключей, по которым есть статистика и сделать из них SC, которые можно использовать - extractColumnStatistics ходит по table, а не по ifp
- запрос `select l_suppkey from lineitem limit 10;`
- Статистика собирается из таблиц, нужно добавить SC-кандидаты в статистику
Про фильтры MCOL-6117
- Рассмотрев, что фильтр содержит выражения из:
- колонок, прендалежищих не затронутым таблицам (гр 1)
- выражений, содержащих колонки только незатронутых таблиц (гр 1)
- колонок затронутых таблиц SC (гр 2)
- замапить на SC соответствующих derived
- выражений, содержащих колонки только затронутых таблиц (гр 2)
- замапить SC затронутых таблиц на SC derived
- ОПТ замапить на SC затронутых таблиц, если нет AC в поддереве
- выражений, содержащих колонки затронутых таблиц и колонок не затронутых таблиц (гр 2)
- замапить SC затронутых таблиц на SC derived
- применить правила маппинга SC в выражении фильтра
- прогнать существующее правило проброса условий вниз
- если не получится, то клонить дерево фильтра и заменять нерелевантные части на true
- добавить правило очистки от constant true в дереве
- правило проброса условий вниз работает для derived, но не работает для UNION
- оптимизация добавить правило проброса условий в UNION