1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-10-31 18:30:33 +03:00
Files
mariadb-columnstore-engine/docs/QA_parallel.md

42 KiB
Raw Blame History

  • addExpresssionStepsToBps, combineJobStepsByTable
    • if TableInfoMap fTableOid is 0

jobInfo.keyInfo->tupleKeyMap CrossEngineStep is created, replacing TBPS if fTableOid is 0 setTupleInfo sets

mcsSetConfig CrossEngineSupport User 'cejuser'
mcsSetConfig CrossEngineSupport Password 'Vagrant1|0000001'

CREATE USER IF NOT EXISTS'cejuser'@'localhost' IDENTIFIED BY 'Vagrant1|0000001';
GRANT ALL PRIVILEGES ON *.* TO 'cejuser'@'localhost';
FLUSH PRIVILEGES;
create table i1(i bigint)engine=innodb;
insert into i1 SELECT floor(rand(seq)*50001) FROM seq_1_to_1000;

analyze table i1 PERSISTENT FOR ALL;
MariaDB [mysql]> select * from column_stats;
+---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+
| db_name | table_name | column_name | min_value | max_value | nulls_ratio | avg_length | avg_frequency | hist_size | hist_type | histogram |
+---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+
| test    | i1         | i           | 12        | 49977     |      0.0000 |     8.0000 |        1.0183 |        10 | JSON_HB   | {
  "target_histogram_size": 10,
  "collected_at": "2025-05-29 14:54:20",
  "collected_by": "11.4.5-3-MariaDB-debug",
  "histogram_hb": [
    {
      "start": "12",
      "size": 0.100081811,
      "ndv": 1101
    },
    {
      "start": "30480",
      "size": 0.100081811,
      "ndv": 1101
    },
    {
      "start": "31581",
      "size": 0.100081811,
      "ndv": 1101
    },
    {
      "start": "32682",
      "size": 0.100081811,
      "ndv": 1074
    },
    {
      "start": "33756",
      "size": 0.100081811,
      "ndv": 1072
    },
    {
      "start": "34828",
      "size": 0.100081811,
      "ndv": 1071
    },
    {
      "start": "35899",
      "size": 0.100081811,
      "ndv": 1072
    },
    {
      "start": "36971",
      "size": 0.100081811,
      "ndv": 1072
    },
    {
      "start": "38043",
      "size": 0.100081811,
      "ndv": 1072
    },
    {
      "start": "39115",
      "end": "49977",
      "size": 0.099263703,
      "ndv": 1067
    }
  ]
} |
+---------+------------+-------------+-----------+-----------+-------------+------------+---------------+-----------+-----------+-----------+
1 row in set (0.001 sec)

Get buckets. Need to come up with recursive CTE that reduces buckets down to N, where N is a parallel factor for CES.

SELECT 
  JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.start')) AS start,
  JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.size')) AS size,
  JSON_UNQUOTE(JSON_EXTRACT(hb.value, '$.ndv')) AS ndv
FROM 
  column_stats AS cs,
  JSON_TABLE(
    cs.histogram,
    '$.histogram_hb[*]' COLUMNS (
      value JSON PATH '$'
    )
  ) AS hb
WHERE 
  cs.db_name = 'test' and
  cs.table_name = 'i1' and
  cs.column_name = 'i' and
  cs.hist_type = 'JSON_HB';

MDB Histogram_json_hb read_statistics_for_table once per runtime get_column_range_cardinality -> every Field::read_stats::histogram -> ? Histogram_json_hb ?

select * from cs1 union all select cs1.i from cs1, cs1 as cs11 where cs1.i=cs1.i;
select * from cs1 union all select cs2.i from cs2, cs3 where cs2.i=cs3.i;

select * from cs1 union all select cs1.i from cs1, cs1 as cs11 where cs1.i=cs1.i;

CREATE TABLE cs1 (i bigint) engine=columnstore;
CREATE table i1(i bigint);
select * from i1 union all select i1.i from i1, cs1 where i1.i=cs1.i;

select i1.i from i1 union all select i1.i from i1, cs1 where i1.i=cs1.i;
Нужно менять верхний уровень 

select i1.i from (select i1.i from i1 union all select i1.i from i1) sub union all select i1.i from i1, cs1 where i1.i=cs1.i;
select * from (select i1.i from i1 union all select i1.i from i1) sub union all select i1.i from i1, cs1 where i1.i=cs1.i;


select i1.i from i1, cs1 where i1.i=cs1.i union all select * from (select i1.i from i1 union all select i1.i from i1) sub;

select i1.i from i1, cs1 where i1.i=cs1.i union all select i1.i from i1;
  • Простой вариант переписывать leaf без JOIN
  • Сложный переписывать leaf с JOIN
    • обе MCS
    • одна MCS

Получается, что если в запросе UNION с UNIT из Foreign, main query будет содержать запрос из primary union unit -> нужен detect primary или нет и переставлять

select * from i1;

select i1.i from i1 rewritten plan differs in columnmap

adjustLastStep JobInfo:: pjColList

When column doesn't have table name it fails to ct makeSubQueryStep and projectSimpleColumn via mapping that delivers ct oid = 100 p jobInfo

buildSimpleColFromDerivedTable

to add derived table:

so colPosition for SC is used to build a mapping of subquery columns' types and store it in JobInfo::vtableColTypes using (subQ table oid, table.alias). В JobList-е есть два неявных инварианта, с поиском которых я провозился дня три:

  • TableAliasName для sub должен иметь пустую schema и tableName, но иметь alias
  • все ReturnedColumn исходного CSEP должны иметь пустые schema и table и ReturnedColumn::colPosition() должен быть явно установлен в соответствии с порядком возвращаемых колонок - иначе возвращаемый из sub RowGroup экзотически взорвёт весь JobList  - вариантов масса

select * from (select * from i1) i;

#include "simplecolumn.h"
#include "execplan/calpontsystemcatalog.h"
#include "simplefilter.h"
#include "constantcolumn.h"

using namespace execplan;
const SOP opeq(new Operator("="));



int main()
{
  CalpontSelectExecutionPlan csep;

  CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnList;
  CalpontSelectExecutionPlan::ColumnMap colMap;

  string columnlength = CALPONT_SCHEMA + "." + SYSCOLUMN_TABLE + "." + COLUMNLEN_COL;

  SimpleColumn* col[1];
  col[0] = new SimpleColumn(columnlength, 22222222);

  SRCP srcp;
  srcp.reset(col[0]);
  colMap.insert({columnlength, srcp});
  csep.columnMapNonStatic(colMap);
  srcp.reset(col[0]->clone());
  returnedColumnList.push_back(srcp);
  csep.returnedCols(returnedColumnList);

  {
    SCSEP csepDerived(new CalpontSelectExecutionPlan());
    CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnListLocal;
    CalpontSelectExecutionPlan::ColumnMap colMapLocal;

    string columnlength = CALPONT_SCHEMA + "." + SYSCOLUMN_TABLE + "." + COLUMNLEN_COL;

    SimpleColumn* col[1];
    col[0] = new SimpleColumn(columnlength, 22222222);

    SRCP srcpLocal;
    srcpLocal.reset(col[0]);
    colMapLocal.insert({columnlength, srcpLocal});
    csepDerived->columnMapNonStatic(colMapLocal);

    srcp.reset(col[0]->clone());
    returnedColumnListLocal.push_back(srcpLocal);
    csepDerived->returnedCols(returnedColumnList);

    CalpontSelectExecutionPlan::SelectList derivedTables;
    derivedTables.push_back(csepDerived);
    csep.derivedTableList(derivedTables);
  }

  CalpontSelectExecutionPlan::TableList tableList = {execplan::CalpontSystemCatalog::TableAliasName("", "", "alias")};
  csep.tableList(tableList);

  CalpontSelectExecutionPlan::SelectList unionVec;

  for (size_t i = 0; i < 3; ++i)
  {
    SCSEP plan(new CalpontSelectExecutionPlan());
    CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnListLocal;
    CalpontSelectExecutionPlan::ColumnMap colMapLocal;

    SRCP srcpLocal;
    srcpLocal.reset(col[0]);
    colMapLocal.insert({columnlength, srcpLocal});
    plan->columnMapNonStatic(colMapLocal);
    srcpLocal.reset(col[0]->clone());
    returnedColumnListLocal.push_back(srcpLocal);
    plan->returnedCols(returnedColumnListLocal);

    plan->txnID(csep.txnID());
    plan->verID(csep.verID());
    plan->sessionID(csep.sessionID());
    plan->columnMapNonStatic(colMapLocal);
    plan->returnedCols(returnedColumnListLocal);
    unionVec.push_back(plan);

    // std::cout << plan->toString() << std::endl;
  }

  csep.unionVec(unionVec);
  std::cout << csep.toString() << std::endl;
}

set columnstore_unstable_optimizer=on; set @@optimizer_switch='derived_merge=off';

Any type of columns must produce SimpleColumn that preserves ?OID?, op type, CS, timezone and position but sets alias sc = new SimpleColumn(); sc->columnName(tcn.column); sc->tableName(tcn.table); sc->schemaName(tcn.schema); sc->oid(oidlist[j].objnum); sc->alias(!ifp->is_explicit_name() ? tcn.column : ifp->name.str); sc->tableAlias(gwi.tbList[i].alias); sc->viewName(viewName, lower_case_table_names); sc->partitions(gwi.tbList[i].partitions); sc->resultType(ct); sc->timeZone(gwi.timeZone);

cols = csep->returnedCols() sc->resultType(cols[j]->resultType());

select i as c1, i as c2 from i1; doesn't work properly

newConstantColumnNotNullUsingValNativeNoTz

set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';

```SQL
select
  s_name,
  count(*) as numwait
    from
      supplier,
      (select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem) l1,
      orders,
      nation
    where
      s_suppkey = l1.l_suppkey
      and o_orderkey = l1.l_orderkey
      and o_orderstatus = 'F'
      and l1.recdate_gt_commitdate = 1
      and exists(
        select
          l2.l_orderkey
        from
          lineitem l2
        where
          l2.l_orderkey = l1.l_orderkey
          and l2.l_suppkey <> l1.l_suppkey
      )
      and not exists (
        select
          l3.l_orderkey
        from
          lineitem l3
        where
          l3.l_orderkey = l1.l_orderkey
          and l3.l_suppkey <> l1.l_suppkey
          and l3.recdate_gt_commitdate = 1
      )
      and s_nationkey = n_nationkey
      and n_name = 'SAUDI ARABIA'
group by
  s_name
order by
  numwait desc,
  s_name
limit
  100;
select
  s_name,
  count(*) as numwait
    from
      supplier,
      lineitem l1,
      orders,
      nation
    where
      s_suppkey = l1.l_suppkey
      and o_orderkey = l1.l_orderkey
      and o_orderstatus = 'F'
      and l1.recdate_gt_commitdate = 1
      and exists(
        select
          l_orderkey
        from
          lineitem l2
        where
          l2.l_orderkey = l1.l_orderkey
          and l2.l_suppkey <> l1.l_suppkey
      )
      and not exists (
        select
          l_orderkey
        from
          lineitem l3
        where
          l3.l_orderkey = l1.l_orderkey
          and l3.l_suppkey <> l1.l_suppkey
          and l3.recdate_gt_commitdate = 1
      )
      and s_nationkey = n_nationkey
      and n_name = 'SAUDI ARABIA'
group by
  s_name
order by
  numwait desc,
  s_name
limit
  100;

Potentially representative q

select
  s_name,
  count(*) as numwait
    from
      supplier,
      lineitem l1
    where
      s_suppkey = l1.l_suppkey
      and exists(
        select
          l_orderkey
        from
          lineitem l2
        where
          l2.l_orderkey = l1.l_orderkey
      )
group by
  s_name
order by
  numwait desc,
  s_name
limit
  100;
select
  s_name,
  count(*) as numwait
    from
      supplier,
      (select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 3000961 UNION ALL select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey >= 3000961 ) l1,
      orders,
      nation
    where
      s_suppkey = l1.l_suppkey
      and o_orderkey = l1.l_orderkey
      and o_orderstatus = 'F'
      and l1.recdate_gt_commitdate = 1
      and exists(
	    select * from 
	    (
		    select
	          l_orderkey,l_suppkey
	        from
	          lineitem
	        where 
		        l_orderkey < 3000961
	        UNION ALL
	        select
	          l_orderkey,l_suppkey
	        from
	          lineitem
	        where 
		        l_orderkey >= 3000961
	    ) l2
        where
          l2.l_orderkey = l1.l_orderkey
          and l2.l_suppkey <> l1.l_suppkey
      )
      and not exists (
        select * from 
	    (
		    select
	          l_orderkey,l_suppkey,recdate_gt_commitdate
	        from
	          lineitem l3_s
	        where 
		        l3_s.l_orderkey < 3000961
		        and l3_s.l_orderkey >= 0
		        and l3_s.recdate_gt_commitdate = 1
	        UNION ALL
	        select
	          l_orderkey,l_suppkey,recdate_gt_commitdate
	        from
	          lineitem l3_s
	        where 
		        l_orderkey >= 3000961 and l3_s.l_orderkey  < 18000000000
		        and l3_s.recdate_gt_commitdate = 1
	    ) l3
        where
          l3.l_orderkey = l1.l_orderkey
          and l3.l_suppkey <> l1.l_suppkey
          
      )
      and s_nationkey = n_nationkey
      and n_name = 'SAUDI ARABIA'
group by
  s_name
order by
  numwait desc,
  s_name
limit
  100;
select
  s_name,
  count(*) as numwait
    from
      supplier,
      (select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem) l1,
      orders,
      nation
    where
      s_suppkey = l1.l_suppkey
      and o_orderkey = l1.l_orderkey
      and o_orderstatus = 'F'
      and l1.recdate_gt_commitdate = 1
      and exists(
	    select * from 
	    (
		    select
	          l_orderkey,l_suppkey
	        from
	          lineitem
	        where 
		        l_orderkey < 3000961
	        UNION ALL
	        select
	          l_orderkey,l_suppkey
	        from
	          lineitem
	        where 
		        l_orderkey >= 3000961
	    ) l2
        where
          l2.l_orderkey = l1.l_orderkey
          and l2.l_suppkey <> l1.l_suppkey
      )
      and not exists (
        select * from 
	    (
		    select
	          l_orderkey,l_suppkey,recdate_gt_commitdate
	        from
	          lineitem
	        where 
		        l_orderkey < 3000961
		        and lineitem.recdate_gt_commitdate = 1
	        UNION ALL
	        select
	          l_orderkey,l_suppkey,recdate_gt_commitdate
	        from
	          lineitem
	        where 
		        l_orderkey >= 3000961
		        and lineitem.recdate_gt_commitdate = 1
	    ) l3
        where
          l3.l_orderkey = l1.l_orderkey
          and l3.l_suppkey <> l1.l_suppkey
      )
      and s_nationkey = n_nationkey
      and n_name = 'SAUDI ARABIA'
group by
  s_name
order by
  numwait desc,
  s_name
limit
  100;
select
  s_name,
  count(*) as numwait
    from
      supplier,
      lineitem l1
    where
      s_suppkey = l1.l_suppkey
      and l1.recdate_gt_commitdate = 1
      and not exists (
        select
          l_orderkey
        from
          lineitem l3
        where
          l3.l_orderkey = l1.l_orderkey
      )
group by
  s_name
order by
  numwait desc,
  s_name
limit
  100;
cs_package_manager.sh install dev stable-23.10 cron/792 -dev cspkg
select
  s_name,
  count(*) as numwait
    from
      supplier,
      (select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 300096 UNION ALL
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960 
	) l1,
      orders,
      nation
    where
      s_suppkey = l1.l_suppkey
      and o_orderkey = l1.l_orderkey
      and o_orderstatus = 'F'
      and l1.recdate_gt_commitdate = 1
      and exists(
	    select * from 
	    (
	    select l_orderkey,l_suppkey from lineitem where l_orderkey < 300096 UNION ALL
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all
      select l_orderkey,l_suppkey from lineitem where l_orderkey BETWEEN 57000960 and 60000960
	    ) l2
        where
          l2.l_orderkey = l1.l_orderkey
          and l2.l_suppkey <> l1.l_suppkey
      )
      and not exists (
        select * from 
	    (
	    select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey < 300096 UNION ALL
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 3000960 and 6000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 6000960 and 9000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 9000960 and 12000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem l3_s where l_orderkey BETWEEN 12000960 and 15000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 and l3_s.recdate_gt_commitdate = 1  union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960 and l3_s.recdate_gt_commitdate = 1
	    ) l3
        where
          l3.l_orderkey = l1.l_orderkey
          and l3.l_suppkey <> l1.l_suppkey
          
      )
      and s_nationkey = n_nationkey
      and n_name = 'SAUDI ARABIA'
group by
  s_name
order by
  numwait desc,
  s_name
limit
  100;
select
  l_suppkey,
  count(l_orderkey) as numwait
  from (
	  select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey < 300096 UNION ALL
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 3000960 and 6000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 6000960 and 9000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 9000960 and 12000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 12000960 and 15000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 15000960 and 18000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 18000960 and 21000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 21000960 and 24000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 24000960 and 27000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 27000960 and 30000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 30000960 and 33000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 33000960 and 36000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 36000960 and 39000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 39000960 and 42000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 42000960 and 45000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 45000960 and 48000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 48000960 and 52000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 52000960 and 55000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 55000960 and 57000960 union all
      select l_orderkey,l_suppkey,recdate_gt_commitdate from lineitem where l_orderkey BETWEEN 57000960 and 60000960
  ) tmp
  group by
  l_suppkey
order by
  numwait desc,
  l_suppkey
limit
  100;
  
create table lineitem (
l_orderkey int,
l_partkey int,
l_suppkey int,
l_linenumber bigint,
l_quantity decimal(12,2),
l_extendedprice decimal(12,2),
l_discount decimal(12,2),
l_tax decimal(12,2),
l_returnflag char (1),
l_linestatus char (1),
l_shipdate date,
l_commitdate date,
l_receiptdate date,
l_shipinstruct char (25),
l_shipmode char (10),
l_comment varchar (44)
);

--mariadb tpc_h_i -e "load data infile '/data/columnstore-tooling/tpc-h/dbgen/lineitem.tbl' INTO TABLE lineitem FIELDS TERMINATED BY '|'";
mariadb test -e "load data infile '/data/regr/testData/tpch/1g/lineitem.tbl' INTO TABLE lineitem FIELDS TERMINATED BY '|'";


SET SESSION alter_algorithm='INSTANT';
alter table lineitem add column `recdate_gt_commitdate` tinyint(4) not null default 0;
set autocommit=0;LOCK TABLE lineitem WRITE; update lineitem set recdate_gt_commitdate = 1 where l_receiptdate > l_commitdate; commit; UNLOCK TABLES;

alter table lineitem add index(`l_orderkey`, `l_suppkey`,recdate_gt_commitdate);

select l_suppkey, count(l_orderkey) as numwait from lineitem group by l_suppkey order by numwait desc, l_suppkey limit 100;

set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select l_suppkey, count(l_orderkey) as numwait from (select l_suppkey, l_orderkey from lineitem) s group by l_suppkey order by numwait desc, l_suppkey limit 100;


/etc/my.cnf.d/columnstore.cnf
columnstore_innodb_queries_use_mcs = on

mcsSetConfig CrossEngineSupport User 'cejuser'
mcsSetConfig CrossEngineSupport Password 'Vagrant1|0000001'

CREATE USER IF NOT EXISTS'cejuser'@'localhost' IDENTIFIED BY 'Vagrant1|0000001';
GRANT ALL PRIVILEGES ON *.* TO 'cejuser'@'localhost';
FLUSH PRIVILEGES;
create table i1(col bigint);
insert into i1 values (42),(45),(46);
analyze persistant table i1; 
create index on i1 (col);
	ANALYZE TABLE i1 PERSISTENT FOR ALL;
alter table i1 add index(col);
select col from i1;


set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select calsettrace(1);

select l_orderkey,l_suppkey from lineitem limit 10;
select l_suppkey,l_orderkey from (
	select l_suppkey,l_orderkey from lineitem where l_orderkey > min and l_orderkey < median_value union all
	select l_suppkey,l_orderkey from lineitem where l_orderkey >= median and l_orderkey < last_value
	 ) s1
select l_suppkey,l_orderkey from (select l_suppkey,l_orderkey from lineitem limit 10) s1


select l_suppkey,l_orderkey from (select l_suppkey,l_orderkey from lineitem limit 10) s1;


select l_suppkey+1,l_orderkey+1 from (select l_suppkey+1,l_orderkey+1 from lineitem limit 10) s1;

-- must ignore
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_suppkey from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
-- test set with join
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;

-- join with derived
select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;
-- subquery with AC
select * from (select l_orderkey,l_suppkey+1 from lineitem limit 10) sa where sa.l_orderkey = 1999905;
-- subquery with FC

select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,(select l_orderkey,l_suppkey from lineitem_10rows) nl WHERE l.l_suppkey=nl.l_suppkey limit 10;

select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;

-- feat
-- Запросы без ключевой колонки
select nl.l_orderkey,l.l_suppkey+1 from lineitem l,lineitem_10rows nl WHERE l.l_suppkey=nl.l_suppkey limit 10;

Check if the expression uses only index keys uses_index_fields_only

Tasks:

  • expand support types adding varchar, timestamps
  • add tests to see if expressions works
  • search for a specific interesting column available
  • replacing rule filter to work on tables also
    • alter rule to handle this case
  • add support for correlated subquery
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: looking for l_orderkey in ctx.gwi.columnStatisticsMap  with size 1
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: key l_orderkey vector size 4
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: columnStatistics.size() 4
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 1
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 1500738
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: currentLowerBound 1 currentUpperBound 1875794
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 3000961
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: bucket.start_value 6000000
Jul 16 22:02:27 drrtuy-u24 mariadbd[727576]: currentLowerBound 3000961 currentUpperBound 6374911
Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: Adding column statistics for l_orderkey
Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: Type of histogram object: 17Histogram_json_hb
Jul 16 22:02:36 drrtuy-u24 mariadbd[727576]: gwi.columnStatisticsMap[ifp->field->field_name.str].size() 4

Алгоритм для derived c учётом semi-join/scalar subquery фильтров:

  • пройти по таблицам и найти подходящие
    • для каждой подходящей
      • оставить фильтр, заменив обращение к локальным колонкам на true
      • создать union и добавить
        • в проекцию все колонки/выражения, содержащие только колонки таблицы для целевой таблицы
        • в фильтр все пригодные выражения из исходного фильтра
          • в фильтре найти для каждой данной таблицы эквивалент?
            • использовать исходный фильтр, заменив все предикаты с нелокальными колонками на true?
          • не должно быть выражений, содержащих несколько таблиц
          • что делать с join правилами?
      • добавить в map с ключём (schema,table,alias) -> имя derived, если derived был создан
  • пройти по проекции и заменить обращения к колонке из derived на обращение к derived

replaceRefCol getSimpleCols pt->walk(getSimpleCols, &fSimpleColumnList); SC::derivedTable

Для запроса вида

select nl.l_orderkey,l.l_suppkey,nl.l_suppkey,l.l_orderkey from (select l_orderkey,l_suppkey from lineitem) l,(select l_orderkey,l_suppkey from lineitem_10rows) nl WHERE l.l_suppkey=nl.l_suppkey limit 10;

Добавить в фильтр ограничение на то, что может быть в проекции масштабируемой таблицы Проход по SF для поиска SCs setSimpleColumnList + simpleColumnList -DWITH_SAFEMALLOC=OFF and sql_alloc.h SqlAlloc class controls memory allocation in a thread blowing up if I want to copy Histogram_json_bb instance Если несколько таблиц, то AC/FC -> можно выбрать и заменить данные в указателях на колонки.

1 UNION UNIT возвращает нормальное значение для второй колонки, 2ой UNION UNIT в RClist имеет две l_orderkey ! - SC::setSimpleColumnList - добавлял в vector, не очищая его.

set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select calsettrace(1);

select
	l_returnflag,
	l_linestatus,
	sum(l_quantity) as sum_qty,
	sum(l_extendedprice) as sum_base_price,
	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
	avg(l_quantity) as avg_qty,
	avg(l_extendedprice) as avg_price,
	avg(l_discount) as avg_disc,
	count(*) as count_order
from
	lineitem
where
	l_shipdate <= date '1998-12-01' - interval '90' day
group by
	l_returnflag,
	l_linestatus
order by
	l_returnflag,
	l_linestatus;
16,5 secs

select
	l_returnflag,
	l_linestatus,
	sum(l_quantity) as sum_qty,
	sum(l_extendedprice) as sum_base_price,
	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
	avg(l_quantity) as avg_qty,
	avg(l_extendedprice) as avg_price,
	avg(l_discount) as avg_disc,
	count(*) as count_order
from (
SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1 AND l_orderkey < 250001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 250001 AND l_orderkey < 500001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 500001 AND l_orderkey < 750001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 750001 AND l_orderkey < 1000001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1000001 AND l_orderkey < 1250001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1250001 AND l_orderkey < 1500001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1500001 AND l_orderkey < 1750001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 1750001 AND l_orderkey < 2000001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2000001 AND l_orderkey < 2250001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2250001 AND l_orderkey < 2500001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2500001 AND l_orderkey < 2750001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 2750001 AND l_orderkey < 3000001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3000001 AND l_orderkey < 3250001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3250001 AND l_orderkey < 3500001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3500001 AND l_orderkey < 3750001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 3750001 AND l_orderkey < 4000001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4000001 AND l_orderkey < 4250001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4250001 AND l_orderkey < 4500001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4500001 AND l_orderkey < 4750001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 4750001 AND l_orderkey < 5000001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5000001 AND l_orderkey < 5250001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5250001 AND l_orderkey < 5500001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5500001 AND l_orderkey < 5750001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 5750001 AND l_orderkey < 6000001

UNION ALL

SELECT l_orderkey, l_suppkey, l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate
FROM lineitem
WHERE l_orderkey >= 6000001 AND l_orderkey <= 6144000
  ) tmp
where
	l_shipdate <= date '1998-12-01' - interval '90' day
group by
	l_returnflag,
	l_linestatus
order by
	l_returnflag,
	l_linestatus;

9,5 secs
set columnstore_unstable_optimizer=on;
set @@optimizer_switch='derived_merge=off';
select calsettrace(1);

select
	l_returnflag,
	l_linestatus,
	sum(l_quantity) as sum_qty,
	sum(l_extendedprice) as sum_base_price,
	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
	avg(l_quantity) as avg_qty,
	avg(l_extendedprice) as avg_price,
	avg(l_discount) as avg_disc,
	count(*) as count_order
from
	lineitem
where
	l_shipdate <= date '1998-12-01' - interval '90' day
group by
	l_returnflag,
	l_linestatus
order by
	l_returnflag,
	l_linestatus;

(45,42) 
(46,40)
(48,41),
(NULL,41)

1 - 45*42 + 46 * 40
2 - 48 * 41

? sum(c1 * c2) | range(k) = [45,46] === sum( sum(c1 * c2) | range(k) =[45,46), sum(c1 * c2) | range(k) =[46,48))

select count(col),col2 from i1 GROUP BY col2;

select count(s1.col),s1.col2,min(s1.col)+s1.col2
from
(
 select col,col2 from i1 where col >= min AND < median UNION ALL
 select col,col2 from i1 where col >= median AND <= max
) s1
GROUP BY s1.col2;



фичи:

  • собрать первые колонки ключей, по которым есть статистика и сделать из них SC, которые можно использовать - extractColumnStatistics ходит по table, а не по ifp
  • запрос select l_suppkey from lineitem limit 10;
  • Статистика собирается из таблиц, нужно добавить SC-кандидаты в статистику

Про фильтры MCOL-6117

  • Рассмотрев, что фильтр содержит выражения из:
    • колонок, прендалежищих не затронутым таблицам (гр 1)

    • выражений, содержащих колонки только незатронутых таблиц (гр 1)

    • колонок затронутых таблиц SC (гр 2)

      • замапить на SC соответствующих derived
    • выражений, содержащих колонки только затронутых таблиц (гр 2)

      • замапить SC затронутых таблиц на SC derived
      • ОПТ замапить на SC затронутых таблиц, если нет AC в поддереве
    • выражений, содержащих колонки затронутых таблиц и колонок не затронутых таблиц (гр 2)

      • замапить SC затронутых таблиц на SC derived
  • применить правила маппинга SC в выражении фильтра
  • прогнать существующее правило проброса условий вниз
    • если не получится, то клонить дерево фильтра и заменять нерелевантные части на true
    • добавить правило очистки от constant true в дереве
    • правило проброса условий вниз работает для derived, но не работает для UNION
      • оптимизация добавить правило проброса условий в UNION