1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-4530: common conjuction top rewrite (#2673)

Added logical transformation of the execplan::ParseTrees with the taking out the common factor in expression of the form "(A and B) or (A and C)" for the purposes of passing a TPCH 19 query.

Co-authored-by: Leonid Fedorov <leonid.fedorov@mariadb.com>
This commit is contained in:
Andrey Piskunov
2023-02-27 18:23:19 +02:00
committed by GitHub
parent 8671f55784
commit b6808c97f1
18 changed files with 6097 additions and 23 deletions

View File

@ -34,6 +34,7 @@ set(execplan_LIB_SRCS
pseudocolumn.cpp
range.cpp
returnedcolumn.cpp
rewrites.cpp
rowcolumn.cpp
selectfilter.cpp
sessionmanager.cpp

View File

@ -30,6 +30,8 @@
#include "treenode.h"
#include "operator.h"
#include "mcs_decimal.h"
#include <boost/core/demangle.hpp>
namespace rowgroup
{
@ -78,6 +80,21 @@ class ParseTree
return fLeft;
}
inline ParseTree** leftRef()
{
return &fLeft;
}
inline void nullRight()
{
fRight = nullptr;
}
inline void nullLeft()
{
fLeft = nullptr;
}
inline void right(ParseTree* expressionTree)
{
fRight = expressionTree;
@ -94,6 +111,11 @@ class ParseTree
return fRight;
}
inline ParseTree** rightRef()
{
return &fRight;
}
inline void data(TreeNode* data)
{
fData = data;
@ -498,7 +520,11 @@ inline void ParseTree::draw(const ParseTree* n, std::ostream& dotFile)
dotFile << "n" << (void*)n << " -> "
<< "n" << (void*)r << std::endl;
dotFile << "n" << (void*)n << " [label=\"" << n->data()->data() << "\"]" << std::endl;
auto& node = *(n->data());
dotFile << "n" << (void*)n << " [label=\"" <<
n->data()->data() << " (" <<
n << ") " <<
boost::core::demangle(typeid(node).name()) << "\"]" << std::endl;
}
inline void ParseTree::drawTree(std::string filename)

437
dbcon/execplan/rewrites.cpp Normal file
View File

@ -0,0 +1,437 @@
/*
Copyright (C) 2022 MariaDB Corporation
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation; version 2 of the License. This program is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "rewrites.h"
#include <typeinfo>
#include "objectreader.h"
#include "installdir.h"
#include "parsetree.h"
#include "operator.h"
#include "simplefilter.h"
#include "logicoperator.h"
#include <boost/core/demangle.hpp>
#include <set>
#include <string>
#include <ostream>
namespace execplan
{
namespace details
{
template <typename T, typename F>
void printContainer(std::ostream& os, const T& container, const std::string& delimiter, const F& printer,
const std::string& preambule = {})
{
os << preambule << "\n";
for (auto i = container.begin(); i != container.end();)
{
os << printer(*i);
++i;
if (i != container.end())
os << delimiter;
}
os << std::endl;
}
using CommonContainer =
std::pair<std::set<execplan::ParseTree*, NodeSemanticComparator>, std::set<execplan::ParseTree*>>;
execplan::Filter* castToFilter(execplan::ParseTree* node)
{
return dynamic_cast<execplan::Filter*>(node->data());
}
SimpleFilter* castToSimpleFilter(execplan::TreeNode* node)
{
return dynamic_cast<SimpleFilter*>(node);
}
bool commonContainsSemantic(const CommonContainer& common, execplan::ParseTree* node)
{
auto filter = castToFilter(node);
return filter && common.first.contains(node);
}
bool commonContainsPtr(const CommonContainer& common, execplan::ParseTree* node)
{
return common.second.contains(node);
}
OpType operatorType(execplan::ParseTree* node)
{
auto op = dynamic_cast<Operator*>(node->data());
if (!op)
return OP_UNKNOWN;
return op->op();
}
enum class ChildType
{
Unchain,
Delete,
Leave
};
void printTreeLevel(execplan::ParseTree* root, int level)
{
#if debug_rewrites
auto sep = std::string(level * 4, '-');
auto& node = *(root->data());
std::cerr << sep << ": " << root->data()->data() << " " << boost::core::demangle(typeid(node).name()) << " "
<< root << std::endl;
#endif
}
auto normalizeNode(std::string const& left, std::string const& right, execplan::OpType op)
{
if (left < right)
return std::make_tuple(op, std::ref(left), std::ref(right));
execplan::OpType oppositeOp = oppositeOperator(op);
return std::make_tuple(oppositeOp, std::ref(right), std::ref(left));
}
bool simpleFiltersCmp(const SimpleFilter* left, const SimpleFilter* right)
{
return normalizeNode(left->lhs()->data(), left->rhs()->data(), left->op()->op()) <
normalizeNode(right->lhs()->data(), right->rhs()->data(), right->op()->op());
}
// Walk the tree and find out common conjuctions
void collectCommonConjuctions(execplan::ParseTree* root, CommonContainer& accumulator, int level = 0,
bool orMeeted = false, bool andParent = false)
{
if (root == nullptr)
{
return;
}
printTreeLevel(root, level);
// the condition below means leaf node
if (root->left() == nullptr && root->right() == nullptr && orMeeted && andParent)
{
// we want to collect it if it is a child of and node and or node was met before
if (castToFilter(root))
{
accumulator.first.insert(root);
}
return;
}
// we do set intersection for all the lower levels for the or node
if (operatorType(root) == OP_OR)
{
CommonContainer leftAcc;
CommonContainer rightAcc;
collectCommonConjuctions(root->left(), leftAcc, ++level, true, false);
collectCommonConjuctions(root->right(), rightAcc, ++level, true, false);
CommonContainer intersection;
std::set_intersection(leftAcc.first.begin(), leftAcc.first.end(), rightAcc.first.begin(),
rightAcc.first.end(), std::inserter(intersection.first, intersection.first.begin()),
NodeSemanticComparator{});
accumulator = intersection;
return;
}
collectCommonConjuctions(root->left(), accumulator, ++level, orMeeted, operatorType(root) == OP_AND);
collectCommonConjuctions(root->right(), accumulator, ++level, orMeeted, operatorType(root) == OP_AND);
return;
}
// this utility function creates new and node
execplan::ParseTree* newAndNode()
{
execplan::Operator* op = new execplan::Operator();
op->data("and");
return new execplan::ParseTree(op);
}
template <typename Common>
execplan::ParseTree* appendToRoot(execplan::ParseTree* tree, const Common& common)
{
if (common.empty())
return tree;
// TODO: refactor to debug
execplan::ParseTree* result = newAndNode();
auto current = result;
for (auto treenode = common.begin(); treenode != common.end();)
{
execplan::ParseTree* andCondition = *treenode;
++treenode;
current->right(andCondition);
if ((treenode != common.end() && std::next(treenode) != common.end()) ||
(std::next(treenode) == common.end() && tree != nullptr))
{
execplan::ParseTree* andOp = newAndNode();
current->left(andOp);
current = andOp;
}
else if (std::next(treenode) == common.end() && tree == nullptr)
{
current->left(andCondition);
}
}
if (tree)
current->left(tree);
return result;
}
enum class GoTo
{
Left,
Right,
Up
};
struct StackFrame
{
execplan::ParseTree** node;
GoTo direction;
ChildType containsLeft;
ChildType containsRight;
StackFrame(execplan::ParseTree** node_, GoTo direction_)
: node(node_), direction(direction_), containsLeft(ChildType::Leave), containsRight(ChildType::Leave)
{
}
};
using DFSStack = std::vector<StackFrame>;
void deleteOneNode(execplan::ParseTree** node)
{
if (!node || !*node)
return;
(*node)->nullLeft();
(*node)->nullRight();
#if debug_rewrites
std::cerr << " Deleting: " << (*node)->data() << " " << boost::core::demangle(typeid(**node).name())
<< " "
<< "ptr: " << *node << std::endl;
#endif
delete *node;
*node = nullptr;
}
// this utility function adds one stack frame to a stack for dfs traversal
void addStackFrame(DFSStack& stack, GoTo direction, execplan::ParseTree* node)
{
if (direction == GoTo::Left)
{
stack.back().direction = GoTo::Right;
if (node->left() != nullptr)
{
auto left = node->leftRef();
stack.emplace_back(left, GoTo::Left);
}
}
else if (direction == GoTo::Right)
{
stack.back().direction = GoTo::Up;
if (node->right() != nullptr)
{
auto right = node->rightRef();
stack.emplace_back(right, GoTo::Left);
}
}
}
// this utility function reaplces the flag for in the stack frame,
// indicating whether to delete, unchain or leave child node. It depends on the direction
// specified in the stack frame
void replaceContainsTypeFlag(StackFrame& stackframe, ChildType containsflag)
{
if (stackframe.direction == GoTo::Right)
stackframe.containsLeft = containsflag;
else
stackframe.containsRight = containsflag;
}
// this utility function does the main transformation
void fixUpTree(execplan::ParseTree** node, ChildType ltype, ChildType rtype,
StackFrame* parentframe = nullptr)
{
if (ltype == ChildType::Leave)
{
if (rtype != ChildType::Leave) // if we don't leave the right node, we replace
{ // the parent node with the left child
execplan::ParseTree* oldNode = *node;
if (rtype == ChildType::Delete) // we delete the node that is a duplicate
deleteOneNode((*node)->rightRef()); // of something in the common
*node = (*node)->left();
deleteOneNode(&oldNode);
}
}
else
{
if (ltype == ChildType::Delete) // same as above
deleteOneNode((*node)->leftRef());
if (rtype == ChildType::Leave) // replace the parent with the right child
{
execplan::ParseTree* oldNode = *node;
*node = (*node)->right();
deleteOneNode(&oldNode);
}
else
{
if (rtype == ChildType::Delete)
deleteOneNode((*node)->rightRef());
// if parent exists and botht children are deleted/unchained
// we mark the node for deletion
// otherwise it is the root and we just delete it
if (parentframe)
replaceContainsTypeFlag(*parentframe, ChildType::Delete);
else
deleteOneNode(node);
}
}
}
void removeFromTreeIterative(execplan::ParseTree** root, const CommonContainer& common)
{
if (common.first.empty())
return;
DFSStack stack;
stack.emplace_back(root, GoTo::Left);
while (!stack.empty())
{
auto [node, flag, ltype, rtype] = stack.back();
if (flag != GoTo::Up)
{
addStackFrame(stack, flag, *node);
continue;
}
auto sz = stack.size();
if (castToFilter(*node) && sz > 1)
{
if (commonContainsPtr(common, *node))
replaceContainsTypeFlag(stack.at(sz - 2), ChildType::Unchain);
else if (!commonContainsPtr(common, *node) && commonContainsSemantic(common, *node))
replaceContainsTypeFlag(stack.at(sz - 2), ChildType::Delete);
else
replaceContainsTypeFlag(stack.at(sz - 2), ChildType::Leave);
stack.pop_back();
continue;
}
if (sz == 1)
fixUpTree(node, ltype, rtype);
else
fixUpTree(node, ltype, rtype, &stack[sz - 2]);
stack.pop_back();
}
}
} // namespace details
void dumpTreeFiles(execplan::ParseTree* filters, const std::string& name, std::string dumpfolder = {})
{
#if debug_rewrites
messageqcpp::ByteStream beforetree;
ObjectReader::writeParseTree(filters, beforetree);
if (dumpfolder.empty())
{
dumpfolder = startup::StartUp::tmpDir();
}
std::ofstream before(dumpfolder + "filters." + name + ".data");
before << beforetree;
std::string dotname = dumpfolder + "filters." + name + ".dot";
filters->drawTree(dotname);
std::string dotInvoke = "dot -Tpng ";
std::string convert = dotInvoke + dotname + " -o " + dotname + ".png";
[[maybe_unused]] auto _ = std::system(convert.c_str());
#endif
}
template <bool stableSort>
execplan::ParseTree* extractCommonLeafConjunctionsToRoot(execplan::ParseTree* tree)
{
dumpTreeFiles(tree, ".initial", stableSort ? "/tmp/" : "");
details::CommonContainer common;
details::collectCommonConjuctions(tree, common);
std::copy(common.first.begin(), common.first.end(), std::inserter(common.second, common.second.begin()));
#if debug_rewrites
details::printContainer(
std::cerr, common.first, "\n", [](auto treenode) { return treenode->data()->data(); },
"Common Leaf Conjunctions:");
#endif
details::removeFromTreeIterative(&tree, common);
execplan::ParseTree* result = nullptr;
if constexpr (stableSort)
{
std::vector<execplan::ParseTree*> commonSorted;
std::copy(common.first.begin(), common.first.end(), std::back_inserter(commonSorted));
std::sort(commonSorted.begin(), commonSorted.end(),
[](auto left, auto right) { return left->data()->data() < right->data()->data(); });
result = details::appendToRoot(tree, commonSorted);
}
else
{
result = details::appendToRoot(tree, common.first);
}
dumpTreeFiles(result, ".final", stableSort ? "/tmp/" : "");
return result;
}
execplan::OpType oppositeOperator(execplan::OpType op)
{
if (op == OP_GT)
return OP_LT;
if (op == OP_GE)
return OP_LE;
if (op == OP_LT)
return OP_GT;
if (op == OP_LE)
return OP_GE;
return op;
}
template execplan::ParseTree* extractCommonLeafConjunctionsToRoot<false>(execplan::ParseTree* tree);
template execplan::ParseTree* extractCommonLeafConjunctionsToRoot<true>(execplan::ParseTree* tree);
bool NodeSemanticComparator::operator()(execplan::ParseTree* left, execplan::ParseTree* right) const
{
auto filterLeft = details::castToSimpleFilter(left->data());
auto filterRight = details::castToSimpleFilter(right->data());
if (filterLeft && filterRight)
return details::simpleFiltersCmp(filterLeft, filterRight);
return left->data()->data() < right->data()->data();
}
} // namespace execplan

40
dbcon/execplan/rewrites.h Normal file
View File

@ -0,0 +1,40 @@
/* Copyright (C) 2022 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#pragma once
#include <parsetree.h>
#include <treenode.h>
#include <simplefilter.h>
namespace execplan
{
#define debug_rewrites false
execplan::OpType oppositeOperator(execplan::OpType op);
struct NodeSemanticComparator
{
bool operator()(execplan::ParseTree* left, execplan::ParseTree* right) const;
};
// Walk the tree and find out common conjuctions
template<bool stableSort = false>
execplan::ParseTree* extractCommonLeafConjunctionsToRoot(execplan::ParseTree* tree);
} // namespace execplan

View File

@ -141,6 +141,12 @@ SimpleColumn::SimpleColumn() : ReturnedColumn(), fOid(0), fisColumnStore(true)
fDistinct = false;
}
SimpleColumn::SimpleColumn(const std::string& token, ForTestPurposeWithoutOID)
: ReturnedColumn(0), fOid(0), fData(token), fisColumnStore(true)
{
parse(token);
fDistinct = false;
}
SimpleColumn::SimpleColumn(const string& token, const uint32_t sessionID)
: ReturnedColumn(sessionID), fOid(0), fData(token), fisColumnStore(true)
{
@ -373,6 +379,7 @@ void SimpleColumn::unserialize(messageqcpp::ByteStream& b)
b >> reinterpret_cast<ByteStream::doublebyte&>(fisColumnStore);
}
bool SimpleColumn::operator==(const SimpleColumn& t) const
{
const ReturnedColumn *rc1, *rc2;

View File

@ -57,8 +57,14 @@ class SimpleColumn : public ReturnedColumn
/**
* Constructors
*/
class ForTestPurposeWithoutOID{};
SimpleColumn();
SimpleColumn(const std::string& token, ForTestPurposeWithoutOID);
SimpleColumn(const std::string& token, const uint32_t sessionID = 0);
SimpleColumn(const std::string& schema, const std::string& table, const std::string& col,
const uint32_t sessionID = 0, const int lower_case_table_names = 0);
SimpleColumn(const std::string& schema, const std::string& table, const std::string& col,
@ -203,6 +209,8 @@ class SimpleColumn : public ReturnedColumn
*/
bool operator==(const SimpleColumn& t) const;
bool operator<(const SimpleColumn& t) const;
/** @brief Do a deep, strict (as opposed to semantic) equivalence test
*
* Do a deep, strict (as opposed to semantic) equivalence test.

View File

@ -55,6 +55,12 @@ SimpleFilter::SimpleFilter(const string& sql) : Filter(sql)
parse(sql);
}
// TODO: only handled simplecolumn operands for now
SimpleFilter::SimpleFilter(const string& sql, ForTestPurposesWithoutColumnsOIDS) : Filter(sql)
{
parse(sql, ForTestPurposesWithoutColumnsOIDS{});
}
SimpleFilter::SimpleFilter(const SOP& op, ReturnedColumn* lhs, ReturnedColumn* rhs, const long timeZone)
: fOp(op), fLhs(lhs), fRhs(rhs), fIndexFlag(NOINDEX), fJoinFlag(EQUA), fTimeZone(timeZone)
{
@ -244,7 +250,7 @@ const string SimpleFilter::toString() const
return output.str();
}
void SimpleFilter::parse(string sql)
void SimpleFilter::parse(string sql, std::optional<ForTestPurposesWithoutColumnsOIDS> testFlag)
{
fLhs = 0;
fRhs = 0;
@ -267,6 +273,9 @@ void SimpleFilter::parse(string sql)
if (lhs.at(lhs.length() - 1) == ' ')
lhs = lhs.substr(0, pos - 1);
if (testFlag)
fLhs = new SimpleColumn(lhs, SimpleColumn::ForTestPurposeWithoutOID{});
else
fLhs = new SimpleColumn(lhs);
pos = pos + delimiter[i].length();
@ -278,6 +287,9 @@ void SimpleFilter::parse(string sql)
if (rhs.at(rhs.length() - 1) == ' ')
rhs = rhs.substr(0, pos - 1);
if (testFlag)
fRhs = new SimpleColumn(rhs, SimpleColumn::ForTestPurposeWithoutOID{});
else
fRhs = new SimpleColumn(rhs);
break;
}

View File

@ -65,8 +65,11 @@ class SimpleFilter : public Filter
SEMI
};
struct ForTestPurposesWithoutColumnsOIDS{};
SimpleFilter();
SimpleFilter(const std::string& sql);
SimpleFilter(const std::string& sql, ForTestPurposesWithoutColumnsOIDS);
SimpleFilter(const SOP& op, ReturnedColumn* lhs, ReturnedColumn* rhs, const long timeZone = 0);
SimpleFilter(const SimpleFilter& rhs);
@ -154,6 +157,10 @@ class SimpleFilter : public Filter
* @return true if filter operation are the same and
* the sets of arguments are the same; false otherwise
*/
bool operator<(const SimpleFilter& t) const;
bool semanticEq(const SimpleFilter& t) const;
/** @brief Do a deep, strict (as opposed to semantic) equivalence test
@ -222,7 +229,7 @@ class SimpleFilter : public Filter
int fJoinFlag; /// hash join type
long fTimeZone;
void parse(std::string);
void parse(std::string, std::optional<ForTestPurposesWithoutColumnsOIDS> testFlag = std::nullopt);
/***********************************************************
* F&E framework *

View File

@ -63,27 +63,28 @@ using namespace logging;
#include "ha_tzinfo.h"
using namespace cal_impl_if;
#include "calpontselectexecutionplan.h"
#include "calpontsystemcatalog.h"
#include "simplecolumn_int.h"
#include "simplecolumn_uint.h"
#include "simplecolumn_decimal.h"
#include "aggregatecolumn.h"
#include "constantcolumn.h"
#include "simplefilter.h"
#include "constantfilter.h"
#include "functioncolumn.h"
#include "arithmeticcolumn.h"
#include "arithmeticoperator.h"
#include "calpontselectexecutionplan.h"
#include "calpontsystemcatalog.h"
#include "constantcolumn.h"
#include "constantfilter.h"
#include "existsfilter.h"
#include "functioncolumn.h"
#include "groupconcatcolumn.h"
#include "intervalcolumn.h"
#include "jsonarrayaggcolumn.h"
#include "logicoperator.h"
#include "outerjoinonfilter.h"
#include "predicateoperator.h"
#include "rewrites.h"
#include "rowcolumn.h"
#include "selectfilter.h"
#include "existsfilter.h"
#include "groupconcatcolumn.h"
#include "jsonarrayaggcolumn.h"
#include "outerjoinonfilter.h"
#include "intervalcolumn.h"
#include "simplecolumn_decimal.h"
#include "simplecolumn_int.h"
#include "simplecolumn_uint.h"
#include "simplefilter.h"
#include "udafcolumn.h"
using namespace execplan;
@ -7100,6 +7101,13 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s
outerJoinStack.push(ptp);
}
config::Config* cf = config::Config::makeConfig();
string rewriteEnabled = cf->getConfig("Rewrites", "CommonLeafConjunctionsToTop");
if (filters && rewriteEnabled != "OFF")
{
filters = extractCommonLeafConjunctionsToRoot(filters);
}
// Append outer join filters at the end of inner join filters.
// JLF_ExecPlanToJobList::walkTree processes ParseTree::left
// before ParseTree::right which is what we intend to do in the
@ -7119,9 +7127,6 @@ int processWhere(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, const s
if (filters)
{
csep->filters(filters);
std::string aTmpDir(startup::StartUp::tmpDir());
aTmpDir = aTmpDir + "/filter1.dot";
filters->drawTree(aTmpDir);
}
return 0;
@ -7684,6 +7689,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i
case Item::CONST_ITEM:
{
switch (item->cmp_type())
{
case INT_RESULT:
case STRING_RESULT:

View File

@ -0,0 +1,84 @@
DROP DATABASE IF EXISTS mcol4530;
CREATE DATABASE mcol4530;
USE mcol4530;
create table part (
p_partkey int,
p_name varchar (55),
p_mfgr char (25),
p_brand char (10),
p_type varchar (25),
p_size int,
p_container char (10),
p_retailprice decimal(12,2),
p_comment varchar (23)
)engine=columnstore;
create table lineitem (
l_orderkey int,
l_partkey int,
l_suppkey int,
l_linenumber bigint,
l_quantity decimal(12,2),
l_extendedprice decimal(12,2),
l_discount decimal(12,2),
l_tax decimal(12,2),
l_returnflag char (1),
l_linestatus char (1),
l_shipdate date,
l_commitdate date,
l_receiptdate date,
l_shipinstruct char (25),
l_shipmode char (10),
l_comment varchar (44)
)engine=columnstore;
INSERT INTO part VALUES
(1, 'goldenrod lavender spring chocolate lace', 'Manufacturer#1', 'Brand#13', 'PROMO BURNISHED COPPER', 7, 'JUMBO PKG', 901.00, 'ly. slyly ironi'),
(2, 'blush thistle blue yellow saddle', 'Manufacturer#1', 'Brand#13', 'LARGE BRUSHED BRASS', 1, 'LG CASE', 902.00, 'lar accounts amo'),
(3, 'spring green yellow purple cornsilk', 'Manufacturer#4', 'Brand#42', 'STANDARD POLISHED BRASS', 21, 'WRAP CASE', 903.00, 'egular deposits hag'),
(4, 'cornflower chocolate smoke green pink', 'Manufacturer#3', 'Brand#34', 'SMALL PLATED BRASS', 14, 'MED DRUM', 904.00, 'p furiously r'),
(5, 'forest brown coral puff cream', 'Manufacturer#3', 'Brand#32', 'STANDARD POLISHED TIN', 15, 'SM PKG', 905.00, 'wake carefully');
INSERT INTO lineitem VALUES
(1, 156, 4, 1, 17, 17954.55, 0.04, 0.02, 'N', 'O', '1996-03-13', '1996-02-12', '1996-03-22', 'DELIVER IN PERSON', 'TRUCK', 'egular courts above the'),
(1, 68, 9, 2, 36, 34850.16, 0.09, 0.06, 'N', 'O', '1996-04-12', '1996-02-28', '1996-04-20', 'TAKE BACK RETURN', 'MAIL', 'ly final dependencies: slyly bold'),
(1, 64, 5, 3, 8, 7712.48, 0.10, 0.02, 'N', 'O', '1996-01-29', '1996-03-05', '1996-01-31', 'TAKE BACK RETURN', 'REG AIR', 'riously. regular, express dep'),
(1, 3, 6, 4, 28, 25284.00, 0.09, 0.06, 'N', 'O', '1996-04-21', '1996-03-30', '1996-05-16', 'NONE', 'AIR', 'lites. fluffily even de'),
(1, 25, 8, 5, 24, 22200.48, 0.10, 0.04, 'N', 'O', '1996-03-30', '1996-03-14', '1996-04-01', 'NONE', 'FOB', 'pending foxes. slyly re'),
(1, 16, 3, 6, 32, 29312.32, 0.07, 0.02, 'N', 'O', '1996-01-30', '1996-02-07', '1996-02-03', 'DELIVER IN PERSON', 'MAIL', 'arefully slyly ex'),
(2, 107, 2, 1, 38, 38269.80, 0.00, 0.05, 'N', 'O', '1997-01-28', '1997-01-14', '1997-02-02', 'TAKE BACK RETURN', 'RAIL', 'ven requests. deposits breach a');
select
sum(l_extendedprice* (1 - l_discount)) as revenue
from
lineitem,
part
where
(
p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
and l_quantity >= 2 and l_quantity <= 2 + 10
and p_size between 1 and 5
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_partkey = l_partkey
and p_brand = 'Brand#24'
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
and l_quantity >= 20 and l_quantity <= 20 + 10
and p_size between 1 and 10
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_partkey = l_partkey
and p_brand = 'Brand#44'
and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
and l_quantity >= 26 and l_quantity <= 26 + 10
and p_size between 1 and 15
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
);
revenue
NULL
DROP DATABASE mcol4530;

View File

@ -0,0 +1,96 @@
--source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS mcol4530;
--enable_warnings
CREATE DATABASE mcol4530;
USE mcol4530;
create table part (
p_partkey int,
p_name varchar (55),
p_mfgr char (25),
p_brand char (10),
p_type varchar (25),
p_size int,
p_container char (10),
p_retailprice decimal(12,2),
p_comment varchar (23)
)engine=columnstore;
create table lineitem (
l_orderkey int,
l_partkey int,
l_suppkey int,
l_linenumber bigint,
l_quantity decimal(12,2),
l_extendedprice decimal(12,2),
l_discount decimal(12,2),
l_tax decimal(12,2),
l_returnflag char (1),
l_linestatus char (1),
l_shipdate date,
l_commitdate date,
l_receiptdate date,
l_shipinstruct char (25),
l_shipmode char (10),
l_comment varchar (44)
)engine=columnstore;
INSERT INTO part VALUES
(1, 'goldenrod lavender spring chocolate lace', 'Manufacturer#1', 'Brand#13', 'PROMO BURNISHED COPPER', 7, 'JUMBO PKG', 901.00, 'ly. slyly ironi'),
(2, 'blush thistle blue yellow saddle', 'Manufacturer#1', 'Brand#13', 'LARGE BRUSHED BRASS', 1, 'LG CASE', 902.00, 'lar accounts amo'),
(3, 'spring green yellow purple cornsilk', 'Manufacturer#4', 'Brand#42', 'STANDARD POLISHED BRASS', 21, 'WRAP CASE', 903.00, 'egular deposits hag'),
(4, 'cornflower chocolate smoke green pink', 'Manufacturer#3', 'Brand#34', 'SMALL PLATED BRASS', 14, 'MED DRUM', 904.00, 'p furiously r'),
(5, 'forest brown coral puff cream', 'Manufacturer#3', 'Brand#32', 'STANDARD POLISHED TIN', 15, 'SM PKG', 905.00, 'wake carefully');
INSERT INTO lineitem VALUES
(1, 156, 4, 1, 17, 17954.55, 0.04, 0.02, 'N', 'O', '1996-03-13', '1996-02-12', '1996-03-22', 'DELIVER IN PERSON', 'TRUCK', 'egular courts above the'),
(1, 68, 9, 2, 36, 34850.16, 0.09, 0.06, 'N', 'O', '1996-04-12', '1996-02-28', '1996-04-20', 'TAKE BACK RETURN', 'MAIL', 'ly final dependencies: slyly bold'),
(1, 64, 5, 3, 8, 7712.48, 0.10, 0.02, 'N', 'O', '1996-01-29', '1996-03-05', '1996-01-31', 'TAKE BACK RETURN', 'REG AIR', 'riously. regular, express dep'),
(1, 3, 6, 4, 28, 25284.00, 0.09, 0.06, 'N', 'O', '1996-04-21', '1996-03-30', '1996-05-16', 'NONE', 'AIR', 'lites. fluffily even de'),
(1, 25, 8, 5, 24, 22200.48, 0.10, 0.04, 'N', 'O', '1996-03-30', '1996-03-14', '1996-04-01', 'NONE', 'FOB', 'pending foxes. slyly re'),
(1, 16, 3, 6, 32, 29312.32, 0.07, 0.02, 'N', 'O', '1996-01-30', '1996-02-07', '1996-02-03', 'DELIVER IN PERSON', 'MAIL', 'arefully slyly ex'),
(2, 107, 2, 1, 38, 38269.80, 0.00, 0.05, 'N', 'O', '1997-01-28', '1997-01-14', '1997-02-02', 'TAKE BACK RETURN', 'RAIL', 'ven requests. deposits breach a');
select
sum(l_extendedprice* (1 - l_discount)) as revenue
from
lineitem,
part
where
(
p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
and l_quantity >= 2 and l_quantity <= 2 + 10
and p_size between 1 and 5
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_partkey = l_partkey
and p_brand = 'Brand#24'
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
and l_quantity >= 20 and l_quantity <= 20 + 10
and p_size between 1 and 10
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_partkey = l_partkey
and p_brand = 'Brand#44'
and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
and l_quantity >= 26 and l_quantity <= 26 + 10
and p_size between 1 and 15
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
);
DROP DATABASE mcol4530;

View File

@ -246,4 +246,7 @@
<DataRedundancyConfig>
<DBRoot1PMs/>
</DataRedundancyConfig>
<Rewrites>
<CommonLeafConjunctionsToTop>Y</CommonLeafConjunctionsToTop>
</Rewrites>
</Columnstore>

View File

@ -2,7 +2,6 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ${ENGINE_BLOCKCACHE_INCLUDE} ${EN
MY_CHECK_AND_SET_COMPILER_FLAG("-U_FORTIFY_SOURCE" DEBUG RELWITHDEBINFO)
MY_CHECK_AND_SET_COMPILER_FLAG("-fsanitize=address -fsanitize-address-use-after-scope -fPIC")
if (WITH_UNITTESTS)
cmake_policy(SET CMP0054 NEW)
set(EXTERNAL_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/external)
@ -31,6 +30,12 @@ if (WITH_UNITTESTS)
target_link_libraries(rowgroup_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${ENGINE_EXEC_LIBS})
gtest_add_tests(TARGET rowgroup_tests TEST_PREFIX columnstore:)
add_executable(rewritetest rewritetest.cpp)
add_dependencies(rewritetest googletest)
target_link_libraries(rewritetest ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${ENGINE_EXEC_LIBS} messageqcpp execplan)
gtest_add_tests(TARGET rewritetest TEST_PREFIX columnstore:)
add_executable(mcs_decimal_tests mcs_decimal-tests.cpp)
add_dependencies(mcs_decimal_tests googletest)
target_link_libraries(mcs_decimal_tests ${ENGINE_LDFLAGS} ${GTEST_LIBRARIES} ${ENGINE_EXEC_LIBS})

1129
tests/query19_fixed.h Normal file

File diff suppressed because it is too large Load Diff

1522
tests/query19_init.h Normal file

File diff suppressed because it is too large Load Diff

616
tests/rewritetest.cpp Executable file
View File

@ -0,0 +1,616 @@
/* Copyright (C) 2022 MariaDB Corporation
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation; version 2 of the License. This program is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <gtest/gtest.h>
#include <algorithm>
#include <memory>
#include "rewrites.h"
#include "bytestream.h"
#include "objectreader.h"
#include "unitqueries_before.h"
#include "unitqueries_after.h"
#include "query19_init.h"
#include "query19_fixed.h"
using TreePtr = std::unique_ptr<execplan::ParseTree>;
bool treeEqual(execplan::ParseTree* fst, execplan::ParseTree* snd, int depth = 0)
{
if (fst == nullptr)
{
return snd == nullptr;
}
if (snd == nullptr)
{
return fst == nullptr;
}
auto comp = execplan::NodeSemanticComparator();
if (comp(fst, snd) || comp(fst, snd))
{
std::cerr << "Data " << fst->data()->data() << " differs from " << snd->data()->data() << " at level " << depth << '\n';
return false;
}
return (treeEqual(fst->left(), snd->left(), depth + 1) && treeEqual(fst->right(), snd->right(), depth + 1)) ||
(treeEqual(fst->left(), snd->right(), depth + 1) && treeEqual(fst->right(), snd->left(), depth + 1));
}
#define REWRITE_TREE_TEST_DEBUG false
void printTree(const std::string& queryName, execplan::ParseTree* tree, const std::string& treeName)
{
#if REWRITE_TREE_TEST_DEBUG
std::string dotPath = std::string("/tmp/") + queryName;
std::string dotFile = dotPath + "." + treeName + ".dot";
tree->drawTree(dotFile);
std::string dotInvoke = "dot -Tpng ";
std::string convertCommand = dotInvoke + dotFile + " -o " + dotFile + ".png";
[[maybe_unused]] auto _ = std::system(convertCommand.c_str());
#endif
}
struct ParseTreeTestParam
{
std::string queryName;
std::vector<unsigned char>* query = nullptr;
std::vector<unsigned char>* manually_rewritten_query = nullptr;
friend std::ostream& operator<<(std::ostream& os, const ParseTreeTestParam& bar)
{
return os << bar.queryName;
}
};
class ParseTreeTest : public testing::TestWithParam<::ParseTreeTestParam> {};
TEST_P(ParseTreeTest, Rewrite)
{
messageqcpp::ByteStream stream;
stream.load(GetParam().query->data(), GetParam().query->size());
execplan::ParseTree* initialTree = execplan::ObjectReader::createParseTree(stream);
printTree(GetParam().queryName, initialTree, "initial");
TreePtr rewrittenTree;
rewrittenTree.reset(execplan::extractCommonLeafConjunctionsToRoot<true>(initialTree));
if (GetParam().manually_rewritten_query)
{
stream.load(GetParam().manually_rewritten_query->data(), GetParam().manually_rewritten_query->size());
TreePtr manuallyRewrittenTree;
manuallyRewrittenTree.reset(execplan::ObjectReader::createParseTree(stream));
bool result = treeEqual(manuallyRewrittenTree.get(), rewrittenTree.get());
printTree(GetParam().queryName, rewrittenTree.get(), "rewritten");
printTree(GetParam().queryName, manuallyRewrittenTree.get(), "reference");
EXPECT_TRUE(result);
}
else
{
bool result = treeEqual(initialTree, rewrittenTree.get());
printTree(GetParam().queryName, rewrittenTree.get(), "rewritten");
EXPECT_TRUE(result);
}
}
INSTANTIATE_TEST_SUITE_P(TreeRewrites, ParseTreeTest, testing::Values(
/*
select t1.posname, t2.posname from t1,t2
where
(
t1.id = t2.id
and t1.pos + t2.pos < 1000
)
or
(
t1.id = t2.id
and t1.pos + t2.pos > 15000
);
*/
ParseTreeTestParam{"Query_1", &__test_query_before_1, &__test_query_after_1},
/*
select t1.posname, t2.posname
from t1,t2
where
t1.id = t2.id
and (t1.pos + t2.pos < 1000);
*/
ParseTreeTestParam{"Query_2", &__test_query_before_2},
/*
select t1.posname, t2.posname
from t1,t2
where
(t1.pos + t2.pos < 1000)
or
(t1.pos + t2.pos > 16000)
or
(t1.posname < dcba);
*/
ParseTreeTestParam{"Query_3", &__test_query_before_3},
/*
select t1.posname, t2.posname
from t1,t2
where
(t1.pos > 20)
or
(t2.posname in (select t1.posname from t1 where t1.pos > 20));
*/
ParseTreeTestParam{"Query_4", &__test_query_before_4},
/*select t1.posname, t2.posname from t1,t2
where
(
t1.id = t2.id
or t1.pos + t2.pos < 1000
)
and
(
t1.id = t2.id
or t1.pos + t2.pos > 15000
);
*/
ParseTreeTestParam{"Query_5", &__test_query_before_5},
/*select t1.posname, t2.posname from t1,t2
where
(
t1.id = t2.rid
or t1.pos + t2.pos < 1000
)
and
(
t1.id = t2.id
or t1.pos + t2.pos > 15000
);
*/
ParseTreeTestParam{"Query_6", &__test_query_before_6},
/*
select t1.posname
from t1
where
t1.posname in
(
select t1.posname
from t1
where posname > 'qwer'
and
id < 30
);
*/
ParseTreeTestParam{"Query_7", &__test_query_before_7},
/*select t1.posname, t2.posname
from t1,t2
where t1.posname in
(
select t1.posname
from t1
where posname > 'qwer'
and id < 30
)
and t1.id = t2.id;
*/
ParseTreeTestParam{"Query_8", &__test_query_before_8},
/*select t1.posname, t2.posname
from t1,t2
where t1.posname in
(
select t1.posname
from t1
where posname > 'qwer'
and id < 30
) and
(
t1.id = t2.id
and t1.id = t2.rid
);
*/
ParseTreeTestParam{"Query_9", &__test_query_before_9},
/*select * from t1
where
(
posname > 'qwer'
and id < 30
)
or
(
pos > 5000
and place > 'abcdefghij'
);
*/
ParseTreeTestParam{"Query_10", &__test_query_before_10},
/*select *
from t1
where
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
);
*/
ParseTreeTestParam{"Query_11", &__test_query_before_11, &__test_query_after_11},
/*select *
from t1
where
(
pos > 5000
and id < 30
)
and
(
pos > 5000
and id < 30
);
*/
ParseTreeTestParam{"Query_12", &__test_query_before_12},
/*select *
from t1
where
(
pos > 5000
or id < 30
)
or
(
pos > 5000
or id < 30
);
*/
ParseTreeTestParam{"Query_13", &__test_query_before_13},
/*select *
from t1
where
(
id in
(
select id
from t2
where posname > 'qwer'
and rid > 10
)
)
and
(
pos > 5000
or id < 30
);
*/
ParseTreeTestParam{"Query_14", &__test_query_before_14},
/*select *
from t1
where
(
id in
(
select id
from t2
where
(
posname > 'qwer'
and rid < 10
)
or
(
posname > 'qwer'
and rid > 40
)
)
)
and
(
pos > 5000
or id < 30);
*/
ParseTreeTestParam{"Query_15", &__test_query_before_15, &__test_query_after_15},
/*
select *
from t1
where
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
);
*/
ParseTreeTestParam{"Query_16", &__test_query_before_16, &__test_query_after_16},
/*
select *
from t1
where
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
);
*/
ParseTreeTestParam{"Query_17", &__test_query_before_17, &__test_query_after_17},
/*
select *
from t1
where
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
);
*/
ParseTreeTestParam{"Query_18", &__test_query_before_18, &__test_query_after_18},
/*
select *
from t1
where
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
)
or
(
pos > 5000
and id < 30
);
*/
ParseTreeTestParam{"Query_19", &__test_query_before_19, &__test_query_after_19},
/*
select *
from t1
where
(
pos > 5000
and id < 30
)
or
(
posname > 'qwer'
and
id < 30
and
place > 'abcdefghij'
);
*/
ParseTreeTestParam{"Query_20", &__test_query_before_20, &__test_query_after_20},
/*
select *
from t1
where
(
pos > 5000
and id < 30
)
or
(
posname > 'qwer'
and
id < 30
and
place > 'abcdefghij'
)
or
(
id < 30
and
place < 'zyxqwertyu'
);
*/
ParseTreeTestParam{"Query_21", &__test_query_before_21, &__test_query_after_21},
/*
select *
from t1
where
(pos > 5000 and id < 30)
or
(posname > 'qwer' and id < 30 and place > 'abcdefghij' and pos > 5000)
or
(id < 30 and place < 'zyxqwertyu' and pos > 5000)
or
(pos > 5000 and id < 30);
*/
ParseTreeTestParam{"Query_22", &__test_query_before_22, &__test_query_after_22},
/*
select *
from t1
where
(5000 < pos and id < 30)
or
(posname > 'qwer' and id < 30 and place > 'abcdefghij' and 5000 < pos)
or
(30 > id and place < 'zyxqwertyu' and pos > 5000)
or
(pos > 5000 and id < 30);
*/
ParseTreeTestParam{"Query_23", &__test_query_before_23, &__test_query_after_23},
/*
select *
from t1
where
(pos > 5000 and id < 30 and rid > 20)
or
(posname > 'qwer' and id < 30 and place > 'abcdefghij' and pos > 5000 and rid > 20)
or
(id < 30 and place < 'zyxqwertyu' and pos > 5000 and rid > 20)
or
(pos > 5000 and id < 30 and rid > 20)
or
(pos > 5000 and id < 30 and place < 'zyxqwertyu' and rid > 20);
*/
ParseTreeTestParam{"Query_27", &__test_query_before_27, &__test_query_after_27},
/*
select *
from t1
where
(pos > 5000 and id < 30 and rid > 20 and place < 'zyxqwertyu')
or
(posname > 'qwer' and id < 30 and place > 'abcdefghij' and place < 'zyxqwertyu' and pos > 5000 and rid > 20)
or
(id < 30 and place < 'zyxqwertyu' and pos > 5000 and rid > 20)
or
(pos > 5000 and id < 30 and rid > 20 and place < 'zyxqwertyu' and place < 'zyxqwertyu');
*/
ParseTreeTestParam{"Query_28", &__test_query_before_28, &__test_query_after_28},
ParseTreeTestParam{"TPCH_19", &__query19_tree_init, &__query19_tree_fixed}
),
[](const ::testing::TestParamInfo<ParseTreeTest::ParamType>& info) {
return info.param.queryName;
}
);
struct ComparatorTestParam
{
std::string queryName;
std::string filter;
std::vector<std::string> existingFilters;
bool contains;
friend std::ostream& operator<<(std::ostream& os, const ComparatorTestParam& bar)
{
return os << bar.queryName;
}
};
class ParseTreeComparatorTest : public testing::TestWithParam<ComparatorTestParam> {};
struct TestComparator
{
bool operator()(std::unique_ptr<execplan::ParseTree> const& left,
std::unique_ptr<execplan::ParseTree> const& right) const
{
execplan::NodeSemanticComparator comp;
return comp(left.get(), right.get());
}
};
TEST_P(ParseTreeComparatorTest, CompareContains)
{
std::set<std::unique_ptr<execplan::ParseTree>, TestComparator> container;
for (auto const& f : GetParam().existingFilters)
{
container.insert(std::make_unique<execplan::ParseTree>(new execplan::SimpleFilter(f, execplan::SimpleFilter::ForTestPurposesWithoutColumnsOIDS{})));
}
auto filter = std::make_unique<execplan::ParseTree>(new execplan::SimpleFilter(
GetParam().filter, execplan::SimpleFilter::ForTestPurposesWithoutColumnsOIDS{}));
ASSERT_EQ(GetParam().contains, container.contains(filter));
}
INSTANTIATE_TEST_SUITE_P(
Comparator, ParseTreeComparatorTest,
testing::Values(ComparatorTestParam{"SimpleInverse1", "a=b", {"b=a", "a=a"}, true},
ComparatorTestParam{"SimpleInverse2", "acb=bdd", {"b>a", "a=b", "bdd=acb"}, true},
ComparatorTestParam{"SimpleInverseOpposite", "a<b", {"b>a"}, true},
ComparatorTestParam{"SimpleInverseOpposite2", "a<b", {"a=c", "d=e", "b>a", "a<=b"}, true},
ComparatorTestParam{"SimpleContains", "a<b", {"a<b", "a=b", "acb=bdd"}, true},
ComparatorTestParam{"SimpleNotContains", "a<b", {"a<b1", "a=b", "acb=bdd"}, false}),
[](const ::testing::TestParamInfo<ParseTreeComparatorTest::ParamType>& info)
{ return info.param.queryName; });

1970
tests/unitqueries_after.h Normal file

File diff suppressed because it is too large Load Diff

105
tests/unitqueries_before.h Normal file

File diff suppressed because one or more lines are too long