1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-08 14:22:09 +03:00

feat(joblist): better dot graphs that represents joblist steps execution tree.

This commit is contained in:
drrtuy
2024-09-16 20:38:48 +00:00
committed by Leonid Fedorov
parent 9ef5d7f3ac
commit ce86d1025a
5 changed files with 477 additions and 828 deletions

View File

@@ -1,19 +1,19 @@
/* Copyright (C) 2014 InfiniDB, Inc. /* Copyright (C) 2014 InfiniDB, Inc.
This program is free software; you can redistribute it and/or This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of as published by the Free Software Foundation; version 2 of
the License. the License.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */ MA 02110-1301, USA. */
// $Id: jlf_graphics.cpp 9550 2013-05-17 23:58:07Z xlou $ // $Id: jlf_graphics.cpp 9550 2013-05-17 23:58:07Z xlou $
@@ -23,7 +23,12 @@
#include <iostream> #include <iostream>
using namespace std; using namespace std;
#include <cstddef>
#include <iterator>
#include <sstream>
#include "joblist.h" #include "joblist.h"
#include "jobstep.h"
#include "primitivestep.h" #include "primitivestep.h"
#include "subquerystep.h" #include "subquerystep.h"
#include "windowfunctionstep.h" #include "windowfunctionstep.h"
@@ -44,302 +49,423 @@ using namespace joblist;
namespace jlf_graphics namespace jlf_graphics
{ {
ostream& writeDotCmds(ostream& dotFile, const JobStepVector& query, const JobStepVector& project) std::string generateDotFileName(const std::string& prefix)
{
ostringstream oss;
struct timeval tvbuf;
gettimeofday(&tvbuf, 0);
struct tm tmbuf;
localtime_r(reinterpret_cast<time_t*>(&tvbuf.tv_sec), &tmbuf);
oss << prefix << setfill('0') << setw(4) << (tmbuf.tm_year + 1900) << setw(2) << (tmbuf.tm_mon + 1)
<< setw(2) << (tmbuf.tm_mday) << setw(2) << (tmbuf.tm_hour) << setw(2) << (tmbuf.tm_min) << setw(2)
<< (tmbuf.tm_sec) << setw(6) << (tvbuf.tv_usec) << ".dot";
return oss.str();
}
JobStepVector GraphGeneratorInterface::extractSubquerySteps(const SJSTEP& sqs)
{
JobStepVector res;
auto* subQueryStep = dynamic_cast<SubQueryStep*>(sqs.get());
if (subQueryStep)
{
JobStepVector subQuerySteps;
auto& stepsBeforeRecursion = subQueryStep->subJoblist()->querySteps();
for (auto& step : stepsBeforeRecursion)
{
auto steps = extractJobSteps(step);
subQuerySteps.insert(subQuerySteps.end(), steps.begin(), steps.end());
}
res.insert(res.end(), subQuerySteps.begin(), subQuerySteps.end());
}
res.push_back(sqs);
return res;
}
// This f() is recursive to handle nested subqueries
JobStepVector GraphGeneratorInterface::extractJobSteps(const SJSTEP& umbrella)
{
JobStepVector res;
if (typeid(*umbrella) == typeid(SubAdapterStep))
{
auto* subAdapterStep = dynamic_cast<SubAdapterStep*>(umbrella.get());
assert(subAdapterStep);
auto subQuerySteps = extractSubquerySteps(subAdapterStep->subStep());
res.insert(res.end(), subQuerySteps.begin(), subQuerySteps.end());
res.push_back(umbrella);
}
else if (typeid(*umbrella) == typeid(SubQueryStep))
{
auto subQuerySteps = extractSubquerySteps(umbrella);
res.insert(res.end(), subQuerySteps.begin(), subQuerySteps.end());
}
else
{
res.push_back(umbrella);
}
return res;
}
std::string getLadderRepr(const JobStepVector& steps, const std::vector<size_t>& tabsToPretify)
{
std::ostringstream oss;
assert(tabsToPretify.size() == steps.size());
// Tabs are in the reverse order of the steps
auto tabsIt = tabsToPretify.begin();
// Reverse the order of the steps to draw the graph from top to bottom
for (auto s = steps.rbegin(); s != steps.rend(); ++s, ++tabsIt)
{
oss << std::string(*tabsIt, '\t');
oss << (*s)->extendedInfo() << std::endl;
}
return oss.str();
}
std::string GraphGeneratorInterface::getGraphNode(const SJSTEP& stepPtr)
{
auto& step = *stepPtr;
uint16_t stepidIn = step.stepId();
std::ostringstream oss;
oss << stepidIn << " [label=\"st_" << stepidIn << " ";
if (typeid(step) == typeid(pColStep))
{
oss << "(" << step.tableOid() << "/" << step.oid() << ")" << "\"";
oss << " shape=ellipse";
}
else if (typeid(step) == typeid(pColScanStep))
{
oss << "(" << step.tableOid() << "/" << step.oid() << ")" << "\"";
oss << " shape=box";
}
else if (typeid(step) == typeid(TupleBPS))
{
bool isTuple = false;
BatchPrimitive* bps = dynamic_cast<BatchPrimitive*>(stepPtr.get());
if (dynamic_cast<TupleBPS*>(bps) != 0)
isTuple = true;
oss << "(" << bps->tableOid() << "/" << bps->oid() << "/" << bps->alias();
OIDVector projectOids = bps->getProjectOids();
if (projectOids.size() > 0)
{
oss << "\\l";
oss << "PC: ";
}
for (unsigned int i = 0; i < projectOids.size(); i++)
{
oss << projectOids[i] << " ";
if ((i + 1) % 3 == 0)
oss << "\\l";
}
oss << ")\"";
oss << " shape=box style=bold";
if (isTuple)
oss << " peripheries=2";
}
else if (typeid(step) == typeid(CrossEngineStep))
{
CrossEngineStep* cej = dynamic_cast<CrossEngineStep*>(stepPtr.get());
oss << "(" << cej->tableName() << "/" << cej->tableAlias() << ")\"";
oss << " shape=cylinder style=bold";
}
else if (typeid(step) == typeid(TupleHashJoinStep))
{
oss << "\"";
oss << " shape=diamond peripheries=2";
}
else if (typeid(step) == typeid(TupleUnion))
{
oss << "\"";
oss << " shape=triangle";
}
else if (typeid(step) == typeid(pDictionaryStep))
{
oss << "\"";
oss << " shape=trapezium";
}
else if (typeid(step) == typeid(FilterStep))
{
oss << "\"";
oss << " shape=invhouse";
}
else if (typeid(step) == typeid(TupleAggregateStep))
{
oss << "\"";
oss << " shape=invtriangle";
}
else if (typeid(step) == typeid(TupleAnnexStep))
{
oss << "\"";
oss << " shape=star";
}
else if (typeid(step) == typeid(WindowFunctionStep))
{
oss << "\"";
oss << " shape=invtriangle";
oss << " peripheries=2";
}
else if (typeid(step) == typeid(SubAdapterStep))
{
oss << "\"";
oss << " shape=polygon";
oss << " peripheries=2";
}
else if (typeid(step) == typeid(SubQueryStep))
{
oss << "\"";
oss << " shape=polygon";
}
else
{
oss << "\"";
}
oss << "]" << endl;
return oss.str();
}
std::pair<size_t, std::string> GraphGeneratorInterface::getTabsAndEdges(
const JobStepVector& querySteps, const JobStepVector& projectSteps, const SJSTEP& stepPtr,
const std::vector<size_t>& tabsToPretify)
{
auto& step = *stepPtr;
uint16_t stepidIn = step.stepId();
std::ostringstream oss;
size_t tab = 0;
for (unsigned int i = 0; i < step.outputAssociation().outSize(); i++)
{
ptrdiff_t dloutptr = 0;
auto* dlout = step.outputAssociation().outAt(i)->rowGroupDL();
uint32_t numConsumers = step.outputAssociation().outAt(i)->getNumConsumers();
if (dlout)
{
dloutptr = (ptrdiff_t)dlout;
}
for (auto it = querySteps.rbegin(); it != querySteps.rend(); ++it)
{
auto& otherStep = *it;
// Reverse order idx
auto otherIdx = std::distance(querySteps.rbegin(), it);
uint16_t stepidOut = otherStep.get()->stepId();
JobStepAssociation queryInputSA = otherStep.get()->inputAssociation();
for (unsigned int j = 0; j < queryInputSA.outSize(); j++)
{
ptrdiff_t dlinptr = 0;
auto* dlin = queryInputSA.outAt(j)->rowGroupDL();
if (dlin)
{
dlinptr = (ptrdiff_t)dlin;
}
if ((ptrdiff_t)dloutptr == (ptrdiff_t)dlinptr)
{
oss << stepidIn << " -> " << stepidOut;
if (dlin)
{
oss << " [label=\"[" << numConsumers << "]\"]" << endl;
tab = tabsToPretify[otherIdx] + 1;
}
}
}
}
for (auto& otherProjectStep : projectSteps)
{
uint16_t stepidOut = otherProjectStep->stepId();
JobStepAssociation projectInputSA = otherProjectStep->inputAssociation();
for (unsigned int j = 0; j < projectInputSA.outSize(); j++)
{
ptrdiff_t dlinptr = 0;
auto* dlin = projectInputSA.outAt(j)->rowGroupDL();
if (dlin)
dlinptr = (ptrdiff_t)dlin;
if (dloutptr == dlinptr)
{
oss << stepidIn << " -> " << stepidOut;
if (dlin)
{
oss << " [label=\"[" << numConsumers << "]\"]" << endl;
}
}
}
}
}
return {tab, oss.str()};
}
std::string GraphGeneratorInterface::getGraphProjectionNode(SJSTEP& step)
{
std::ostringstream oss;
uint16_t stepidIn = step->stepId();
oss << stepidIn << " [label=\"st_" << stepidIn << " ";
if (typeid(*(step)) == typeid(pColStep))
{
oss << "(" << step->tableOid() << "/" << step->oid() << ")" << "\"";
oss << " shape=ellipse";
}
else if (typeid(*(step)) == typeid(pColScanStep))
{
oss << "(" << step->tableOid() << "/" << step->oid() << ")" << "\"";
oss << " shape=box";
}
else if (typeid(*(step)) == typeid(pDictionaryStep))
{
oss << "\"";
oss << " shape=trapezium";
}
else if (typeid(*(step)) == typeid(PassThruStep))
{
oss << "(" << step->tableOid() << "/" << step->oid() << ")" << "\"";
oss << " shape=octagon";
}
else if (typeid(*(step)) == typeid(TupleBPS))
{
bool isTuple = false;
BatchPrimitive* bps = dynamic_cast<BatchPrimitive*>(step.get());
if (dynamic_cast<TupleBPS*>(bps) != 0)
isTuple = true;
oss << "(" << bps->tableOid() << ":\\l";
OIDVector projectOids = bps->getProjectOids();
for (unsigned int i = 0; i < projectOids.size(); i++)
{
oss << projectOids[i] << " ";
if ((i + 1) % 3 == 0)
oss << "\\l";
}
oss << ")\"";
oss << " shape=box style=bold";
if (isTuple)
oss << " peripheries=2";
}
else if (typeid(*(step)) == typeid(CrossEngineStep))
{
CrossEngineStep* cej = dynamic_cast<CrossEngineStep*>(step.get());
oss << "(" << cej->tableName() << "/" << cej->tableAlias() << ")\"";
oss << " shape=cylinder style=bold";
}
else
oss << "\"";
oss << "]" << endl;
return oss.str();
}
std::string GraphGeneratorInterface::getProjectionEdges(JobStepVector& steps, SJSTEP& step,
const std::size_t ctn)
{
uint16_t stepidIn = step->stepId();
std::ostringstream oss;
for (unsigned int i = 0; i < step->outputAssociation().outSize(); i++)
{
ptrdiff_t dloutptr = 0;
auto* dlout = step->outputAssociation().outAt(i)->rowGroupDL();
uint32_t numConsumers = step->outputAssociation().outAt(i)->getNumConsumers();
if (dlout)
{
dloutptr = (ptrdiff_t)dlout;
}
for (auto k = ctn + 1; k < steps.size(); k++)
{
uint16_t stepidOut = steps[k].get()->stepId();
JobStepAssociation projectInputSA = steps[k].get()->inputAssociation();
for (unsigned int j = 0; j < projectInputSA.outSize(); j++)
{
ptrdiff_t dlinptr = 0;
auto* dlin = projectInputSA.outAt(j)->rowGroupDL();
if (dlin)
dlinptr = (ptrdiff_t)dlin;
if ((ptrdiff_t)dloutptr == (ptrdiff_t)dlinptr)
{
oss << stepidIn << " -> " << stepidOut;
if (dlin)
{
oss << " [label=\"[" << numConsumers << "]\"]" << endl;
}
}
}
}
}
return oss.str();
}
std::string GraphGeneratorInterface::writeDotCmds()
{ {
// Graphic view draw // Graphic view draw
dotFile << "digraph G {" << endl; std::ostringstream oss;
JobStepVector::iterator qsi; oss << "digraph G {" << endl;
JobStepVector::iterator psi;
int ctn = 0;
// merge in the subquery steps // merge in the subquery steps
JobStepVector querySteps = query; JobStepVector querySteps;
for (auto& step : query)
{
auto steps = extractJobSteps(step);
querySteps.insert(querySteps.end(), steps.begin(), steps.end());
}
JobStepVector projectSteps = project; JobStepVector projectSteps = project;
std::vector<size_t> tabsToPretify;
// Reverse the order of the steps to draw the graph from top to bottom
for (auto it = querySteps.rbegin(); it != querySteps.rend(); ++it)
{ {
SubQueryStep* subquery = NULL; auto& step = *it;
qsi = querySteps.begin(); oss << getGraphNode(step);
auto [tab, graphEdges] = getTabsAndEdges(querySteps, projectSteps, step, tabsToPretify);
while (qsi != querySteps.end()) tabsToPretify.push_back(tab);
{ oss << graphEdges;
if ((subquery = dynamic_cast<SubQueryStep*>(qsi->get())) != NULL)
{
querySteps.erase(qsi);
JobStepVector subSteps = subquery->subJoblist()->querySteps();
querySteps.insert(querySteps.end(), subSteps.begin(), subSteps.end());
qsi = querySteps.begin();
}
else
{
qsi++;
}
}
} }
for (qsi = querySteps.begin(); qsi != querySteps.end(); ctn++, qsi++) for (auto psi = projectSteps.begin(); psi != projectSteps.end(); ++psi)
{ {
// if (dynamic_cast<OrDelimiter*>(qsi->get()) != NULL) auto& step = *psi;
// continue; oss << getGraphProjectionNode(step);
auto idx = std::distance(projectSteps.begin(), psi);
uint16_t stepidIn = qsi->get()->stepId(); oss << getProjectionEdges(projectSteps, step, idx);
dotFile << stepidIn << " [label=\"st_" << stepidIn << " ";
if (typeid(*(qsi->get())) == typeid(pColStep))
{
dotFile << "(" << qsi->get()->tableOid() << "/" << qsi->get()->oid() << ")"
<< "\"";
dotFile << " shape=ellipse";
}
else if (typeid(*(qsi->get())) == typeid(pColScanStep))
{
dotFile << "(" << qsi->get()->tableOid() << "/" << qsi->get()->oid() << ")"
<< "\"";
dotFile << " shape=box";
}
// else if (typeid(*(qsi->get())) == typeid(HashJoinStep) ||
// typeid(*(qsi->get())) == typeid(StringHashJoinStep))
// {
// dotFile << "\"";
// dotFile << " shape=diamond";
// }
else if (typeid(*(qsi->get())) == typeid(TupleHashJoinStep))
{
dotFile << "\"";
dotFile << " shape=diamond peripheries=2";
}
// else if (typeid(*(qsi->get())) == typeid(UnionStep) ||
// typeid(*(qsi->get())) == typeid(TupleUnion) )
else if (typeid(*(qsi->get())) == typeid(TupleUnion))
{
dotFile << "\"";
dotFile << " shape=triangle";
}
else if (typeid(*(qsi->get())) == typeid(pDictionaryStep))
{
dotFile << "\"";
dotFile << " shape=trapezium";
}
else if (typeid(*(qsi->get())) == typeid(FilterStep))
{
dotFile << "\"";
dotFile << " shape=house orientation=180";
}
// else if (typeid(*(qsi->get())) == typeid(ReduceStep))
// {
// dotFile << "\"";
// dotFile << " shape=triangle orientation=180";
// }
// else if (typeid(*(qsi->get())) == typeid(BatchPrimitiveStep) || typeid(*(qsi->get())) ==
//typeid(TupleBPS))
else if (typeid(*(qsi->get())) == typeid(TupleBPS))
{
bool isTuple = false;
BatchPrimitive* bps = dynamic_cast<BatchPrimitive*>(qsi->get());
if (dynamic_cast<TupleBPS*>(bps) != 0)
isTuple = true;
dotFile << "(" << bps->tableOid() << "/" << bps->oid();
OIDVector projectOids = bps->getProjectOids();
if (projectOids.size() > 0)
{
dotFile << "\\l";
dotFile << "PC: ";
}
for (unsigned int i = 0; i < projectOids.size(); i++)
{
dotFile << projectOids[i] << " ";
if ((i + 1) % 3 == 0)
dotFile << "\\l";
}
dotFile << ")\"";
dotFile << " shape=box style=bold";
if (isTuple)
dotFile << " peripheries=2";
}
else if (typeid(*(qsi->get())) == typeid(CrossEngineStep))
{
BatchPrimitive* bps = dynamic_cast<BatchPrimitive*>(qsi->get());
dotFile << "(" << bps->alias() << ")\"";
dotFile << " shape=box style=bold";
dotFile << " peripheries=2";
}
else if (typeid(*(qsi->get())) == typeid(TupleAggregateStep))
{
dotFile << "\"";
dotFile << " shape=triangle orientation=180";
}
else if (typeid(*(qsi->get())) == typeid(TupleAnnexStep))
{
dotFile << "\"";
dotFile << " shape=star";
}
else if (typeid(*(qsi->get())) == typeid(WindowFunctionStep))
{
dotFile << "\"";
dotFile << " shape=triangle orientation=180";
dotFile << " peripheries=2";
}
// else if (typeid(*(qsi->get())) == typeid(AggregateFilterStep))
// {
// dotFile << "\"";
// dotFile << " shape=hexagon peripheries=2 style=bold";
// }
// else if (typeid(*(qsi->get())) == typeid(BucketReuseStep))
// {
// dotFile << "(" << qsi->get()->tableOid() << "/" << qsi->get()->oid() << ")" << "\"";
// dotFile << " shape=box style=dashed";
// }
else
dotFile << "\"";
dotFile << "]" << endl;
for (unsigned int i = 0; i < qsi->get()->outputAssociation().outSize(); i++)
{
RowGroupDL* dlout = qsi->get()->outputAssociation().outAt(i)->rowGroupDL();
ptrdiff_t dloutptr = (ptrdiff_t)dlout;
for (unsigned int k = 0; k < querySteps.size(); k++)
{
uint16_t stepidOut = querySteps[k].get()->stepId();
JobStepAssociation queryInputSA = querySteps[k].get()->inputAssociation();
for (unsigned int j = 0; j < queryInputSA.outSize(); j++)
{
RowGroupDL* dlin = queryInputSA.outAt(j)->rowGroupDL();
ptrdiff_t dlinptr = (ptrdiff_t)dlin;;
if ((ptrdiff_t)dloutptr == (ptrdiff_t)dlinptr)
{
dotFile << stepidIn << " -> " << stepidOut;
}
}
}
for (psi = projectSteps.begin(); psi < projectSteps.end(); psi++)
{
uint16_t stepidOut = psi->get()->stepId();
JobStepAssociation projectInputSA = psi->get()->inputAssociation();
for (unsigned int j = 0; j < projectInputSA.outSize(); j++)
{
RowGroupDL* dlin = projectInputSA.outAt(j)->rowGroupDL();
ptrdiff_t dlinptr = (ptrdiff_t)dlin;;
if (dloutptr == dlinptr)
{
dotFile << stepidIn << " -> " << stepidOut;
}
}
}
}
} }
for (psi = projectSteps.begin(), ctn = 0; psi != projectSteps.end(); ctn++, psi++) oss << "}" << endl;
{
uint16_t stepidIn = psi->get()->stepId();
dotFile << stepidIn << " [label=\"st_" << stepidIn << " ";
if (typeid(*(psi->get())) == typeid(pColStep)) auto ladderRepr = getLadderRepr(querySteps, tabsToPretify);
{ cout << endl;
dotFile << "(" << psi->get()->tableOid() << "/" << psi->get()->oid() << ")" cout << ladderRepr;
<< "\"";
dotFile << " shape=ellipse";
}
else if (typeid(*(psi->get())) == typeid(pColScanStep))
{
dotFile << "(" << psi->get()->tableOid() << "/" << psi->get()->oid() << ")"
<< "\"";
dotFile << " shape=box";
}
else if (typeid(*(psi->get())) == typeid(pDictionaryStep))
{
dotFile << "\"";
dotFile << " shape=trapezium";
}
else if (typeid(*(psi->get())) == typeid(PassThruStep))
{
dotFile << "(" << psi->get()->tableOid() << "/" << psi->get()->oid() << ")"
<< "\"";
dotFile << " shape=octagon";
}
// else if (typeid(*(psi->get())) == typeid(BatchPrimitiveStep) || typeid(*(psi->get())) ==
//typeid(TupleBPS))
else if (typeid(*(psi->get())) == typeid(TupleBPS))
{
bool isTuple = false;
BatchPrimitive* bps = dynamic_cast<BatchPrimitive*>(psi->get());
if (dynamic_cast<TupleBPS*>(bps) != 0) return oss.str();
isTuple = true;
dotFile << "(" << bps->tableOid() << ":\\l";
OIDVector projectOids = bps->getProjectOids();
for (unsigned int i = 0; i < projectOids.size(); i++)
{
dotFile << projectOids[i] << " ";
if ((i + 1) % 3 == 0)
dotFile << "\\l";
}
dotFile << ")\"";
dotFile << " shape=box style=bold";
if (isTuple)
dotFile << " peripheries=2";
}
else if (typeid(*(qsi->get())) == typeid(CrossEngineStep))
{
BatchPrimitive* bps = dynamic_cast<BatchPrimitive*>(qsi->get());
dotFile << "(" << bps->alias() << ")\"";
dotFile << " shape=box style=bold";
dotFile << " peripheries=2";
}
else
dotFile << "\"";
dotFile << "]" << endl;
for (unsigned int i = 0; i < psi->get()->outputAssociation().outSize(); i++)
{
RowGroupDL* dlout = psi->get()->outputAssociation().outAt(i)->rowGroupDL();
ptrdiff_t dloutptr = (ptrdiff_t)dlout;
for (unsigned int k = ctn + 1; k < projectSteps.size(); k++)
{
uint16_t stepidOut = projectSteps[k].get()->stepId();
JobStepAssociation projectInputSA = projectSteps[k].get()->inputAssociation();
for (unsigned int j = 0; j < projectInputSA.outSize(); j++)
{
RowGroupDL* dlin = projectInputSA.outAt(j)->rowGroupDL();
ptrdiff_t dlinptr = (ptrdiff_t)dlin;
if ((ptrdiff_t)dloutptr == (ptrdiff_t)dlinptr)
{
dotFile << stepidIn << " -> " << stepidOut;
}
}
}
}
}
dotFile << "}" << endl;
return dotFile;
} }
} // end namespace jlf_graphics } // end namespace jlf_graphics
#ifdef __clang__ #ifdef __clang__
#pragma clang diagnostic pop #pragma clang diagnostic pop
#endif #endif

View File

@@ -21,8 +21,6 @@
#pragma once #pragma once
#include <iostream>
#include "joblist.h" #include "joblist.h"
namespace jlf_graphics namespace jlf_graphics
@@ -30,7 +28,53 @@ namespace jlf_graphics
/** Format a stream of dot commands /** Format a stream of dot commands
* Format a stream of dot commands * Format a stream of dot commands
*/ */
std::ostream& writeDotCmds(std::ostream& dotFile, const joblist::JobStepVector& querySteps,
const joblist::JobStepVector& projectSteps); std::string getLadderRepr(const joblist::JobStepVector& steps, const std::vector<size_t>& tabsToPretify);
std::string generateDotFileName(const std::string& prefix);
class GraphGeneratorInterface
{
public:
GraphGeneratorInterface(const joblist::JobStepVector& query, const joblist::JobStepVector& project)
: query(query), project(project){};
virtual ~GraphGeneratorInterface(){};
virtual std::string writeDotCmds();
private:
virtual joblist::JobStepVector extractSubquerySteps(const joblist::SJSTEP& sqs);
virtual joblist::JobStepVector extractJobSteps(const joblist::SJSTEP& umbrella);
virtual std::string getGraphNode(const joblist::SJSTEP& stepPtr);
virtual std::pair<size_t, std::string> getTabsAndEdges(const joblist::JobStepVector& querySteps,
const joblist::JobStepVector& projectSteps,
const joblist::SJSTEP& stepPtr,
const std::vector<size_t>& tabsToPretify);
virtual std::string getGraphProjectionNode(joblist::SJSTEP& step);
virtual std::string getProjectionEdges(joblist::JobStepVector& steps, joblist::SJSTEP& step,
const std::size_t ctn);
const joblist::JobStepVector& query;
const joblist::JobStepVector& project;
};
class GraphGeneratorNoStats : public GraphGeneratorInterface
{
public:
GraphGeneratorNoStats(const joblist::JobStepVector& query, const joblist::JobStepVector& project)
: GraphGeneratorInterface(query, project){};
~GraphGeneratorNoStats(){};
};
class GraphGeneratorWStats : public GraphGeneratorInterface
{
public:
GraphGeneratorWStats(const joblist::JobStepVector& query, const joblist::JobStepVector& project)
: GraphGeneratorInterface(query, project){};
~GraphGeneratorWStats(){};
private:
};
} // namespace jlf_graphics } // namespace jlf_graphics

View File

@@ -19,12 +19,13 @@
// $Id: joblist.cpp 9655 2013-06-25 23:08:13Z xlou $ // $Id: joblist.cpp 9655 2013-06-25 23:08:13Z xlou $
// Cross engine needs to be at the top due to MySQL includes // Cross engine needs to be at the top due to MySQL includes
#include <algorithm>
#define PREFER_MY_CONFIG_H #define PREFER_MY_CONFIG_H
#include "crossenginestep.h" #include "crossenginestep.h"
#include "errorcodes.h" #include "errorcodes.h"
#include <iterator> #include <iterator>
#include <stdexcept> #include <stdexcept>
//#define NDEBUG // #define NDEBUG
#include <cassert> #include <cassert>
using namespace std; using namespace std;
@@ -33,6 +34,7 @@ using namespace std;
using namespace execplan; using namespace execplan;
#include "errorids.h" #include "errorids.h"
#include "jlf_graphics.h"
#include "jobstep.h" #include "jobstep.h"
#include "primitivestep.h" #include "primitivestep.h"
#include "subquerystep.h" #include "subquerystep.h"
@@ -570,530 +572,15 @@ void JobList::querySummary(bool extendedStats)
void JobList::graph(uint32_t sessionID) void JobList::graph(uint32_t sessionID)
{ {
// Graphic view draw // Graphic view draw
ostringstream oss; auto jsrname = jlf_graphics::generateDotFileName("jobstep_results.");
struct timeval tvbuf;
gettimeofday(&tvbuf, 0);
struct tm tmbuf;
localtime_r(reinterpret_cast<time_t*>(&tvbuf.tv_sec), &tmbuf);
oss << "jobstep_results." << setfill('0') << setw(4) << (tmbuf.tm_year + 1900) << setw(2)
<< (tmbuf.tm_mon + 1) << setw(2) << (tmbuf.tm_mday) << setw(2) << (tmbuf.tm_hour) << setw(2)
<< (tmbuf.tm_min) << setw(2) << (tmbuf.tm_sec) << setw(6) << (tvbuf.tv_usec) << ".dot";
string jsrname(oss.str());
// it's too late to set this here. ExeMgr has already returned ei to dm...
// fExtendedInfo += "Graphs are in " + jsrname;
std::ofstream dotFile(jsrname.c_str(), std::ios::out); std::ofstream dotFile(jsrname.c_str(), std::ios::out);
dotFile << "digraph G {" << std::endl; dotFile << jlf_graphics::GraphGeneratorWStats(fQuery, fProject).writeDotCmds();
JobStepVector::iterator qsi;
JobStepVector::iterator psi;
DeliveredTableMap::iterator dsi;
boost::shared_ptr<CalpontSystemCatalog> csc = CalpontSystemCatalog::makeCalpontSystemCatalog(sessionID);
CalpontSystemCatalog::TableColName tcn;
uint64_t outSize = 0;
uint64_t msgs = 0;
uint64_t pio = 0;
int ctn = 0;
bool diskIo = false;
uint64_t saveTime = 0;
uint64_t loadTime = 0;
// merge in the subquery steps
JobStepVector querySteps = fQuery;
{
SubQueryStep* subquery = NULL;
qsi = querySteps.begin();
while (qsi != querySteps.end())
{
if ((subquery = dynamic_cast<SubQueryStep*>(qsi->get())) != NULL)
{
querySteps.erase(qsi);
JobStepVector subSteps = subquery->subJoblist()->querySteps();
querySteps.insert(querySteps.end(), subSteps.begin(), subSteps.end());
qsi = querySteps.begin();
}
else
{
qsi++;
}
}
}
for (qsi = querySteps.begin(); qsi != querySteps.end(); ctn++, qsi++)
{
// HashJoinStep* hjs = 0;
// if (dynamic_cast<OrDelimiter*>(qsi->get()) != NULL)
// continue;
// @bug 1042. clear column name first at each loop
tcn.column = "";
uint16_t stepidIn = qsi->get()->stepId();
dotFile << stepidIn << " [label=\"st_" << stepidIn << " ";
// @Bug 1033. colName was being called for dictionary steps that don't have column names.
// Added if condition below.
if (typeid(*(qsi->get())) == typeid(pColScanStep) || typeid(*(qsi->get())) == typeid(pColStep))
tcn = csc->colName(qsi->get()->oid());
dotFile << "(";
if (!tcn.column.empty())
dotFile << tcn.column << "/";
if (typeid(*(qsi->get())) == typeid(TupleBPS))
{
BatchPrimitive* bps = dynamic_cast<BatchPrimitive*>(qsi->get());
OIDVector projectOids = bps->getProjectOids();
if (projectOids.size() > 0)
{
dotFile << "\\l";
dotFile << "PC:";
dotFile << "\\l";
for (unsigned int i = 0; i < projectOids.size(); i++)
{
tcn = csc->colName(projectOids[i]);
if (!tcn.column.empty())
dotFile << tcn.column << " ";
if ((i + 1) % 3 == 0)
dotFile << "\\l";
}
}
else
{
tcn = csc->colName(qsi->get()->oid());
dotFile << tcn.column << "/";
}
}
else if (typeid(*(qsi->get())) == typeid(CrossEngineStep))
{
tcn.schema = qsi->get()->schema();
tcn.table = qsi->get()->alias();
}
dotFile << JSTimeStamp::tsdiffstr(qsi->get()->dlTimes.EndOfInputTime(),
qsi->get()->dlTimes.FirstReadTime())
<< "s";
dotFile << ")";
// oracle predict card
dotFile << "\\l#: " << (*qsi)->cardinality();
if (typeid(*(qsi->get())) == typeid(pColStep))
{
dotFile << "\""
<< " shape=ellipse";
}
else if (typeid(*(qsi->get())) == typeid(pColScanStep))
{
dotFile << "\""
<< " shape=box";
}
else if (typeid(*(qsi->get())) == typeid(TupleBPS))
{
BatchPrimitive* bps = dynamic_cast<BatchPrimitive*>(qsi->get());
// if BPS not run, a BucketReuseStep was substituted, so draw dashed
if (bps->wasStepRun())
{
dotFile << "\""
<< " shape=box style=bold";
if (typeid(*(qsi->get())) == typeid(TupleBPS))
dotFile << " peripheries=2";
}
else
dotFile << "\""
<< " shape=box style=dashed";
}
else if (typeid(*(qsi->get())) == typeid(CrossEngineStep))
{
dotFile << "\""
<< " shape=box style=dashed";
}
else if (typeid(*(qsi->get())) == typeid(TupleAggregateStep))
{
dotFile << "\""
<< " shape=triangle orientation=180";
}
else if (typeid(*(qsi->get())) == typeid(TupleAnnexStep))
{
dotFile << "\""
<< " shape=star";
}
else if (typeid(*(qsi->get())) == typeid(WindowFunctionStep))
{
dotFile << "\""
<< " shape=triangle orientation=180 peripheries=2";
}
else if (typeid(*(qsi->get())) == typeid(TupleHashJoinStep))
{
dotFile << "\"";
dotFile << " shape=diamond style=dashed peripheries=2";
}
else if (typeid(*(qsi->get())) == typeid(TupleUnion))
{
dotFile << "\""
<< " shape=triangle";
}
else if (typeid(*(qsi->get())) == typeid(pDictionaryStep))
{
dotFile << "\""
<< " shape=trapezium";
}
else if (typeid(*(qsi->get())) == typeid(FilterStep))
{
dotFile << "\""
<< " shape=house orientation=180";
}
else if (typeid(*(qsi->get())) == typeid(TupleBPS))
{
dotFile << "\""
<< " shape=box style=bold";
dotFile << " peripheries=2";
}
else if (typeid(*(qsi->get())) == typeid(CrossEngineStep))
{
dotFile << "\""
<< " shape=box style=bold";
dotFile << " peripheries=2";
}
else
dotFile << "\"";
dotFile << "]" << endl;
// msgsRecived, physicalIO, cacheIO
msgs = qsi->get()->msgsRcvdCount();
pio = qsi->get()->phyIOCount();
for (unsigned int i = 0; i < qsi->get()->outputAssociation().outSize(); i++)
{
ptrdiff_t dloutptr = 0;
RowGroupDL* dlout;
// TupleDataList* tdl;
if ((dlout = qsi->get()->outputAssociation().outAt(i)->rowGroupDL()))
{
dloutptr = (ptrdiff_t)dlout;
outSize = dlout->totalSize();
diskIo = dlout->totalDiskIoTime(saveTime, loadTime);
}
// if HashJoinStep, determine if output fifo was cached to disk
bool hjTempDiskFlag = false;
for (unsigned int k = 0; k < querySteps.size(); k++)
{
uint16_t stepidOut = querySteps[k].get()->stepId();
JobStepAssociation queryInputSA = querySteps[k].get()->inputAssociation();
for (unsigned int j = 0; j < queryInputSA.outSize(); j++)
{
ptrdiff_t dlinptr = 0;
RowGroupDL* dlin = queryInputSA.outAt(j)->rowGroupDL();
if (dlin)
dlinptr = (ptrdiff_t)dlin;
if (dloutptr == dlinptr)
{
dotFile << stepidIn << " -> " << stepidOut;
dotFile << " [label=\" r: " << outSize;
if (hjTempDiskFlag)
{
dotFile << "*";
}
dotFile << "\\l";
if (msgs != 0)
{
dotFile << " m: " << msgs << "\\l";
if (typeid(*(qsi->get())) == typeid(TupleBPS))
{
dotFile << " b: " << qsi->get()->blockTouched() << "\\l";
}
dotFile << " p: " << pio << "\\l";
}
if (diskIo == true)
{
dotFile << " wr: " << saveTime << "s\\l";
dotFile << " rd: " << loadTime << "s\\l";
}
dotFile << "\"]" << endl;
}
}
}
for (psi = fProject.begin(); psi < fProject.end(); psi++)
{
uint16_t stepidOut = psi->get()->stepId();
JobStepAssociation projectInputSA = psi->get()->inputAssociation();
for (unsigned int j = 0; j < projectInputSA.outSize(); j++)
{
RowGroupDL* dlin = projectInputSA.outAt(j)->rowGroupDL();
ptrdiff_t dlinptr = (ptrdiff_t)dlin;
if (dloutptr == dlinptr)
{
dotFile << stepidIn << " -> " << stepidOut;
dotFile << " [label=\" r: " << outSize;
if (hjTempDiskFlag)
{
dotFile << "*";
}
dotFile << "\\l";
if (msgs != 0)
{
dotFile << " m: " << msgs << "\\l";
dotFile << " p: " << pio << "\\l";
}
if (diskIo == true)
{
dotFile << " wr: " << saveTime << "s\\l";
dotFile << " rd: " << loadTime << "s\\l";
}
dotFile << "\"]" << endl;
}
}
}
}
//@Bug 921
if (typeid(*(qsi->get())) == typeid(TupleBPS))
{
BatchPrimitive* bps = dynamic_cast<BatchPrimitive*>(qsi->get());
CalpontSystemCatalog::OID tableOIDProject = bps->tableOid();
if (bps->getOutputType() == TABLE_BAND || bps->getOutputType() == ROW_GROUP)
{
outSize = bps->getRows();
for (dsi = fDeliveredTables.begin(); dsi != fDeliveredTables.end(); dsi++)
{
BatchPrimitive* bpsDelivery = dynamic_cast<BatchPrimitive*>((dsi->second).get());
TupleHashJoinStep* thjDelivery = dynamic_cast<TupleHashJoinStep*>((dsi->second).get());
if (bpsDelivery)
{
CalpontSystemCatalog::OID tableOID = bpsDelivery->tableOid();
dotFile << tableOID << " [label=" << bpsDelivery->alias() << " shape=plaintext]" << endl;
JobStepAssociation deliveryInputSA = bpsDelivery->inputAssociation();
if (tableOIDProject == tableOID)
{
dotFile << stepidIn << " -> " << tableOID;
dotFile << " [label=\" r: " << outSize << "\\l";
dotFile << " m: " << bpsDelivery->msgsRcvdCount() << "\\l";
dotFile << " b: " << bpsDelivery->blockTouched() << "\\l";
dotFile << " p: " << bpsDelivery->phyIOCount() << "\\l";
dotFile << "\"]" << endl;
}
}
else if (thjDelivery)
{
CalpontSystemCatalog::OID tableOID = thjDelivery->tableOid();
dotFile << tableOID << " [label="
<< "vtable"
<< " shape=plaintext]" << endl;
JobStepAssociation deliveryInputSA = thjDelivery->inputAssociation();
if (tableOIDProject == tableOID)
{
dotFile << stepidIn << " -> " << tableOID;
dotFile << " [label=\" r: " << outSize << "\\l";
dotFile << " m: " << thjDelivery->msgsRcvdCount() << "\\l";
dotFile << " b: " << thjDelivery->blockTouched() << "\\l";
dotFile << " p: " << thjDelivery->phyIOCount() << "\\l";
dotFile << "\"]" << endl;
}
}
}
}
}
else if (typeid(*(qsi->get())) == typeid(CrossEngineStep))
{
outSize = dynamic_cast<CrossEngineStep*>(qsi->get())->getRows();
dotFile << "0"
<< " [label=" << qsi->get()->alias() << " shape=plaintext]" << endl;
dotFile << stepidIn << " -> 0";
dotFile << " [label=\" r: " << outSize << "\\l";
dotFile << "\"]" << endl;
}
}
for (psi = fProject.begin(), ctn = 0; psi != fProject.end(); ctn++, psi++)
{
tcn.column = "";
uint16_t stepidIn = psi->get()->stepId();
dotFile << stepidIn << " [label=\"st_" << stepidIn << " ";
tcn = csc->colName(psi->get()->oid());
dotFile << "(";
BatchPrimitive* bps = 0;
if (typeid(*(psi->get())) == typeid(TupleBPS))
{
bps = dynamic_cast<BatchPrimitive*>(psi->get());
OIDVector projectOids = bps->getProjectOids();
for (unsigned int i = 0; i < projectOids.size(); i++)
{
tcn = csc->colName(projectOids[i]);
if (!tcn.column.empty())
{
dotFile << tcn.column;
if (i != (projectOids.size() - 1))
dotFile << "/ ";
}
if ((i + 1) % 3 == 0)
dotFile << "\\l";
}
}
else
{
if (!tcn.column.empty())
dotFile << tcn.column << "/";
}
dotFile << JSTimeStamp::tsdiffstr(psi->get()->dlTimes.EndOfInputTime(),
psi->get()->dlTimes.FirstReadTime())
<< "s";
dotFile << ")";
if (typeid(*(psi->get())) == typeid(pColStep))
{
dotFile << "\""
<< " shape=ellipse";
}
else if (typeid(*(psi->get())) == typeid(pColScanStep))
{
dotFile << "\""
<< " shape=box";
}
else if (typeid(*(psi->get())) == typeid(TupleBPS))
{
dotFile << "\""
<< " shape=box style=bold";
if (typeid(*(psi->get())) == typeid(TupleBPS))
dotFile << " peripheries=2";
}
else if (typeid(*(psi->get())) == typeid(pDictionaryStep))
{
dotFile << "\""
<< " shape=trapezium";
}
else if (typeid(*(psi->get())) == typeid(PassThruStep))
{
dotFile << "\""
<< " shape=octagon";
}
else if (typeid(*(psi->get())) == typeid(FilterStep))
{
dotFile << "\""
<< " shape=house orientation=180";
}
else
dotFile << "\"";
dotFile << "]" << endl;
// msgsRecived, physicalIO, cacheIO
msgs = psi->get()->msgsRcvdCount();
pio = psi->get()->phyIOCount();
CalpontSystemCatalog::OID tableOIDProject = 0;
if (bps)
tableOIDProject = bps->tableOid();
//@Bug 921
for (dsi = fDeliveredTables.begin(); dsi != fDeliveredTables.end(); dsi++)
{
BatchPrimitive* dbps = dynamic_cast<BatchPrimitive*>((dsi->second).get());
if (dbps)
{
outSize = dbps->getRows();
CalpontSystemCatalog::OID tableOID = dbps->tableOid();
dotFile << tableOID << " [label=" << dbps->alias() << " shape=plaintext]" << endl;
JobStepAssociation deliveryInputSA = dbps->inputAssociation();
if (tableOIDProject == tableOID)
{
dotFile << stepidIn << " -> " << tableOID;
dotFile << " [label=\" r: " << outSize << "\\l";
dotFile << " m: " << dbps->msgsRcvdCount() << "\\l";
dotFile << " b: " << dbps->blockTouched() << "\\l";
dotFile << " p: " << dbps->phyIOCount() << "\\l";
dotFile << "\"]" << endl;
}
}
}
}
dotFile << "}" << std::endl;
dotFile.close();
} }
void JobList::validate() const void JobList::validate() const
{ {
// uint32_t i;
// DeliveredTableMap::const_iterator it;
/* Make sure there's at least one query step and that they're the right type */ /* Make sure there's at least one query step and that they're the right type */
idbassert(fQuery.size() > 0); idbassert(fQuery.size() > 0);
// for (i = 0; i < fQuery.size(); i++)
// idbassert(dynamic_cast<BatchPrimitiveStep *>(fQuery[i].get()) ||
// dynamic_cast<HashJoinStep *>(fQuery[i].get()) ||
// dynamic_cast<UnionStep *>(fQuery[i].get()) ||
// dynamic_cast<AggregateFilterStep *>(fQuery[i].get()) ||
// dynamic_cast<BucketReuseStep *>(fQuery[i].get()) ||
// dynamic_cast<pDictionaryScan *>(fQuery[i].get()) ||
// dynamic_cast<FilterStep *>(fQuery[i].get()) ||
// dynamic_cast<OrDelimiter *>(fQuery[i].get())
// );
//
// /* Make sure there's at least one projected table and that they're the right type */
// idbassert(fDeliveredTables.size() > 0);
// for (i = 0; i < fProject.size(); i++)
// idbassert(dynamic_cast<BatchPrimitiveStep *>(fProject[i].get()));
//
// /* Check that all JobSteps use the right status pointer */
// for (i = 0; i < fQuery.size(); i++) {
// idbassert(fQuery[i]->errorInfo().get() == errorInfo().get());
// }
// for (i = 0; i < fProject.size(); i++) {
// idbassert(fProject[i]->errorInfo().get() == errorInfo().get());
// }
// for (it = fDeliveredTables.begin(); it != fDeliveredTables.end(); ++it) {
// idbassert(it->second->errorInfo().get() == errorInfo().get());
// }
} }
void TupleJobList::validate() const void TupleJobList::validate() const
@@ -1175,9 +662,9 @@ string JobList::toString() const
for (i = 0; i < fQuery.size(); i++) for (i = 0; i < fQuery.size(); i++)
ret += fQuery[i]->toString(); ret += fQuery[i]->toString();
ret += "\nProjection Steps:\n"; ret += "\nProjection Steps:\n";
for (i = 0; i < fProject.size(); i++) for (i = 0; i < fProject.size(); i++)
ret += fProject[i]->toString(); ret += fProject[i]->toString();
ret += "\n"; ret += "\n";
return ret; return ret;
} }
@@ -1209,4 +696,3 @@ void TupleJobList::abort()
#ifdef __clang__ #ifdef __clang__
#pragma clang diagnostic pop #pragma clang diagnostic pop
#endif #endif

View File

@@ -2174,18 +2174,10 @@ SJLP makeJobList_(CalpontExecutionPlan* cplan, ResourceManager* rm,
} }
oss << endl; oss << endl;
gettimeofday(&stTime, 0);
struct tm tmbuf; auto jsrname = jlf_graphics::generateDotFileName("jobstep.");
localtime_r(&stTime.tv_sec, &tmbuf);
ostringstream tms;
tms << setfill('0') << setw(4) << (tmbuf.tm_year + 1900) << setw(2) << (tmbuf.tm_mon + 1) << setw(2)
<< (tmbuf.tm_mday) << setw(2) << (tmbuf.tm_hour) << setw(2) << (tmbuf.tm_min) << setw(2)
<< (tmbuf.tm_sec) << setw(6) << (stTime.tv_usec);
string tmstr(tms.str());
string jsrname("jobstep." + tmstr + ".dot");
ofstream dotFile(jsrname.c_str()); ofstream dotFile(jsrname.c_str());
jlf_graphics::writeDotCmds(dotFile, querySteps, projectSteps); dotFile << jlf_graphics::GraphGeneratorNoStats(querySteps, projectSteps).writeDotCmds();
char timestamp[80]; char timestamp[80];
ctime_r((const time_t*)&stTime.tv_sec, timestamp); ctime_r((const time_t*)&stTime.tv_sec, timestamp);

View File

@@ -19,7 +19,7 @@
// $Id: subquerystep.cpp 6370 2010-03-18 02:58:09Z xlou $ // $Id: subquerystep.cpp 6370 2010-03-18 02:58:09Z xlou $
#include <iostream> #include <iostream>
//#define NDEBUG // #define NDEBUG
#include <cassert> #include <cassert>
using namespace std; using namespace std;
@@ -147,6 +147,7 @@ SubAdapterStep::SubAdapterStep(SJSTEP& s, const JobInfo& jobInfo)
, fOutputIterator(0) , fOutputIterator(0)
, fRunner(0) , fRunner(0)
{ {
fExtendedInfo = "SAS: ";
fAlias = s->alias(); fAlias = s->alias();
fView = s->view(); fView = s->view();
fInputJobStepAssociation = s->outputAssociation(); fInputJobStepAssociation = s->outputAssociation();