You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-01 06:46:55 +03:00
Reformat all code to coding standard
This commit is contained in:
@ -53,498 +53,576 @@ using namespace logging;
|
||||
abort(); \
|
||||
}
|
||||
|
||||
namespace joblist {
|
||||
namespace joblist
|
||||
{
|
||||
|
||||
DiskJoinStep::DiskJoinStep() { }
|
||||
|
||||
DiskJoinStep::DiskJoinStep(TupleHashJoinStep *t, int djsIndex, int joinIndex, bool lastOne) : JobStep(*t), thjs(t),
|
||||
mainThread(0), joinerIndex(joinIndex), closedOutput(false)
|
||||
DiskJoinStep::DiskJoinStep(TupleHashJoinStep* t, int djsIndex, int joinIndex, bool lastOne) : JobStep(*t), thjs(t),
|
||||
mainThread(0), joinerIndex(joinIndex), closedOutput(false)
|
||||
{
|
||||
/*
|
||||
grab all relevant vars from THJS
|
||||
make largeRG and outputRG
|
||||
make the RG mappings
|
||||
init a JoinPartition
|
||||
load the existing RGData into JoinPartition
|
||||
*/
|
||||
/*
|
||||
grab all relevant vars from THJS
|
||||
make largeRG and outputRG
|
||||
make the RG mappings
|
||||
init a JoinPartition
|
||||
load the existing RGData into JoinPartition
|
||||
*/
|
||||
|
||||
largeRG = thjs->largeRG + thjs->outputRG;
|
||||
if (lastOne)
|
||||
outputRG = thjs->outputRG;
|
||||
else
|
||||
outputRG = largeRG;
|
||||
smallRG = thjs->smallRGs[joinerIndex];
|
||||
largeKeyCols = thjs->largeSideKeys[joinerIndex];
|
||||
smallKeyCols = thjs->smallSideKeys[joinerIndex];
|
||||
largeRG = thjs->largeRG + thjs->outputRG;
|
||||
|
||||
/* Should not be necessary if we can use THJS's logic to do the join */
|
||||
fe = thjs->getJoinFilter(joinerIndex);
|
||||
if (fe) {
|
||||
joinFERG = thjs->joinFilterRG;
|
||||
SjoinFEMapping = makeMapping(smallRG, joinFERG);
|
||||
LjoinFEMapping = makeMapping(largeRG, joinFERG);
|
||||
}
|
||||
joiner = thjs->djsJoiners[djsIndex];
|
||||
joinType = joiner->getJoinType();
|
||||
typeless = joiner->isTypelessJoin();
|
||||
joiner->clearData();
|
||||
joiner->setInUM();
|
||||
if (lastOne)
|
||||
outputRG = thjs->outputRG;
|
||||
else
|
||||
outputRG = largeRG;
|
||||
|
||||
LOMapping = makeMapping(largeRG, outputRG);
|
||||
SOMapping = makeMapping(smallRG, outputRG);
|
||||
smallRG = thjs->smallRGs[joinerIndex];
|
||||
largeKeyCols = thjs->largeSideKeys[joinerIndex];
|
||||
smallKeyCols = thjs->smallSideKeys[joinerIndex];
|
||||
|
||||
/* Link up */
|
||||
largeDL = thjs->fifos[djsIndex];
|
||||
outputDL = thjs->fifos[djsIndex+1];
|
||||
smallDL = thjs->smallDLs[joinerIndex];
|
||||
largeIt = largeDL->getIterator();
|
||||
/* Should not be necessary if we can use THJS's logic to do the join */
|
||||
fe = thjs->getJoinFilter(joinerIndex);
|
||||
|
||||
smallUsage = thjs->djsSmallUsage;
|
||||
smallLimit = thjs->djsSmallLimit;
|
||||
largeLimit = thjs->djsLargeLimit;
|
||||
partitionSize = thjs->djsPartitionSize;
|
||||
if (fe)
|
||||
{
|
||||
joinFERG = thjs->joinFilterRG;
|
||||
SjoinFEMapping = makeMapping(smallRG, joinFERG);
|
||||
LjoinFEMapping = makeMapping(largeRG, joinFERG);
|
||||
}
|
||||
|
||||
if (smallLimit == 0)
|
||||
smallLimit = numeric_limits<int64_t>::max();
|
||||
if (largeLimit == 0)
|
||||
largeLimit = numeric_limits<int64_t>::max();
|
||||
joiner = thjs->djsJoiners[djsIndex];
|
||||
joinType = joiner->getJoinType();
|
||||
typeless = joiner->isTypelessJoin();
|
||||
joiner->clearData();
|
||||
joiner->setInUM();
|
||||
|
||||
uint64_t totalUMMemory = thjs->resourceManager->getConfiguredUMMemLimit();
|
||||
jp.reset(new JoinPartition(largeRG, smallRG, smallKeyCols, largeKeyCols, typeless,
|
||||
(joinType & ANTI) && (joinType & MATCHNULLS), (bool) fe, totalUMMemory, partitionSize));
|
||||
LOMapping = makeMapping(largeRG, outputRG);
|
||||
SOMapping = makeMapping(smallRG, outputRG);
|
||||
|
||||
if (cancelled()) {
|
||||
// drain inputs, close output
|
||||
smallReader(); // only small input is supplying input at this point
|
||||
// largeReader();
|
||||
outputDL->endOfInput();
|
||||
closedOutput = true;
|
||||
}
|
||||
/* Link up */
|
||||
largeDL = thjs->fifos[djsIndex];
|
||||
outputDL = thjs->fifos[djsIndex + 1];
|
||||
smallDL = thjs->smallDLs[joinerIndex];
|
||||
largeIt = largeDL->getIterator();
|
||||
|
||||
largeIterationCount = 0;
|
||||
lastLargeIteration = false;
|
||||
fMiniInfo.clear();
|
||||
fExtendedInfo.clear();
|
||||
smallUsage = thjs->djsSmallUsage;
|
||||
smallLimit = thjs->djsSmallLimit;
|
||||
largeLimit = thjs->djsLargeLimit;
|
||||
partitionSize = thjs->djsPartitionSize;
|
||||
|
||||
if (smallLimit == 0)
|
||||
smallLimit = numeric_limits<int64_t>::max();
|
||||
|
||||
if (largeLimit == 0)
|
||||
largeLimit = numeric_limits<int64_t>::max();
|
||||
|
||||
uint64_t totalUMMemory = thjs->resourceManager->getConfiguredUMMemLimit();
|
||||
jp.reset(new JoinPartition(largeRG, smallRG, smallKeyCols, largeKeyCols, typeless,
|
||||
(joinType & ANTI) && (joinType & MATCHNULLS), (bool) fe, totalUMMemory, partitionSize));
|
||||
|
||||
if (cancelled())
|
||||
{
|
||||
// drain inputs, close output
|
||||
smallReader(); // only small input is supplying input at this point
|
||||
// largeReader();
|
||||
outputDL->endOfInput();
|
||||
closedOutput = true;
|
||||
}
|
||||
|
||||
largeIterationCount = 0;
|
||||
lastLargeIteration = false;
|
||||
fMiniInfo.clear();
|
||||
fExtendedInfo.clear();
|
||||
}
|
||||
|
||||
DiskJoinStep::~DiskJoinStep()
|
||||
{
|
||||
abort();
|
||||
if (mainThread)
|
||||
{
|
||||
jobstepThreadPool.join(mainThread);
|
||||
mainThread = 0;
|
||||
}
|
||||
if (jp)
|
||||
atomicops::atomicSub(smallUsage.get(), jp->getSmallSideDiskUsage());
|
||||
abort();
|
||||
|
||||
if (mainThread)
|
||||
{
|
||||
jobstepThreadPool.join(mainThread);
|
||||
mainThread = 0;
|
||||
}
|
||||
|
||||
if (jp)
|
||||
atomicops::atomicSub(smallUsage.get(), jp->getSmallSideDiskUsage());
|
||||
}
|
||||
|
||||
void DiskJoinStep::loadExistingData(vector<RGData> &data)
|
||||
void DiskJoinStep::loadExistingData(vector<RGData>& data)
|
||||
{
|
||||
int64_t memUsage;
|
||||
uint32_t i;
|
||||
int64_t memUsage;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < data.size() && !cancelled(); i++) {
|
||||
memUsage = jp->insertSmallSideRGData(data[i]);
|
||||
atomicops::atomicAdd(smallUsage.get(), memUsage);
|
||||
}
|
||||
for (i = 0; i < data.size() && !cancelled(); i++)
|
||||
{
|
||||
memUsage = jp->insertSmallSideRGData(data[i]);
|
||||
atomicops::atomicAdd(smallUsage.get(), memUsage);
|
||||
}
|
||||
}
|
||||
|
||||
void DiskJoinStep::run()
|
||||
{
|
||||
mainThread = jobstepThreadPool.invoke(Runner(this));
|
||||
mainThread = jobstepThreadPool.invoke(Runner(this));
|
||||
}
|
||||
|
||||
void DiskJoinStep::join()
|
||||
{
|
||||
if (mainThread)
|
||||
{
|
||||
jobstepThreadPool.join(mainThread);
|
||||
mainThread = 0;
|
||||
}
|
||||
if (jp) {
|
||||
atomicops::atomicSub(smallUsage.get(), jp->getSmallSideDiskUsage());
|
||||
//int64_t memUsage;
|
||||
//memUsage = atomicops::atomicSub(smallUsage.get(), jp->getSmallSideDiskUsage());
|
||||
//cout << "join(): small side usage was: " << jp->getSmallSideDiskUsage() << " final shared mem usage = " << memUsage << endl;
|
||||
jp.reset();
|
||||
}
|
||||
if (mainThread)
|
||||
{
|
||||
jobstepThreadPool.join(mainThread);
|
||||
mainThread = 0;
|
||||
}
|
||||
|
||||
if (jp)
|
||||
{
|
||||
atomicops::atomicSub(smallUsage.get(), jp->getSmallSideDiskUsage());
|
||||
//int64_t memUsage;
|
||||
//memUsage = atomicops::atomicSub(smallUsage.get(), jp->getSmallSideDiskUsage());
|
||||
//cout << "join(): small side usage was: " << jp->getSmallSideDiskUsage() << " final shared mem usage = " << memUsage << endl;
|
||||
jp.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void DiskJoinStep::smallReader()
|
||||
{
|
||||
RGData rgData;
|
||||
bool more = true;
|
||||
int64_t memUsage = 0, combinedMemUsage = 0;
|
||||
int rowCount = 0;
|
||||
RowGroup l_smallRG = smallRG;
|
||||
RGData rgData;
|
||||
bool more = true;
|
||||
int64_t memUsage = 0, combinedMemUsage = 0;
|
||||
int rowCount = 0;
|
||||
RowGroup l_smallRG = smallRG;
|
||||
|
||||
try {
|
||||
while (more && !cancelled()) {
|
||||
more = smallDL->next(0, &rgData);
|
||||
if (more) {
|
||||
l_smallRG.setData(&rgData);
|
||||
rowCount += l_smallRG.getRowCount();
|
||||
memUsage = jp->insertSmallSideRGData(rgData);
|
||||
combinedMemUsage = atomicops::atomicAdd(smallUsage.get(), memUsage);
|
||||
//cout << "memusage = " << memUsage << " total = " << combinedMemUsage << endl;
|
||||
if (combinedMemUsage > smallLimit) {
|
||||
errorMessage(IDBErrorInfo::instance()->errorMsg(ERR_DBJ_DISK_USAGE_LIMIT));
|
||||
status(ERR_DBJ_DISK_USAGE_LIMIT);
|
||||
cout << "DJS small reader: exceeded disk space limit" << endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
try
|
||||
{
|
||||
while (more && !cancelled())
|
||||
{
|
||||
more = smallDL->next(0, &rgData);
|
||||
|
||||
//cout << "(" << joinerIndex << ") read the small side data, combined mem usage= " << combinedMemUsage << " rowcount = " << rowCount << endl;
|
||||
if (!cancelled()) {
|
||||
memUsage = jp->doneInsertingSmallData();
|
||||
combinedMemUsage = atomicops::atomicAdd(smallUsage.get(), memUsage);
|
||||
//cout << "2memusage = " << memUsage << " total = " << combinedMemUsage << endl;
|
||||
if (combinedMemUsage > smallLimit) {
|
||||
errorMessage(IDBErrorInfo::instance()->errorMsg(ERR_DBJ_DISK_USAGE_LIMIT));
|
||||
status(ERR_DBJ_DISK_USAGE_LIMIT);
|
||||
cout << "DJS small reader: exceeded disk space limit" << endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
CATCH_AND_LOG;
|
||||
if (more)
|
||||
{
|
||||
l_smallRG.setData(&rgData);
|
||||
rowCount += l_smallRG.getRowCount();
|
||||
memUsage = jp->insertSmallSideRGData(rgData);
|
||||
combinedMemUsage = atomicops::atomicAdd(smallUsage.get(), memUsage);
|
||||
|
||||
while (more)
|
||||
more = smallDL->next(0, &rgData);
|
||||
//cout << "memusage = " << memUsage << " total = " << combinedMemUsage << endl;
|
||||
if (combinedMemUsage > smallLimit)
|
||||
{
|
||||
errorMessage(IDBErrorInfo::instance()->errorMsg(ERR_DBJ_DISK_USAGE_LIMIT));
|
||||
status(ERR_DBJ_DISK_USAGE_LIMIT);
|
||||
cout << "DJS small reader: exceeded disk space limit" << endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//cout << "(" << joinerIndex << ") read the small side data, combined mem usage= " << combinedMemUsage << " rowcount = " << rowCount << endl;
|
||||
if (!cancelled())
|
||||
{
|
||||
memUsage = jp->doneInsertingSmallData();
|
||||
combinedMemUsage = atomicops::atomicAdd(smallUsage.get(), memUsage);
|
||||
|
||||
//cout << "2memusage = " << memUsage << " total = " << combinedMemUsage << endl;
|
||||
if (combinedMemUsage > smallLimit)
|
||||
{
|
||||
errorMessage(IDBErrorInfo::instance()->errorMsg(ERR_DBJ_DISK_USAGE_LIMIT));
|
||||
status(ERR_DBJ_DISK_USAGE_LIMIT);
|
||||
cout << "DJS small reader: exceeded disk space limit" << endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CATCH_AND_LOG;
|
||||
|
||||
while (more)
|
||||
more = smallDL->next(0, &rgData);
|
||||
}
|
||||
|
||||
void DiskJoinStep::largeReader()
|
||||
{
|
||||
RGData rgData;
|
||||
bool more = true;
|
||||
int64_t largeSize = 0;
|
||||
int rowCount = 0;
|
||||
RowGroup l_largeRG = largeRG;
|
||||
RGData rgData;
|
||||
bool more = true;
|
||||
int64_t largeSize = 0;
|
||||
int rowCount = 0;
|
||||
RowGroup l_largeRG = largeRG;
|
||||
|
||||
largeIterationCount++;
|
||||
//cout << "iteration " << largeIterationCount << " largeLimit=" << largeLimit << endl;
|
||||
largeIterationCount++;
|
||||
//cout << "iteration " << largeIterationCount << " largeLimit=" << largeLimit << endl;
|
||||
|
||||
try {
|
||||
while (more && !cancelled() && largeSize < largeLimit) {
|
||||
more = largeDL->next(largeIt, &rgData);
|
||||
if (more) {
|
||||
l_largeRG.setData(&rgData);
|
||||
rowCount += l_largeRG.getRowCount();
|
||||
//cout << "large side raw data: " << largeRG.toString() << endl;
|
||||
try
|
||||
{
|
||||
while (more && !cancelled() && largeSize < largeLimit)
|
||||
{
|
||||
more = largeDL->next(largeIt, &rgData);
|
||||
|
||||
largeSize += jp->insertLargeSideRGData(rgData);
|
||||
}
|
||||
}
|
||||
if (more)
|
||||
{
|
||||
l_largeRG.setData(&rgData);
|
||||
rowCount += l_largeRG.getRowCount();
|
||||
//cout << "large side raw data: " << largeRG.toString() << endl;
|
||||
|
||||
jp->doneInsertingLargeData();
|
||||
largeSize += jp->insertLargeSideRGData(rgData);
|
||||
}
|
||||
}
|
||||
|
||||
//cout << "(" << joinerIndex << ") read the large side data rowcount = " << rowCount << endl;
|
||||
if (!more)
|
||||
lastLargeIteration = true;
|
||||
}
|
||||
CATCH_AND_LOG;
|
||||
jp->doneInsertingLargeData();
|
||||
|
||||
if (cancelled())
|
||||
while (more)
|
||||
more = largeDL->next(largeIt, &rgData);
|
||||
//cout << "(" << joinerIndex << ") read the large side data rowcount = " << rowCount << endl;
|
||||
if (!more)
|
||||
lastLargeIteration = true;
|
||||
}
|
||||
|
||||
CATCH_AND_LOG;
|
||||
|
||||
if (cancelled())
|
||||
while (more)
|
||||
more = largeDL->next(largeIt, &rgData);
|
||||
}
|
||||
|
||||
|
||||
void DiskJoinStep::loadFcn()
|
||||
{
|
||||
boost::shared_ptr<LoaderOutput> out;
|
||||
bool ret;
|
||||
boost::shared_ptr<LoaderOutput> out;
|
||||
bool ret;
|
||||
|
||||
try {
|
||||
do {
|
||||
out.reset(new LoaderOutput());
|
||||
ret = jp->getNextPartition(&out->smallData, &out->partitionID, &out->jp);
|
||||
if (ret) {
|
||||
//cout << "loaded partition " << out->partitionID << " smallData = " << out->smallData.size() << endl;
|
||||
loadFIFO->insert(out);
|
||||
}
|
||||
} while (ret && !cancelled());
|
||||
}
|
||||
CATCH_AND_LOG;
|
||||
try
|
||||
{
|
||||
do
|
||||
{
|
||||
out.reset(new LoaderOutput());
|
||||
ret = jp->getNextPartition(&out->smallData, &out->partitionID, &out->jp);
|
||||
|
||||
loadFIFO->endOfInput();
|
||||
if (ret)
|
||||
{
|
||||
//cout << "loaded partition " << out->partitionID << " smallData = " << out->smallData.size() << endl;
|
||||
loadFIFO->insert(out);
|
||||
}
|
||||
}
|
||||
while (ret && !cancelled());
|
||||
}
|
||||
|
||||
CATCH_AND_LOG;
|
||||
|
||||
loadFIFO->endOfInput();
|
||||
}
|
||||
|
||||
void DiskJoinStep::buildFcn()
|
||||
{
|
||||
boost::shared_ptr<LoaderOutput> in;
|
||||
boost::shared_ptr<BuilderOutput> out;
|
||||
bool more = true;
|
||||
int it = loadFIFO->getIterator();
|
||||
int i, j;
|
||||
Row smallRow;
|
||||
RowGroup l_smallRG = smallRG;
|
||||
boost::shared_ptr<LoaderOutput> in;
|
||||
boost::shared_ptr<BuilderOutput> out;
|
||||
bool more = true;
|
||||
int it = loadFIFO->getIterator();
|
||||
int i, j;
|
||||
Row smallRow;
|
||||
RowGroup l_smallRG = smallRG;
|
||||
|
||||
l_smallRG.initRow(&smallRow);
|
||||
l_smallRG.initRow(&smallRow);
|
||||
|
||||
while (1) {
|
||||
//cout << "getting a partition from the loader" << endl;
|
||||
more = loadFIFO->next(it, &in);
|
||||
if (!more || cancelled())
|
||||
goto out;
|
||||
while (1)
|
||||
{
|
||||
//cout << "getting a partition from the loader" << endl;
|
||||
more = loadFIFO->next(it, &in);
|
||||
|
||||
out.reset(new BuilderOutput());
|
||||
out->smallData = in->smallData;
|
||||
out->partitionID = in->partitionID;
|
||||
out->jp = in->jp;
|
||||
out->tupleJoiner = joiner->copyForDiskJoin();
|
||||
if (!more || cancelled())
|
||||
goto out;
|
||||
|
||||
//cout << "building a tuplejoiner" << endl;
|
||||
for (i = 0; i < (int) in->smallData.size(); i++) {
|
||||
l_smallRG.setData(&in->smallData[i]);
|
||||
l_smallRG.getRow(0, &smallRow);
|
||||
for (j = 0; j < (int) l_smallRG.getRowCount(); j++, smallRow.nextRow())
|
||||
out->tupleJoiner->insert(smallRow, (largeIterationCount == 1));
|
||||
}
|
||||
out->tupleJoiner->doneInserting();
|
||||
buildFIFO->insert(out);
|
||||
}
|
||||
out.reset(new BuilderOutput());
|
||||
out->smallData = in->smallData;
|
||||
out->partitionID = in->partitionID;
|
||||
out->jp = in->jp;
|
||||
out->tupleJoiner = joiner->copyForDiskJoin();
|
||||
|
||||
//cout << "building a tuplejoiner" << endl;
|
||||
for (i = 0; i < (int) in->smallData.size(); i++)
|
||||
{
|
||||
l_smallRG.setData(&in->smallData[i]);
|
||||
l_smallRG.getRow(0, &smallRow);
|
||||
|
||||
for (j = 0; j < (int) l_smallRG.getRowCount(); j++, smallRow.nextRow())
|
||||
out->tupleJoiner->insert(smallRow, (largeIterationCount == 1));
|
||||
}
|
||||
|
||||
out->tupleJoiner->doneInserting();
|
||||
buildFIFO->insert(out);
|
||||
}
|
||||
|
||||
out:
|
||||
while (more)
|
||||
more = loadFIFO->next(it, &in);
|
||||
buildFIFO->endOfInput();
|
||||
|
||||
while (more)
|
||||
more = loadFIFO->next(it, &in);
|
||||
|
||||
buildFIFO->endOfInput();
|
||||
}
|
||||
|
||||
void DiskJoinStep::joinFcn()
|
||||
{
|
||||
|
||||
/* This function mostly serves as an adapter between the
|
||||
input data and the joinOneRG() fcn in THJS. */
|
||||
/* This function mostly serves as an adapter between the
|
||||
input data and the joinOneRG() fcn in THJS. */
|
||||
|
||||
boost::shared_ptr<BuilderOutput> in;
|
||||
bool more = true;
|
||||
int it = buildFIFO->getIterator();
|
||||
int i, j;
|
||||
vector<RGData> joinResults;
|
||||
RowGroup l_largeRG = largeRG, l_smallRG = smallRG;
|
||||
RowGroup l_outputRG = outputRG;
|
||||
Row l_largeRow;
|
||||
Row l_joinFERow, l_outputRow, baseRow;
|
||||
vector<vector<Row::Pointer> > joinMatches;
|
||||
boost::shared_array<Row> smallRowTemplates(new Row[1]);
|
||||
vector<boost::shared_ptr<TupleJoiner> > joiners;
|
||||
boost::shared_array<boost::shared_array<int> > colMappings, fergMappings;
|
||||
boost::scoped_array<boost::scoped_array<uint8_t > > smallNullMem;
|
||||
boost::scoped_array<uint8_t> joinFEMem;
|
||||
Row smallNullRow;
|
||||
boost::shared_ptr<BuilderOutput> in;
|
||||
bool more = true;
|
||||
int it = buildFIFO->getIterator();
|
||||
int i, j;
|
||||
vector<RGData> joinResults;
|
||||
RowGroup l_largeRG = largeRG, l_smallRG = smallRG;
|
||||
RowGroup l_outputRG = outputRG;
|
||||
Row l_largeRow;
|
||||
Row l_joinFERow, l_outputRow, baseRow;
|
||||
vector<vector<Row::Pointer> > joinMatches;
|
||||
boost::shared_array<Row> smallRowTemplates(new Row[1]);
|
||||
vector<boost::shared_ptr<TupleJoiner> > joiners;
|
||||
boost::shared_array<boost::shared_array<int> > colMappings, fergMappings;
|
||||
boost::scoped_array<boost::scoped_array<uint8_t > > smallNullMem;
|
||||
boost::scoped_array<uint8_t> joinFEMem;
|
||||
Row smallNullRow;
|
||||
|
||||
boost::scoped_array<uint8_t> baseRowMem;
|
||||
boost::scoped_array<uint8_t> baseRowMem;
|
||||
|
||||
if (joiner->hasFEFilter()) {
|
||||
joinFERG.initRow(&l_joinFERow, true);
|
||||
joinFEMem.reset(new uint8_t[l_joinFERow.getSize()]);
|
||||
l_joinFERow.setData(joinFEMem.get());
|
||||
}
|
||||
outputRG.initRow(&l_outputRow);
|
||||
outputRG.initRow(&baseRow, true);
|
||||
if (joiner->hasFEFilter())
|
||||
{
|
||||
joinFERG.initRow(&l_joinFERow, true);
|
||||
joinFEMem.reset(new uint8_t[l_joinFERow.getSize()]);
|
||||
l_joinFERow.setData(joinFEMem.get());
|
||||
}
|
||||
|
||||
largeRG.initRow(&l_largeRow);
|
||||
outputRG.initRow(&l_outputRow);
|
||||
outputRG.initRow(&baseRow, true);
|
||||
|
||||
baseRowMem.reset(new uint8_t[baseRow.getSize()]);
|
||||
baseRow.setData(baseRowMem.get());
|
||||
joinMatches.push_back(vector<Row::Pointer>());
|
||||
smallRG.initRow(&smallRowTemplates[0]);
|
||||
joiners.resize(1);
|
||||
largeRG.initRow(&l_largeRow);
|
||||
|
||||
colMappings.reset(new boost::shared_array<int>[2]);
|
||||
colMappings[0] = SOMapping;
|
||||
colMappings[1] = LOMapping;
|
||||
if (fe) {
|
||||
fergMappings.reset(new boost::shared_array<int>[2]);
|
||||
fergMappings[0] = SjoinFEMapping;
|
||||
fergMappings[1] = LjoinFEMapping;
|
||||
}
|
||||
baseRowMem.reset(new uint8_t[baseRow.getSize()]);
|
||||
baseRow.setData(baseRowMem.get());
|
||||
joinMatches.push_back(vector<Row::Pointer>());
|
||||
smallRG.initRow(&smallRowTemplates[0]);
|
||||
joiners.resize(1);
|
||||
|
||||
l_smallRG.initRow(&smallNullRow, true);
|
||||
smallNullMem.reset(new boost::scoped_array<uint8_t>[1]);
|
||||
smallNullMem[0].reset(new uint8_t[smallNullRow.getSize()]);
|
||||
smallNullRow.setData(smallNullMem[0].get());
|
||||
smallNullRow.initToNull();
|
||||
colMappings.reset(new boost::shared_array<int>[2]);
|
||||
colMappings[0] = SOMapping;
|
||||
colMappings[1] = LOMapping;
|
||||
|
||||
try {
|
||||
while (1) {
|
||||
more = buildFIFO->next(it, &in);
|
||||
if (!more || cancelled())
|
||||
goto out;
|
||||
if (fe)
|
||||
{
|
||||
fergMappings.reset(new boost::shared_array<int>[2]);
|
||||
fergMappings[0] = SjoinFEMapping;
|
||||
fergMappings[1] = LjoinFEMapping;
|
||||
}
|
||||
|
||||
joiners[0] = in->tupleJoiner;
|
||||
boost::shared_ptr<RGData> largeData;
|
||||
largeData = in->jp->getNextLargeRGData();
|
||||
while (largeData) {
|
||||
l_largeRG.setData(largeData.get());
|
||||
thjs->joinOneRG(0, &joinResults, l_largeRG, l_outputRG, l_largeRow, l_joinFERow,
|
||||
l_outputRow, baseRow, joinMatches, smallRowTemplates,
|
||||
&joiners, &colMappings, &fergMappings, &smallNullMem);
|
||||
l_smallRG.initRow(&smallNullRow, true);
|
||||
smallNullMem.reset(new boost::scoped_array<uint8_t>[1]);
|
||||
smallNullMem[0].reset(new uint8_t[smallNullRow.getSize()]);
|
||||
smallNullRow.setData(smallNullMem[0].get());
|
||||
smallNullRow.initToNull();
|
||||
|
||||
for (j = 0; j < (int) joinResults.size(); j++) {
|
||||
//l_outputRG.setData(&joinResults[j]);
|
||||
//cout << "got joined output " << l_outputRG.toString() << endl;
|
||||
outputDL->insert(joinResults[j]);
|
||||
}
|
||||
joinResults.clear();
|
||||
largeData = in->jp->getNextLargeRGData();
|
||||
}
|
||||
try
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
more = buildFIFO->next(it, &in);
|
||||
|
||||
if (joinType & SMALLOUTER) {
|
||||
if (!lastLargeIteration) {
|
||||
if (!more || cancelled())
|
||||
goto out;
|
||||
|
||||
/* TODO: an optimization would be to detect whether any new rows were marked and if not
|
||||
suppress the save operation */
|
||||
vector<Row::Pointer> unmatched;
|
||||
in->tupleJoiner->getUnmarkedRows(&unmatched);
|
||||
//cout << "***** saving partition " << in->partitionID << " unmarked count=" << unmatched.size() << " total count="
|
||||
// << in->tupleJoiner->size() << " vector size=" << in->smallData.size() << endl;
|
||||
in->jp->saveSmallSidePartition(in->smallData);
|
||||
}
|
||||
else {
|
||||
joiners[0] = in->tupleJoiner;
|
||||
boost::shared_ptr<RGData> largeData;
|
||||
largeData = in->jp->getNextLargeRGData();
|
||||
|
||||
//cout << "finishing small-outer output" << endl;
|
||||
vector<Row::Pointer> unmatched;
|
||||
RGData rgData(l_outputRG);
|
||||
Row outputRow;
|
||||
while (largeData)
|
||||
{
|
||||
l_largeRG.setData(largeData.get());
|
||||
thjs->joinOneRG(0, &joinResults, l_largeRG, l_outputRG, l_largeRow, l_joinFERow,
|
||||
l_outputRow, baseRow, joinMatches, smallRowTemplates,
|
||||
&joiners, &colMappings, &fergMappings, &smallNullMem);
|
||||
|
||||
l_outputRG.setData(&rgData);
|
||||
l_outputRG.resetRowGroup(0);
|
||||
l_outputRG.initRow(&outputRow);
|
||||
l_outputRG.getRow(0, &outputRow);
|
||||
for (j = 0; j < (int) joinResults.size(); j++)
|
||||
{
|
||||
//l_outputRG.setData(&joinResults[j]);
|
||||
//cout << "got joined output " << l_outputRG.toString() << endl;
|
||||
outputDL->insert(joinResults[j]);
|
||||
}
|
||||
|
||||
l_largeRG.initRow(&l_largeRow, true);
|
||||
boost::scoped_array<uint8_t> largeNullMem(new uint8_t[l_largeRow.getSize()]);
|
||||
l_largeRow.setData(largeNullMem.get());
|
||||
l_largeRow.initToNull();
|
||||
joinResults.clear();
|
||||
largeData = in->jp->getNextLargeRGData();
|
||||
}
|
||||
|
||||
in->tupleJoiner->getUnmarkedRows(&unmatched);
|
||||
//cout << " small-outer count=" << unmatched.size() << endl;
|
||||
for (i = 0; i < (int) unmatched.size(); i++) {
|
||||
smallRowTemplates[0].setData(unmatched[i]);
|
||||
applyMapping(LOMapping, l_largeRow, &outputRow);
|
||||
applyMapping(SOMapping, smallRowTemplates[0], &outputRow);
|
||||
l_outputRG.incRowCount();
|
||||
if (l_outputRG.getRowCount() == 8192) {
|
||||
outputDL->insert(rgData);
|
||||
//cout << "inserting a full RG" << endl;
|
||||
rgData.reinit(l_outputRG);
|
||||
l_outputRG.setData(&rgData);
|
||||
l_outputRG.resetRowGroup(0);
|
||||
l_outputRG.getRow(0, &outputRow);
|
||||
}
|
||||
else
|
||||
outputRow.nextRow();
|
||||
}
|
||||
if (l_outputRG.getRowCount() > 0) {
|
||||
//cout << "inserting an rg with " << l_outputRG.getRowCount() << endl;
|
||||
outputDL->insert(rgData);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // the try stmt above; need to reformat.
|
||||
CATCH_AND_LOG;
|
||||
if (joinType & SMALLOUTER)
|
||||
{
|
||||
if (!lastLargeIteration)
|
||||
{
|
||||
|
||||
/* TODO: an optimization would be to detect whether any new rows were marked and if not
|
||||
suppress the save operation */
|
||||
vector<Row::Pointer> unmatched;
|
||||
in->tupleJoiner->getUnmarkedRows(&unmatched);
|
||||
//cout << "***** saving partition " << in->partitionID << " unmarked count=" << unmatched.size() << " total count="
|
||||
// << in->tupleJoiner->size() << " vector size=" << in->smallData.size() << endl;
|
||||
in->jp->saveSmallSidePartition(in->smallData);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
//cout << "finishing small-outer output" << endl;
|
||||
vector<Row::Pointer> unmatched;
|
||||
RGData rgData(l_outputRG);
|
||||
Row outputRow;
|
||||
|
||||
l_outputRG.setData(&rgData);
|
||||
l_outputRG.resetRowGroup(0);
|
||||
l_outputRG.initRow(&outputRow);
|
||||
l_outputRG.getRow(0, &outputRow);
|
||||
|
||||
l_largeRG.initRow(&l_largeRow, true);
|
||||
boost::scoped_array<uint8_t> largeNullMem(new uint8_t[l_largeRow.getSize()]);
|
||||
l_largeRow.setData(largeNullMem.get());
|
||||
l_largeRow.initToNull();
|
||||
|
||||
in->tupleJoiner->getUnmarkedRows(&unmatched);
|
||||
|
||||
//cout << " small-outer count=" << unmatched.size() << endl;
|
||||
for (i = 0; i < (int) unmatched.size(); i++)
|
||||
{
|
||||
smallRowTemplates[0].setData(unmatched[i]);
|
||||
applyMapping(LOMapping, l_largeRow, &outputRow);
|
||||
applyMapping(SOMapping, smallRowTemplates[0], &outputRow);
|
||||
l_outputRG.incRowCount();
|
||||
|
||||
if (l_outputRG.getRowCount() == 8192)
|
||||
{
|
||||
outputDL->insert(rgData);
|
||||
//cout << "inserting a full RG" << endl;
|
||||
rgData.reinit(l_outputRG);
|
||||
l_outputRG.setData(&rgData);
|
||||
l_outputRG.resetRowGroup(0);
|
||||
l_outputRG.getRow(0, &outputRow);
|
||||
}
|
||||
else
|
||||
outputRow.nextRow();
|
||||
}
|
||||
|
||||
if (l_outputRG.getRowCount() > 0)
|
||||
{
|
||||
//cout << "inserting an rg with " << l_outputRG.getRowCount() << endl;
|
||||
outputDL->insert(rgData);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // the try stmt above; need to reformat.
|
||||
|
||||
CATCH_AND_LOG;
|
||||
|
||||
out:
|
||||
while (more)
|
||||
more = buildFIFO->next(it, &in);
|
||||
if (lastLargeIteration || cancelled()) {
|
||||
reportStats();
|
||||
outputDL->endOfInput();
|
||||
closedOutput = true;
|
||||
}
|
||||
|
||||
while (more)
|
||||
more = buildFIFO->next(it, &in);
|
||||
|
||||
if (lastLargeIteration || cancelled())
|
||||
{
|
||||
reportStats();
|
||||
outputDL->endOfInput();
|
||||
closedOutput = true;
|
||||
}
|
||||
}
|
||||
|
||||
void DiskJoinStep::mainRunner()
|
||||
{
|
||||
/*
|
||||
Read from smallDL, insert into small side
|
||||
Read from largeDL, insert into large side
|
||||
Start the processing threads
|
||||
*/
|
||||
try {
|
||||
smallReader();
|
||||
/*
|
||||
Read from smallDL, insert into small side
|
||||
Read from largeDL, insert into large side
|
||||
Start the processing threads
|
||||
*/
|
||||
try
|
||||
{
|
||||
smallReader();
|
||||
|
||||
while (!lastLargeIteration && !cancelled()) {
|
||||
//cout << "large iteration " << largeIterationCount << endl;
|
||||
jp->initForLargeSideFeed();
|
||||
largeReader();
|
||||
//cout << "done reading iteration " << largeIterationCount-1 << endl;
|
||||
while (!lastLargeIteration && !cancelled())
|
||||
{
|
||||
//cout << "large iteration " << largeIterationCount << endl;
|
||||
jp->initForLargeSideFeed();
|
||||
largeReader();
|
||||
//cout << "done reading iteration " << largeIterationCount-1 << endl;
|
||||
|
||||
jp->initForProcessing();
|
||||
if (cancelled())
|
||||
break;
|
||||
jp->initForProcessing();
|
||||
|
||||
loadFIFO.reset(new FIFO<boost::shared_ptr<LoaderOutput> >(1, 1)); // double buffering should be good enough
|
||||
buildFIFO.reset(new FIFO<boost::shared_ptr<BuilderOutput> >(1, 1));
|
||||
if (cancelled())
|
||||
break;
|
||||
|
||||
std::vector<uint64_t> thrds;
|
||||
thrds.reserve(3);
|
||||
thrds.push_back(jobstepThreadPool.invoke(Loader(this)));
|
||||
thrds.push_back(jobstepThreadPool.invoke(Builder(this)));
|
||||
thrds.push_back(jobstepThreadPool.invoke(Joiner(this)));
|
||||
jobstepThreadPool.join(thrds);
|
||||
}
|
||||
}
|
||||
CATCH_AND_LOG;
|
||||
loadFIFO.reset(new FIFO<boost::shared_ptr<LoaderOutput> >(1, 1)); // double buffering should be good enough
|
||||
buildFIFO.reset(new FIFO<boost::shared_ptr<BuilderOutput> >(1, 1));
|
||||
|
||||
// make sure all inputs were drained & output closed
|
||||
if (cancelled()) {
|
||||
try {
|
||||
jp->initForLargeSideFeed();
|
||||
} catch (...) { } // doesn't matter if this fails to open the large-file
|
||||
std::vector<uint64_t> thrds;
|
||||
thrds.reserve(3);
|
||||
thrds.push_back(jobstepThreadPool.invoke(Loader(this)));
|
||||
thrds.push_back(jobstepThreadPool.invoke(Builder(this)));
|
||||
thrds.push_back(jobstepThreadPool.invoke(Joiner(this)));
|
||||
jobstepThreadPool.join(thrds);
|
||||
}
|
||||
}
|
||||
|
||||
largeReader(); // large reader will only drain the fifo when cancelled()
|
||||
if (!closedOutput) {
|
||||
outputDL->endOfInput();
|
||||
closedOutput = true;
|
||||
}
|
||||
}
|
||||
CATCH_AND_LOG;
|
||||
|
||||
// make sure all inputs were drained & output closed
|
||||
if (cancelled())
|
||||
{
|
||||
try
|
||||
{
|
||||
jp->initForLargeSideFeed();
|
||||
}
|
||||
catch (...) { } // doesn't matter if this fails to open the large-file
|
||||
|
||||
largeReader(); // large reader will only drain the fifo when cancelled()
|
||||
|
||||
if (!closedOutput)
|
||||
{
|
||||
outputDL->endOfInput();
|
||||
closedOutput = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const string DiskJoinStep::toString() const
|
||||
{
|
||||
return "DiskJoinStep\n";
|
||||
return "DiskJoinStep\n";
|
||||
}
|
||||
|
||||
void DiskJoinStep::reportStats()
|
||||
{
|
||||
ostringstream os1, os2;
|
||||
ostringstream os1, os2;
|
||||
|
||||
os1 << "DiskJoinStep: joined (large) " << alias() << " to (small) " << joiner->getTableName() <<". Processing stages: " << largeIterationCount <<
|
||||
", disk usage small/large: " << jp->getMaxSmallSize() << "/" << jp->getMaxLargeSize() <<
|
||||
", total bytes read/written: " << jp->getBytesRead() << "/" << jp->getBytesWritten() << endl;
|
||||
fExtendedInfo = os1.str();
|
||||
os1 << "DiskJoinStep: joined (large) " << alias() << " to (small) " << joiner->getTableName() << ". Processing stages: " << largeIterationCount <<
|
||||
", disk usage small/large: " << jp->getMaxSmallSize() << "/" << jp->getMaxLargeSize() <<
|
||||
", total bytes read/written: " << jp->getBytesRead() << "/" << jp->getBytesWritten() << endl;
|
||||
fExtendedInfo = os1.str();
|
||||
|
||||
/* TODO: Can this report anything more useful in miniInfo? */
|
||||
int64_t bytesToReport = jp->getBytesRead() + jp->getBytesWritten();
|
||||
char units;
|
||||
if (bytesToReport > (1 << 30)) {
|
||||
bytesToReport >>= 30;
|
||||
units = 'G';
|
||||
}
|
||||
else if (bytesToReport > (1 << 20)) {
|
||||
bytesToReport >>= 20;
|
||||
units = 'M';
|
||||
}
|
||||
else if (bytesToReport > (1 << 10)) {
|
||||
bytesToReport >>= 10;
|
||||
units = 'K';
|
||||
}
|
||||
else units = ' ';
|
||||
/* TODO: Can this report anything more useful in miniInfo? */
|
||||
int64_t bytesToReport = jp->getBytesRead() + jp->getBytesWritten();
|
||||
char units;
|
||||
|
||||
os2 << "DJS UM " << alias() << "-" << joiner->getTableName() << " - - " << bytesToReport <<
|
||||
units << " - - -------- -\n";
|
||||
if (bytesToReport > (1 << 30))
|
||||
{
|
||||
bytesToReport >>= 30;
|
||||
units = 'G';
|
||||
}
|
||||
else if (bytesToReport > (1 << 20))
|
||||
{
|
||||
bytesToReport >>= 20;
|
||||
units = 'M';
|
||||
}
|
||||
else if (bytesToReport > (1 << 10))
|
||||
{
|
||||
bytesToReport >>= 10;
|
||||
units = 'K';
|
||||
}
|
||||
else units = ' ';
|
||||
|
||||
fMiniInfo = os2.str();
|
||||
os2 << "DJS UM " << alias() << "-" << joiner->getTableName() << " - - " << bytesToReport <<
|
||||
units << " - - -------- -\n";
|
||||
|
||||
if (traceOn())
|
||||
logEnd(os1.str().c_str());
|
||||
fMiniInfo = os2.str();
|
||||
|
||||
if (traceOn())
|
||||
logEnd(os1.str().c_str());
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user