You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-05 16:15:50 +03:00
Mcol 4841 dev6 Handle large joins without OOM (#2155)
* MCOL-4846 dev-6 Handle large join results Use a loop to shrink the number of results reported per message to something manageable. * MCOL-4841 small changes requested by review * Add EXTRA threads to prioritythreadpool prioritythreadpool is configured at startup with a fixed number of threads available. This is to prevent thread thrashing. Since most of the time, BPP job steps are short lived, and a rescheduling mechanism exist if no threads are available, this works to keep cpu wastage to a minimum. However, if a query or queries consume all the threads in prioritythreadpool and then block (due to the consumer not consuming fast enough) we can run out of threads and no work will be done until some threads unblock. A new mechanism allows for EXTRA threads to be generated for the duration of the blocking action. These threads can act on new queries. When all blocking is completed, these threads will be released when idle. * MCOL-4841 dev6 Reconcile with changes in develop-6 * MCOL-4841 Some format corrections * MCOL-4841 dev clean up some things based on review * MCOL-4841 dev 6 ExeMgr Crashes after large join This commit fixes up memory accounting issues in ExeMgr * MCOL-4841 remove LDI change Opened MCOL-4968 to address the issue * MCOL-4841 Add fMaxBPPSendQueue to ResourceManager This causes the setting to be loaded at run time (requires restart to accept a change) BPPSendthread gets this in it's ctor Also rolled back changes to TupleHashJoinStep::smallRunnerFcn() that used a local variable to count locally allocated memory, then added it into the global counter at function's end. Not counting the memory globally caused conversion to UM only join way later than it should. This resulted in MCOL-4971. * MCOL-4841 make blockedThreads and extraThreads atomic Also restore previous scope of locks in bppsendthread. There is some small chance the new scope could be incorrect, and the performance boost is negligible. Better safe than sorry.
This commit is contained in:
@@ -424,8 +424,8 @@ void DiskJoinStep::joinFcn()
|
|||||||
while (largeData)
|
while (largeData)
|
||||||
{
|
{
|
||||||
l_largeRG.setData(largeData.get());
|
l_largeRG.setData(largeData.get());
|
||||||
thjs->joinOneRG(0, &joinResults, l_largeRG, l_outputRG, l_largeRow, l_joinFERow,
|
thjs->joinOneRG(0, joinResults, l_largeRG, l_outputRG, l_largeRow, l_joinFERow,
|
||||||
l_outputRow, baseRow, joinMatches, smallRowTemplates,
|
l_outputRow, baseRow, joinMatches, smallRowTemplates, outputDL.get(),
|
||||||
&joiners, &colMappings, &fergMappings, &smallNullMem);
|
&joiners, &colMappings, &fergMappings, &smallNullMem);
|
||||||
|
|
||||||
for (j = 0; j < (int) joinResults.size(); j++)
|
for (j = 0; j < (int) joinResults.size(); j++)
|
||||||
@@ -434,7 +434,7 @@ void DiskJoinStep::joinFcn()
|
|||||||
//cout << "got joined output " << l_outputRG.toString() << endl;
|
//cout << "got joined output " << l_outputRG.toString() << endl;
|
||||||
outputDL->insert(joinResults[j]);
|
outputDL->insert(joinResults[j]);
|
||||||
}
|
}
|
||||||
|
thjs->returnMemory();
|
||||||
joinResults.clear();
|
joinResults.clear();
|
||||||
largeData = in->jp->getNextLargeRGData();
|
largeData = in->jp->getNextLargeRGData();
|
||||||
}
|
}
|
||||||
@@ -443,7 +443,6 @@ void DiskJoinStep::joinFcn()
|
|||||||
{
|
{
|
||||||
if (!lastLargeIteration)
|
if (!lastLargeIteration)
|
||||||
{
|
{
|
||||||
|
|
||||||
/* TODO: an optimization would be to detect whether any new rows were marked and if not
|
/* TODO: an optimization would be to detect whether any new rows were marked and if not
|
||||||
suppress the save operation */
|
suppress the save operation */
|
||||||
vector<Row::Pointer> unmatched;
|
vector<Row::Pointer> unmatched;
|
||||||
@@ -454,7 +453,6 @@ void DiskJoinStep::joinFcn()
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
||||||
//cout << "finishing small-outer output" << endl;
|
//cout << "finishing small-outer output" << endl;
|
||||||
vector<Row::Pointer> unmatched;
|
vector<Row::Pointer> unmatched;
|
||||||
RGData rgData(l_outputRG);
|
RGData rgData(l_outputRG);
|
||||||
@@ -484,6 +482,21 @@ void DiskJoinStep::joinFcn()
|
|||||||
{
|
{
|
||||||
outputDL->insert(rgData);
|
outputDL->insert(rgData);
|
||||||
//cout << "inserting a full RG" << endl;
|
//cout << "inserting a full RG" << endl;
|
||||||
|
if (thjs)
|
||||||
|
{
|
||||||
|
if (!thjs->getMemory(l_outputRG.getMaxDataSize()))
|
||||||
|
{
|
||||||
|
// calculate guess of size required for error message
|
||||||
|
uint64_t memReqd = (unmatched.size() * outputRG.getDataSize(1)) / 1048576;
|
||||||
|
Message::Args args;
|
||||||
|
args.add(memReqd);
|
||||||
|
args.add(thjs->resourceManager->getConfiguredUMMemLimit() / 1048576);
|
||||||
|
std::cerr << logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_JOIN_RESULT_TOO_BIG, args)
|
||||||
|
<< " @" << __FILE__ << ":" << __LINE__;
|
||||||
|
throw logging::IDBExcept(logging::ERR_JOIN_RESULT_TOO_BIG, args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
rgData.reinit(l_outputRG);
|
rgData.reinit(l_outputRG);
|
||||||
l_outputRG.setData(&rgData);
|
l_outputRG.setData(&rgData);
|
||||||
l_outputRG.resetRowGroup(0);
|
l_outputRG.resetRowGroup(0);
|
||||||
@@ -498,6 +511,10 @@ void DiskJoinStep::joinFcn()
|
|||||||
//cout << "inserting an rg with " << l_outputRG.getRowCount() << endl;
|
//cout << "inserting an rg with " << l_outputRG.getRowCount() << endl;
|
||||||
outputDL->insert(rgData);
|
outputDL->insert(rgData);
|
||||||
}
|
}
|
||||||
|
if (thjs)
|
||||||
|
{
|
||||||
|
thjs->returnMemory();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -307,7 +307,11 @@ void DistributedEngineComm::Setup()
|
|||||||
newPmCount--;
|
newPmCount--;
|
||||||
|
|
||||||
writeToLog(__FILE__, __LINE__, "Could not connect to PMS" + std::to_string(connectionId) + ": " + ex.what(), LOG_TYPE_ERROR);
|
writeToLog(__FILE__, __LINE__, "Could not connect to PMS" + std::to_string(connectionId) + ": " + ex.what(), LOG_TYPE_ERROR);
|
||||||
cerr << "Could not connect to PMS" << std::to_string(connectionId) << ": " << ex.what() << endl;
|
if (newPmCount == 0)
|
||||||
|
{
|
||||||
|
writeToLog(__FILE__, __LINE__, "No more PMs to try to connect to", LOG_TYPE_ERROR);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
{
|
{
|
||||||
@@ -315,6 +319,11 @@ void DistributedEngineComm::Setup()
|
|||||||
newPmCount--;
|
newPmCount--;
|
||||||
|
|
||||||
writeToLog(__FILE__, __LINE__, "Could not connect to PMS" + std::to_string(connectionId), LOG_TYPE_ERROR);
|
writeToLog(__FILE__, __LINE__, "Could not connect to PMS" + std::to_string(connectionId), LOG_TYPE_ERROR);
|
||||||
|
if (newPmCount == 0)
|
||||||
|
{
|
||||||
|
writeToLog(__FILE__, __LINE__, "No more PMs to try to connect to", LOG_TYPE_ERROR);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -788,7 +788,6 @@ void GroupConcatOrderBy::processRow(const rowgroup::Row& row)
|
|||||||
fDataQueue.push(fData);
|
fDataQueue.push(fData);
|
||||||
|
|
||||||
uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize();
|
uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize();
|
||||||
fMemSize += newSize;
|
|
||||||
|
|
||||||
if (!fRm->getMemory(newSize, fSessionMemLimit))
|
if (!fRm->getMemory(newSize, fSessionMemLimit))
|
||||||
{
|
{
|
||||||
@@ -796,7 +795,8 @@ void GroupConcatOrderBy::processRow(const rowgroup::Row& row)
|
|||||||
<< " @" << __FILE__ << ":" << __LINE__;
|
<< " @" << __FILE__ << ":" << __LINE__;
|
||||||
throw IDBExcept(fErrorCode);
|
throw IDBExcept(fErrorCode);
|
||||||
}
|
}
|
||||||
|
fMemSize += newSize;
|
||||||
|
|
||||||
fData.reinit(fRowGroup, fRowsPerRG);
|
fData.reinit(fRowGroup, fRowsPerRG);
|
||||||
fRowGroup.setData(&fData);
|
fRowGroup.setData(&fData);
|
||||||
fRowGroup.resetRowGroup(0);
|
fRowGroup.resetRowGroup(0);
|
||||||
@@ -979,7 +979,6 @@ void GroupConcatNoOrder::initialize(const rowgroup::SP_GroupConcat& gcc)
|
|||||||
fConcatColumns.push_back((*(i++)).second);
|
fConcatColumns.push_back((*(i++)).second);
|
||||||
|
|
||||||
uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize();
|
uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize();
|
||||||
fMemSize += newSize;
|
|
||||||
|
|
||||||
if (!fRm->getMemory(newSize, fSessionMemLimit))
|
if (!fRm->getMemory(newSize, fSessionMemLimit))
|
||||||
{
|
{
|
||||||
@@ -987,7 +986,8 @@ void GroupConcatNoOrder::initialize(const rowgroup::SP_GroupConcat& gcc)
|
|||||||
<< " @" << __FILE__ << ":" << __LINE__;
|
<< " @" << __FILE__ << ":" << __LINE__;
|
||||||
throw IDBExcept(fErrorCode);
|
throw IDBExcept(fErrorCode);
|
||||||
}
|
}
|
||||||
|
fMemSize += newSize;
|
||||||
|
|
||||||
fData.reinit(fRowGroup, fRowsPerRG);
|
fData.reinit(fRowGroup, fRowsPerRG);
|
||||||
fRowGroup.setData(&fData);
|
fRowGroup.setData(&fData);
|
||||||
fRowGroup.resetRowGroup(0);
|
fRowGroup.resetRowGroup(0);
|
||||||
@@ -1014,15 +1014,14 @@ void GroupConcatNoOrder::processRow(const rowgroup::Row& row)
|
|||||||
{
|
{
|
||||||
uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize();
|
uint64_t newSize = fRowsPerRG * fRowGroup.getRowSize();
|
||||||
|
|
||||||
fMemSize += newSize;
|
|
||||||
|
|
||||||
if (!fRm->getMemory(newSize, fSessionMemLimit))
|
if (!fRm->getMemory(newSize, fSessionMemLimit))
|
||||||
{
|
{
|
||||||
cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode)
|
cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode)
|
||||||
<< " @" << __FILE__ << ":" << __LINE__;
|
<< " @" << __FILE__ << ":" << __LINE__;
|
||||||
throw IDBExcept(fErrorCode);
|
throw IDBExcept(fErrorCode);
|
||||||
}
|
}
|
||||||
|
fMemSize += newSize;
|
||||||
|
|
||||||
fDataQueue.push(fData);
|
fDataQueue.push(fData);
|
||||||
fData.reinit(fRowGroup, fRowsPerRG);
|
fData.reinit(fRowGroup, fRowsPerRG);
|
||||||
fRowGroup.setData(&fData);
|
fRowGroup.setData(&fData);
|
||||||
|
@@ -127,13 +127,13 @@ void LimitedOrderBy::processRow(const rowgroup::Row& row)
|
|||||||
fUncommitedMemory += memSizeInc;
|
fUncommitedMemory += memSizeInc;
|
||||||
if (fUncommitedMemory >= fMaxUncommited)
|
if (fUncommitedMemory >= fMaxUncommited)
|
||||||
{
|
{
|
||||||
fMemSize += fUncommitedMemory;
|
|
||||||
if (!fRm->getMemory(fUncommitedMemory, fSessionMemLimit))
|
if (!fRm->getMemory(fUncommitedMemory, fSessionMemLimit))
|
||||||
{
|
{
|
||||||
cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @"
|
cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @"
|
||||||
<< __FILE__ << ":" << __LINE__;
|
<< __FILE__ << ":" << __LINE__;
|
||||||
throw IDBExcept(fErrorCode);
|
throw IDBExcept(fErrorCode);
|
||||||
}
|
}
|
||||||
|
fMemSize += fUncommitedMemory;
|
||||||
fUncommitedMemory = 0;
|
fUncommitedMemory = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -148,7 +148,6 @@ void LimitedOrderBy::processRow(const rowgroup::Row& row)
|
|||||||
{
|
{
|
||||||
fDataQueue.push(fData);
|
fDataQueue.push(fData);
|
||||||
uint64_t newSize = fRowGroup.getSizeWithStrings() - fRowGroup.getHeaderSize();
|
uint64_t newSize = fRowGroup.getSizeWithStrings() - fRowGroup.getHeaderSize();
|
||||||
fMemSize += newSize;
|
|
||||||
|
|
||||||
if (!fRm->getMemory(newSize, fSessionMemLimit))
|
if (!fRm->getMemory(newSize, fSessionMemLimit))
|
||||||
{
|
{
|
||||||
@@ -156,7 +155,8 @@ void LimitedOrderBy::processRow(const rowgroup::Row& row)
|
|||||||
<< " @" << __FILE__ << ":" << __LINE__;
|
<< " @" << __FILE__ << ":" << __LINE__;
|
||||||
throw IDBExcept(fErrorCode);
|
throw IDBExcept(fErrorCode);
|
||||||
}
|
}
|
||||||
|
fMemSize += newSize;
|
||||||
|
|
||||||
fData.reinit(fRowGroup, fRowsPerRG);
|
fData.reinit(fRowGroup, fRowsPerRG);
|
||||||
fRowGroup.setData(&fData);
|
fRowGroup.setData(&fData);
|
||||||
fRowGroup.resetRowGroup(0);
|
fRowGroup.resetRowGroup(0);
|
||||||
@@ -190,13 +190,13 @@ void LimitedOrderBy::finalize()
|
|||||||
{
|
{
|
||||||
if (fUncommitedMemory > 0)
|
if (fUncommitedMemory > 0)
|
||||||
{
|
{
|
||||||
fMemSize += fUncommitedMemory;
|
|
||||||
if (!fRm->getMemory(fUncommitedMemory, fSessionMemLimit))
|
if (!fRm->getMemory(fUncommitedMemory, fSessionMemLimit))
|
||||||
{
|
{
|
||||||
cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @"
|
cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode) << " @"
|
||||||
<< __FILE__ << ":" << __LINE__;
|
<< __FILE__ << ":" << __LINE__;
|
||||||
throw IDBExcept(fErrorCode);
|
throw IDBExcept(fErrorCode);
|
||||||
}
|
}
|
||||||
|
fMemSize += fUncommitedMemory;
|
||||||
fUncommitedMemory = 0;
|
fUncommitedMemory = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -209,7 +209,6 @@ void LimitedOrderBy::finalize()
|
|||||||
// *DRRTUY Very memory intensive. CS needs to account active
|
// *DRRTUY Very memory intensive. CS needs to account active
|
||||||
// memory only and release memory if needed.
|
// memory only and release memory if needed.
|
||||||
uint64_t memSizeInc = fRowGroup.getSizeWithStrings() - fRowGroup.getHeaderSize();
|
uint64_t memSizeInc = fRowGroup.getSizeWithStrings() - fRowGroup.getHeaderSize();
|
||||||
fMemSize += memSizeInc;
|
|
||||||
|
|
||||||
if (!fRm->getMemory(memSizeInc, fSessionMemLimit))
|
if (!fRm->getMemory(memSizeInc, fSessionMemLimit))
|
||||||
{
|
{
|
||||||
@@ -217,6 +216,7 @@ void LimitedOrderBy::finalize()
|
|||||||
<< " @" << __FILE__ << ":" << __LINE__;
|
<< " @" << __FILE__ << ":" << __LINE__;
|
||||||
throw IDBExcept(fErrorCode);
|
throw IDBExcept(fErrorCode);
|
||||||
}
|
}
|
||||||
|
fMemSize += memSizeInc;
|
||||||
|
|
||||||
uint64_t offset = 0;
|
uint64_t offset = 0;
|
||||||
uint64_t i = 0;
|
uint64_t i = 0;
|
||||||
@@ -265,7 +265,6 @@ void LimitedOrderBy::finalize()
|
|||||||
if(offset == (uint64_t)-1)
|
if(offset == (uint64_t)-1)
|
||||||
{
|
{
|
||||||
tempRGDataList.push_front(fData);
|
tempRGDataList.push_front(fData);
|
||||||
fMemSize += memSizeInc;
|
|
||||||
|
|
||||||
if (!fRm->getMemory(memSizeInc, fSessionMemLimit))
|
if (!fRm->getMemory(memSizeInc, fSessionMemLimit))
|
||||||
{
|
{
|
||||||
@@ -273,6 +272,7 @@ void LimitedOrderBy::finalize()
|
|||||||
<< " @" << __FILE__ << ":" << __LINE__;
|
<< " @" << __FILE__ << ":" << __LINE__;
|
||||||
throw IDBExcept(fErrorCode);
|
throw IDBExcept(fErrorCode);
|
||||||
}
|
}
|
||||||
|
fMemSize += memSizeInc;
|
||||||
|
|
||||||
fData.reinit(fRowGroup, fRowsPerRG);
|
fData.reinit(fRowGroup, fRowsPerRG);
|
||||||
fRowGroup.setData(&fData);
|
fRowGroup.setData(&fData);
|
||||||
|
@@ -1366,7 +1366,15 @@ public:
|
|||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
ResourceManager* resourceManager() const
|
||||||
|
{
|
||||||
|
return fRm;
|
||||||
|
}
|
||||||
|
bool runFEonPM() const
|
||||||
|
{
|
||||||
|
return bRunFEonPM;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void sendError(uint16_t status);
|
void sendError(uint16_t status);
|
||||||
|
|
||||||
@@ -1454,12 +1462,6 @@ private:
|
|||||||
void serializeJoiner();
|
void serializeJoiner();
|
||||||
void serializeJoiner(uint32_t connectionNumber);
|
void serializeJoiner(uint32_t connectionNumber);
|
||||||
|
|
||||||
void generateJoinResultSet(const std::vector<std::vector<rowgroup::Row::Pointer>>& joinerOutput,
|
|
||||||
rowgroup::Row& baseRow, const std::vector<boost::shared_array<int>>& mappings,
|
|
||||||
const uint32_t depth, rowgroup::RowGroup& outputRG, rowgroup::RGData& rgData,
|
|
||||||
std::vector<rowgroup::RGData>* outputData,
|
|
||||||
const boost::scoped_array<rowgroup::Row>& smallRows, rowgroup::Row& joinedRow);
|
|
||||||
|
|
||||||
std::vector<boost::shared_ptr<joiner::TupleJoiner>> tjoiners;
|
std::vector<boost::shared_ptr<joiner::TupleJoiner>> tjoiners;
|
||||||
bool doJoin, hasPMJoin, hasUMJoin;
|
bool doJoin, hasPMJoin, hasUMJoin;
|
||||||
std::vector<rowgroup::RowGroup> joinerMatchesRGs; // parses the small-side matches from joiner
|
std::vector<rowgroup::RowGroup> joinerMatchesRGs; // parses the small-side matches from joiner
|
||||||
@@ -1491,17 +1493,12 @@ private:
|
|||||||
boost::shared_ptr<funcexp::FuncExpWrapper> fe1, fe2;
|
boost::shared_ptr<funcexp::FuncExpWrapper> fe1, fe2;
|
||||||
rowgroup::RowGroup fe1Input, fe2Output;
|
rowgroup::RowGroup fe1Input, fe2Output;
|
||||||
boost::shared_array<int> fe2Mapping;
|
boost::shared_array<int> fe2Mapping;
|
||||||
bool runFEonPM;
|
bool bRunFEonPM;
|
||||||
|
|
||||||
/* for UM F & E 2 processing */
|
/* for UM F & E 2 processing */
|
||||||
rowgroup::RGData fe2Data;
|
rowgroup::RGData fe2Data;
|
||||||
rowgroup::Row fe2InRow, fe2OutRow;
|
rowgroup::Row fe2InRow, fe2OutRow;
|
||||||
|
|
||||||
void processFE2(rowgroup::RowGroup& input, rowgroup::RowGroup& output, rowgroup::Row& inRow, rowgroup::Row& outRow,
|
|
||||||
std::vector<rowgroup::RGData>* rgData, funcexp::FuncExpWrapper* localFE2);
|
|
||||||
void processFE2_oneRG(rowgroup::RowGroup& input, rowgroup::RowGroup& output, rowgroup::Row& inRow,
|
|
||||||
rowgroup::Row& outRow, funcexp::FuncExpWrapper* localFE2);
|
|
||||||
|
|
||||||
/* Runtime Casual Partitioning adjustments. The CP code is needlessly complicated;
|
/* Runtime Casual Partitioning adjustments. The CP code is needlessly complicated;
|
||||||
* to avoid making it worse, decided to designate 'scanFlags' as the static
|
* to avoid making it worse, decided to designate 'scanFlags' as the static
|
||||||
* component and this new array as the runtime component. The final CP decision
|
* component and this new array as the runtime component. The final CP decision
|
||||||
@@ -1515,8 +1512,9 @@ private:
|
|||||||
boost::shared_ptr<RowGroupDL> deliveryDL;
|
boost::shared_ptr<RowGroupDL> deliveryDL;
|
||||||
uint32_t deliveryIt;
|
uint32_t deliveryIt;
|
||||||
|
|
||||||
struct JoinLocalData
|
class JoinLocalData
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
JoinLocalData() = delete;
|
JoinLocalData() = delete;
|
||||||
JoinLocalData(const JoinLocalData&) = delete;
|
JoinLocalData(const JoinLocalData&) = delete;
|
||||||
JoinLocalData(JoinLocalData&&) = delete;
|
JoinLocalData(JoinLocalData&&) = delete;
|
||||||
@@ -1524,12 +1522,21 @@ private:
|
|||||||
JoinLocalData& operator=(JoinLocalData&&) = delete;
|
JoinLocalData& operator=(JoinLocalData&&) = delete;
|
||||||
~JoinLocalData() = default;
|
~JoinLocalData() = default;
|
||||||
|
|
||||||
JoinLocalData(rowgroup::RowGroup& primRowGroup, rowgroup::RowGroup& outputRowGroup,
|
JoinLocalData(TupleBPS* pTupleBPS, rowgroup::RowGroup& primRowGroup, rowgroup::RowGroup& outputRowGroup,
|
||||||
boost::shared_ptr<funcexp::FuncExpWrapper>& fe2, rowgroup::RowGroup& fe2Output,
|
boost::shared_ptr<funcexp::FuncExpWrapper>& fe2, rowgroup::RowGroup& fe2Output,
|
||||||
std::vector<rowgroup::RowGroup>& joinerMatchesRGs, rowgroup::RowGroup& joinFERG,
|
std::vector<rowgroup::RowGroup>& joinerMatchesRGs, rowgroup::RowGroup& joinFERG,
|
||||||
std::vector<boost::shared_ptr<joiner::TupleJoiner>>& tjoiners, uint32_t smallSideCount,
|
std::vector<boost::shared_ptr<joiner::TupleJoiner>>& tjoiners, uint32_t smallSideCount,
|
||||||
bool doJoin);
|
bool doJoin);
|
||||||
|
|
||||||
|
friend class TupleBPS;
|
||||||
|
|
||||||
|
private:
|
||||||
|
uint64_t generateJoinResultSet(const uint32_t depth,
|
||||||
|
std::vector<rowgroup::RGData>& outputData,
|
||||||
|
RowGroupDL* dlp);
|
||||||
|
void processFE2(vector<rowgroup::RGData>& rgData);
|
||||||
|
|
||||||
|
TupleBPS* tbps; // Parent
|
||||||
rowgroup::RowGroup local_primRG;
|
rowgroup::RowGroup local_primRG;
|
||||||
rowgroup::RowGroup local_outputRG;
|
rowgroup::RowGroup local_outputRG;
|
||||||
|
|
||||||
@@ -1565,7 +1572,7 @@ private:
|
|||||||
boost::scoped_array<uint8_t> largeNullMemory;
|
boost::scoped_array<uint8_t> largeNullMemory;
|
||||||
boost::scoped_array<boost::shared_array<uint8_t>> smallNullMemory;
|
boost::scoped_array<boost::shared_array<uint8_t>> smallNullMemory;
|
||||||
uint32_t matchCount;
|
uint32_t matchCount;
|
||||||
|
|
||||||
rowgroup::Row postJoinRow;
|
rowgroup::Row postJoinRow;
|
||||||
rowgroup::RowGroup local_fe2Output;
|
rowgroup::RowGroup local_fe2Output;
|
||||||
rowgroup::RGData local_fe2Data;
|
rowgroup::RGData local_fe2Data;
|
||||||
@@ -1591,7 +1598,7 @@ private:
|
|||||||
for (uint32_t i = 0; i < numThreads; ++i)
|
for (uint32_t i = 0; i < numThreads; ++i)
|
||||||
{
|
{
|
||||||
joinLocalDataPool.push_back(std::shared_ptr<JoinLocalData>(
|
joinLocalDataPool.push_back(std::shared_ptr<JoinLocalData>(
|
||||||
new JoinLocalData(primRowGroup, outputRowGroup, fe2, fe2Output, joinerMatchesRGs, joinFERG,
|
new JoinLocalData(this, primRowGroup, outputRowGroup, fe2, fe2Output, joinerMatchesRGs, joinFERG,
|
||||||
tjoiners, smallSideCount, doJoin)));
|
tjoiners, smallSideCount, doJoin)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -267,6 +267,9 @@ ResourceManager::ResourceManager(bool runningInExeMgr) :
|
|||||||
fAllowedDiskAggregation = getBoolVal(fRowAggregationStr,
|
fAllowedDiskAggregation = getBoolVal(fRowAggregationStr,
|
||||||
"AllowDiskBasedAggregation",
|
"AllowDiskBasedAggregation",
|
||||||
defaultAllowDiskAggregation);
|
defaultAllowDiskAggregation);
|
||||||
|
|
||||||
|
fMaxBPPSendQueue = getUintVal(fPrimitiveServersStr, "MaxBPPSendQueue", defaultMaxBPPSendQueue);
|
||||||
|
|
||||||
if (!load_encryption_keys())
|
if (!load_encryption_keys())
|
||||||
{
|
{
|
||||||
Logger log;
|
Logger log;
|
||||||
@@ -347,7 +350,7 @@ void ResourceManager::emMaxPct() { }
|
|||||||
void ResourceManager::emPriority() { }
|
void ResourceManager::emPriority() { }
|
||||||
void ResourceManager::emExecQueueSize() { }
|
void ResourceManager::emExecQueueSize() { }
|
||||||
|
|
||||||
void ResourceManager::hjNumThreads() { }
|
void ResourceManager::hjNumThreads() { }
|
||||||
void ResourceManager::hjMaxBuckets() { }
|
void ResourceManager::hjMaxBuckets() { }
|
||||||
void ResourceManager::hjMaxElems() { }
|
void ResourceManager::hjMaxElems() { }
|
||||||
void ResourceManager::hjFifoSizeLargeSide() { }
|
void ResourceManager::hjFifoSizeLargeSide() { }
|
||||||
@@ -418,24 +421,52 @@ bool ResourceManager::userPriorityEnabled() const
|
|||||||
return "Y" == val;
|
return "Y" == val;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ResourceManager::getMemory(int64_t amount, boost::shared_ptr<int64_t> sessionLimit, bool patience)
|
// Counts memory. This funtion doesn't actually malloc, just counts against two limits
|
||||||
|
// totalUmMemLimit for overall UM counting and (optional) sessionLimit for a single session.
|
||||||
|
// If both have space, return true.
|
||||||
|
bool ResourceManager::getMemory(int64_t amount, boost::shared_ptr<int64_t>& sessionLimit, bool patience)
|
||||||
{
|
{
|
||||||
bool ret1 = (atomicops::atomicSub(&totalUmMemLimit, amount) >= 0);
|
bool ret1 = (atomicops::atomicSub(&totalUmMemLimit, amount) >= 0);
|
||||||
bool ret2 = (atomicops::atomicSub(sessionLimit.get(), amount) >= 0);
|
bool ret2 = sessionLimit ? (atomicops::atomicSub(sessionLimit.get(), amount) >= 0) : ret1;
|
||||||
|
|
||||||
uint32_t retryCounter = 0, maxRetries = 20; // 10s delay
|
uint32_t retryCounter = 0, maxRetries = 20; // 10s delay
|
||||||
|
|
||||||
while (patience && !(ret1 && ret2) && retryCounter++ < maxRetries)
|
while (patience && !(ret1 && ret2) && retryCounter++ < maxRetries)
|
||||||
{
|
{
|
||||||
atomicops::atomicAdd(&totalUmMemLimit, amount);
|
atomicops::atomicAdd(&totalUmMemLimit, amount);
|
||||||
atomicops::atomicAdd(sessionLimit.get(), amount);
|
sessionLimit ? atomicops::atomicAdd(sessionLimit.get(), amount) : 0;
|
||||||
usleep(500000);
|
usleep(500000);
|
||||||
ret1 = (atomicops::atomicSub(&totalUmMemLimit, amount) >= 0);
|
ret1 = (atomicops::atomicSub(&totalUmMemLimit, amount) >= 0);
|
||||||
ret2 = (atomicops::atomicSub(sessionLimit.get(), amount) >= 0);
|
ret2 = sessionLimit ? (atomicops::atomicSub(sessionLimit.get(), amount) >= 0) : ret1;
|
||||||
|
}
|
||||||
|
if (!(ret1 && ret2))
|
||||||
|
{
|
||||||
|
// If we didn't get any memory, restore the counters.
|
||||||
|
atomicops::atomicAdd(&totalUmMemLimit, amount);
|
||||||
|
sessionLimit ? atomicops::atomicAdd(sessionLimit.get(), amount) : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (ret1 && ret2);
|
return (ret1 && ret2);
|
||||||
}
|
}
|
||||||
|
// Don't care about session memory
|
||||||
|
bool ResourceManager::getMemory(int64_t amount, bool patience)
|
||||||
|
{
|
||||||
|
bool ret1 = (atomicops::atomicSub(&totalUmMemLimit, amount) >= 0);
|
||||||
|
|
||||||
|
uint32_t retryCounter = 0, maxRetries = 20; // 10s delay
|
||||||
|
|
||||||
|
while (patience && !ret1 && retryCounter++ < maxRetries)
|
||||||
|
{
|
||||||
|
atomicops::atomicAdd(&totalUmMemLimit, amount);
|
||||||
|
usleep(500000);
|
||||||
|
ret1 = (atomicops::atomicSub(&totalUmMemLimit, amount) >= 0);
|
||||||
|
}
|
||||||
|
if (!ret1)
|
||||||
|
{
|
||||||
|
// If we didn't get any memory, restore the counters.
|
||||||
|
atomicops::atomicAdd(&totalUmMemLimit, amount);
|
||||||
|
}
|
||||||
|
return ret1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} //namespace
|
} //namespace
|
||||||
|
@@ -82,6 +82,15 @@ const uint32_t defaultMaxOutstandingRequests = 20;
|
|||||||
const uint32_t defaultProcessorThreadsPerScan = 16;
|
const uint32_t defaultProcessorThreadsPerScan = 16;
|
||||||
const uint32_t defaultJoinerChunkSize = 16 * 1024 * 1024;
|
const uint32_t defaultJoinerChunkSize = 16 * 1024 * 1024;
|
||||||
|
|
||||||
|
// I estimate that the average non-cloud columnstore node has 64GB. I've seen from 16GB to 256GB. Cloud can be as low as 4GB
|
||||||
|
// However, ExeMgr has a targetRecvQueueSize hardcoded to 50,000,000, so some number greater than this makes sense. Seriously greater doesn't make sense,
|
||||||
|
// so I went with 5x. If there are a number of simultaneous queries that return giant result sets, then 0.25 GB each seems reasonable.
|
||||||
|
// This is only for the return queue. We still need room for all the processing, and if a single node system, for ExeMgr as well.
|
||||||
|
// On small systems, I recommend we use a smaller value.
|
||||||
|
// I believe a larger value will not improve anything since at this point, we're just filling a queue much faster than it can be emptied.
|
||||||
|
// Even if we make this default larger, giant results will still eventually block. Just with less memory available for other processing.
|
||||||
|
const uint64_t defaultMaxBPPSendQueue = 250000000; // ~250MB
|
||||||
|
|
||||||
//bucketreuse
|
//bucketreuse
|
||||||
const std::string defaultTempDiskPath = "/tmp";
|
const std::string defaultTempDiskPath = "/tmp";
|
||||||
const std::string defaultWorkingDir = "."; //"/tmp";
|
const std::string defaultWorkingDir = "."; //"/tmp";
|
||||||
@@ -380,6 +389,11 @@ public:
|
|||||||
return getUintVal(fJobListStr, "DECThrottleThreshold", defaultDECThrottleThreshold);
|
return getUintVal(fJobListStr, "DECThrottleThreshold", defaultDECThrottleThreshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t getMaxBPPSendQueue() const
|
||||||
|
{
|
||||||
|
return fMaxBPPSendQueue;
|
||||||
|
}
|
||||||
|
|
||||||
EXPORT void emServerThreads();
|
EXPORT void emServerThreads();
|
||||||
EXPORT void emServerQueueSize();
|
EXPORT void emServerQueueSize();
|
||||||
EXPORT void emSecondsBetweenMemChecks();
|
EXPORT void emSecondsBetweenMemChecks();
|
||||||
@@ -397,11 +411,16 @@ public:
|
|||||||
/* sessionLimit is a pointer to the var holding the session-scope limit, should be JobInfo.umMemLimit
|
/* sessionLimit is a pointer to the var holding the session-scope limit, should be JobInfo.umMemLimit
|
||||||
for the query. */
|
for the query. */
|
||||||
/* Temporary parameter 'patience', will wait for up to 10s to get the memory. */
|
/* Temporary parameter 'patience', will wait for up to 10s to get the memory. */
|
||||||
EXPORT bool getMemory(int64_t amount, boost::shared_ptr<int64_t> sessionLimit, bool patience = true);
|
EXPORT bool getMemory(int64_t amount, boost::shared_ptr<int64_t>& sessionLimit, bool patience = true);
|
||||||
inline void returnMemory(int64_t amount, boost::shared_ptr<int64_t> sessionLimit)
|
EXPORT bool getMemory(int64_t amount, bool patience = true);
|
||||||
|
inline void returnMemory(int64_t amount)
|
||||||
{
|
{
|
||||||
atomicops::atomicAdd(&totalUmMemLimit, amount);
|
atomicops::atomicAdd(&totalUmMemLimit, amount);
|
||||||
atomicops::atomicAdd(sessionLimit.get(), amount);
|
}
|
||||||
|
inline void returnMemory(int64_t amount, boost::shared_ptr<int64_t>& sessionLimit)
|
||||||
|
{
|
||||||
|
atomicops::atomicAdd(&totalUmMemLimit, amount);
|
||||||
|
sessionLimit ? atomicops::atomicAdd(sessionLimit.get(), amount): 0;
|
||||||
}
|
}
|
||||||
inline int64_t availableMemory() const
|
inline int64_t availableMemory() const
|
||||||
{
|
{
|
||||||
@@ -602,7 +621,7 @@ private:
|
|||||||
|
|
||||||
/* new HJ/Union/Aggregation support */
|
/* new HJ/Union/Aggregation support */
|
||||||
volatile int64_t totalUmMemLimit; // mem limit for join, union, and aggregation on the UM
|
volatile int64_t totalUmMemLimit; // mem limit for join, union, and aggregation on the UM
|
||||||
uint64_t configuredUmMemLimit;
|
int64_t configuredUmMemLimit;
|
||||||
uint64_t pmJoinMemLimit; // mem limit on individual PM joins
|
uint64_t pmJoinMemLimit; // mem limit on individual PM joins
|
||||||
|
|
||||||
/* multi-thread aggregate */
|
/* multi-thread aggregate */
|
||||||
@@ -618,6 +637,7 @@ private:
|
|||||||
bool fUseHdfs;
|
bool fUseHdfs;
|
||||||
bool fAllowedDiskAggregation{false};
|
bool fAllowedDiskAggregation{false};
|
||||||
uint64_t fDECConnectionsPerQuery;
|
uint64_t fDECConnectionsPerQuery;
|
||||||
|
uint64_t fMaxBPPSendQueue = 250000000;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -662,3 +682,4 @@ inline bool ResourceManager::getBoolVal(const std::string& section, const std::s
|
|||||||
#undef EXPORT
|
#undef EXPORT
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -153,15 +153,15 @@ struct TupleBPSAggregators
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TupleBPS::JoinLocalData::JoinLocalData(RowGroup& primRowGroup, RowGroup& outputRowGroup,
|
TupleBPS::JoinLocalData::JoinLocalData(TupleBPS* pTupleBPS, RowGroup& primRowGroup, RowGroup& outputRowGroup,
|
||||||
boost::shared_ptr<funcexp::FuncExpWrapper>& fe2,
|
boost::shared_ptr<funcexp::FuncExpWrapper>& fe2,
|
||||||
rowgroup::RowGroup& fe2Output,
|
rowgroup::RowGroup& fe2Output,
|
||||||
std::vector<rowgroup::RowGroup>& joinerMatchesRGs,
|
std::vector<rowgroup::RowGroup>& joinerMatchesRGs,
|
||||||
rowgroup::RowGroup& joinFERG,
|
rowgroup::RowGroup& joinFERG,
|
||||||
std::vector<boost::shared_ptr<joiner::TupleJoiner>>& tjoiners,
|
std::vector<boost::shared_ptr<joiner::TupleJoiner>>& tjoiners,
|
||||||
uint32_t smallSideCount, bool doJoin)
|
uint32_t smallSideCount, bool doJoin)
|
||||||
: local_primRG(primRowGroup), local_outputRG(outputRowGroup), fe2(fe2), fe2Output(fe2Output),
|
: tbps(pTupleBPS), local_primRG(primRowGroup), local_outputRG(outputRowGroup), fe2(fe2),
|
||||||
joinerMatchesRGs(joinerMatchesRGs), joinFERG(joinFERG), tjoiners(tjoiners),
|
fe2Output(fe2Output), joinerMatchesRGs(joinerMatchesRGs), joinFERG(joinFERG), tjoiners(tjoiners),
|
||||||
smallSideCount(smallSideCount), doJoin(doJoin)
|
smallSideCount(smallSideCount), doJoin(doJoin)
|
||||||
{
|
{
|
||||||
if (doJoin || fe2)
|
if (doJoin || fe2)
|
||||||
@@ -231,6 +231,132 @@ TupleBPS::JoinLocalData::JoinLocalData(RowGroup& primRowGroup, RowGroup& outputR
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t TupleBPS::JoinLocalData::generateJoinResultSet(const uint32_t depth,
|
||||||
|
std::vector<rowgroup::RGData>& outputData,
|
||||||
|
RowGroupDL* dlp)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
Row& smallRow = smallSideRows[depth];
|
||||||
|
uint64_t memSizeForOutputRG = 0;
|
||||||
|
|
||||||
|
if (depth < smallSideCount - 1)
|
||||||
|
{
|
||||||
|
for (i = 0; i < joinerOutput[depth].size() && !tbps->cancelled(); i++)
|
||||||
|
{
|
||||||
|
smallRow.setPointer(joinerOutput[depth][i]);
|
||||||
|
applyMapping(smallMappings[depth], smallRow, &joinedBaseRow);
|
||||||
|
memSizeForOutputRG += generateJoinResultSet(depth + 1, outputData, dlp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
local_outputRG.getRow(local_outputRG.getRowCount(), &postJoinRow);
|
||||||
|
|
||||||
|
for (i = 0; i < joinerOutput[depth].size() && !tbps->cancelled(); i++, postJoinRow.nextRow(),
|
||||||
|
local_outputRG.incRowCount())
|
||||||
|
{
|
||||||
|
smallRow.setPointer(joinerOutput[depth][i]);
|
||||||
|
|
||||||
|
if (UNLIKELY(local_outputRG.getRowCount() == 8192))
|
||||||
|
{
|
||||||
|
uint32_t dbRoot = local_outputRG.getDBRoot();
|
||||||
|
uint64_t baseRid = local_outputRG.getBaseRid();
|
||||||
|
outputData.push_back(joinedData);
|
||||||
|
// Don't let the join results buffer get out of control.
|
||||||
|
if (tbps->resourceManager()->getMemory(local_outputRG.getMaxDataSize(), false))
|
||||||
|
{
|
||||||
|
memSizeForOutputRG += local_outputRG.getMaxDataSize();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Don't wait for memory, just send the data on to DL.
|
||||||
|
RowGroup out(local_outputRG);
|
||||||
|
if (fe2 && tbps->runFEonPM())
|
||||||
|
{
|
||||||
|
processFE2(outputData);
|
||||||
|
tbps->rgDataVecToDl(outputData, local_fe2Output, dlp);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
tbps->rgDataVecToDl(outputData, out, dlp);
|
||||||
|
}
|
||||||
|
tbps->resourceManager()->returnMemory(memSizeForOutputRG);
|
||||||
|
memSizeForOutputRG = 0;
|
||||||
|
}
|
||||||
|
joinedData.reinit(local_outputRG);
|
||||||
|
local_outputRG.setData(&joinedData);
|
||||||
|
local_outputRG.resetRowGroup(baseRid);
|
||||||
|
local_outputRG.setDBRoot(dbRoot);
|
||||||
|
local_outputRG.getRow(0, &postJoinRow);
|
||||||
|
}
|
||||||
|
|
||||||
|
applyMapping(smallMappings[depth], smallRow, &joinedBaseRow);
|
||||||
|
copyRow(joinedBaseRow, &postJoinRow);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return memSizeForOutputRG;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TupleBPS::JoinLocalData::processFE2(vector<rowgroup::RGData>& rgData)
|
||||||
|
{
|
||||||
|
vector<RGData> results;
|
||||||
|
RGData result;
|
||||||
|
uint32_t i, j;
|
||||||
|
bool ret;
|
||||||
|
|
||||||
|
result = RGData(local_fe2Output);
|
||||||
|
local_fe2Output.setData(&result);
|
||||||
|
local_fe2Output.resetRowGroup(-1);
|
||||||
|
local_fe2Output.getRow(0, &local_fe2OutRow);
|
||||||
|
|
||||||
|
for (i = 0; i < rgData.size(); i++)
|
||||||
|
{
|
||||||
|
local_outputRG.setData(&(rgData)[i]);
|
||||||
|
|
||||||
|
if (local_fe2Output.getRowCount() == 0)
|
||||||
|
{
|
||||||
|
local_fe2Output.resetRowGroup(local_outputRG.getBaseRid());
|
||||||
|
local_fe2Output.setDBRoot(local_outputRG.getDBRoot());
|
||||||
|
}
|
||||||
|
|
||||||
|
local_outputRG.getRow(0, &postJoinRow);
|
||||||
|
|
||||||
|
for (j = 0; j < local_outputRG.getRowCount(); j++, postJoinRow.nextRow())
|
||||||
|
{
|
||||||
|
ret = local_fe2.evaluate(&postJoinRow);
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
{
|
||||||
|
applyMapping(tbps->fe2Mapping, postJoinRow, &local_fe2OutRow);
|
||||||
|
local_fe2OutRow.setRid(postJoinRow.getRelRid());
|
||||||
|
local_fe2Output.incRowCount();
|
||||||
|
local_fe2OutRow.nextRow();
|
||||||
|
|
||||||
|
if (local_fe2Output.getRowCount() == 8192 ||
|
||||||
|
local_fe2Output.getDBRoot() != local_outputRG.getDBRoot() ||
|
||||||
|
local_fe2Output.getBaseRid() != local_outputRG.getBaseRid()
|
||||||
|
)
|
||||||
|
{
|
||||||
|
results.push_back(result);
|
||||||
|
result = RGData(local_fe2Output);
|
||||||
|
local_fe2Output.setData(&result);
|
||||||
|
local_fe2Output.resetRowGroup(local_outputRG.getBaseRid());
|
||||||
|
local_fe2Output.setDBRoot(local_outputRG.getDBRoot());
|
||||||
|
local_fe2Output.getRow(0, &local_fe2OutRow);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (local_fe2Output.getRowCount() > 0)
|
||||||
|
{
|
||||||
|
results.push_back(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
rgData.swap(results);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
struct ByteStreamProcessor
|
struct ByteStreamProcessor
|
||||||
{
|
{
|
||||||
ByteStreamProcessor(TupleBPS* tbps, vector<boost::shared_ptr<messageqcpp::ByteStream>>& bsv,
|
ByteStreamProcessor(TupleBPS* tbps, vector<boost::shared_ptr<messageqcpp::ByteStream>>& bsv,
|
||||||
@@ -379,7 +505,6 @@ TupleBPS::TupleBPS(const pColStep& rhs, const JobInfo& jobInfo) :
|
|||||||
fExtendedInfo = "TBPS: ";
|
fExtendedInfo = "TBPS: ";
|
||||||
fQtc.stepParms().stepType = StepTeleStats::T_BPS;
|
fQtc.stepParms().stepType = StepTeleStats::T_BPS;
|
||||||
|
|
||||||
|
|
||||||
hasPCFilter = hasPMFilter = hasRIDFilter = hasSegmentFilter = hasDBRootFilter = hasSegmentDirFilter =
|
hasPCFilter = hasPMFilter = hasRIDFilter = hasSegmentFilter = hasDBRootFilter = hasSegmentDirFilter =
|
||||||
hasPartitionFilter = hasMaxFilter = hasMinFilter = hasLBIDFilter = hasExtentIDFilter = false;
|
hasPartitionFilter = hasMaxFilter = hasMinFilter = hasLBIDFilter = hasExtentIDFilter = false;
|
||||||
}
|
}
|
||||||
@@ -1294,7 +1419,7 @@ void TupleBPS::run()
|
|||||||
if (fe1)
|
if (fe1)
|
||||||
fBPP->setFEGroup1(fe1, fe1Input);
|
fBPP->setFEGroup1(fe1, fe1Input);
|
||||||
|
|
||||||
if (fe2 && runFEonPM)
|
if (fe2 && bRunFEonPM)
|
||||||
fBPP->setFEGroup2(fe2, fe2Output);
|
fBPP->setFEGroup2(fe2, fe2Output);
|
||||||
|
|
||||||
if (fe2)
|
if (fe2)
|
||||||
@@ -2006,6 +2131,7 @@ void TupleBPS::processByteStreamVector(vector<boost::shared_ptr<messageqcpp::Byt
|
|||||||
uint32_t cachedIO;
|
uint32_t cachedIO;
|
||||||
uint32_t physIO;
|
uint32_t physIO;
|
||||||
uint32_t touchedBlocks;
|
uint32_t touchedBlocks;
|
||||||
|
int32_t memAmount = 0;
|
||||||
|
|
||||||
for (uint32_t i = begin; i < end; ++i)
|
for (uint32_t i = begin; i < end; ++i)
|
||||||
{
|
{
|
||||||
@@ -2172,26 +2298,7 @@ void TupleBPS::processByteStreamVector(vector<boost::shared_ptr<messageqcpp::Byt
|
|||||||
{
|
{
|
||||||
applyMapping(data->largeMapping, data->largeSideRow, &data->joinedBaseRow);
|
applyMapping(data->largeMapping, data->largeSideRow, &data->joinedBaseRow);
|
||||||
data->joinedBaseRow.setRid(data->largeSideRow.getRelRid());
|
data->joinedBaseRow.setRid(data->largeSideRow.getRelRid());
|
||||||
generateJoinResultSet(data->joinerOutput, data->joinedBaseRow, data->smallMappings, 0,
|
memAmount += data->generateJoinResultSet( 0, rgDatav, dlp);
|
||||||
data->local_outputRG, data->joinedData, &rgDatav,
|
|
||||||
data->smallSideRows, data->postJoinRow);
|
|
||||||
|
|
||||||
// Bug 3510: Don't let the join results buffer get out of control. Need
|
|
||||||
// to refactor this. All post-join processing needs to go here AND below
|
|
||||||
// for now.
|
|
||||||
if (rgDatav.size() * data->local_outputRG.getMaxDataSize() > 50000000)
|
|
||||||
{
|
|
||||||
RowGroup out(data->local_outputRG);
|
|
||||||
|
|
||||||
if (fe2 && !runFEonPM)
|
|
||||||
{
|
|
||||||
processFE2(out, data->local_fe2Output, data->postJoinRow, data->local_fe2OutRow,
|
|
||||||
&rgDatav, &data->local_fe2);
|
|
||||||
rgDataVecToDl(rgDatav, data->local_fe2Output, dlp);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
rgDataVecToDl(rgDatav, out, dlp);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} // end of the for-loop in the join code
|
} // end of the for-loop in the join code
|
||||||
|
|
||||||
@@ -2204,12 +2311,16 @@ void TupleBPS::processByteStreamVector(vector<boost::shared_ptr<messageqcpp::Byt
|
|||||||
{
|
{
|
||||||
rgDatav.push_back(rgData);
|
rgDatav.push_back(rgData);
|
||||||
}
|
}
|
||||||
|
if (memAmount)
|
||||||
// Execute UM F & E group 2 on rgDatav
|
|
||||||
if (fe2 && !runFEonPM && rgDatav.size() > 0 && !cancelled())
|
|
||||||
{
|
{
|
||||||
processFE2(data->local_outputRG, data->local_fe2Output, data->postJoinRow,
|
resourceManager()->returnMemory(memAmount);
|
||||||
data->local_fe2OutRow, &rgDatav, &data->local_fe2);
|
memAmount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute UM F & E group 2 on rgDatav
|
||||||
|
if (fe2 && !bRunFEonPM && rgDatav.size() > 0 && !cancelled())
|
||||||
|
{
|
||||||
|
data->processFE2(rgDatav);
|
||||||
rgDataVecToDl(rgDatav, data->local_fe2Output, dlp);
|
rgDataVecToDl(rgDatav, data->local_fe2Output, dlp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2233,7 +2344,7 @@ void TupleBPS::processByteStreamVector(vector<boost::shared_ptr<messageqcpp::Byt
|
|||||||
// insert the resulting rowgroup data from a single bytestream into dlp
|
// insert the resulting rowgroup data from a single bytestream into dlp
|
||||||
if (rgDatav.size() > 0)
|
if (rgDatav.size() > 0)
|
||||||
{
|
{
|
||||||
if (fe2 && runFEonPM)
|
if (fe2 && bRunFEonPM)
|
||||||
rgDataVecToDl(rgDatav, data->local_fe2Output, dlp);
|
rgDataVecToDl(rgDatav, data->local_fe2Output, dlp);
|
||||||
else
|
else
|
||||||
rgDataVecToDl(rgDatav, data->local_outputRG, dlp);
|
rgDataVecToDl(rgDatav, data->local_outputRG, dlp);
|
||||||
@@ -2382,10 +2493,7 @@ void TupleBPS::receiveMultiPrimitiveMessages()
|
|||||||
start = end;
|
start = end;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Join threads.
|
jobstepThreadPool.join(fProcessorThreads);
|
||||||
for (uint32_t i = 0, e = fProcessorThreads.size(); i < e; ++i)
|
|
||||||
jobstepThreadPool.join(fProcessorThreads[i]);
|
|
||||||
|
|
||||||
// Clear all.
|
// Clear all.
|
||||||
fProcessorThreads.clear();
|
fProcessorThreads.clear();
|
||||||
bsv.clear();
|
bsv.clear();
|
||||||
@@ -2444,7 +2552,7 @@ void TupleBPS::receiveMultiPrimitiveMessages()
|
|||||||
abort_nolock();
|
abort_nolock();
|
||||||
}
|
}
|
||||||
|
|
||||||
// We have on thread here and do not need to notify any waiting producer threads, because we are done of
|
// We have one thread here and do not need to notify any waiting producer threads, because we are done with
|
||||||
// consuming messages from queue.
|
// consuming messages from queue.
|
||||||
tplLock.unlock();
|
tplLock.unlock();
|
||||||
|
|
||||||
@@ -2492,9 +2600,8 @@ void TupleBPS::receiveMultiPrimitiveMessages()
|
|||||||
if (fe2)
|
if (fe2)
|
||||||
{
|
{
|
||||||
rgDatav.push_back(data->joinedData);
|
rgDatav.push_back(data->joinedData);
|
||||||
processFE2(data->local_outputRG, data->local_fe2Output, data->postJoinRow,
|
data->processFE2(rgDatav);
|
||||||
data->local_fe2OutRow, &rgDatav, &data->local_fe2);
|
|
||||||
|
|
||||||
if (rgDatav.size() > 0)
|
if (rgDatav.size() > 0)
|
||||||
rgDataToDl(rgDatav[0], data->local_fe2Output, dlp);
|
rgDataToDl(rgDatav[0], data->local_fe2Output, dlp);
|
||||||
|
|
||||||
@@ -2515,9 +2622,8 @@ void TupleBPS::receiveMultiPrimitiveMessages()
|
|||||||
if (fe2)
|
if (fe2)
|
||||||
{
|
{
|
||||||
rgDatav.push_back(data->joinedData);
|
rgDatav.push_back(data->joinedData);
|
||||||
processFE2(data->local_outputRG, data->local_fe2Output, data->postJoinRow,
|
data->processFE2(rgDatav);
|
||||||
data->local_fe2OutRow, &rgDatav, &data->local_fe2);
|
|
||||||
|
|
||||||
if (rgDatav.size() > 0)
|
if (rgDatav.size() > 0)
|
||||||
rgDataToDl(rgDatav[0], data->local_fe2Output, dlp);
|
rgDataToDl(rgDatav[0], data->local_fe2Output, dlp);
|
||||||
|
|
||||||
@@ -2856,52 +2962,6 @@ void TupleBPS::setJoinedResultRG(const rowgroup::RowGroup& rg)
|
|||||||
fe2Mapping = makeMapping(outputRowGroup, fe2Output);
|
fe2Mapping = makeMapping(outputRowGroup, fe2Output);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* probably worthwhile to make some of these class vars */
|
|
||||||
void TupleBPS::generateJoinResultSet(const vector<vector<Row::Pointer> >& joinerOutput,
|
|
||||||
Row& baseRow, const vector<shared_array<int> >& mappings, const uint32_t depth,
|
|
||||||
RowGroup& outputRG, RGData& rgData, vector<RGData>* outputData, const scoped_array<Row>& smallRows,
|
|
||||||
Row& joinedRow)
|
|
||||||
{
|
|
||||||
uint32_t i;
|
|
||||||
Row& smallRow = smallRows[depth];
|
|
||||||
|
|
||||||
if (depth < smallSideCount - 1)
|
|
||||||
{
|
|
||||||
for (i = 0; i < joinerOutput[depth].size(); i++)
|
|
||||||
{
|
|
||||||
smallRow.setPointer(joinerOutput[depth][i]);
|
|
||||||
applyMapping(mappings[depth], smallRow, &baseRow);
|
|
||||||
generateJoinResultSet(joinerOutput, baseRow, mappings, depth + 1,
|
|
||||||
outputRG, rgData, outputData, smallRows, joinedRow);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
outputRG.getRow(outputRG.getRowCount(), &joinedRow);
|
|
||||||
|
|
||||||
for (i = 0; i < joinerOutput[depth].size(); i++, joinedRow.nextRow(),
|
|
||||||
outputRG.incRowCount())
|
|
||||||
{
|
|
||||||
smallRow.setPointer(joinerOutput[depth][i]);
|
|
||||||
|
|
||||||
if (UNLIKELY(outputRG.getRowCount() == 8192))
|
|
||||||
{
|
|
||||||
uint32_t dbRoot = outputRG.getDBRoot();
|
|
||||||
uint64_t baseRid = outputRG.getBaseRid();
|
|
||||||
outputData->push_back(rgData);
|
|
||||||
rgData = RGData(outputRG);
|
|
||||||
outputRG.setData(&rgData);
|
|
||||||
outputRG.resetRowGroup(baseRid);
|
|
||||||
outputRG.setDBRoot(dbRoot);
|
|
||||||
outputRG.getRow(0, &joinedRow);
|
|
||||||
}
|
|
||||||
|
|
||||||
applyMapping(mappings[depth], smallRow, &baseRow);
|
|
||||||
copyRow(baseRow, &joinedRow);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const rowgroup::RowGroup& TupleBPS::getOutputRowGroup() const
|
const rowgroup::RowGroup& TupleBPS::getOutputRowGroup() const
|
||||||
{
|
{
|
||||||
return outputRowGroup;
|
return outputRowGroup;
|
||||||
@@ -3015,9 +3075,9 @@ void TupleBPS::setFcnExpGroup2(const boost::shared_ptr<funcexp::FuncExpWrapper>&
|
|||||||
fe2Output = rg;
|
fe2Output = rg;
|
||||||
checkDupOutputColumns(rg);
|
checkDupOutputColumns(rg);
|
||||||
fe2Mapping = makeMapping(outputRowGroup, fe2Output);
|
fe2Mapping = makeMapping(outputRowGroup, fe2Output);
|
||||||
runFEonPM = runFE2onPM;
|
bRunFEonPM = runFE2onPM;
|
||||||
|
|
||||||
if (runFEonPM)
|
if (bRunFEonPM)
|
||||||
fBPP->setFEGroup2(fe2, fe2Output);
|
fBPP->setFEGroup2(fe2, fe2Output);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3030,7 +3090,7 @@ void TupleBPS::setFcnExpGroup3(const vector<execplan::SRCP>& fe)
|
|||||||
fe2->addReturnedColumn(fe[i]);
|
fe2->addReturnedColumn(fe[i]);
|
||||||
|
|
||||||
// if this is called, there's no join, so it can always run on the PM
|
// if this is called, there's no join, so it can always run on the PM
|
||||||
runFEonPM = true;
|
bRunFEonPM = true;
|
||||||
fBPP->setFEGroup2(fe2, fe2Output);
|
fBPP->setFEGroup2(fe2, fe2Output);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3040,95 +3100,10 @@ void TupleBPS::setFE23Output(const rowgroup::RowGroup& feOutput)
|
|||||||
checkDupOutputColumns(feOutput);
|
checkDupOutputColumns(feOutput);
|
||||||
fe2Mapping = makeMapping(outputRowGroup, fe2Output);
|
fe2Mapping = makeMapping(outputRowGroup, fe2Output);
|
||||||
|
|
||||||
if (fe2 && runFEonPM)
|
if (fe2 && bRunFEonPM)
|
||||||
fBPP->setFEGroup2(fe2, fe2Output);
|
fBPP->setFEGroup2(fe2, fe2Output);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TupleBPS::processFE2_oneRG(RowGroup& input, RowGroup& output, Row& inRow,
|
|
||||||
Row& outRow, funcexp::FuncExpWrapper* local_fe)
|
|
||||||
{
|
|
||||||
bool ret;
|
|
||||||
uint32_t i;
|
|
||||||
|
|
||||||
output.resetRowGroup(input.getBaseRid());
|
|
||||||
output.setDBRoot(input.getDBRoot());
|
|
||||||
output.getRow(0, &outRow);
|
|
||||||
input.getRow(0, &inRow);
|
|
||||||
|
|
||||||
for (i = 0; i < input.getRowCount(); i++, inRow.nextRow())
|
|
||||||
{
|
|
||||||
ret = local_fe->evaluate(&inRow);
|
|
||||||
|
|
||||||
if (ret)
|
|
||||||
{
|
|
||||||
applyMapping(fe2Mapping, inRow, &outRow);
|
|
||||||
outRow.setRid(inRow.getRelRid());
|
|
||||||
output.incRowCount();
|
|
||||||
outRow.nextRow();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void TupleBPS::processFE2(RowGroup& input, RowGroup& output, Row& inRow, Row& outRow,
|
|
||||||
vector<RGData>* rgData, funcexp::FuncExpWrapper* local_fe)
|
|
||||||
{
|
|
||||||
vector<RGData> results;
|
|
||||||
RGData result;
|
|
||||||
uint32_t i, j;
|
|
||||||
bool ret;
|
|
||||||
|
|
||||||
result = RGData(output);
|
|
||||||
output.setData(&result);
|
|
||||||
output.resetRowGroup(-1);
|
|
||||||
output.getRow(0, &outRow);
|
|
||||||
|
|
||||||
for (i = 0; i < rgData->size(); i++)
|
|
||||||
{
|
|
||||||
input.setData(&(*rgData)[i]);
|
|
||||||
|
|
||||||
if (output.getRowCount() == 0)
|
|
||||||
{
|
|
||||||
output.resetRowGroup(input.getBaseRid());
|
|
||||||
output.setDBRoot(input.getDBRoot());
|
|
||||||
}
|
|
||||||
|
|
||||||
input.getRow(0, &inRow);
|
|
||||||
|
|
||||||
for (j = 0; j < input.getRowCount(); j++, inRow.nextRow())
|
|
||||||
{
|
|
||||||
ret = local_fe->evaluate(&inRow);
|
|
||||||
|
|
||||||
if (ret)
|
|
||||||
{
|
|
||||||
applyMapping(fe2Mapping, inRow, &outRow);
|
|
||||||
outRow.setRid(inRow.getRelRid());
|
|
||||||
output.incRowCount();
|
|
||||||
outRow.nextRow();
|
|
||||||
|
|
||||||
if (output.getRowCount() == 8192 ||
|
|
||||||
output.getDBRoot() != input.getDBRoot() ||
|
|
||||||
output.getBaseRid() != input.getBaseRid()
|
|
||||||
)
|
|
||||||
{
|
|
||||||
results.push_back(result);
|
|
||||||
result = RGData(output);
|
|
||||||
output.setData(&result);
|
|
||||||
output.resetRowGroup(input.getBaseRid());
|
|
||||||
output.setDBRoot(input.getDBRoot());
|
|
||||||
output.getRow(0, &outRow);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (output.getRowCount() > 0)
|
|
||||||
{
|
|
||||||
results.push_back(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
rgData->swap(results);
|
|
||||||
}
|
|
||||||
|
|
||||||
const rowgroup::RowGroup& TupleBPS::getDeliveredRowGroup() const
|
const rowgroup::RowGroup& TupleBPS::getDeliveredRowGroup() const
|
||||||
{
|
{
|
||||||
if (fe2)
|
if (fe2)
|
||||||
|
@@ -5565,13 +5565,10 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
|
|||||||
if (more)
|
if (more)
|
||||||
{
|
{
|
||||||
fRowGroupIns[threadID].setData(&rgData);
|
fRowGroupIns[threadID].setData(&rgData);
|
||||||
fMemUsage[threadID] +=
|
|
||||||
fRowGroupIns[threadID].getSizeWithStrings();
|
|
||||||
|
|
||||||
bool diskAggAllowed = fRm->getAllowDiskAggregation();
|
bool diskAggAllowed = fRm->getAllowDiskAggregation();
|
||||||
if (!fRm->getMemory(
|
int64_t memSize = fRowGroupIns[threadID].getSizeWithStrings();
|
||||||
fRowGroupIns[threadID].getSizeWithStrings(),
|
if (!fRm->getMemory(memSize, fSessionMemLimit, !diskAggAllowed))
|
||||||
fSessionMemLimit, !diskAggAllowed))
|
|
||||||
{
|
{
|
||||||
if (!diskAggAllowed)
|
if (!diskAggAllowed)
|
||||||
{
|
{
|
||||||
@@ -5592,6 +5589,7 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
fMemUsage[threadID] += memSize;
|
||||||
rgDatas.push_back(rgData);
|
rgDatas.push_back(rgData);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@@ -40,7 +40,6 @@ using namespace std;
|
|||||||
#include "tuplehashjoin.h"
|
#include "tuplehashjoin.h"
|
||||||
#include "calpontsystemcatalog.h"
|
#include "calpontsystemcatalog.h"
|
||||||
#include "elementcompression.h"
|
#include "elementcompression.h"
|
||||||
#include "resourcemanager.h"
|
|
||||||
#include "tupleaggregatestep.h"
|
#include "tupleaggregatestep.h"
|
||||||
#include "errorids.h"
|
#include "errorids.h"
|
||||||
#include "diskjoinstep.h"
|
#include "diskjoinstep.h"
|
||||||
@@ -76,6 +75,7 @@ TupleHashJoinStep::TupleHashJoinStep(const JobInfo& jobInfo) :
|
|||||||
fTupleId2(-1),
|
fTupleId2(-1),
|
||||||
fCorrelatedSide(0),
|
fCorrelatedSide(0),
|
||||||
resourceManager(jobInfo.rm),
|
resourceManager(jobInfo.rm),
|
||||||
|
fMemSizeForOutputRG(0),
|
||||||
runRan(false),
|
runRan(false),
|
||||||
joinRan(false),
|
joinRan(false),
|
||||||
largeSideIndex(1),
|
largeSideIndex(1),
|
||||||
@@ -137,10 +137,12 @@ TupleHashJoinStep::~TupleHashJoinStep()
|
|||||||
if (memUsedByEachJoin)
|
if (memUsedByEachJoin)
|
||||||
{
|
{
|
||||||
for (uint i = 0 ; i < smallDLs.size(); i++)
|
for (uint i = 0 ; i < smallDLs.size(); i++)
|
||||||
resourceManager->returnMemory(memUsedByEachJoin[i], sessionMemLimit);
|
{
|
||||||
|
if (memUsedByEachJoin[i])
|
||||||
|
resourceManager->returnMemory(memUsedByEachJoin[i], sessionMemLimit);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
returnMemory();
|
||||||
|
|
||||||
//cout << "deallocated THJS, UM memory available: " << resourceManager.availableMemory() << endl;
|
//cout << "deallocated THJS, UM memory available: " << resourceManager.availableMemory() << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -223,11 +225,13 @@ void TupleHashJoinStep::trackMem(uint index)
|
|||||||
memAfter = joiner->getMemUsage();
|
memAfter = joiner->getMemUsage();
|
||||||
if (memAfter != memBefore)
|
if (memAfter != memBefore)
|
||||||
{
|
{
|
||||||
gotMem = resourceManager->getMemory(memAfter - memBefore, sessionMemLimit, false);
|
gotMem = resourceManager->getMemory(memAfter - memBefore, sessionMemLimit, true);
|
||||||
atomicops::atomicAdd(&memUsedByEachJoin[index], memAfter - memBefore);
|
if (gotMem)
|
||||||
memBefore = memAfter;
|
atomicops::atomicAdd(&memUsedByEachJoin[index], memAfter - memBefore);
|
||||||
if (!gotMem)
|
else
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
memBefore = memAfter;
|
||||||
}
|
}
|
||||||
memTrackDone.timed_wait(scoped, boost::posix_time::seconds(1));
|
memTrackDone.timed_wait(scoped, boost::posix_time::seconds(1));
|
||||||
}
|
}
|
||||||
@@ -237,16 +241,21 @@ void TupleHashJoinStep::trackMem(uint index)
|
|||||||
memAfter = joiner->getMemUsage();
|
memAfter = joiner->getMemUsage();
|
||||||
if (memAfter == memBefore)
|
if (memAfter == memBefore)
|
||||||
return;
|
return;
|
||||||
gotMem = resourceManager->getMemory(memAfter - memBefore, sessionMemLimit, false);
|
gotMem = resourceManager->getMemory(memAfter - memBefore, sessionMemLimit, true);
|
||||||
atomicops::atomicAdd(&memUsedByEachJoin[index], memAfter - memBefore);
|
if (gotMem)
|
||||||
if (!gotMem)
|
{
|
||||||
|
atomicops::atomicAdd(&memUsedByEachJoin[index], memAfter - memBefore);
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
if (!joinIsTooBig && (isDML || !allowDJS || (fSessionId & 0x80000000) ||
|
if (!joinIsTooBig && (isDML || !allowDJS || (fSessionId & 0x80000000) ||
|
||||||
(tableOid() < 3000 && tableOid() >= 1000)))
|
(tableOid() < 3000 && tableOid() >= 1000)))
|
||||||
{
|
{
|
||||||
joinIsTooBig = true;
|
joinIsTooBig = true;
|
||||||
fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_JOIN_TOO_BIG);
|
ostringstream oss;
|
||||||
errorMessage(logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_JOIN_TOO_BIG));
|
oss << "(" << __LINE__ << ") " << logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_JOIN_TOO_BIG);
|
||||||
|
fLogger->logMessage(logging::LOG_TYPE_INFO, oss.str());
|
||||||
|
errorMessage(oss.str());
|
||||||
status(logging::ERR_JOIN_TOO_BIG);
|
status(logging::ERR_JOIN_TOO_BIG);
|
||||||
cout << "Join is too big, raise the UM join limit for now (monitor thread)" << endl;
|
cout << "Join is too big, raise the UM join limit for now (monitor thread)" << endl;
|
||||||
abort();
|
abort();
|
||||||
@@ -389,7 +398,6 @@ void TupleHashJoinStep::smallRunnerFcn(uint32_t index, uint threadID, uint64_t *
|
|||||||
smallRG = smallRGs[index];
|
smallRG = smallRGs[index];
|
||||||
|
|
||||||
smallRG.initRow(&r);
|
smallRG.initRow(&r);
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
ssize_t rgSize;
|
ssize_t rgSize;
|
||||||
@@ -407,9 +415,12 @@ void TupleHashJoinStep::smallRunnerFcn(uint32_t index, uint threadID, uint64_t *
|
|||||||
utils::releaseSpinlock(rgdLock);
|
utils::releaseSpinlock(rgdLock);
|
||||||
|
|
||||||
rgSize = smallRG.getSizeWithStrings();
|
rgSize = smallRG.getSizeWithStrings();
|
||||||
atomicops::atomicAdd(&memUsedByEachJoin[index], rgSize);
|
gotMem = resourceManager->getMemory(rgSize, sessionMemLimit, true);
|
||||||
gotMem = resourceManager->getMemory(rgSize, sessionMemLimit, false);
|
if (gotMem)
|
||||||
if (!gotMem)
|
{
|
||||||
|
atomicops::atomicAdd(&memUsedByEachJoin[index], rgSize);
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
/* Mem went over the limit.
|
/* Mem went over the limit.
|
||||||
If DML or a syscat query, abort.
|
If DML or a syscat query, abort.
|
||||||
@@ -423,19 +434,20 @@ void TupleHashJoinStep::smallRunnerFcn(uint32_t index, uint threadID, uint64_t *
|
|||||||
(tableOid() < 3000 && tableOid() >= 1000))
|
(tableOid() < 3000 && tableOid() >= 1000))
|
||||||
{
|
{
|
||||||
joinIsTooBig = true;
|
joinIsTooBig = true;
|
||||||
fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_JOIN_TOO_BIG);
|
ostringstream oss;
|
||||||
errorMessage(logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_JOIN_TOO_BIG));
|
oss << "(" << __LINE__ << ") " << logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_JOIN_TOO_BIG);
|
||||||
|
fLogger->logMessage(logging::LOG_TYPE_INFO, oss.str());
|
||||||
|
errorMessage(oss.str());
|
||||||
status(logging::ERR_JOIN_TOO_BIG);
|
status(logging::ERR_JOIN_TOO_BIG);
|
||||||
cout << "Join is too big, raise the UM join limit for now (small runner)" << endl;
|
cout << "Join is too big, raise the UM join limit for now (small runner)" << endl;
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
else if (allowDJS)
|
else if (allowDJS)
|
||||||
joiner->setConvertToDiskJoin();
|
joiner->setConvertToDiskJoin();
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
joiner->insertRGData(smallRG, threadID);
|
joiner->insertRGData(smallRG, threadID);
|
||||||
|
|
||||||
if (!joiner->inUM() && (memUsedByEachJoin[index] > pmMemLimit))
|
if (!joiner->inUM() && (memUsedByEachJoin[index] > pmMemLimit))
|
||||||
{
|
{
|
||||||
joiner->setInUM(rgData[index]);
|
joiner->setInUM(rgData[index]);
|
||||||
@@ -457,7 +469,6 @@ next:
|
|||||||
"TupleHashJoinStep::smallRunnerFcn()");
|
"TupleHashJoinStep::smallRunnerFcn()");
|
||||||
status(logging::ERR_EXEMGR_MALFUNCTION);
|
status(logging::ERR_EXEMGR_MALFUNCTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!joiner->inUM())
|
if (!joiner->inUM())
|
||||||
joiner->setInPM();
|
joiner->setInPM();
|
||||||
}
|
}
|
||||||
@@ -649,7 +660,7 @@ void TupleHashJoinStep::hjRunner()
|
|||||||
memUsedByEachJoin.reset(new ssize_t[smallDLs.size()]);
|
memUsedByEachJoin.reset(new ssize_t[smallDLs.size()]);
|
||||||
|
|
||||||
for (i = 0; i < smallDLs.size(); i++)
|
for (i = 0; i < smallDLs.size(); i++)
|
||||||
memUsedByEachJoin[i] = 0;
|
atomicops::atomicZero(&memUsedByEachJoin[i]);
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@@ -747,7 +758,7 @@ void TupleHashJoinStep::hjRunner()
|
|||||||
{
|
{
|
||||||
vector<RGData> empty;
|
vector<RGData> empty;
|
||||||
resourceManager->returnMemory(memUsedByEachJoin[djsJoinerMap[i]], sessionMemLimit);
|
resourceManager->returnMemory(memUsedByEachJoin[djsJoinerMap[i]], sessionMemLimit);
|
||||||
memUsedByEachJoin[djsJoinerMap[i]] = 0;
|
atomicops::atomicZero(&memUsedByEachJoin[i]);
|
||||||
djs[i].loadExistingData(rgData[djsJoinerMap[i]]);
|
djs[i].loadExistingData(rgData[djsJoinerMap[i]]);
|
||||||
rgData[djsJoinerMap[i]].swap(empty);
|
rgData[djsJoinerMap[i]].swap(empty);
|
||||||
}
|
}
|
||||||
@@ -833,8 +844,10 @@ void TupleHashJoinStep::hjRunner()
|
|||||||
{
|
{
|
||||||
if (joinIsTooBig && !status())
|
if (joinIsTooBig && !status())
|
||||||
{
|
{
|
||||||
fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_JOIN_TOO_BIG);
|
ostringstream oss;
|
||||||
errorMessage(logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_JOIN_TOO_BIG));
|
oss << "(" << __LINE__ << ") " << logging::IDBErrorInfo::instance()->errorMsg(logging::ERR_JOIN_TOO_BIG);
|
||||||
|
fLogger->logMessage(logging::LOG_TYPE_INFO, oss.str());
|
||||||
|
errorMessage(oss.str());
|
||||||
status(logging::ERR_JOIN_TOO_BIG);
|
status(logging::ERR_JOIN_TOO_BIG);
|
||||||
cout << "Join is too big, raise the UM join limit for now" << endl;
|
cout << "Join is too big, raise the UM join limit for now" << endl;
|
||||||
|
|
||||||
@@ -847,7 +860,7 @@ void TupleHashJoinStep::hjRunner()
|
|||||||
for (uint i = 0; i < smallDLs.size(); i++)
|
for (uint i = 0; i < smallDLs.size(); i++)
|
||||||
{
|
{
|
||||||
resourceManager->returnMemory(memUsedByEachJoin[i], sessionMemLimit);
|
resourceManager->returnMemory(memUsedByEachJoin[i], sessionMemLimit);
|
||||||
memUsedByEachJoin[i] = 0;
|
atomicops::atomicZero(&memUsedByEachJoin[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1027,7 +1040,7 @@ uint32_t TupleHashJoinStep::nextBand(messageqcpp::ByteStream& bs)
|
|||||||
for (uint i = 0; i < smallDLs.size(); i++)
|
for (uint i = 0; i < smallDLs.size(); i++)
|
||||||
{
|
{
|
||||||
resourceManager->returnMemory(memUsedByEachJoin[i], sessionMemLimit);
|
resourceManager->returnMemory(memUsedByEachJoin[i], sessionMemLimit);
|
||||||
memUsedByEachJoin[i] = 0;
|
atomicops::atomicZero(&memUsedByEachJoin[i]);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -1051,7 +1064,7 @@ uint32_t TupleHashJoinStep::nextBand(messageqcpp::ByteStream& bs)
|
|||||||
for (uint i = 0; i < smallDLs.size(); i++)
|
for (uint i = 0; i < smallDLs.size(); i++)
|
||||||
{
|
{
|
||||||
resourceManager->returnMemory(memUsedByEachJoin[i], sessionMemLimit);
|
resourceManager->returnMemory(memUsedByEachJoin[i], sessionMemLimit);
|
||||||
memUsedByEachJoin[i] = 0;
|
atomicops::atomicZero(&memUsedByEachJoin[i]);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -1549,8 +1562,8 @@ void TupleHashJoinStep::joinRunnerFcn(uint32_t threadID)
|
|||||||
if (local_inputRG.getRowCount() == 0)
|
if (local_inputRG.getRowCount() == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
joinOneRG(threadID, &joinedRowData, local_inputRG, local_outputRG, largeRow,
|
joinOneRG(threadID, joinedRowData, local_inputRG, local_outputRG, largeRow,
|
||||||
joinFERow, joinedRow, baseRow, joinMatches, smallRowTemplates);
|
joinFERow, joinedRow, baseRow, joinMatches, smallRowTemplates, outputDL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fe2)
|
if (fe2)
|
||||||
@@ -1558,6 +1571,7 @@ void TupleHashJoinStep::joinRunnerFcn(uint32_t threadID)
|
|||||||
|
|
||||||
processDupList(threadID, (fe2 ? local_fe2RG : local_outputRG), &joinedRowData);
|
processDupList(threadID, (fe2 ? local_fe2RG : local_outputRG), &joinedRowData);
|
||||||
sendResult(joinedRowData);
|
sendResult(joinedRowData);
|
||||||
|
returnMemory();
|
||||||
joinedRowData.clear();
|
joinedRowData.clear();
|
||||||
grabSomeWork(&inputData);
|
grabSomeWork(&inputData);
|
||||||
}
|
}
|
||||||
@@ -1689,10 +1703,11 @@ void TupleHashJoinStep::grabSomeWork(vector<RGData>* work)
|
|||||||
|
|
||||||
/* This function is a port of the main join loop in TupleBPS::receiveMultiPrimitiveMessages(). Any
|
/* This function is a port of the main join loop in TupleBPS::receiveMultiPrimitiveMessages(). Any
|
||||||
* changes made here should also be made there and vice versa. */
|
* changes made here should also be made there and vice versa. */
|
||||||
void TupleHashJoinStep::joinOneRG(uint32_t threadID, vector<RGData>* out,
|
void TupleHashJoinStep::joinOneRG(uint32_t threadID, vector<RGData>& out,
|
||||||
RowGroup& inputRG, RowGroup& joinOutput, Row& largeSideRow, Row& joinFERow,
|
RowGroup& inputRG, RowGroup& joinOutput, Row& largeSideRow, Row& joinFERow,
|
||||||
Row& joinedRow, Row& baseRow, vector<vector<Row::Pointer> >& joinMatches,
|
Row& joinedRow, Row& baseRow, vector<vector<Row::Pointer> >& joinMatches,
|
||||||
shared_array<Row>& smallRowTemplates,
|
shared_array<Row>& smallRowTemplates,
|
||||||
|
RowGroupDL* outputDL,
|
||||||
// disk-join support vars. This param list is insane; refactor attempt would be nice at some point.
|
// disk-join support vars. This param list is insane; refactor attempt would be nice at some point.
|
||||||
vector<boost::shared_ptr<joiner::TupleJoiner> >* tjoiners,
|
vector<boost::shared_ptr<joiner::TupleJoiner> >* tjoiners,
|
||||||
boost::shared_array<boost::shared_array<int> >* rgMappings,
|
boost::shared_array<boost::shared_array<int> >* rgMappings,
|
||||||
@@ -1700,7 +1715,6 @@ void TupleHashJoinStep::joinOneRG(uint32_t threadID, vector<RGData>* out,
|
|||||||
boost::scoped_array<boost::scoped_array<uint8_t> >* smallNullMem
|
boost::scoped_array<boost::scoped_array<uint8_t> >* smallNullMem
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
|
||||||
/* Disk-join support.
|
/* Disk-join support.
|
||||||
These dissociate the fcn from THJS's members & allow this fcn to be called from DiskJoinStep
|
These dissociate the fcn from THJS's members & allow this fcn to be called from DiskJoinStep
|
||||||
*/
|
*/
|
||||||
@@ -1821,19 +1835,19 @@ void TupleHashJoinStep::joinOneRG(uint32_t threadID, vector<RGData>* out,
|
|||||||
applyMapping((*rgMappings)[smallSideCount], largeSideRow, &baseRow);
|
applyMapping((*rgMappings)[smallSideCount], largeSideRow, &baseRow);
|
||||||
baseRow.setRid(largeSideRow.getRelRid());
|
baseRow.setRid(largeSideRow.getRelRid());
|
||||||
generateJoinResultSet(joinMatches, baseRow, *rgMappings,
|
generateJoinResultSet(joinMatches, baseRow, *rgMappings,
|
||||||
0, joinOutput, joinedData, out, smallRowTemplates, joinedRow);
|
0, joinOutput, joinedData, out, smallRowTemplates, joinedRow, outputDL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (joinOutput.getRowCount() > 0)
|
if (joinOutput.getRowCount() > 0)
|
||||||
out->push_back(joinedData);
|
out.push_back(joinedData);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TupleHashJoinStep::generateJoinResultSet(const vector<vector<Row::Pointer> >& joinerOutput,
|
void TupleHashJoinStep::generateJoinResultSet(const vector<vector<Row::Pointer> >& joinerOutput,
|
||||||
Row& baseRow, const shared_array<shared_array<int> >& mappings,
|
Row& baseRow, const shared_array<shared_array<int> >& mappings,
|
||||||
const uint32_t depth, RowGroup& l_outputRG, RGData& rgData,
|
const uint32_t depth, RowGroup& l_outputRG, RGData& rgData,
|
||||||
vector<RGData>* outputData, const shared_array<Row>& smallRows,
|
vector<RGData>& outputData, const shared_array<Row>& smallRows,
|
||||||
Row& joinedRow)
|
Row& joinedRow, RowGroupDL* dlp)
|
||||||
{
|
{
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
Row& smallRow = smallRows[depth];
|
Row& smallRow = smallRows[depth];
|
||||||
@@ -1845,9 +1859,8 @@ void TupleHashJoinStep::generateJoinResultSet(const vector<vector<Row::Pointer>
|
|||||||
{
|
{
|
||||||
smallRow.setPointer(joinerOutput[depth][i]);
|
smallRow.setPointer(joinerOutput[depth][i]);
|
||||||
applyMapping(mappings[depth], smallRow, &baseRow);
|
applyMapping(mappings[depth], smallRow, &baseRow);
|
||||||
// cout << "depth " << depth << ", size " << joinerOutput[depth].size() << ", row " << i << ": " << smallRow.toString() << endl;
|
|
||||||
generateJoinResultSet(joinerOutput, baseRow, mappings, depth + 1,
|
generateJoinResultSet(joinerOutput, baseRow, mappings, depth + 1,
|
||||||
l_outputRG, rgData, outputData, smallRows, joinedRow);
|
l_outputRG, rgData, outputData, smallRows, joinedRow, dlp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -1863,7 +1876,15 @@ void TupleHashJoinStep::generateJoinResultSet(const vector<vector<Row::Pointer>
|
|||||||
{
|
{
|
||||||
uint32_t dbRoot = l_outputRG.getDBRoot();
|
uint32_t dbRoot = l_outputRG.getDBRoot();
|
||||||
uint64_t baseRid = l_outputRG.getBaseRid();
|
uint64_t baseRid = l_outputRG.getBaseRid();
|
||||||
outputData->push_back(rgData);
|
outputData.push_back(rgData);
|
||||||
|
// Count the memory
|
||||||
|
if (UNLIKELY(!getMemory(l_outputRG.getMaxDataSize())))
|
||||||
|
{
|
||||||
|
// Don't let the join results buffer get out of control.
|
||||||
|
sendResult(outputData);
|
||||||
|
outputData.clear();
|
||||||
|
returnMemory();
|
||||||
|
}
|
||||||
rgData.reinit(l_outputRG);
|
rgData.reinit(l_outputRG);
|
||||||
l_outputRG.setData(&rgData);
|
l_outputRG.setData(&rgData);
|
||||||
l_outputRG.resetRowGroup(baseRid);
|
l_outputRG.resetRowGroup(baseRid);
|
||||||
@@ -1871,11 +1892,8 @@ void TupleHashJoinStep::generateJoinResultSet(const vector<vector<Row::Pointer>
|
|||||||
l_outputRG.getRow(0, &joinedRow);
|
l_outputRG.getRow(0, &joinedRow);
|
||||||
}
|
}
|
||||||
|
|
||||||
// cout << "depth " << depth << ", size " << joinerOutput[depth].size() << ", row " << i << ": " << smallRow.toString() << endl;
|
|
||||||
applyMapping(mappings[depth], smallRow, &baseRow);
|
applyMapping(mappings[depth], smallRow, &baseRow);
|
||||||
copyRow(baseRow, &joinedRow);
|
copyRow(baseRow, &joinedRow);
|
||||||
//memcpy(joinedRow.getData(), baseRow.getData(), joinedRow.getSize());
|
|
||||||
//cout << "(step " << stepID << ") fully joined row is: " << joinedRow.toString() << endl;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -32,11 +32,12 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include "resourcemanager.h"
|
||||||
|
#include "exceptclasses.h"
|
||||||
|
|
||||||
namespace joblist
|
namespace joblist
|
||||||
{
|
{
|
||||||
class BatchPrimitive;
|
class BatchPrimitive;
|
||||||
class ResourceManager;
|
|
||||||
class TupleBPS;
|
class TupleBPS;
|
||||||
struct FunctionJoinInfo;
|
struct FunctionJoinInfo;
|
||||||
class DiskJoinStep;
|
class DiskJoinStep;
|
||||||
@@ -371,6 +372,21 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void abort();
|
void abort();
|
||||||
|
void returnMemory()
|
||||||
|
{
|
||||||
|
if (fMemSizeForOutputRG > 0)
|
||||||
|
{
|
||||||
|
resourceManager->returnMemory(fMemSizeForOutputRG);
|
||||||
|
fMemSizeForOutputRG = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool getMemory(uint64_t memSize)
|
||||||
|
{
|
||||||
|
bool gotMem = resourceManager->getMemory(memSize);
|
||||||
|
if (gotMem)
|
||||||
|
fMemSizeForOutputRG += memSize;
|
||||||
|
return gotMem;
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
TupleHashJoinStep();
|
TupleHashJoinStep();
|
||||||
TupleHashJoinStep(const TupleHashJoinStep&);
|
TupleHashJoinStep(const TupleHashJoinStep&);
|
||||||
@@ -427,7 +443,8 @@ private:
|
|||||||
std::vector<std::vector<uint32_t> > smallSideKeys;
|
std::vector<std::vector<uint32_t> > smallSideKeys;
|
||||||
|
|
||||||
ResourceManager* resourceManager;
|
ResourceManager* resourceManager;
|
||||||
|
uint64_t fMemSizeForOutputRG;
|
||||||
|
|
||||||
struct JoinerSorter
|
struct JoinerSorter
|
||||||
{
|
{
|
||||||
inline bool operator()(const boost::shared_ptr<joiner::TupleJoiner>& j1,
|
inline bool operator()(const boost::shared_ptr<joiner::TupleJoiner>& j1,
|
||||||
@@ -519,18 +536,20 @@ private:
|
|||||||
void generateJoinResultSet(const std::vector<std::vector<rowgroup::Row::Pointer> >& joinerOutput,
|
void generateJoinResultSet(const std::vector<std::vector<rowgroup::Row::Pointer> >& joinerOutput,
|
||||||
rowgroup::Row& baseRow, const boost::shared_array<boost::shared_array<int> >& mappings,
|
rowgroup::Row& baseRow, const boost::shared_array<boost::shared_array<int> >& mappings,
|
||||||
const uint32_t depth, rowgroup::RowGroup& outputRG, rowgroup::RGData& rgData,
|
const uint32_t depth, rowgroup::RowGroup& outputRG, rowgroup::RGData& rgData,
|
||||||
std::vector<rowgroup::RGData>* outputData,
|
std::vector<rowgroup::RGData>& outputData,
|
||||||
const boost::shared_array<rowgroup::Row>& smallRows, rowgroup::Row& joinedRow);
|
const boost::shared_array<rowgroup::Row>& smallRows, rowgroup::Row& joinedRow,
|
||||||
|
RowGroupDL* outputDL);
|
||||||
void grabSomeWork(std::vector<rowgroup::RGData>* work);
|
void grabSomeWork(std::vector<rowgroup::RGData>* work);
|
||||||
void sendResult(const std::vector<rowgroup::RGData>& res);
|
void sendResult(const std::vector<rowgroup::RGData>& res);
|
||||||
void processFE2(rowgroup::RowGroup& input, rowgroup::RowGroup& output, rowgroup::Row& inRow,
|
void processFE2(rowgroup::RowGroup& input, rowgroup::RowGroup& output, rowgroup::Row& inRow,
|
||||||
rowgroup::Row& outRow, std::vector<rowgroup::RGData>* rgData,
|
rowgroup::Row& outRow, std::vector<rowgroup::RGData>* rgData,
|
||||||
funcexp::FuncExpWrapper* local_fe);
|
funcexp::FuncExpWrapper* local_fe);
|
||||||
void joinOneRG(uint32_t threadID, std::vector<rowgroup::RGData>* out,
|
void joinOneRG(uint32_t threadID, std::vector<rowgroup::RGData>& out,
|
||||||
rowgroup::RowGroup& inputRG, rowgroup::RowGroup& joinOutput, rowgroup::Row& largeSideRow,
|
rowgroup::RowGroup& inputRG, rowgroup::RowGroup& joinOutput, rowgroup::Row& largeSideRow,
|
||||||
rowgroup::Row& joinFERow, rowgroup::Row& joinedRow, rowgroup::Row& baseRow,
|
rowgroup::Row& joinFERow, rowgroup::Row& joinedRow, rowgroup::Row& baseRow,
|
||||||
std::vector<std::vector<rowgroup::Row::Pointer> >& joinMatches,
|
std::vector<std::vector<rowgroup::Row::Pointer> >& joinMatches,
|
||||||
boost::shared_array<rowgroup::Row>& smallRowTemplates,
|
boost::shared_array<rowgroup::Row>& smallRowTemplates,
|
||||||
|
RowGroupDL* outputDL,
|
||||||
std::vector<boost::shared_ptr<joiner::TupleJoiner> >* joiners = NULL,
|
std::vector<boost::shared_ptr<joiner::TupleJoiner> >* joiners = NULL,
|
||||||
boost::shared_array<boost::shared_array<int> >* rgMappings = NULL,
|
boost::shared_array<boost::shared_array<int> >* rgMappings = NULL,
|
||||||
boost::shared_array<boost::shared_array<int> >* feMappings = NULL,
|
boost::shared_array<boost::shared_array<int> >* feMappings = NULL,
|
||||||
@@ -575,6 +594,7 @@ private:
|
|||||||
DJSReader(TupleHashJoinStep* hj, uint32_t i) : HJ(hj), index(i) { }
|
DJSReader(TupleHashJoinStep* hj, uint32_t i) : HJ(hj), index(i) { }
|
||||||
void operator()()
|
void operator()()
|
||||||
{
|
{
|
||||||
|
utils::setThreadName("DJSReader");
|
||||||
HJ->djsReaderFcn(index);
|
HJ->djsReaderFcn(index);
|
||||||
}
|
}
|
||||||
TupleHashJoinStep* HJ;
|
TupleHashJoinStep* HJ;
|
||||||
|
@@ -245,10 +245,13 @@ void TupleUnion::readInput(uint32_t which)
|
|||||||
|
|
||||||
memUsageAfter = allocator.getMemUsage();
|
memUsageAfter = allocator.getMemUsage();
|
||||||
memDiff += (memUsageAfter - memUsageBefore);
|
memDiff += (memUsageAfter - memUsageBefore);
|
||||||
memUsage += memDiff;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!rm->getMemory(memDiff, sessionMemLimit))
|
if (rm->getMemory(memDiff, sessionMemLimit))
|
||||||
|
{
|
||||||
|
memUsage += memDiff;
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_UNION_TOO_BIG);
|
fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_UNION_TOO_BIG);
|
||||||
|
|
||||||
|
@@ -919,11 +919,12 @@ void WindowFunctionStep::execute()
|
|||||||
{
|
{
|
||||||
fInRowGroupData.push_back(rgData);
|
fInRowGroupData.push_back(rgData);
|
||||||
uint64_t memAdd = fRowGroupIn.getSizeWithStrings() + rowCnt * sizeof(RowPosition);
|
uint64_t memAdd = fRowGroupIn.getSizeWithStrings() + rowCnt * sizeof(RowPosition);
|
||||||
fMemUsage += memAdd;
|
|
||||||
|
|
||||||
if (fRm->getMemory(memAdd, fSessionMemLimit) == false)
|
if (fRm->getMemory(memAdd, fSessionMemLimit) == false)
|
||||||
throw IDBExcept(ERR_WF_DATA_SET_TOO_BIG);
|
throw IDBExcept(ERR_WF_DATA_SET_TOO_BIG);
|
||||||
|
|
||||||
|
fMemUsage += memAdd;
|
||||||
|
|
||||||
for (uint64_t j = 0; j < rowCnt; ++j)
|
for (uint64_t j = 0; j < rowCnt; ++j)
|
||||||
{
|
{
|
||||||
if (i > 0x0000FFFFFFFFFFFFULL || j > 0x000000000000FFFFULL)
|
if (i > 0x0000FFFFFFFFFFFFULL || j > 0x000000000000FFFFULL)
|
||||||
@@ -1054,11 +1055,12 @@ void WindowFunctionStep::doFunction()
|
|||||||
while (((i = nextFunctionIndex()) < fFunctionCount) && !cancelled())
|
while (((i = nextFunctionIndex()) < fFunctionCount) && !cancelled())
|
||||||
{
|
{
|
||||||
uint64_t memAdd = fRows.size() * sizeof(RowPosition);
|
uint64_t memAdd = fRows.size() * sizeof(RowPosition);
|
||||||
fMemUsage += memAdd;
|
|
||||||
|
|
||||||
if (fRm->getMemory(memAdd, fSessionMemLimit) == false)
|
if (fRm->getMemory(memAdd, fSessionMemLimit) == false)
|
||||||
throw IDBExcept(ERR_WF_DATA_SET_TOO_BIG);
|
throw IDBExcept(ERR_WF_DATA_SET_TOO_BIG);
|
||||||
|
|
||||||
|
fMemUsage += memAdd;
|
||||||
|
|
||||||
fFunctions[i]->setCallback(this, i);
|
fFunctions[i]->setCallback(this, i);
|
||||||
(*fFunctions[i].get())();
|
(*fFunctions[i].get())();
|
||||||
}
|
}
|
||||||
|
@@ -755,7 +755,7 @@ int ha_mcs_impl_write_batch_row_(const uchar* buf, TABLE* table, cal_impl_if::ca
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = fprintf(ci.filePtr, "\n"); //@bug 6077 check whether thhe pipe is still open
|
rc = fprintf(ci.filePtr, "\n"); //@bug 6077 check whether the pipe is still open
|
||||||
|
|
||||||
if ( rc < 0)
|
if ( rc < 0)
|
||||||
rc = -1;
|
rc = -1;
|
||||||
|
@@ -19,6 +19,7 @@
|
|||||||
#include "iosocket.h"
|
#include "iosocket.h"
|
||||||
|
|
||||||
#include "femsghandler.h"
|
#include "femsghandler.h"
|
||||||
|
#include "threadnaming.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace joblist;
|
using namespace joblist;
|
||||||
@@ -35,6 +36,7 @@ public:
|
|||||||
Runner(FEMsgHandler* f) : target(f) { }
|
Runner(FEMsgHandler* f) : target(f) { }
|
||||||
void operator()()
|
void operator()()
|
||||||
{
|
{
|
||||||
|
utils::setThreadName("FEMsgHandler");
|
||||||
target->threadFcn();
|
target->threadFcn();
|
||||||
}
|
}
|
||||||
FEMsgHandler* target;
|
FEMsgHandler* target;
|
||||||
|
@@ -79,6 +79,7 @@
|
|||||||
|
|
||||||
#include "mariadb_my_sys.h"
|
#include "mariadb_my_sys.h"
|
||||||
#include "statistics.h"
|
#include "statistics.h"
|
||||||
|
#include "threadnaming.h"
|
||||||
|
|
||||||
class Opt
|
class Opt
|
||||||
{
|
{
|
||||||
@@ -640,8 +641,6 @@ private:
|
|||||||
|
|
||||||
if (jl->status() == 0)
|
if (jl->status() == 0)
|
||||||
{
|
{
|
||||||
std::string emsg;
|
|
||||||
|
|
||||||
if (jl->putEngineComm(fEc) != 0)
|
if (jl->putEngineComm(fEc) != 0)
|
||||||
throw std::runtime_error(jl->errMsg());
|
throw std::runtime_error(jl->errMsg());
|
||||||
}
|
}
|
||||||
@@ -738,6 +737,7 @@ private:
|
|||||||
|
|
||||||
void operator()()
|
void operator()()
|
||||||
{
|
{
|
||||||
|
utils::setThreadName("SessionThread");
|
||||||
messageqcpp::ByteStream bs, inbs;
|
messageqcpp::ByteStream bs, inbs;
|
||||||
execplan::CalpontSelectExecutionPlan csep;
|
execplan::CalpontSelectExecutionPlan csep;
|
||||||
csep.sessionID(0);
|
csep.sessionID(0);
|
||||||
|
@@ -104,6 +104,7 @@ using namespace compress;
|
|||||||
using namespace idbdatafile;
|
using namespace idbdatafile;
|
||||||
|
|
||||||
#include "mcsconfig.h"
|
#include "mcsconfig.h"
|
||||||
|
#include "threadnaming.h"
|
||||||
|
|
||||||
typedef tr1::unordered_set<BRM::OID_t> USOID;
|
typedef tr1::unordered_set<BRM::OID_t> USOID;
|
||||||
|
|
||||||
@@ -394,6 +395,7 @@ static int updateptrs(char* ptr, FdCacheType_t::iterator fdit)
|
|||||||
|
|
||||||
void* thr_popper(ioManager* arg)
|
void* thr_popper(ioManager* arg)
|
||||||
{
|
{
|
||||||
|
utils::setThreadName("thr_popper");
|
||||||
ioManager* iom = arg;
|
ioManager* iom = arg;
|
||||||
FileBufferMgr* fbm;
|
FileBufferMgr* fbm;
|
||||||
int totalRqst = 0;
|
int totalRqst = 0;
|
||||||
|
@@ -1151,17 +1151,24 @@ void BatchPrimitiveProcessor::initProcessor()
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* This version does a join on projected rows */
|
/* This version does a join on projected rows */
|
||||||
void BatchPrimitiveProcessor::executeTupleJoin()
|
// In order to prevent super size result sets in the case of near cartesian joins on three or more joins,
|
||||||
|
// the startRid start at 0) is used to begin the rid loop and if we cut off processing early because of
|
||||||
|
// the size of the result set, we return the next rid to start with. If we finish ridCount rids, return 0-
|
||||||
|
uint32_t BatchPrimitiveProcessor::executeTupleJoin(uint32_t startRid)
|
||||||
{
|
{
|
||||||
uint32_t newRowCount = 0, i, j;
|
uint32_t newRowCount = 0, i, j;
|
||||||
vector<uint32_t> matches;
|
vector<uint32_t> matches;
|
||||||
uint64_t largeKey;
|
uint64_t largeKey;
|
||||||
|
uint64_t resultCount = 0;
|
||||||
|
uint32_t newStartRid = startRid;
|
||||||
outputRG.getRow(0, &oldRow);
|
outputRG.getRow(0, &oldRow);
|
||||||
outputRG.getRow(0, &newRow);
|
outputRG.getRow(0, &newRow);
|
||||||
|
|
||||||
//cout << "before join, RG has " << outputRG.getRowCount() << " BPP ridcount= " << ridCount << endl;
|
//cout << "before join, RG has " << outputRG.getRowCount() << " BPP ridcount= " << ridCount << endl;
|
||||||
for (i = 0; i < ridCount && !sendThread->aborted(); i++, oldRow.nextRow())
|
// ridCount gets modified based on the number of Rids actually processed during this call.
|
||||||
|
// origRidCount is the number of rids for this thread after filter, which are the total
|
||||||
|
// number of rids to be processed from all calls to this function during this thread.
|
||||||
|
for (i = startRid; i < origRidCount && !sendThread->aborted(); i++, oldRow.nextRow())
|
||||||
{
|
{
|
||||||
/* Decide whether this large-side row belongs in the output. The breaks
|
/* Decide whether this large-side row belongs in the output. The breaks
|
||||||
* in the loop mean that it doesn't.
|
* in the loop mean that it doesn't.
|
||||||
@@ -1270,10 +1277,9 @@ void BatchPrimitiveProcessor::executeTupleJoin()
|
|||||||
|
|
||||||
if (j == joinerCount)
|
if (j == joinerCount)
|
||||||
{
|
{
|
||||||
|
uint32_t matchCount;
|
||||||
for (j = 0; j < joinerCount; j++)
|
for (j = 0; j < joinerCount; j++)
|
||||||
{
|
{
|
||||||
uint32_t matchCount;
|
|
||||||
|
|
||||||
/* The result is already known if...
|
/* The result is already known if...
|
||||||
* -- anti-join with no fcnexp
|
* -- anti-join with no fcnexp
|
||||||
* -- semi-join with no fcnexp and not scalar
|
* -- semi-join with no fcnexp and not scalar
|
||||||
@@ -1361,6 +1367,8 @@ void BatchPrimitiveProcessor::executeTupleJoin()
|
|||||||
tSmallSideMatches[j][newRowCount].push_back(-1);
|
tSmallSideMatches[j][newRowCount].push_back(-1);
|
||||||
matchCount = 1;
|
matchCount = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resultCount += matchCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Finally, copy the row into the output */
|
/* Finally, copy the row into the output */
|
||||||
@@ -1384,8 +1392,18 @@ void BatchPrimitiveProcessor::executeTupleJoin()
|
|||||||
//else
|
//else
|
||||||
// cout << "j != joinerCount\n";
|
// cout << "j != joinerCount\n";
|
||||||
}
|
}
|
||||||
|
// If we've accumulated more than maxResultCount -- 1048576 (2^20)_ of resultCounts, cut off processing. The caller will
|
||||||
|
// restart to continue where we left off.
|
||||||
|
if (resultCount >= maxResultCount)
|
||||||
|
{
|
||||||
|
newStartRid += newRowCount;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (resultCount < maxResultCount)
|
||||||
|
newStartRid = 0;
|
||||||
|
|
||||||
ridCount = newRowCount;
|
ridCount = newRowCount;
|
||||||
outputRG.setRowCount(ridCount);
|
outputRG.setRowCount(ridCount);
|
||||||
|
|
||||||
@@ -1404,6 +1422,7 @@ void BatchPrimitiveProcessor::executeTupleJoin()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
return newStartRid;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef PRIMPROC_STOPWATCH
|
#ifdef PRIMPROC_STOPWATCH
|
||||||
@@ -1412,6 +1431,9 @@ void BatchPrimitiveProcessor::execute(StopWatch* stopwatch)
|
|||||||
void BatchPrimitiveProcessor::execute()
|
void BatchPrimitiveProcessor::execute()
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
uint8_t sendCount = 0;
|
||||||
|
// bool smoreRGs = false;
|
||||||
|
// uint32_t sStartRid = 0;
|
||||||
uint32_t i, j;
|
uint32_t i, j;
|
||||||
|
|
||||||
try
|
try
|
||||||
@@ -1616,9 +1638,9 @@ void BatchPrimitiveProcessor::execute()
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* 7/7/09 PL: I Changed the projection alg to reduce block touches when there's
|
/* 7/7/09 PL: I Changed the projection alg to reduce block touches when there's
|
||||||
a join. The key columns get projected first, the join is executed to further
|
a join. The key columns get projected first, the join is executed to further
|
||||||
reduce the ridlist, then the rest of the columns get projected */
|
reduce the ridlist, then the rest of the columns get projected */
|
||||||
|
|
||||||
if (!doJoin)
|
if (!doJoin)
|
||||||
{
|
{
|
||||||
for (j = 0; j < projectCount; ++j)
|
for (j = 0; j < projectCount; ++j)
|
||||||
@@ -1638,15 +1660,92 @@ void BatchPrimitiveProcessor::execute()
|
|||||||
// else
|
// else
|
||||||
// cout << " no target found for OID " << projectSteps[j]->getOID() << endl;
|
// cout << " no target found for OID " << projectSteps[j]->getOID() << endl;
|
||||||
}
|
}
|
||||||
|
if (fe2)
|
||||||
|
{
|
||||||
|
/* functionize this -> processFE2() */
|
||||||
|
fe2Output.resetRowGroup ( baseRid );
|
||||||
|
fe2Output.getRow ( 0, &fe2Out );
|
||||||
|
fe2Input->getRow ( 0, &fe2In );
|
||||||
|
|
||||||
|
//cerr << "input row: " << fe2In.toString() << endl;
|
||||||
|
for ( j = 0; j < outputRG.getRowCount(); j++, fe2In.nextRow() )
|
||||||
|
{
|
||||||
|
if ( fe2->evaluate ( &fe2In ) )
|
||||||
|
{
|
||||||
|
applyMapping ( fe2Mapping, fe2In, &fe2Out );
|
||||||
|
//cerr << " passed. output row: " << fe2Out.toString() << endl;
|
||||||
|
fe2Out.setRid ( fe2In.getRelRid() );
|
||||||
|
fe2Output.incRowCount();
|
||||||
|
fe2Out.nextRow();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( !fAggregator )
|
||||||
|
{
|
||||||
|
*serialized << ( uint8_t ) 1; // the "count this msg" var
|
||||||
|
fe2Output.setDBRoot ( dbRoot );
|
||||||
|
fe2Output.serializeRGData ( *serialized );
|
||||||
|
//*serialized << fe2Output.getDataSize();
|
||||||
|
//serialized->append(fe2Output.getData(), fe2Output.getDataSize());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fAggregator)
|
||||||
|
{
|
||||||
|
*serialized << ( uint8_t ) 1; // the "count this msg" var
|
||||||
|
|
||||||
|
RowGroup& toAggregate = ( fe2 ? fe2Output : outputRG );
|
||||||
|
//toAggregate.convertToInlineDataInPlace();
|
||||||
|
|
||||||
|
if ( fe2 )
|
||||||
|
fe2Output.setDBRoot ( dbRoot );
|
||||||
|
else
|
||||||
|
outputRG.setDBRoot ( dbRoot );
|
||||||
|
|
||||||
|
fAggregator->addRowGroup ( &toAggregate );
|
||||||
|
|
||||||
|
if ( ( currentBlockOffset + 1 ) == count ) // @bug4507, 8k
|
||||||
|
{
|
||||||
|
fAggregator->loadResult ( *serialized ); // @bug4507, 8k
|
||||||
|
} // @bug4507, 8k
|
||||||
|
else if ( utils::MonitorProcMem::isMemAvailable() ) // @bug4507, 8k
|
||||||
|
{
|
||||||
|
fAggregator->loadEmptySet ( *serialized ); // @bug4507, 8k
|
||||||
|
} // @bug4507, 8k
|
||||||
|
else // @bug4507, 8k
|
||||||
|
{
|
||||||
|
fAggregator->loadResult ( *serialized ); // @bug4507, 8k
|
||||||
|
fAggregator->aggReset(); // @bug4507, 8k
|
||||||
|
} // @bug4507, 8k
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!fAggregator && !fe2)
|
||||||
|
{
|
||||||
|
*serialized << ( uint8_t ) 1; // the "count this msg" var
|
||||||
|
outputRG.setDBRoot ( dbRoot );
|
||||||
|
//cerr << "serializing " << outputRG.toString() << endl;
|
||||||
|
outputRG.serializeRGData ( *serialized );
|
||||||
|
|
||||||
|
//*serialized << outputRG.getDataSize();
|
||||||
|
//serialized->append(outputRG.getData(), outputRG.getDataSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef PRIMPROC_STOPWATCH
|
||||||
|
stopwatch->stop ( "- if(ot != ROW_GROUP) else" );
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else // Is doJoin
|
||||||
{
|
{
|
||||||
|
uint32_t startRid = 0;
|
||||||
|
ByteStream preamble = *serialized;
|
||||||
|
origRidCount = ridCount; // ridCount can get modified by executeTupleJoin(). We need to keep track of the original val.
|
||||||
/* project the key columns. If there's the filter IN the join, project everything.
|
/* project the key columns. If there's the filter IN the join, project everything.
|
||||||
Also need to project 'long' strings b/c executeTupleJoin may copy entire rows
|
Also need to project 'long' strings b/c executeTupleJoin may copy entire rows
|
||||||
using copyRow(), which will try to interpret the uninit'd string ptr.
|
using copyRow(), which will try to interpret the uninit'd string ptr.
|
||||||
Valgrind will legitimately complain about copying uninit'd values for the
|
Valgrind will legitimately complain about copying uninit'd values for the
|
||||||
other types but that is technically safe. */
|
other types but that is technically safe. */
|
||||||
for (j = 0; j < projectCount; j++)
|
for (j = 0; j < projectCount; j++)
|
||||||
|
{
|
||||||
if (keyColumnProj[j] || (projectionMap[j] != -1 && (hasJoinFEFilters ||
|
if (keyColumnProj[j] || (projectionMap[j] != -1 && (hasJoinFEFilters ||
|
||||||
oldRow.isLongString(projectionMap[j]))))
|
oldRow.isLongString(projectionMap[j]))))
|
||||||
{
|
{
|
||||||
@@ -1656,218 +1755,174 @@ void BatchPrimitiveProcessor::execute()
|
|||||||
stopwatch->stop("-- projectIntoRowGroup");
|
stopwatch->stop("-- projectIntoRowGroup");
|
||||||
#else
|
#else
|
||||||
projectSteps[j]->projectIntoRowGroup(outputRG, projectionMap[j]);
|
projectSteps[j]->projectIntoRowGroup(outputRG, projectionMap[j]);
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef PRIMPROC_STOPWATCH
|
|
||||||
stopwatch->start("-- executeTupleJoin()");
|
|
||||||
executeTupleJoin();
|
|
||||||
stopwatch->stop("-- executeTupleJoin()");
|
|
||||||
#else
|
|
||||||
executeTupleJoin();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* project the non-key columns */
|
|
||||||
for (j = 0; j < projectCount; ++j)
|
|
||||||
{
|
|
||||||
if (projectionMap[j] != -1 && !keyColumnProj[j] && !hasJoinFEFilters &&
|
|
||||||
!oldRow.isLongString(projectionMap[j]))
|
|
||||||
{
|
|
||||||
#ifdef PRIMPROC_STOPWATCH
|
|
||||||
stopwatch->start("-- projectIntoRowGroup");
|
|
||||||
projectSteps[j]->projectIntoRowGroup(outputRG, projectionMap[j]);
|
|
||||||
stopwatch->stop("-- projectIntoRowGroup");
|
|
||||||
#else
|
|
||||||
projectSteps[j]->projectIntoRowGroup(outputRG, projectionMap[j]);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* The RowGroup is fully joined at this point.
|
do //while (startRid > 0)
|
||||||
Add additional RowGroup processing here.
|
|
||||||
TODO: Try to clean up all of the switching */
|
|
||||||
|
|
||||||
if (doJoin && (fe2 || fAggregator))
|
|
||||||
{
|
|
||||||
bool moreRGs = true;
|
|
||||||
ByteStream preamble = *serialized;
|
|
||||||
initGJRG();
|
|
||||||
|
|
||||||
while (moreRGs && !sendThread->aborted())
|
|
||||||
{
|
{
|
||||||
/*
|
#ifdef PRIMPROC_STOPWATCH
|
||||||
generate 1 rowgroup (8192 rows max) of joined rows
|
stopwatch->start("-- executeTupleJoin()");
|
||||||
if there's an FE2, run it
|
startRid = executeTupleJoin(startRid);
|
||||||
-pack results into a new rowgroup
|
stopwatch->stop("-- executeTupleJoin()");
|
||||||
-if there are < 8192 rows in the new RG, continue
|
#else
|
||||||
if there's an agg, run it
|
startRid = executeTupleJoin(startRid);
|
||||||
send the result
|
// sStartRid = startRid;
|
||||||
*/
|
#endif
|
||||||
resetGJRG();
|
/* project the non-key columns */
|
||||||
moreRGs = generateJoinedRowGroup(baseJRow);
|
for (j = 0; j < projectCount; ++j)
|
||||||
*serialized << (uint8_t) !moreRGs;
|
|
||||||
|
|
||||||
if (fe2)
|
|
||||||
{
|
{
|
||||||
/* functionize this -> processFE2()*/
|
if (projectionMap[j] != -1 && !keyColumnProj[j] && !hasJoinFEFilters &&
|
||||||
fe2Output.resetRowGroup(baseRid);
|
!oldRow.isLongString(projectionMap[j]))
|
||||||
fe2Output.setDBRoot(dbRoot);
|
{
|
||||||
fe2Output.getRow(0, &fe2Out);
|
#ifdef PRIMPROC_STOPWATCH
|
||||||
fe2Input->getRow(0, &fe2In);
|
stopwatch->start("-- projectIntoRowGroup");
|
||||||
|
projectSteps[j]->projectIntoRowGroup(outputRG, projectionMap[j]);
|
||||||
for (j = 0; j < joinedRG.getRowCount(); j++, fe2In.nextRow())
|
stopwatch->stop("-- projectIntoRowGroup");
|
||||||
if (fe2->evaluate(&fe2In))
|
#else
|
||||||
{
|
projectSteps[j]->projectIntoRowGroup(outputRG, projectionMap[j]);
|
||||||
applyMapping(fe2Mapping, fe2In, &fe2Out);
|
#endif
|
||||||
fe2Out.setRid(fe2In.getRelRid());
|
}
|
||||||
fe2Output.incRowCount();
|
|
||||||
fe2Out.nextRow();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
/* The RowGroup is fully joined at this point.
|
||||||
|
* Add additional RowGroup processing here.
|
||||||
|
* TODO: Try to clean up all of the switching */
|
||||||
|
|
||||||
RowGroup& nextRG = (fe2 ? fe2Output : joinedRG);
|
if (fe2 || fAggregator)
|
||||||
nextRG.setDBRoot(dbRoot);
|
|
||||||
|
|
||||||
if (fAggregator)
|
|
||||||
{
|
{
|
||||||
fAggregator->addRowGroup(&nextRG);
|
bool moreRGs = true;
|
||||||
|
initGJRG();
|
||||||
|
|
||||||
if ((currentBlockOffset + 1) == count && moreRGs == false) // @bug4507, 8k
|
while (moreRGs && !sendThread->aborted())
|
||||||
{
|
{
|
||||||
fAggregator->loadResult(*serialized); // @bug4507, 8k
|
/*
|
||||||
} // @bug4507, 8k
|
* generate 1 rowgroup (8192 rows max) of joined rows
|
||||||
else if (utils::MonitorProcMem::isMemAvailable()) // @bug4507, 8k
|
* if there's an FE2, run it
|
||||||
|
* -pack results into a new rowgroup
|
||||||
|
* -if there are < 8192 rows in the new RG, continue
|
||||||
|
* if there's an agg, run it
|
||||||
|
* send the result
|
||||||
|
*/
|
||||||
|
resetGJRG();
|
||||||
|
moreRGs = generateJoinedRowGroup(baseJRow);
|
||||||
|
// smoreRGs = moreRGs;
|
||||||
|
sendCount = (uint8_t)(!moreRGs && !startRid);
|
||||||
|
// *serialized << (uint8_t)(!moreRGs && !startRid); // the "count this msg" var
|
||||||
|
*serialized << sendCount;
|
||||||
|
if (fe2)
|
||||||
|
{
|
||||||
|
/* functionize this -> processFE2()*/
|
||||||
|
fe2Output.resetRowGroup(baseRid);
|
||||||
|
fe2Output.setDBRoot(dbRoot);
|
||||||
|
fe2Output.getRow(0, &fe2Out);
|
||||||
|
fe2Input->getRow(0, &fe2In);
|
||||||
|
|
||||||
|
for (j = 0; j < joinedRG.getRowCount(); j++, fe2In.nextRow())
|
||||||
|
if (fe2->evaluate(&fe2In))
|
||||||
|
{
|
||||||
|
applyMapping(fe2Mapping, fe2In, &fe2Out);
|
||||||
|
fe2Out.setRid(fe2In.getRelRid());
|
||||||
|
fe2Output.incRowCount();
|
||||||
|
fe2Out.nextRow();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RowGroup& nextRG = (fe2 ? fe2Output : joinedRG);
|
||||||
|
nextRG.setDBRoot(dbRoot);
|
||||||
|
|
||||||
|
if (fAggregator)
|
||||||
|
{
|
||||||
|
fAggregator->addRowGroup(&nextRG);
|
||||||
|
|
||||||
|
if ((currentBlockOffset + 1) == count && moreRGs == false && startRid == 0) // @bug4507, 8k
|
||||||
|
{
|
||||||
|
fAggregator->loadResult(*serialized); // @bug4507, 8k
|
||||||
|
} // @bug4507, 8k
|
||||||
|
else if (utils::MonitorProcMem::isMemAvailable()) // @bug4507, 8k
|
||||||
|
{
|
||||||
|
fAggregator->loadEmptySet(*serialized); // @bug4507, 8k
|
||||||
|
} // @bug4507, 8k
|
||||||
|
else // @bug4507, 8k
|
||||||
|
{
|
||||||
|
fAggregator->loadResult(*serialized); // @bug4507, 8k
|
||||||
|
fAggregator->aggReset(); // @bug4507, 8k
|
||||||
|
} // @bug4507, 8k
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//cerr <<" * serialzing " << nextRG.toString() << endl;
|
||||||
|
nextRG.serializeRGData(*serialized);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* send the msg & reinit the BS */
|
||||||
|
if (moreRGs)
|
||||||
|
{
|
||||||
|
sendResponse();
|
||||||
|
serialized.reset(new ByteStream());
|
||||||
|
*serialized = preamble;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hasSmallOuterJoin )
|
||||||
{
|
{
|
||||||
fAggregator->loadEmptySet(*serialized); // @bug4507, 8k
|
// Should we happen to finish sending data rows right on the boundary of when moreRGs flips off, then
|
||||||
} // @bug4507, 8k
|
// we need to start a new buffer. I.e., it needs the count this message byte pushed.
|
||||||
else // @bug4507, 8k
|
if (serialized->length() == preamble.length())
|
||||||
|
*serialized << ( uint8_t )(startRid > 0 ? 0 : 1); // the "count this msg" var
|
||||||
|
|
||||||
|
*serialized << ridCount;
|
||||||
|
|
||||||
|
for ( i = 0; i < joinerCount; i++ )
|
||||||
|
{
|
||||||
|
for ( j = 0; j < ridCount; ++j )
|
||||||
|
{
|
||||||
|
serializeInlineVector<uint32_t> ( *serialized,
|
||||||
|
tSmallSideMatches[i][j] );
|
||||||
|
tSmallSideMatches[i][j].clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
fAggregator->loadResult(*serialized); // @bug4507, 8k
|
// We hae no more use for this allocation
|
||||||
fAggregator->aggReset(); // @bug4507, 8k
|
for ( i = 0; i < joinerCount; i++ )
|
||||||
} // @bug4507, 8k
|
for ( j = 0; j < ridCount; ++j )
|
||||||
|
tSmallSideMatches[i][j].clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//cerr <<" * serialzing " << nextRG.toString() << endl;
|
*serialized << ( uint8_t )(startRid > 0 ? 0 : 1); // the "count this msg" var
|
||||||
nextRG.serializeRGData(*serialized);
|
outputRG.setDBRoot ( dbRoot );
|
||||||
}
|
//cerr << "serializing " << outputRG.toString() << endl;
|
||||||
|
outputRG.serializeRGData ( *serialized );
|
||||||
|
|
||||||
/* send the msg & reinit the BS */
|
//*serialized << outputRG.getDataSize();
|
||||||
if (moreRGs)
|
//serialized->append(outputRG.getData(), outputRG.getDataSize());
|
||||||
{
|
for ( i = 0; i < joinerCount; i++ )
|
||||||
sendResponse();
|
|
||||||
serialized.reset(new ByteStream());
|
|
||||||
*serialized = preamble;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasSmallOuterJoin)
|
|
||||||
{
|
|
||||||
*serialized << ridCount;
|
|
||||||
|
|
||||||
for (i = 0; i < joinerCount; i++)
|
|
||||||
for (j = 0; j < ridCount; ++j)
|
|
||||||
serializeInlineVector<uint32_t>(*serialized,
|
|
||||||
tSmallSideMatches[i][j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!doJoin && fe2)
|
|
||||||
{
|
|
||||||
/* functionize this -> processFE2() */
|
|
||||||
fe2Output.resetRowGroup(baseRid);
|
|
||||||
fe2Output.getRow(0, &fe2Out);
|
|
||||||
fe2Input->getRow(0, &fe2In);
|
|
||||||
|
|
||||||
//cerr << "input row: " << fe2In.toString() << endl;
|
|
||||||
for (j = 0; j < outputRG.getRowCount(); j++, fe2In.nextRow())
|
|
||||||
{
|
|
||||||
if (fe2->evaluate(&fe2In))
|
|
||||||
{
|
|
||||||
applyMapping(fe2Mapping, fe2In, &fe2Out);
|
|
||||||
//cerr << " passed. output row: " << fe2Out.toString() << endl;
|
|
||||||
fe2Out.setRid (fe2In.getRelRid());
|
|
||||||
fe2Output.incRowCount();
|
|
||||||
fe2Out.nextRow();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fAggregator)
|
|
||||||
{
|
|
||||||
*serialized << (uint8_t) 1; // the "count this msg" var
|
|
||||||
fe2Output.setDBRoot(dbRoot);
|
|
||||||
fe2Output.serializeRGData(*serialized);
|
|
||||||
//*serialized << fe2Output.getDataSize();
|
|
||||||
//serialized->append(fe2Output.getData(), fe2Output.getDataSize());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!doJoin && fAggregator)
|
|
||||||
{
|
|
||||||
*serialized << (uint8_t) 1; // the "count this msg" var
|
|
||||||
|
|
||||||
RowGroup& toAggregate = (fe2 ? fe2Output : outputRG);
|
|
||||||
//toAggregate.convertToInlineDataInPlace();
|
|
||||||
|
|
||||||
if (fe2)
|
|
||||||
fe2Output.setDBRoot(dbRoot);
|
|
||||||
else
|
|
||||||
outputRG.setDBRoot(dbRoot);
|
|
||||||
|
|
||||||
fAggregator->addRowGroup(&toAggregate);
|
|
||||||
|
|
||||||
if ((currentBlockOffset + 1) == count) // @bug4507, 8k
|
|
||||||
{
|
|
||||||
fAggregator->loadResult(*serialized); // @bug4507, 8k
|
|
||||||
} // @bug4507, 8k
|
|
||||||
else if (utils::MonitorProcMem::isMemAvailable()) // @bug4507, 8k
|
|
||||||
{
|
|
||||||
fAggregator->loadEmptySet(*serialized); // @bug4507, 8k
|
|
||||||
} // @bug4507, 8k
|
|
||||||
else // @bug4507, 8k
|
|
||||||
{
|
|
||||||
fAggregator->loadResult(*serialized); // @bug4507, 8k
|
|
||||||
fAggregator->aggReset(); // @bug4507, 8k
|
|
||||||
} // @bug4507, 8k
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fAggregator && !fe2)
|
|
||||||
{
|
|
||||||
*serialized << (uint8_t) 1; // the "count this msg" var
|
|
||||||
outputRG.setDBRoot(dbRoot);
|
|
||||||
//cerr << "serializing " << outputRG.toString() << endl;
|
|
||||||
outputRG.serializeRGData(*serialized);
|
|
||||||
|
|
||||||
//*serialized << outputRG.getDataSize();
|
|
||||||
//serialized->append(outputRG.getData(), outputRG.getDataSize());
|
|
||||||
if (doJoin)
|
|
||||||
{
|
|
||||||
for (i = 0; i < joinerCount; i++)
|
|
||||||
{
|
|
||||||
for (j = 0; j < ridCount; ++j)
|
|
||||||
{
|
{
|
||||||
serializeInlineVector<uint32_t>(*serialized,
|
for ( j = 0; j < ridCount; ++j )
|
||||||
tSmallSideMatches[i][j]);
|
{
|
||||||
|
serializeInlineVector<uint32_t> ( *serialized,
|
||||||
|
tSmallSideMatches[i][j] );
|
||||||
|
tSmallSideMatches[i][j].clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if (startRid > 0)
|
||||||
|
{
|
||||||
|
sendResponse();
|
||||||
|
serialized.reset ( new ByteStream() );
|
||||||
|
*serialized = preamble;
|
||||||
|
}
|
||||||
|
}while (startRid > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// clear small side match vector
|
|
||||||
if (doJoin)
|
|
||||||
{
|
|
||||||
for (i = 0; i < joinerCount; i++)
|
|
||||||
for (j = 0; j < ridCount; ++j)
|
|
||||||
tSmallSideMatches[i][j].clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef PRIMPROC_STOPWATCH
|
#ifdef PRIMPROC_STOPWATCH
|
||||||
stopwatch->stop("- if(ot != ROW_GROUP) else");
|
stopwatch->stop("- if(ot != ROW_GROUP) else");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
ridCount = origRidCount; // May not be needed, but just to be safe.
|
||||||
|
// std::cout << "end of send. startRid=" << sStartRid << " moreRG=" << smoreRGs << " sendCount=" << sendCount << std::endl;
|
||||||
if (projectCount > 0 || ot == ROW_GROUP)
|
if (projectCount > 0 || ot == ROW_GROUP)
|
||||||
{
|
{
|
||||||
*serialized << cachedIO;
|
*serialized << cachedIO;
|
||||||
@@ -2215,8 +2270,9 @@ int BatchPrimitiveProcessor::operator()()
|
|||||||
if (sendThread->aborted())
|
if (sendThread->aborted())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (!sendThread->okToProceed())
|
if (sendThread->sizeTooBig())
|
||||||
{
|
{
|
||||||
|
// The send buffer is full of messages yet to be sent, so this thread would block anyway.
|
||||||
freeLargeBuffers();
|
freeLargeBuffers();
|
||||||
return -1; // the reschedule error code
|
return -1; // the reschedule error code
|
||||||
}
|
}
|
||||||
|
@@ -220,6 +220,7 @@ private:
|
|||||||
int128_t wide128Values[LOGICAL_BLOCK_RIDS];
|
int128_t wide128Values[LOGICAL_BLOCK_RIDS];
|
||||||
boost::scoped_array<uint64_t> absRids;
|
boost::scoped_array<uint64_t> absRids;
|
||||||
boost::scoped_array<std::string> strValues;
|
boost::scoped_array<std::string> strValues;
|
||||||
|
uint16_t origRidCount;
|
||||||
uint16_t ridCount;
|
uint16_t ridCount;
|
||||||
bool needStrValues;
|
bool needStrValues;
|
||||||
uint16_t wideColumnsWidths;
|
uint16_t wideColumnsWidths;
|
||||||
@@ -328,7 +329,7 @@ private:
|
|||||||
boost::shared_array<boost::shared_array<boost::shared_ptr<TJoiner> > > tJoiners;
|
boost::shared_array<boost::shared_array<boost::shared_ptr<TJoiner> > > tJoiners;
|
||||||
typedef std::vector<uint32_t> MatchedData[LOGICAL_BLOCK_RIDS];
|
typedef std::vector<uint32_t> MatchedData[LOGICAL_BLOCK_RIDS];
|
||||||
boost::shared_array<MatchedData> tSmallSideMatches;
|
boost::shared_array<MatchedData> tSmallSideMatches;
|
||||||
void executeTupleJoin();
|
uint32_t executeTupleJoin(uint32_t startRid);
|
||||||
bool getTupleJoinRowGroupData;
|
bool getTupleJoinRowGroupData;
|
||||||
std::vector<rowgroup::RowGroup> smallSideRGs;
|
std::vector<rowgroup::RowGroup> smallSideRGs;
|
||||||
rowgroup::RowGroup largeSideRG;
|
rowgroup::RowGroup largeSideRG;
|
||||||
@@ -425,6 +426,8 @@ private:
|
|||||||
uint processorThreads;
|
uint processorThreads;
|
||||||
uint ptMask;
|
uint ptMask;
|
||||||
bool firstInstance;
|
bool firstInstance;
|
||||||
|
|
||||||
|
static const uint64_t maxResultCount = 1048576; // 2^20
|
||||||
|
|
||||||
friend class Command;
|
friend class Command;
|
||||||
friend class ColumnCommand;
|
friend class ColumnCommand;
|
||||||
|
@@ -23,59 +23,56 @@
|
|||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
#include <mutex>
|
||||||
#include "bppsendthread.h"
|
#include "bppsendthread.h"
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
using namespace boost;
|
|
||||||
|
|
||||||
#include "atomicops.h"
|
|
||||||
|
|
||||||
namespace primitiveprocessor
|
namespace primitiveprocessor
|
||||||
{
|
{
|
||||||
|
|
||||||
extern uint32_t connectionsPerUM;
|
extern uint32_t connectionsPerUM;
|
||||||
|
extern uint32_t BPPCount;
|
||||||
|
|
||||||
BPPSendThread::BPPSendThread() : die(false), gotException(false), mainThreadWaiting(false),
|
BPPSendThread::BPPSendThread() : die(false), gotException(false), mainThreadWaiting(false),
|
||||||
sizeThreshold(100), msgsLeft(-1), waiting(false), sawAllConnections(false),
|
sizeThreshold(100), msgsLeft(-1), waiting(false), sawAllConnections(false),
|
||||||
fcEnabled(false), currentByteSize(0), maxByteSize(25000000)
|
fcEnabled(false), currentByteSize(0)
|
||||||
{
|
{
|
||||||
|
maxByteSize = joblist::ResourceManager::instance()->getMaxBPPSendQueue();
|
||||||
runner = boost::thread(Runner_t(this));
|
runner = boost::thread(Runner_t(this));
|
||||||
}
|
}
|
||||||
|
|
||||||
BPPSendThread::BPPSendThread(uint32_t initMsgsLeft) : die(false), gotException(false),
|
BPPSendThread::BPPSendThread(uint32_t initMsgsLeft) : die(false), gotException(false),
|
||||||
mainThreadWaiting(false), sizeThreshold(100), msgsLeft(initMsgsLeft), waiting(false),
|
mainThreadWaiting(false), sizeThreshold(100), msgsLeft(initMsgsLeft), waiting(false),
|
||||||
sawAllConnections(false), fcEnabled(false), currentByteSize(0), maxByteSize(25000000)
|
sawAllConnections(false), fcEnabled(false), currentByteSize(0)
|
||||||
{
|
{
|
||||||
|
maxByteSize = joblist::ResourceManager::instance()->getMaxBPPSendQueue();
|
||||||
runner = boost::thread(Runner_t(this));
|
runner = boost::thread(Runner_t(this));
|
||||||
}
|
}
|
||||||
|
|
||||||
BPPSendThread::~BPPSendThread()
|
BPPSendThread::~BPPSendThread()
|
||||||
{
|
{
|
||||||
boost::mutex::scoped_lock sl(msgQueueLock);
|
abort();
|
||||||
boost::mutex::scoped_lock sl2(ackLock);
|
|
||||||
die = true;
|
|
||||||
queueNotEmpty.notify_one();
|
|
||||||
okToSend.notify_one();
|
|
||||||
sl.unlock();
|
|
||||||
sl2.unlock();
|
|
||||||
runner.join();
|
runner.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BPPSendThread::okToProceed()
|
|
||||||
{
|
|
||||||
// keep the queue size below the 100 msg threshold & below the 25MB mark,
|
|
||||||
// but at least 2 msgs so there is always 1 ready to be sent.
|
|
||||||
return ((msgQueue.size() < sizeThreshold && currentByteSize < maxByteSize)
|
|
||||||
|| msgQueue.size() < 3) && !die;
|
|
||||||
}
|
|
||||||
|
|
||||||
void BPPSendThread::sendResult(const Msg_t& msg, bool newConnection)
|
void BPPSendThread::sendResult(const Msg_t& msg, bool newConnection)
|
||||||
{
|
{
|
||||||
|
// Wait for the queue to empty out a bit if it's stuffed full
|
||||||
|
if (sizeTooBig())
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> sl1(respondLock);
|
||||||
|
while (currentByteSize >= maxByteSize && msgQueue.size() > 3 && !die)
|
||||||
|
{
|
||||||
|
respondWait = true;
|
||||||
|
fProcessorPool->incBlockedThreads();
|
||||||
|
okToRespond.wait(sl1);
|
||||||
|
fProcessorPool->decBlockedThreads();
|
||||||
|
respondWait = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (die)
|
if (die)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
boost::mutex::scoped_lock sl(msgQueueLock);
|
std::unique_lock<std::mutex> sl(msgQueueLock);
|
||||||
|
|
||||||
if (gotException)
|
if (gotException)
|
||||||
throw runtime_error(exceptionString);
|
throw runtime_error(exceptionString);
|
||||||
|
|
||||||
@@ -105,11 +102,24 @@ void BPPSendThread::sendResult(const Msg_t& msg, bool newConnection)
|
|||||||
|
|
||||||
void BPPSendThread::sendResults(const vector<Msg_t>& msgs, bool newConnection)
|
void BPPSendThread::sendResults(const vector<Msg_t>& msgs, bool newConnection)
|
||||||
{
|
{
|
||||||
|
// Wait for the queue to empty out a bit if it's stuffed full
|
||||||
|
if (sizeTooBig())
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> sl1(respondLock);
|
||||||
|
while (currentByteSize >= maxByteSize && msgQueue.size() > 3 && !die)
|
||||||
|
{
|
||||||
|
respondWait = true;
|
||||||
|
fProcessorPool->incBlockedThreads();
|
||||||
|
okToRespond.wait(sl1);
|
||||||
|
fProcessorPool->decBlockedThreads();
|
||||||
|
respondWait = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (die)
|
if (die)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
boost::mutex::scoped_lock sl(msgQueueLock);
|
std::unique_lock<std::mutex> sl(msgQueueLock);
|
||||||
|
|
||||||
if (gotException)
|
if (gotException)
|
||||||
throw runtime_error(exceptionString);
|
throw runtime_error(exceptionString);
|
||||||
|
|
||||||
@@ -143,7 +153,7 @@ void BPPSendThread::sendResults(const vector<Msg_t>& msgs, bool newConnection)
|
|||||||
|
|
||||||
void BPPSendThread::sendMore(int num)
|
void BPPSendThread::sendMore(int num)
|
||||||
{
|
{
|
||||||
boost::mutex::scoped_lock sl(ackLock);
|
std::unique_lock<std::mutex> sl(ackLock);
|
||||||
|
|
||||||
// cout << "got an ACK for " << num << " msgsLeft=" << msgsLeft << endl;
|
// cout << "got an ACK for " << num << " msgsLeft=" << msgsLeft << endl;
|
||||||
if (num == -1)
|
if (num == -1)
|
||||||
@@ -156,6 +166,7 @@ void BPPSendThread::sendMore(int num)
|
|||||||
else
|
else
|
||||||
(void)atomicops::atomicAdd(&msgsLeft, num);
|
(void)atomicops::atomicAdd(&msgsLeft, num);
|
||||||
|
|
||||||
|
sl.unlock();
|
||||||
if (waiting)
|
if (waiting)
|
||||||
okToSend.notify_one();
|
okToSend.notify_one();
|
||||||
}
|
}
|
||||||
@@ -178,7 +189,7 @@ void BPPSendThread::mainLoop()
|
|||||||
|
|
||||||
while (!die)
|
while (!die)
|
||||||
{
|
{
|
||||||
boost::mutex::scoped_lock sl(msgQueueLock);
|
std::unique_lock<std::mutex> sl(msgQueueLock);
|
||||||
|
|
||||||
if (msgQueue.empty() && !die)
|
if (msgQueue.empty() && !die)
|
||||||
{
|
{
|
||||||
@@ -209,8 +220,7 @@ void BPPSendThread::mainLoop()
|
|||||||
|
|
||||||
if (msgsLeft <= 0 && fcEnabled && !die)
|
if (msgsLeft <= 0 && fcEnabled && !die)
|
||||||
{
|
{
|
||||||
boost::mutex::scoped_lock sl2(ackLock);
|
std::unique_lock<std::mutex> sl2(ackLock);
|
||||||
|
|
||||||
while (msgsLeft <= 0 && fcEnabled && !die)
|
while (msgsLeft <= 0 && fcEnabled && !die)
|
||||||
{
|
{
|
||||||
waiting = true;
|
waiting = true;
|
||||||
@@ -254,19 +264,26 @@ void BPPSendThread::mainLoop()
|
|||||||
(void)atomicops::atomicSub(¤tByteSize, bsSize);
|
(void)atomicops::atomicSub(¤tByteSize, bsSize);
|
||||||
msg[msgsSent].msg.reset();
|
msg[msgsSent].msg.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (respondWait && currentByteSize < maxByteSize)
|
||||||
|
{
|
||||||
|
okToRespond.notify_one();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BPPSendThread::abort()
|
void BPPSendThread::abort()
|
||||||
{
|
{
|
||||||
boost::mutex::scoped_lock sl(msgQueueLock);
|
std::lock_guard<std::mutex> sl(msgQueueLock);
|
||||||
boost::mutex::scoped_lock sl2(ackLock);
|
std::lock_guard<std::mutex> sl2(ackLock);
|
||||||
die = true;
|
std::lock_guard<std::mutex> sl3(respondLock);
|
||||||
queueNotEmpty.notify_one();
|
{
|
||||||
okToSend.notify_one();
|
die = true;
|
||||||
sl.unlock();
|
}
|
||||||
sl2.unlock();
|
queueNotEmpty.notify_all();
|
||||||
|
okToSend.notify_all();
|
||||||
|
okToRespond.notify_all();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -20,15 +20,16 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
/** @file */
|
|
||||||
|
|
||||||
#include "batchprimitiveprocessor.h"
|
#include "batchprimitiveprocessor.h"
|
||||||
#include "umsocketselector.h"
|
#include "umsocketselector.h"
|
||||||
|
#include <mutex>
|
||||||
|
#include <condition_variable>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <boost/thread/thread.hpp>
|
#include <boost/thread/thread.hpp>
|
||||||
#include <boost/thread/condition.hpp>
|
#include "threadnaming.h"
|
||||||
|
#include "prioritythreadpool.h"
|
||||||
#ifndef BPPSENDTHREAD_H
|
#ifndef BPPSENDTHREAD_H
|
||||||
#define BPPSENDTHREAD_H
|
#define BPPSENDTHREAD_H
|
||||||
|
|
||||||
@@ -63,7 +64,15 @@ public:
|
|||||||
msg(m), sock(so), sockLock(sl), sockIndex(si) { }
|
msg(m), sock(so), sockLock(sl), sockIndex(si) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
bool okToProceed();
|
bool sizeTooBig()
|
||||||
|
{
|
||||||
|
// keep the queue size below the 100 msg threshold & below the 250MB mark,
|
||||||
|
// but at least 3 msgs so there is always 1 ready to be sent.
|
||||||
|
return ((msgQueue.size() > sizeThreshold) ||
|
||||||
|
(currentByteSize >= maxByteSize && msgQueue.size() > 3)) && !die;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void sendMore(int num);
|
void sendMore(int num);
|
||||||
void sendResults(const std::vector<Msg_t>& msgs, bool newConnection);
|
void sendResults(const std::vector<Msg_t>& msgs, bool newConnection);
|
||||||
void sendResult(const Msg_t& msg, bool newConnection);
|
void sendResult(const Msg_t& msg, bool newConnection);
|
||||||
@@ -74,6 +83,10 @@ public:
|
|||||||
{
|
{
|
||||||
return die;
|
return die;
|
||||||
}
|
}
|
||||||
|
void setProcessorPool(threadpool::PriorityThreadPool* processorPool)
|
||||||
|
{
|
||||||
|
fProcessorPool = processorPool;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
BPPSendThread(const BPPSendThread&);
|
BPPSendThread(const BPPSendThread&);
|
||||||
@@ -85,22 +98,27 @@ private:
|
|||||||
Runner_t(BPPSendThread* b) : bppst(b) { }
|
Runner_t(BPPSendThread* b) : bppst(b) { }
|
||||||
void operator()()
|
void operator()()
|
||||||
{
|
{
|
||||||
|
utils::setThreadName("BPPSendThread");
|
||||||
bppst->mainLoop();
|
bppst->mainLoop();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
boost::thread runner;
|
boost::thread runner;
|
||||||
std::queue<Msg_t> msgQueue;
|
std::queue<Msg_t> msgQueue;
|
||||||
boost::mutex msgQueueLock;
|
std::mutex msgQueueLock;
|
||||||
boost::condition queueNotEmpty;
|
std::condition_variable queueNotEmpty;
|
||||||
volatile bool die, gotException, mainThreadWaiting;
|
volatile bool die, gotException, mainThreadWaiting;
|
||||||
std::string exceptionString;
|
std::string exceptionString;
|
||||||
uint32_t sizeThreshold;
|
uint32_t sizeThreshold;
|
||||||
volatile int32_t msgsLeft;
|
volatile int32_t msgsLeft;
|
||||||
bool waiting;
|
bool waiting;
|
||||||
boost::mutex ackLock;
|
std::mutex ackLock;
|
||||||
boost::condition okToSend;
|
std::condition_variable okToSend;
|
||||||
|
// Condition to prevent run away queue
|
||||||
|
bool respondWait;
|
||||||
|
std::mutex respondLock;
|
||||||
|
std::condition_variable okToRespond;
|
||||||
|
|
||||||
/* Load balancing structures */
|
/* Load balancing structures */
|
||||||
struct Connection_t
|
struct Connection_t
|
||||||
{
|
{
|
||||||
@@ -125,6 +143,9 @@ private:
|
|||||||
/* secondary queue size restriction based on byte size */
|
/* secondary queue size restriction based on byte size */
|
||||||
volatile uint64_t currentByteSize;
|
volatile uint64_t currentByteSize;
|
||||||
uint64_t maxByteSize;
|
uint64_t maxByteSize;
|
||||||
|
// Used to tell the PriorityThreadPool It should consider additional threads because a
|
||||||
|
// queue full event has happened and a thread has been blocked.
|
||||||
|
threadpool::PriorityThreadPool* fProcessorPool;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -1453,7 +1453,7 @@ struct BPPHandler
|
|||||||
SBPPV bppv;
|
SBPPV bppv;
|
||||||
|
|
||||||
// make the new BPP object
|
// make the new BPP object
|
||||||
bppv.reset(new BPPV());
|
bppv.reset(new BPPV(fPrimitiveServerPtr));
|
||||||
bpp.reset(new BatchPrimitiveProcessor(bs, fPrimitiveServerPtr->prefetchThreshold(),
|
bpp.reset(new BatchPrimitiveProcessor(bs, fPrimitiveServerPtr->prefetchThreshold(),
|
||||||
bppv->getSendThread(), fPrimitiveServerPtr->ProcessorThreads()));
|
bppv->getSendThread(), fPrimitiveServerPtr->ProcessorThreads()));
|
||||||
|
|
||||||
@@ -1913,7 +1913,7 @@ struct ReadThread
|
|||||||
/* Message format:
|
/* Message format:
|
||||||
* ISMPacketHeader
|
* ISMPacketHeader
|
||||||
* Partition count - 32 bits
|
* Partition count - 32 bits
|
||||||
* Partition set - sizeof(LogicalPartition) * count
|
* Partition set - sizeof(LogicalPartition) boost::shared_ptr* count
|
||||||
* OID count - 32 bits
|
* OID count - 32 bits
|
||||||
* OID array - 32 bits * count
|
* OID array - 32 bits * count
|
||||||
*/
|
*/
|
||||||
@@ -2004,8 +2004,7 @@ struct ReadThread
|
|||||||
void operator()()
|
void operator()()
|
||||||
{
|
{
|
||||||
utils::setThreadName("PPReadThread");
|
utils::setThreadName("PPReadThread");
|
||||||
boost::shared_ptr<threadpool::PriorityThreadPool> procPoolPtr =
|
threadpool::PriorityThreadPool* procPoolPtr = fPrimitiveServerPtr->getProcessorThreadPool();
|
||||||
fPrimitiveServerPtr->getProcessorThreadPool();
|
|
||||||
SBS bs;
|
SBS bs;
|
||||||
UmSocketSelector* pUmSocketSelector = UmSocketSelector::instance();
|
UmSocketSelector* pUmSocketSelector = UmSocketSelector::instance();
|
||||||
|
|
||||||
@@ -2475,8 +2474,8 @@ PrimitiveServer::PrimitiveServer(int serverThreads,
|
|||||||
fServerpool.setQueueSize(fServerQueueSize);
|
fServerpool.setQueueSize(fServerQueueSize);
|
||||||
fServerpool.setName("PrimitiveServer");
|
fServerpool.setName("PrimitiveServer");
|
||||||
|
|
||||||
fProcessorPool.reset(new threadpool::PriorityThreadPool(fProcessorWeight, highPriorityThreads,
|
fProcessorPool = new threadpool::PriorityThreadPool(fProcessorWeight, highPriorityThreads,
|
||||||
medPriorityThreads, lowPriorityThreads, 0));
|
medPriorityThreads, lowPriorityThreads, 0);
|
||||||
|
|
||||||
// We're not using either the priority or the job-clustering features, just need a threadpool
|
// We're not using either the priority or the job-clustering features, just need a threadpool
|
||||||
// that can reschedule jobs, and an unlimited non-blocking queue
|
// that can reschedule jobs, and an unlimited non-blocking queue
|
||||||
@@ -2526,9 +2525,10 @@ void PrimitiveServer::start(Service *service)
|
|||||||
cerr << "PrimitiveServer::start() exiting!" << endl;
|
cerr << "PrimitiveServer::start() exiting!" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
BPPV::BPPV()
|
BPPV::BPPV(PrimitiveServer* ps)
|
||||||
{
|
{
|
||||||
sendThread.reset(new BPPSendThread());
|
sendThread.reset(new BPPSendThread());
|
||||||
|
sendThread->setProcessorPool(ps->getProcessorThreadPool());
|
||||||
v.reserve(BPPCount);
|
v.reserve(BPPCount);
|
||||||
pos = 0;
|
pos = 0;
|
||||||
joinDataReceived = false;
|
joinDataReceived = false;
|
||||||
@@ -2570,7 +2570,7 @@ const vector<boost::shared_ptr<BatchPrimitiveProcessor> >& BPPV::get()
|
|||||||
boost::shared_ptr<BatchPrimitiveProcessor> BPPV::next()
|
boost::shared_ptr<BatchPrimitiveProcessor> BPPV::next()
|
||||||
{
|
{
|
||||||
uint32_t size = v.size();
|
uint32_t size = v.size();
|
||||||
uint32_t i;
|
uint32_t i = 0;
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
|
||||||
|
@@ -55,10 +55,12 @@ extern BRM::DBRM* brm;
|
|||||||
extern boost::mutex bppLock;
|
extern boost::mutex bppLock;
|
||||||
extern uint32_t highPriorityThreads, medPriorityThreads, lowPriorityThreads;
|
extern uint32_t highPriorityThreads, medPriorityThreads, lowPriorityThreads;
|
||||||
|
|
||||||
|
class PrimitiveServer;
|
||||||
|
|
||||||
class BPPV
|
class BPPV
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
BPPV();
|
BPPV(PrimitiveServer* ps);
|
||||||
~BPPV();
|
~BPPV();
|
||||||
boost::shared_ptr<BatchPrimitiveProcessor> next();
|
boost::shared_ptr<BatchPrimitiveProcessor> next();
|
||||||
void add(boost::shared_ptr<BatchPrimitiveProcessor> a);
|
void add(boost::shared_ptr<BatchPrimitiveProcessor> a);
|
||||||
@@ -133,7 +135,7 @@ public:
|
|||||||
|
|
||||||
/** @brief get a pointer the shared processor thread pool
|
/** @brief get a pointer the shared processor thread pool
|
||||||
*/
|
*/
|
||||||
inline boost::shared_ptr<threadpool::PriorityThreadPool> getProcessorThreadPool() const
|
inline threadpool::PriorityThreadPool* getProcessorThreadPool() const
|
||||||
{
|
{
|
||||||
return fProcessorPool;
|
return fProcessorPool;
|
||||||
}
|
}
|
||||||
@@ -170,7 +172,7 @@ private:
|
|||||||
/** @brief the thread pool used to process
|
/** @brief the thread pool used to process
|
||||||
* primitive commands
|
* primitive commands
|
||||||
*/
|
*/
|
||||||
boost::shared_ptr<threadpool::PriorityThreadPool> fProcessorPool;
|
threadpool::PriorityThreadPool* fProcessorPool;
|
||||||
|
|
||||||
int fServerThreads;
|
int fServerThreads;
|
||||||
int fServerQueueSize;
|
int fServerQueueSize;
|
||||||
|
@@ -76,7 +76,7 @@ using namespace idbdatafile;
|
|||||||
#include "mariadb_my_sys.h"
|
#include "mariadb_my_sys.h"
|
||||||
|
|
||||||
#include "service.h"
|
#include "service.h"
|
||||||
|
#include "threadnaming.h"
|
||||||
|
|
||||||
class Opt
|
class Opt
|
||||||
{
|
{
|
||||||
@@ -259,6 +259,7 @@ public:
|
|||||||
|
|
||||||
void operator()()
|
void operator()()
|
||||||
{
|
{
|
||||||
|
utils::setThreadName("QszMonThd");
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
uint32_t qd = fPsp->getProcessorThreadPool()->getWaiting();
|
uint32_t qd = fPsp->getProcessorThreadPool()->getWaiting();
|
||||||
@@ -304,6 +305,7 @@ private:
|
|||||||
#ifdef DUMP_CACHE_CONTENTS
|
#ifdef DUMP_CACHE_CONTENTS
|
||||||
void* waitForSIGUSR1(void* p)
|
void* waitForSIGUSR1(void* p)
|
||||||
{
|
{
|
||||||
|
utils::setThreadName("waitForSIGUSR1");
|
||||||
#if defined(__LP64__) || defined(_MSC_VER)
|
#if defined(__LP64__) || defined(_MSC_VER)
|
||||||
ptrdiff_t tmp = reinterpret_cast<ptrdiff_t>(p);
|
ptrdiff_t tmp = reinterpret_cast<ptrdiff_t>(p);
|
||||||
int cacheCount = static_cast<int>(tmp);
|
int cacheCount = static_cast<int>(tmp);
|
||||||
|
@@ -42,6 +42,7 @@ using namespace std;
|
|||||||
using namespace logging;
|
using namespace logging;
|
||||||
|
|
||||||
#include "MonitorProcMem.h"
|
#include "MonitorProcMem.h"
|
||||||
|
#include "threadnaming.h"
|
||||||
|
|
||||||
namespace utils
|
namespace utils
|
||||||
{
|
{
|
||||||
@@ -57,6 +58,7 @@ int MonitorProcMem::fMemPctCheck = 0;
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
void MonitorProcMem::operator()() const
|
void MonitorProcMem::operator()() const
|
||||||
{
|
{
|
||||||
|
utils::setThreadName("MonitorProcMem");
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
if (fMaxPct > 0)
|
if (fMaxPct > 0)
|
||||||
|
@@ -165,6 +165,28 @@ inline bool atomicCAS(volatile T* mem, T comp, T swap)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// implements a zero out of a variable
|
||||||
|
template <typename T>
|
||||||
|
inline void atomicZero(volatile T* mem)
|
||||||
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
|
||||||
|
switch (sizeof(T))
|
||||||
|
{
|
||||||
|
case 4:
|
||||||
|
default:
|
||||||
|
InterlockedXor(reinterpret_cast<volatile LONG*>(mem),(static_cast<LONG>(*mem)));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 8:
|
||||||
|
InterlockedXor64(reinterpret_cast<volatile LONG*>(mem),(static_cast<LONG>(*mem)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
__sync_xor_and_fetch(mem, *mem);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
//Implements a scheduler yield
|
//Implements a scheduler yield
|
||||||
inline void atomicYield()
|
inline void atomicYield()
|
||||||
{
|
{
|
||||||
|
@@ -104,9 +104,6 @@ JoinPartition::JoinPartition(const RowGroup& lRG,
|
|||||||
|
|
||||||
buckets.reserve(bucketCount);
|
buckets.reserve(bucketCount);
|
||||||
|
|
||||||
for (int i = 0; i < (int) bucketCount; i++)
|
|
||||||
buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false)));
|
|
||||||
|
|
||||||
string compressionType;
|
string compressionType;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@@ -122,6 +119,11 @@ JoinPartition::JoinPartition(const RowGroup& lRG,
|
|||||||
{
|
{
|
||||||
compressor.reset(new compress::CompressInterfaceSnappy());
|
compressor.reset(new compress::CompressInterfaceSnappy());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < bucketCount; i++)
|
||||||
|
buckets.push_back(boost::shared_ptr<JoinPartition>(new JoinPartition(*this, false)));
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ctor used by JoinPartition on expansion, creates JP's in filemode */
|
/* Ctor used by JoinPartition on expansion, creates JP's in filemode */
|
||||||
|
@@ -103,6 +103,7 @@
|
|||||||
2054 ERR_DISKAGG_ERROR Unknown error while aggregation.
|
2054 ERR_DISKAGG_ERROR Unknown error while aggregation.
|
||||||
2055 ERR_DISKAGG_TOO_BIG Not enough memory to make disk-based aggregation. Raise TotalUmMemory if possible.
|
2055 ERR_DISKAGG_TOO_BIG Not enough memory to make disk-based aggregation. Raise TotalUmMemory if possible.
|
||||||
2056 ERR_DISKAGG_FILEIO_ERROR There was an IO error during a disk-based aggregation: %1%
|
2056 ERR_DISKAGG_FILEIO_ERROR There was an IO error during a disk-based aggregation: %1%
|
||||||
|
2057 ERR_JOIN_RESULT_TOO_BIG Not enough memory to consolidate join results. Estimated %1% MB needed. TotalUmMemory is %2% MB.
|
||||||
|
|
||||||
# Sub-query errors
|
# Sub-query errors
|
||||||
3001 ERR_NON_SUPPORT_SUB_QUERY_TYPE This subquery type is not supported yet.
|
3001 ERR_NON_SUPPORT_SUB_QUERY_TYPE This subquery type is not supported yet.
|
||||||
|
@@ -4918,12 +4918,12 @@ RowAggregationMultiDistinct::RowAggregationMultiDistinct(const RowAggregationMul
|
|||||||
for (uint32_t i = 0; i < rhs.fSubAggregators.size(); i++)
|
for (uint32_t i = 0; i < rhs.fSubAggregators.size(); i++)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
fTotalMemUsage += fSubRowGroups[i].getDataSize(AGG_ROWGROUP_SIZE);
|
|
||||||
|
|
||||||
if (!fRm->getMemory(fSubRowGroups[i].getDataSize(AGG_ROWGROUP_SIZE, fSessionMemLimit)))
|
if (!fRm->getMemory(fSubRowGroups[i].getDataSize(AGG_ROWGROUP_SIZE, fSessionMemLimit)))
|
||||||
throw logging::IDBExcept(logging::IDBErrorInfo::instance()->
|
throw logging::IDBExcept(logging::IDBErrorInfo::instance()->
|
||||||
errorMsg(logging::ERR_AGGREGATION_TOO_BIG), logging::ERR_AGGREGATION_TOO_BIG);
|
errorMsg(logging::ERR_AGGREGATION_TOO_BIG), logging::ERR_AGGREGATION_TOO_BIG);
|
||||||
|
|
||||||
|
fTotalMemUsage += fSubRowGroups[i].getDataSize(AGG_ROWGROUP_SIZE);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
data.reset(new RGData(fSubRowGroups[i], RowAggStorage::getMaxRows(fRm ? fRm->getAllowDiskAggregation() : false)));
|
data.reset(new RGData(fSubRowGroups[i], RowAggStorage::getMaxRows(fRm ? fRm->getAllowDiskAggregation() : false)));
|
||||||
fSubRowData.push_back(data);
|
fSubRowData.push_back(data);
|
||||||
@@ -4964,12 +4964,11 @@ void RowAggregationMultiDistinct::addSubAggregator(const boost::shared_ptr<RowAg
|
|||||||
{
|
{
|
||||||
boost::shared_ptr<RGData> data;
|
boost::shared_ptr<RGData> data;
|
||||||
#if 0
|
#if 0
|
||||||
fTotalMemUsage += rg.getDataSize(AGG_ROWGROUP_SIZE);
|
|
||||||
|
|
||||||
if (!fRm->getMemory(rg.getDataSize(AGG_ROWGROUP_SIZE), fSessionMemLimit))
|
if (!fRm->getMemory(rg.getDataSize(AGG_ROWGROUP_SIZE), fSessionMemLimit))
|
||||||
throw logging::IDBExcept(logging::IDBErrorInfo::instance()->
|
throw logging::IDBExcept(logging::IDBErrorInfo::instance()->
|
||||||
errorMsg(logging::ERR_AGGREGATION_TOO_BIG), logging::ERR_AGGREGATION_TOO_BIG);
|
errorMsg(logging::ERR_AGGREGATION_TOO_BIG), logging::ERR_AGGREGATION_TOO_BIG);
|
||||||
|
|
||||||
|
fTotalMemUsage += rg.getDataSize(AGG_ROWGROUP_SIZE);
|
||||||
#endif
|
#endif
|
||||||
data.reset(new RGData(rg, RowAggStorage::getMaxRows(fRm ? fRm->getAllowDiskAggregation() : false)));
|
data.reset(new RGData(rg, RowAggStorage::getMaxRows(fRm ? fRm->getAllowDiskAggregation() : false)));
|
||||||
fSubRowData.push_back(data);
|
fSubRowData.push_back(data);
|
||||||
|
@@ -325,25 +325,31 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
bool acquireImpl(size_t amount) final
|
bool acquireImpl(size_t amount) final
|
||||||
{
|
{
|
||||||
MemManager::acquireImpl(amount);
|
if (amount)
|
||||||
if (!fRm->getMemory(amount, fSessLimit, fWait) && fStrict)
|
|
||||||
{
|
{
|
||||||
return false;
|
if (!fRm->getMemory(amount, fSessLimit, fWait) && fStrict)
|
||||||
}
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
MemManager::acquireImpl(amount);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void releaseImpl(size_t amount) override {
|
void releaseImpl(size_t amount) override
|
||||||
MemManager::releaseImpl(amount);
|
{
|
||||||
fRm->returnMemory(amount, fSessLimit);
|
if (amount)
|
||||||
|
{
|
||||||
|
MemManager::releaseImpl(amount);
|
||||||
|
fRm->returnMemory(amount, fSessLimit);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
joblist::ResourceManager* fRm = nullptr;
|
joblist::ResourceManager* fRm = nullptr;
|
||||||
boost::shared_ptr<int64_t> fSessLimit;
|
boost::shared_ptr<int64_t> fSessLimit;
|
||||||
const bool fWait;
|
const bool fWait;
|
||||||
const bool fStrict;
|
const bool fStrict;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Dumper {
|
class Dumper {
|
||||||
@@ -353,7 +359,8 @@ public:
|
|||||||
, fMM(mm->clone())
|
, fMM(mm->clone())
|
||||||
{}
|
{}
|
||||||
|
|
||||||
int write(const std::string &fname, const char *buf, size_t sz) {
|
int write(const std::string &fname, const char *buf, size_t sz)
|
||||||
|
{
|
||||||
if (sz == 0)
|
if (sz == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@@ -368,7 +375,9 @@ public:
|
|||||||
fCompressor->compress(buf, sz, fTmpBuf.data(), &len);
|
fCompressor->compress(buf, sz, fTmpBuf.data(), &len);
|
||||||
tmpbuf = fTmpBuf.data();
|
tmpbuf = fTmpBuf.data();
|
||||||
sz = len;
|
sz = len;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
tmpbuf = buf;
|
tmpbuf = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -28,6 +28,7 @@ using namespace std;
|
|||||||
|
|
||||||
#include "messageobj.h"
|
#include "messageobj.h"
|
||||||
#include "messagelog.h"
|
#include "messagelog.h"
|
||||||
|
#include "threadnaming.h"
|
||||||
using namespace logging;
|
using namespace logging;
|
||||||
|
|
||||||
#include "prioritythreadpool.h"
|
#include "prioritythreadpool.h"
|
||||||
@@ -40,7 +41,8 @@ namespace threadpool
|
|||||||
|
|
||||||
PriorityThreadPool::PriorityThreadPool(uint targetWeightPerRun, uint highThreads,
|
PriorityThreadPool::PriorityThreadPool(uint targetWeightPerRun, uint highThreads,
|
||||||
uint midThreads, uint lowThreads, uint ID) :
|
uint midThreads, uint lowThreads, uint ID) :
|
||||||
_stop(false), weightPerRun(targetWeightPerRun), id(ID)
|
_stop(false), weightPerRun(targetWeightPerRun), id(ID),
|
||||||
|
blockedThreads(0), extraThreads(0), stopExtra(true)
|
||||||
{
|
{
|
||||||
boost::thread* newThread;
|
boost::thread* newThread;
|
||||||
for (uint32_t i = 0; i < highThreads; i++)
|
for (uint32_t i = 0; i < highThreads; i++)
|
||||||
@@ -99,7 +101,22 @@ void PriorityThreadPool::addJob(const Job& job, bool useLock)
|
|||||||
newThread->detach();
|
newThread->detach();
|
||||||
threadCounts[LOW]++;
|
threadCounts[LOW]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If some threads have blocked (because of output queue full)
|
||||||
|
// Temporarily add some extra worker threads to make up for the blocked threads.
|
||||||
|
if (blockedThreads > extraThreads)
|
||||||
|
{
|
||||||
|
stopExtra = false;
|
||||||
|
newThread = threads.create_thread(ThreadHelper(this, EXTRA));
|
||||||
|
newThread->detach();
|
||||||
|
extraThreads++;
|
||||||
|
}
|
||||||
|
else if (blockedThreads == 0)
|
||||||
|
{
|
||||||
|
// Release the temporary threads -- some threads have become unblocked.
|
||||||
|
stopExtra = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (job.priority > 66)
|
if (job.priority > 66)
|
||||||
jobQueues[HIGH].push_back(job);
|
jobQueues[HIGH].push_back(job);
|
||||||
else if (job.priority > 33)
|
else if (job.priority > 33)
|
||||||
@@ -127,7 +144,7 @@ void PriorityThreadPool::removeJobs(uint32_t id)
|
|||||||
|
|
||||||
PriorityThreadPool::Priority PriorityThreadPool::pickAQueue(Priority preference)
|
PriorityThreadPool::Priority PriorityThreadPool::pickAQueue(Priority preference)
|
||||||
{
|
{
|
||||||
if (!jobQueues[preference].empty())
|
if (preference != EXTRA && !jobQueues[preference].empty())
|
||||||
return preference;
|
return preference;
|
||||||
else if (!jobQueues[HIGH].empty())
|
else if (!jobQueues[HIGH].empty())
|
||||||
return HIGH;
|
return HIGH;
|
||||||
@@ -139,6 +156,10 @@ PriorityThreadPool::Priority PriorityThreadPool::pickAQueue(Priority preference)
|
|||||||
|
|
||||||
void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw()
|
void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw()
|
||||||
{
|
{
|
||||||
|
if (preferredQueue == EXTRA)
|
||||||
|
utils::setThreadName("Extra");
|
||||||
|
else
|
||||||
|
utils::setThreadName("Idle");
|
||||||
Priority queue = LOW;
|
Priority queue = LOW;
|
||||||
uint32_t weight, i = 0;
|
uint32_t weight, i = 0;
|
||||||
vector<Job> runList;
|
vector<Job> runList;
|
||||||
@@ -158,6 +179,14 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw()
|
|||||||
|
|
||||||
if (jobQueues[queue].empty())
|
if (jobQueues[queue].empty())
|
||||||
{
|
{
|
||||||
|
// If this is an EXTRA thread due toother threads blocking, and all blockers are unblocked,
|
||||||
|
// we don't want this one any more.
|
||||||
|
if (preferredQueue == EXTRA && stopExtra)
|
||||||
|
{
|
||||||
|
extraThreads--;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
newJob.wait(lk);
|
newJob.wait(lk);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -194,7 +223,11 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw()
|
|||||||
if (reschedule[i])
|
if (reschedule[i])
|
||||||
rescheduleCount++;
|
rescheduleCount++;
|
||||||
}
|
}
|
||||||
|
if (preferredQueue == EXTRA)
|
||||||
|
utils::setThreadName("Extra (used)");
|
||||||
|
else
|
||||||
|
utils::setThreadName("Idle");
|
||||||
|
|
||||||
// no real work was done, prevent intensive busy waiting
|
// no real work was done, prevent intensive busy waiting
|
||||||
if (rescheduleCount == runList.size())
|
if (rescheduleCount == runList.size())
|
||||||
usleep(1000);
|
usleep(1000);
|
||||||
@@ -216,6 +249,7 @@ void PriorityThreadPool::threadFcn(const Priority preferredQueue) throw()
|
|||||||
}
|
}
|
||||||
|
|
||||||
runList.clear();
|
runList.clear();
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (std::exception& ex)
|
catch (std::exception& ex)
|
||||||
|
@@ -35,8 +35,10 @@
|
|||||||
#include <boost/thread/condition.hpp>
|
#include <boost/thread/condition.hpp>
|
||||||
#include <boost/shared_ptr.hpp>
|
#include <boost/shared_ptr.hpp>
|
||||||
#include <boost/function.hpp>
|
#include <boost/function.hpp>
|
||||||
|
#include <atomic>
|
||||||
#include "../winport/winport.h"
|
#include "../winport/winport.h"
|
||||||
#include "primitives/primproc/umsocketselector.h"
|
#include "primitives/primproc/umsocketselector.h"
|
||||||
|
#include "atomicops.h"
|
||||||
|
|
||||||
namespace threadpool
|
namespace threadpool
|
||||||
{
|
{
|
||||||
@@ -73,7 +75,8 @@ public:
|
|||||||
LOW,
|
LOW,
|
||||||
MEDIUM,
|
MEDIUM,
|
||||||
HIGH,
|
HIGH,
|
||||||
_COUNT
|
_COUNT,
|
||||||
|
EXTRA // After _COUNT because _COUNT is for jobQueue size and EXTRA isn't a jobQueue. But we need EXTRA in places where Priority is used.
|
||||||
};
|
};
|
||||||
|
|
||||||
/*********************************************
|
/*********************************************
|
||||||
@@ -95,7 +98,21 @@ public:
|
|||||||
/** @brief for use in debugging
|
/** @brief for use in debugging
|
||||||
*/
|
*/
|
||||||
void dump();
|
void dump();
|
||||||
|
|
||||||
|
// If a job is blocked, we want to temporarily increase the number of threads managed by the pool
|
||||||
|
// A problem can occur if all threads are running long or blocked for a single query. Other
|
||||||
|
// queries won't get serviced, even though there are cpu cycles available.
|
||||||
|
// These calls are currently protected by respondLock in sendThread(). If you call from other
|
||||||
|
// places, you need to consider atomicity.
|
||||||
|
void incBlockedThreads()
|
||||||
|
{
|
||||||
|
blockedThreads++;
|
||||||
|
}
|
||||||
|
void decBlockedThreads()
|
||||||
|
{
|
||||||
|
blockedThreads--;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -127,6 +144,10 @@ private:
|
|||||||
bool _stop;
|
bool _stop;
|
||||||
uint32_t weightPerRun;
|
uint32_t weightPerRun;
|
||||||
volatile uint id; // prevent it from being optimized out
|
volatile uint id; // prevent it from being optimized out
|
||||||
|
|
||||||
|
std::atomic<uint32_t> blockedThreads;
|
||||||
|
std::atomic<uint32_t> extraThreads;
|
||||||
|
bool stopExtra;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace threadpool
|
} // namespace threadpool
|
||||||
|
@@ -88,6 +88,7 @@ void ThreadPool::setQueueSize(size_t queueSize)
|
|||||||
|
|
||||||
void ThreadPool::pruneThread()
|
void ThreadPool::pruneThread()
|
||||||
{
|
{
|
||||||
|
utils::setThreadName("pruneThread");
|
||||||
boost::unique_lock<boost::mutex> lock2(fPruneMutex);
|
boost::unique_lock<boost::mutex> lock2(fPruneMutex);
|
||||||
|
|
||||||
while(true)
|
while(true)
|
||||||
|
@@ -771,15 +771,13 @@ void IdbOrderBy::initialize(const RowGroup& rg)
|
|||||||
IdbCompare::initialize(rg);
|
IdbCompare::initialize(rg);
|
||||||
|
|
||||||
uint64_t newSize = rg.getSizeWithStrings(fRowsPerRG);
|
uint64_t newSize = rg.getSizeWithStrings(fRowsPerRG);
|
||||||
fMemSize += newSize;
|
if (fRm && !fRm->getMemory(newSize, fSessionMemLimit))
|
||||||
|
|
||||||
if (!fRm->getMemory(newSize, fSessionMemLimit))
|
|
||||||
{
|
{
|
||||||
cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode)
|
cerr << IDBErrorInfo::instance()->errorMsg(fErrorCode)
|
||||||
<< " @" << __FILE__ << ":" << __LINE__;
|
<< " @" << __FILE__ << ":" << __LINE__;
|
||||||
throw IDBExcept(fErrorCode);
|
throw IDBExcept(fErrorCode);
|
||||||
}
|
}
|
||||||
|
fMemSize += newSize;
|
||||||
fData.reinit(fRowGroup, fRowsPerRG);
|
fData.reinit(fRowGroup, fRowsPerRG);
|
||||||
fRowGroup.setData(&fData);
|
fRowGroup.setData(&fData);
|
||||||
fRowGroup.resetRowGroup(0);
|
fRowGroup.resetRowGroup(0);
|
||||||
|
Reference in New Issue
Block a user