MCOL-1829 Subquery with limited order by could potentially return onordered set.

There were two code mistakes: Eq::operator() always returned true for any pair and Hasher::operator() always returned 0 as a key.
2025-08-24 17:47:42 +03:00 · 2018-12-25 12:50:18 +03:00
parent 0584b114da
commit 35a17a87c4
4 changed files with 16 additions and 17 deletions
--- a/dbcon/joblist/limitedorderby.cpp
+++ b/dbcon/joblist/limitedorderby.cpp
@@ -113,7 +113,6 @@ void LimitedOrderBy::processRow(const rowgroup::Row& row)
    if (fOrderByQueue.size() < fStart + fCount)
    {
        copyRow(row, &fRow0);
        //memcpy(fRow0.getData(), row.getData(), row.getSize());
        OrderByRow newRow(fRow0, fRule);
        fOrderByQueue.push(newRow);
@@ -121,8 +120,6 @@ void LimitedOrderBy::processRow(const rowgroup::Row& row)
        if (fDistinct)
            fDistinctMap->insert(fRow0.getPointer());
        //fDistinctMap->insert(make_pair((fRow0.getData()+2), fRow0.getData()));
        fRowGroup.incRowCount();
        fRow0.nextRow();
@@ -150,23 +147,16 @@ void LimitedOrderBy::processRow(const rowgroup::Row& row)
    {
        OrderByRow swapRow = fOrderByQueue.top();
        row1.setData(swapRow.fData);
        fOrderByQueue.pop();
-        if (!fDistinct)
+        copyRow(row, &row1);
        if (fDistinct)
        {
-            copyRow(row, &row1);
+            fDistinctMap->erase(fOrderByQueue.top().fData);
            //memcpy(swapRow.fData, row.getData(), row.getSize());
        }
        else
        {
            fDistinctMap->erase(row.getPointer());
            copyRow(row, &row1);
            fDistinctMap->insert(row1.getPointer());
            //fDistinctMap->erase(fDistinctMap->find(row.getData() + 2));
            //memcpy(swapRow.fData, row.getData(), row.getSize());
            //fDistinctMap->insert(make_pair((swapRow.fData+2), swapRow.fData));
        }
        fOrderByQueue.pop();
        fOrderByQueue.push(swapRow);
    }
 }
@@ -228,6 +218,7 @@ void LimitedOrderBy::finalize()
            const OrderByRow& topRow = fOrderByQueue.top();
            row1.setData(topRow.fData);
            copyRow(row1, &fRow0);
            //cerr << "LimitedOrderBy::finalize fRow0 " << fRow0.toString() << endl;
            fRowGroup.incRowCount();
            offset--;
            fRow0.prevRow(rSize);
--- a/dbcon/joblist/tupleaggregatestep.cpp
+++ b/dbcon/joblist/tupleaggregatestep.cpp
@@ -5554,6 +5554,8 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
                                    rowIn.copyField(distRow[j], k, multiDist->subAggregators()[j]->getGroupByCols()[k].get()->fInputColumnIndex);
                                }
                                // TBD This approach could potentiall
                                // put all values in on bucket.
                                bucketID = distRow[j].hash(hashLens[j] - 1) % fNumOfBuckets;
                                rowBucketVecs[bucketID][j].push_back(rowIn.getPointer());
                                rowIn.nextRow();
@@ -5572,6 +5574,8 @@ void TupleAggregateStep::threadedAggregateRowGroups(uint32_t threadID)
                        for (uint64_t i = 0; i < fRowGroupIns[threadID].getRowCount(); ++i)
                        {
                            // The key is the groupby columns, which are the leading columns.
                            // TBD This approach could potentiall
                            // put all values in on bucket.
                            int bucketID = rowIn.hash(hashLens[0] - 1) % fNumOfBuckets;
                            rowBucketVecs[bucketID][0].push_back(rowIn.getPointer());
                            rowIn.nextRow();
--- a/utils/rowgroup/rowgroup.h
+++ b/utils/rowgroup/rowgroup.h
@@ -1183,6 +1183,7 @@ inline bool Row::equals(const Row& r2, const std::vector<uint32_t>& keyCols) con
 inline bool Row::equals(const Row& r2, uint32_t lastCol) const
 {
    // This check fires with empty r2 only.
    if (lastCol >= columnCount)
        return true;
--- a/utils/windowfunction/idborderby.cpp
+++ b/utils/windowfunction/idborderby.cpp
@@ -461,7 +461,8 @@ uint64_t IdbOrderBy::Hasher::operator()(const Row::Pointer& p) const
 {
    Row& row = ts->row1;
    row.setPointer(p);
-    uint64_t ret = row.hash(colCount);
+    // MCOL-1829 Row::h uses colcount as an array idx down a callstack.
    uint64_t ret = row.hash();//(colCount - 1);
    //cout << "hash(): returning " << ret << " for row: " << row.toString() << endl;
    return ret;
 }
@@ -471,7 +472,9 @@ bool IdbOrderBy::Eq::operator()(const Row::Pointer& d1, const Row::Pointer& d2)
    Row& r1 = ts->row1, &r2 = ts->row2;
    r1.setPointer(d1);
    r2.setPointer(d2);
-    bool ret = r1.equals(r2, colCount);
+    // MCOL-1829 Row::equals uses 2nd argument as container size boundary
    // so it must be column count - 1.
    bool ret = r1.equals(r2, colCount - 1);
    //cout << "equals(): returning " << (int) ret << " for r1: " << r1.toString() << " r2: " << r2.toString()
    //	<< endl;