1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-30 06:01:21 +03:00

Support hashed aggregation with grouping sets.

This extends the Aggregate node with two new features: HashAggregate
can now run multiple hashtables concurrently, and a new strategy
MixedAggregate populates hashtables while doing sorted grouping.

The planner will now attempt to save as many sorts as possible when
planning grouping sets queries, while not exceeding work_mem for the
estimated combined sizes of all hashtables used.  No SQL-level changes
are required.  There should be no user-visible impact other than the
new EXPLAIN output and possible changes to result ordering when ORDER
BY was not used (which affected a few regression tests).  The
enable_hashagg option is respected.

Author: Andrew Gierth
Reviewers: Mark Dilger, Andres Freund
Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk
This commit is contained in:
Andrew Gierth
2017-03-27 04:20:54 +01:00
parent f0a6046bcb
commit b5635948ab
22 changed files with 2552 additions and 602 deletions

View File

@@ -21,6 +21,7 @@
#include "postgres.h"
#include "access/hash.h"
#include "nodes/pg_list.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
@@ -457,6 +458,35 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
return false;
}
/*
* bms_overlap_list - does a set overlap an integer list?
*/
bool
bms_overlap_list(const Bitmapset *a, const List *b)
{
ListCell *lc;
int wordnum,
bitnum;
if (a == NULL || b == NIL)
return false;
foreach(lc, b)
{
int x = lfirst_int(lc);
if (x < 0)
elog(ERROR, "negative bitmapset member not allowed");
wordnum = WORDNUM(x);
bitnum = BITNUM(x);
if (wordnum < a->nwords)
if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
return true;
}
return false;
}
/*
* bms_nonempty_difference - do sets have a nonempty difference?
*/

View File

@@ -1941,6 +1941,28 @@ _outAggPath(StringInfo str, const AggPath *node)
WRITE_NODE_FIELD(qual);
}
static void
_outRollupData(StringInfo str, const RollupData *node)
{
WRITE_NODE_TYPE("ROLLUP");
WRITE_NODE_FIELD(groupClause);
WRITE_NODE_FIELD(gsets);
WRITE_NODE_FIELD(gsets_data);
WRITE_FLOAT_FIELD(numGroups, "%.0f");
WRITE_BOOL_FIELD(hashable);
WRITE_BOOL_FIELD(is_hashed);
}
static void
_outGroupingSetData(StringInfo str, const GroupingSetData *node)
{
WRITE_NODE_TYPE("GSDATA");
WRITE_NODE_FIELD(set);
WRITE_FLOAT_FIELD(numGroups, "%.0f");
}
static void
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
{
@@ -1949,8 +1971,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_NODE_FIELD(rollup_groupclauses);
WRITE_NODE_FIELD(rollup_lists);
WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
WRITE_NODE_FIELD(rollups);
WRITE_NODE_FIELD(qual);
}
@@ -3961,14 +3983,18 @@ outNode(StringInfo str, const void *obj)
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
case T_RollupData:
_outRollupData(str, obj);
break;
case T_GroupingSetData:
_outGroupingSetData(str, obj);
break;
case T_StatisticExtInfo:
_outStatisticExtInfo(str, obj);
break;
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
break;
case T_CreateStmt:
_outCreateStmt(str, obj);
break;