mirror of
https://github.com/postgres/postgres.git
synced 2025-07-02 09:02:37 +03:00
Support hashed aggregation with grouping sets.
This extends the Aggregate node with two new features: HashAggregate can now run multiple hashtables concurrently, and a new strategy MixedAggregate populates hashtables while doing sorted grouping. The planner will now attempt to save as many sorts as possible when planning grouping sets queries, while not exceeding work_mem for the estimated combined sizes of all hashtables used. No SQL-level changes are required. There should be no user-visible impact other than the new EXPLAIN output and possible changes to result ordering when ORDER BY was not used (which affected a few regression tests). The enable_hashagg option is respected. Author: Andrew Gierth Reviewers: Mark Dilger, Andres Freund Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk
This commit is contained in:
@ -12,7 +12,7 @@ subdir = src/backend/lib
|
||||
top_builddir = ../../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
|
||||
OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \
|
||||
rbtree.o stringinfo.o
|
||||
OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \
|
||||
pairingheap.o rbtree.o stringinfo.o
|
||||
|
||||
include $(top_srcdir)/src/backend/common.mk
|
||||
|
114
src/backend/lib/knapsack.c
Normal file
114
src/backend/lib/knapsack.c
Normal file
@ -0,0 +1,114 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* knapsack.c
|
||||
* Knapsack problem solver
|
||||
*
|
||||
* Given input vectors of integral item weights (must be >= 0) and values
|
||||
* (double >= 0), compute the set of items which produces the greatest total
|
||||
* value without exceeding a specified total weight; each item is included at
|
||||
* most once (this is the 0/1 knapsack problem). Weight 0 items will always be
|
||||
* included.
|
||||
*
|
||||
* The performance of this algorithm is pseudo-polynomial, O(nW) where W is the
|
||||
* weight limit. To use with non-integral weights or approximate solutions,
|
||||
* the caller should pre-scale the input weights to a suitable range. This
|
||||
* allows approximate solutions in polynomial time (the general case of the
|
||||
* exact problem is NP-hard).
|
||||
*
|
||||
* Copyright (c) 2017, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/backend/lib/knapsack.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "lib/knapsack.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/bitmapset.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/palloc.h"
|
||||
|
||||
/*
|
||||
* DiscreteKnapsack
|
||||
*
|
||||
* The item_values input is optional; if omitted, all the items are assumed to
|
||||
* have value 1.
|
||||
*
|
||||
* Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for
|
||||
* inclusion in the solution.
|
||||
*
|
||||
* This uses the usual dynamic-programming algorithm, adapted to reuse the
|
||||
* memory on each pass (by working from larger weights to smaller). At the
|
||||
* start of pass number i, the values[w] array contains the largest value
|
||||
* computed with total weight <= w, using only items with indices < i; and
|
||||
* sets[w] contains the bitmap of items actually used for that value. (The
|
||||
* bitmapsets are all pre-initialized with an unused high bit so that memory
|
||||
* allocation is done only once.)
|
||||
*/
|
||||
Bitmapset *
|
||||
DiscreteKnapsack(int max_weight, int num_items,
|
||||
int *item_weights, double *item_values)
|
||||
{
|
||||
MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"Knapsack",
|
||||
ALLOCSET_SMALL_MINSIZE,
|
||||
ALLOCSET_SMALL_INITSIZE,
|
||||
ALLOCSET_SMALL_MAXSIZE);
|
||||
MemoryContext oldctx = MemoryContextSwitchTo(local_ctx);
|
||||
double *values;
|
||||
Bitmapset **sets;
|
||||
Bitmapset *result;
|
||||
int i,
|
||||
j;
|
||||
|
||||
Assert(max_weight >= 0);
|
||||
Assert(num_items > 0 && item_weights);
|
||||
|
||||
values = palloc((1 + max_weight) * sizeof(double));
|
||||
sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
|
||||
|
||||
for (i = 0; i <= max_weight; ++i)
|
||||
{
|
||||
values[i] = 0;
|
||||
sets[i] = bms_make_singleton(num_items);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_items; ++i)
|
||||
{
|
||||
int iw = item_weights[i];
|
||||
double iv = item_values ? item_values[i] : 1;
|
||||
|
||||
for (j = max_weight; j >= iw; --j)
|
||||
{
|
||||
int ow = j - iw;
|
||||
|
||||
if (values[j] <= values[ow] + iv)
|
||||
{
|
||||
/* copy sets[ow] to sets[j] without realloc */
|
||||
if (j != ow)
|
||||
{
|
||||
sets[j] = bms_del_members(sets[j], sets[j]);
|
||||
sets[j] = bms_add_members(sets[j], sets[ow]);
|
||||
}
|
||||
|
||||
sets[j] = bms_add_member(sets[j], i);
|
||||
|
||||
values[j] = values[ow] + iv;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldctx);
|
||||
|
||||
result = bms_del_member(bms_copy(sets[max_weight]), num_items);
|
||||
|
||||
MemoryContextDelete(local_ctx);
|
||||
|
||||
return result;
|
||||
}
|
Reference in New Issue
Block a user