1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-29 13:56:47 +03:00

Make nodeSort.c use Datum sorts for single column sorts

Datum sorts can be significantly faster than tuple sorts, especially when
the data type being sorted is a pass-by-value type.  Something in the
region of 50-70% performance improvements appear to be possible.

Just in case there's any confusion; the Datum sort is only used when the
targetlist of the Sort node contains a single column, not when there's a
single column in the sort key and multiple items in the target list.

Author: Ronan Dunklau
Reviewed-by: James Coleman, David Rowley, Ranier Vilela, Hou Zhijie
Tested-by: John Naylor
Discussion: https://postgr.es/m/3177670.itZtoPt7T5@aivenronan
This commit is contained in:
David Rowley 2021-07-22 14:03:19 +12:00
parent 7fa1e1ef74
commit 91e9e89dcc
2 changed files with 81 additions and 24 deletions

View File

@ -29,6 +29,16 @@
* which saves the results in a temporary file or memory. After the
* initial call, returns a tuple from the file with each call.
*
* There are two distinct ways that this sort can be performed:
*
* 1) When the result is a single column we perform a Datum sort.
*
* 2) When the result contains multiple columns we perform a tuple sort.
*
* We could do this by always performing a tuple sort, however sorting
* Datums only can be significantly faster than sorting tuples,
* especially when the Datums are of a pass-by-value type.
*
* Conditions:
* -- none.
*
@ -86,31 +96,56 @@ ExecSort(PlanState *pstate)
outerNode = outerPlanState(node);
tupDesc = ExecGetResultType(outerNode);
tuplesortstate = tuplesort_begin_heap(tupDesc,
plannode->numCols,
plannode->sortColIdx,
plannode->sortOperators,
plannode->collations,
plannode->nullsFirst,
work_mem,
NULL,
node->randomAccess);
if (node->datumSort)
tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
plannode->sortOperators[0],
plannode->collations[0],
plannode->nullsFirst[0],
work_mem,
NULL,
node->randomAccess);
else
tuplesortstate = tuplesort_begin_heap(tupDesc,
plannode->numCols,
plannode->sortColIdx,
plannode->sortOperators,
plannode->collations,
plannode->nullsFirst,
work_mem,
NULL,
node->randomAccess);
if (node->bounded)
tuplesort_set_bound(tuplesortstate, node->bound);
node->tuplesortstate = (void *) tuplesortstate;
/*
* Scan the subplan and feed all the tuples to tuplesort.
* Scan the subplan and feed all the tuples to tuplesort using the
* appropriate method based on the type of sort we're doing.
*/
for (;;)
if (node->datumSort)
{
slot = ExecProcNode(outerNode);
for (;;)
{
slot = ExecProcNode(outerNode);
if (TupIsNull(slot))
break;
if (TupIsNull(slot))
break;
slot_getsomeattrs(slot, 1);
tuplesort_putdatum(tuplesortstate,
slot->tts_values[0],
slot->tts_isnull[0]);
}
}
else
{
for (;;)
{
slot = ExecProcNode(outerNode);
tuplesort_puttupleslot(tuplesortstate, slot);
if (TupIsNull(slot))
break;
tuplesort_puttupleslot(tuplesortstate, slot);
}
}
/*
@ -144,15 +179,27 @@ ExecSort(PlanState *pstate)
SO1_printf("ExecSort: %s\n",
"retrieving tuple from tuplesort");
/*
* Get the first or next tuple from tuplesort. Returns NULL if no more
* tuples. Note that we only rely on slot tuple remaining valid until the
* next fetch from the tuplesort.
*/
slot = node->ss.ps.ps_ResultTupleSlot;
(void) tuplesort_gettupleslot(tuplesortstate,
ScanDirectionIsForward(dir),
false, slot, NULL);
/*
* Fetch the next sorted item from the appropriate tuplesort function. For
* datum sorts we must manage the slot ourselves and leave it clear when
* tuplesort_getdatum returns false to indicate there are no more datums.
* For tuple sorts, tuplesort_gettupleslot manages the slot for us and
* empties the slot when it runs out of tuples.
*/
if (node->datumSort)
{
ExecClearTuple(slot);
if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
&(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
ExecStoreVirtualTuple(slot);
}
else
(void) tuplesort_gettupleslot(tuplesortstate,
ScanDirectionIsForward(dir),
false, slot, NULL);
return slot;
}
@ -221,6 +268,15 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple);
sortstate->ss.ps.ps_ProjInfo = NULL;
/*
* We perform a Datum sort when we're sorting just a single column,
* otherwise we perform a tuple sort.
*/
if (ExecGetResultType(outerPlanState(sortstate))->natts == 1)
sortstate->datumSort = true;
else
sortstate->datumSort = false;
SO1_printf("ExecInitSort: %s\n",
"sort node initialized");

View File

@ -2151,6 +2151,7 @@ typedef struct SortState
int64 bound_Done; /* value of bound we did the sort with */
void *tuplesortstate; /* private state of tuplesort.c */
bool am_worker; /* are we a worker? */
bool datumSort; /* Datum sort instead of tuple sort? */
SharedSortInfo *shared_info; /* one entry per worker */
} SortState;