1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-29 13:56:47 +03:00

Make nodeSort.c use Datum sorts for single column sorts

Datum sorts can be significantly faster than tuple sorts, especially when
the data type being sorted is a pass-by-value type.  Something in the
region of 50-70% performance improvements appear to be possible.

Just in case there's any confusion; the Datum sort is only used when the
targetlist of the Sort node contains a single column, not when there's a
single column in the sort key and multiple items in the target list.

Author: Ronan Dunklau
Reviewed-by: James Coleman, David Rowley, Ranier Vilela, Hou Zhijie
Tested-by: John Naylor
Discussion: https://postgr.es/m/3177670.itZtoPt7T5@aivenronan
This commit is contained in:
David Rowley 2021-07-22 14:03:19 +12:00
parent 7fa1e1ef74
commit 91e9e89dcc
2 changed files with 81 additions and 24 deletions

View File

@ -29,6 +29,16 @@
* which saves the results in a temporary file or memory. After the * which saves the results in a temporary file or memory. After the
* initial call, returns a tuple from the file with each call. * initial call, returns a tuple from the file with each call.
* *
* There are two distinct ways that this sort can be performed:
*
* 1) When the result is a single column we perform a Datum sort.
*
* 2) When the result contains multiple columns we perform a tuple sort.
*
* We could do this by always performing a tuple sort, however sorting
* Datums only can be significantly faster than sorting tuples,
* especially when the Datums are of a pass-by-value type.
*
* Conditions: * Conditions:
* -- none. * -- none.
* *
@ -86,6 +96,15 @@ ExecSort(PlanState *pstate)
outerNode = outerPlanState(node); outerNode = outerPlanState(node);
tupDesc = ExecGetResultType(outerNode); tupDesc = ExecGetResultType(outerNode);
if (node->datumSort)
tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
plannode->sortOperators[0],
plannode->collations[0],
plannode->nullsFirst[0],
work_mem,
NULL,
node->randomAccess);
else
tuplesortstate = tuplesort_begin_heap(tupDesc, tuplesortstate = tuplesort_begin_heap(tupDesc,
plannode->numCols, plannode->numCols,
plannode->sortColIdx, plannode->sortColIdx,
@ -100,18 +119,34 @@ ExecSort(PlanState *pstate)
node->tuplesortstate = (void *) tuplesortstate; node->tuplesortstate = (void *) tuplesortstate;
/* /*
* Scan the subplan and feed all the tuples to tuplesort. * Scan the subplan and feed all the tuples to tuplesort using the
* appropriate method based on the type of sort we're doing.
*/ */
if (node->datumSort)
{
for (;;) for (;;)
{ {
slot = ExecProcNode(outerNode); slot = ExecProcNode(outerNode);
if (TupIsNull(slot)) if (TupIsNull(slot))
break; break;
slot_getsomeattrs(slot, 1);
tuplesort_putdatum(tuplesortstate,
slot->tts_values[0],
slot->tts_isnull[0]);
}
}
else
{
for (;;)
{
slot = ExecProcNode(outerNode);
if (TupIsNull(slot))
break;
tuplesort_puttupleslot(tuplesortstate, slot); tuplesort_puttupleslot(tuplesortstate, slot);
} }
}
/* /*
* Complete the sort. * Complete the sort.
@ -144,15 +179,27 @@ ExecSort(PlanState *pstate)
SO1_printf("ExecSort: %s\n", SO1_printf("ExecSort: %s\n",
"retrieving tuple from tuplesort"); "retrieving tuple from tuplesort");
/*
* Get the first or next tuple from tuplesort. Returns NULL if no more
* tuples. Note that we only rely on slot tuple remaining valid until the
* next fetch from the tuplesort.
*/
slot = node->ss.ps.ps_ResultTupleSlot; slot = node->ss.ps.ps_ResultTupleSlot;
/*
* Fetch the next sorted item from the appropriate tuplesort function. For
* datum sorts we must manage the slot ourselves and leave it clear when
* tuplesort_getdatum returns false to indicate there are no more datums.
* For tuple sorts, tuplesort_gettupleslot manages the slot for us and
* empties the slot when it runs out of tuples.
*/
if (node->datumSort)
{
ExecClearTuple(slot);
if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
&(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
ExecStoreVirtualTuple(slot);
}
else
(void) tuplesort_gettupleslot(tuplesortstate, (void) tuplesort_gettupleslot(tuplesortstate,
ScanDirectionIsForward(dir), ScanDirectionIsForward(dir),
false, slot, NULL); false, slot, NULL);
return slot; return slot;
} }
@ -221,6 +268,15 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple); ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple);
sortstate->ss.ps.ps_ProjInfo = NULL; sortstate->ss.ps.ps_ProjInfo = NULL;
/*
* We perform a Datum sort when we're sorting just a single column,
* otherwise we perform a tuple sort.
*/
if (ExecGetResultType(outerPlanState(sortstate))->natts == 1)
sortstate->datumSort = true;
else
sortstate->datumSort = false;
SO1_printf("ExecInitSort: %s\n", SO1_printf("ExecInitSort: %s\n",
"sort node initialized"); "sort node initialized");

View File

@ -2151,6 +2151,7 @@ typedef struct SortState
int64 bound_Done; /* value of bound we did the sort with */ int64 bound_Done; /* value of bound we did the sort with */
void *tuplesortstate; /* private state of tuplesort.c */ void *tuplesortstate; /* private state of tuplesort.c */
bool am_worker; /* are we a worker? */ bool am_worker; /* are we a worker? */
bool datumSort; /* Datum sort instead of tuple sort? */
SharedSortInfo *shared_info; /* one entry per worker */ SharedSortInfo *shared_info; /* one entry per worker */
} SortState; } SortState;