1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-09 13:09:39 +03:00

Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.

The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway.  So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.

Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element.  That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger.  We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array.  Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free.  (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them.  At the other end of the size range, it doesn't expand
short-header datums either.  In either case, DatumGetArrayTypeP would have
to make a copy.  We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression.  But that
seems like a very acceptable price for winning in pass-by-ref cases.)

Hence, redesign to take these insights into account.  While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments.  That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.

It's certainly arguable that this is new development and not something to
push post-feature-freeze.  However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later.  Since
we aren't quite at beta phase yet, let's put it in.

Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
This commit is contained in:
Tom Lane
2017-05-13 15:14:39 -04:00
parent 1848b73d45
commit 9aab83fc50
10 changed files with 434 additions and 599 deletions

View File

@@ -137,35 +137,22 @@ scalararraysel_containment(PlannerInfo *root,
statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
{
Form_pg_statistic stats;
Datum *values;
int nvalues;
float4 *numbers;
int nnumbers;
float4 *hist;
int nhist;
AttStatsSlot sslot;
AttStatsSlot hslot;
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
/* MCELEM will be an array of same type as element */
if (get_attstatsslot(vardata.statsTuple,
elemtype, vardata.atttypmod,
if (get_attstatsslot(&sslot, vardata.statsTuple,
STATISTIC_KIND_MCELEM, InvalidOid,
NULL,
&values, &nvalues,
&numbers, &nnumbers))
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
{
/* For ALL case, also get histogram of distinct-element counts */
if (useOr ||
!get_attstatsslot(vardata.statsTuple,
elemtype, vardata.atttypmod,
!get_attstatsslot(&hslot, vardata.statsTuple,
STATISTIC_KIND_DECHIST, InvalidOid,
NULL,
NULL, NULL,
&hist, &nhist))
{
hist = NULL;
nhist = 0;
}
ATTSTATSSLOT_NUMBERS))
memset(&hslot, 0, sizeof(hslot));
/*
* For = ANY, estimate as var @> ARRAY[const].
@@ -173,22 +160,26 @@ scalararraysel_containment(PlannerInfo *root,
* For = ALL, estimate as var <@ ARRAY[const].
*/
if (useOr)
selec = mcelem_array_contain_overlap_selec(values, nvalues,
numbers, nnumbers,
selec = mcelem_array_contain_overlap_selec(sslot.values,
sslot.nvalues,
sslot.numbers,
sslot.nnumbers,
&constval, 1,
OID_ARRAY_CONTAINS_OP,
cmpfunc);
else
selec = mcelem_array_contained_selec(values, nvalues,
numbers, nnumbers,
selec = mcelem_array_contained_selec(sslot.values,
sslot.nvalues,
sslot.numbers,
sslot.nnumbers,
&constval, 1,
hist, nhist,
hslot.numbers,
hslot.nnumbers,
OID_ARRAY_CONTAINED_OP,
cmpfunc);
if (hist)
free_attstatsslot(elemtype, NULL, 0, hist, nhist);
free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers);
free_attstatsslot(&hslot);
free_attstatsslot(&sslot);
}
else
{
@@ -369,49 +360,35 @@ calc_arraycontsel(VariableStatData *vardata, Datum constval,
statistic_proc_security_check(vardata, cmpfunc->fn_oid))
{
Form_pg_statistic stats;
Datum *values;
int nvalues;
float4 *numbers;
int nnumbers;
float4 *hist;
int nhist;
AttStatsSlot sslot;
AttStatsSlot hslot;
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
/* MCELEM will be an array of same type as column */
if (get_attstatsslot(vardata->statsTuple,
elemtype, vardata->atttypmod,
if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_MCELEM, InvalidOid,
NULL,
&values, &nvalues,
&numbers, &nnumbers))
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
{
/*
* For "array <@ const" case we also need histogram of distinct
* element counts.
*/
if (operator != OID_ARRAY_CONTAINED_OP ||
!get_attstatsslot(vardata->statsTuple,
elemtype, vardata->atttypmod,
!get_attstatsslot(&hslot, vardata->statsTuple,
STATISTIC_KIND_DECHIST, InvalidOid,
NULL,
NULL, NULL,
&hist, &nhist))
{
hist = NULL;
nhist = 0;
}
ATTSTATSSLOT_NUMBERS))
memset(&hslot, 0, sizeof(hslot));
/* Use the most-common-elements slot for the array Var. */
selec = mcelem_array_selec(array, typentry,
values, nvalues,
numbers, nnumbers,
hist, nhist,
sslot.values, sslot.nvalues,
sslot.numbers, sslot.nnumbers,
hslot.numbers, hslot.nnumbers,
operator, cmpfunc);
if (hist)
free_attstatsslot(elemtype, NULL, 0, hist, nhist);
free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers);
free_attstatsslot(&hslot);
free_attstatsslot(&sslot);
}
else
{