1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.

The mess cleaned up in commit da0759600 is clear evidence that it's a
bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot()
to provide the correct type OID for the array elements in the slot.
Moreover, we weren't even getting any performance benefit from that,
since get_attstatsslot() was extracting the real type OID from the array
anyway.  So we ought to get rid of that requirement; indeed, it would
make more sense for get_attstatsslot() to pass back the type OID it found,
in case the caller isn't sure what to expect, which is likely in binary-
compatible-operator cases.

Another problem with the current implementation is that if the stats array
element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle
for each element.  That seemed acceptable when the code was written because
we were targeting O(10) array sizes --- but these days, stats arrays are
almost always bigger than that, sometimes much bigger.  We can save a
significant number of cycles by doing one palloc/memcpy/pfree of the whole
array.  Indeed, in the now-probably-common case where the array is toasted,
that happens anyway so this method is basically free.  (Note: although the
catcache code will inline any out-of-line toasted values, it doesn't
decompress them.  At the other end of the size range, it doesn't expand
short-header datums either.  In either case, DatumGetArrayTypeP would have
to make a copy.  We do end up using an extra array copy step if the element
type is pass-by-value and the array length is neither small enough for a
short header nor large enough to have suffered compression.  But that
seems like a very acceptable price for winning in pass-by-ref cases.)

Hence, redesign to take these insights into account.  While at it,
convert to an API in which we fill a struct rather than passing a bunch
of pointers to individual output arguments.  That will make it less
painful if we ever want further expansion of what get_attstatsslot can
pass back.

It's certainly arguable that this is new development and not something to
push post-feature-freeze.  However, I view it as primarily bug-proofing
and therefore something that's better to have sooner not later.  Since
we aren't quite at beta phase yet, let's put it in.

Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
This commit is contained in:
Tom Lane
2017-05-13 15:14:39 -04:00
parent 1848b73d45
commit 9aab83fc50
10 changed files with 434 additions and 599 deletions

View File

@ -32,7 +32,6 @@
#include "catalog/pg_proc.h"
#include "catalog/pg_range.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
@ -2865,35 +2864,39 @@ get_attavgwidth(Oid relid, AttrNumber attnum)
* that have been provided by a stats hook and didn't really come from
* pg_statistic.
*
* sslot: pointer to output area (typically, a local variable in the caller).
* statstuple: pg_statistic tuple to be examined.
* atttype: type OID of slot's stavalues (can be InvalidOid if values == NULL).
* atttypmod: typmod of slot's stavalues (can be 0 if values == NULL).
* reqkind: STAKIND code for desired statistics slot kind.
* reqop: STAOP value wanted, or InvalidOid if don't care.
* actualop: if not NULL, *actualop receives the actual STAOP value.
* values, nvalues: if not NULL, the slot's stavalues are extracted.
* numbers, nnumbers: if not NULL, the slot's stanumbers are extracted.
* flags: bitmask of ATTSTATSSLOT_VALUES and/or ATTSTATSSLOT_NUMBERS.
*
* If assigned, values and numbers are set to point to palloc'd arrays.
* If the stavalues datatype is pass-by-reference, the values referenced by
* the values array are themselves palloc'd. The palloc'd stuff can be
* freed by calling free_attstatsslot.
* If a matching slot is found, TRUE is returned, and *sslot is filled thus:
* staop: receives the actual STAOP value.
* valuetype: receives actual datatype of the elements of stavalues.
* values: receives pointer to an array of the slot's stavalues.
* nvalues: receives number of stavalues.
* numbers: receives pointer to an array of the slot's stanumbers (as float4).
* nnumbers: receives number of stanumbers.
*
* Note: at present, atttype/atttypmod aren't actually used here at all.
* But the caller must have the correct (or at least binary-compatible)
* type ID to pass to free_attstatsslot later.
* valuetype/values/nvalues are InvalidOid/NULL/0 if ATTSTATSSLOT_VALUES
* wasn't specified. Likewise, numbers/nnumbers are NULL/0 if
* ATTSTATSSLOT_NUMBERS wasn't specified.
*
* If no matching slot is found, FALSE is returned, and *sslot is zeroed.
*
* The data referred to by the fields of sslot is locally palloc'd and
* is independent of the original pg_statistic tuple. When the caller
* is done with it, call free_attstatsslot to release the palloc'd data.
*
* If it's desirable to call free_attstatsslot when get_attstatsslot might
* not have been called, memset'ing sslot to zeroes will allow that.
*/
bool
get_attstatsslot(HeapTuple statstuple,
Oid atttype, int32 atttypmod,
int reqkind, Oid reqop,
Oid *actualop,
Datum **values, int *nvalues,
float4 **numbers, int *nnumbers)
get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple,
int reqkind, Oid reqop, int flags)
{
Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
int i,
j;
int i;
Datum val;
bool isnull;
ArrayType *statarray;
@ -2902,6 +2905,9 @@ get_attstatsslot(HeapTuple statstuple,
HeapTuple typeTuple;
Form_pg_type typeForm;
/* initialize *sslot properly */
memset(sslot, 0, sizeof(AttStatsSlot));
for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
{
if ((&stats->stakind1)[i] == reqkind &&
@ -2911,26 +2917,29 @@ get_attstatsslot(HeapTuple statstuple,
if (i >= STATISTIC_NUM_SLOTS)
return false; /* not there */
if (actualop)
*actualop = (&stats->staop1)[i];
sslot->staop = (&stats->staop1)[i];
if (values)
if (flags & ATTSTATSSLOT_VALUES)
{
val = SysCacheGetAttr(STATRELATTINH, statstuple,
Anum_pg_statistic_stavalues1 + i,
&isnull);
if (isnull)
elog(ERROR, "stavalues is null");
statarray = DatumGetArrayTypeP(val);
/*
* Need to get info about the array element type. We look at the
* actual element type embedded in the array, which might be only
* binary-compatible with the passed-in atttype. The info we extract
* here should be the same either way, but deconstruct_array is picky
* about having an exact type OID match.
* Detoast the array if needed, and in any case make a copy that's
* under control of this AttStatsSlot.
*/
arrayelemtype = ARR_ELEMTYPE(statarray);
statarray = DatumGetArrayTypePCopy(val);
/*
* Extract the actual array element type, and pass it back in case the
* caller needs it.
*/
sslot->valuetype = arrayelemtype = ARR_ELEMTYPE(statarray);
/* Need info about element type */
typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(arrayelemtype));
if (!HeapTupleIsValid(typeTuple))
elog(ERROR, "cache lookup failed for type %u", arrayelemtype);
@ -2942,40 +2951,35 @@ get_attstatsslot(HeapTuple statstuple,
typeForm->typlen,
typeForm->typbyval,
typeForm->typalign,
values, NULL, nvalues);
&sslot->values, NULL, &sslot->nvalues);
/*
* If the element type is pass-by-reference, we now have a bunch of
* Datums that are pointers into the syscache value. Copy them to
* avoid problems if syscache decides to drop the entry.
* Datums that are pointers into the statarray, so we need to keep
* that until free_attstatsslot. Otherwise, all the useful info is in
* sslot->values[], so we can free the array object immediately.
*/
if (!typeForm->typbyval)
{
for (j = 0; j < *nvalues; j++)
{
(*values)[j] = datumCopy((*values)[j],
typeForm->typbyval,
typeForm->typlen);
}
}
sslot->values_arr = statarray;
else
pfree(statarray);
ReleaseSysCache(typeTuple);
/*
* Free statarray if it's a detoasted copy.
*/
if ((Pointer) statarray != DatumGetPointer(val))
pfree(statarray);
}
if (numbers)
if (flags & ATTSTATSSLOT_NUMBERS)
{
val = SysCacheGetAttr(STATRELATTINH, statstuple,
Anum_pg_statistic_stanumbers1 + i,
&isnull);
if (isnull)
elog(ERROR, "stanumbers is null");
statarray = DatumGetArrayTypeP(val);
/*
* Detoast the array if needed, and in any case make a copy that's
* under control of this AttStatsSlot.
*/
statarray = DatumGetArrayTypePCopy(val);
/*
* We expect the array to be a 1-D float4 array; verify that. We don't
@ -2987,15 +2991,13 @@ get_attstatsslot(HeapTuple statstuple,
ARR_HASNULL(statarray) ||
ARR_ELEMTYPE(statarray) != FLOAT4OID)
elog(ERROR, "stanumbers is not a 1-D float4 array");
*numbers = (float4 *) palloc(narrayelem * sizeof(float4));
memcpy(*numbers, ARR_DATA_PTR(statarray), narrayelem * sizeof(float4));
*nnumbers = narrayelem;
/*
* Free statarray if it's a detoasted copy.
*/
if ((Pointer) statarray != DatumGetPointer(val))
pfree(statarray);
/* Give caller a pointer directly into the statarray */
sslot->numbers = (float4 *) ARR_DATA_PTR(statarray);
sslot->nnumbers = narrayelem;
/* We'll free the statarray in free_attstatsslot */
sslot->numbers_arr = statarray;
}
return true;
@ -3004,28 +3006,19 @@ get_attstatsslot(HeapTuple statstuple,
/*
* free_attstatsslot
* Free data allocated by get_attstatsslot
*
* atttype is the type of the individual values in values[].
* It need be valid only if values != NULL.
*/
void
free_attstatsslot(Oid atttype,
Datum *values, int nvalues,
float4 *numbers, int nnumbers)
free_attstatsslot(AttStatsSlot *sslot)
{
if (values)
{
if (!get_typbyval(atttype))
{
int i;
for (i = 0; i < nvalues; i++)
pfree(DatumGetPointer(values[i]));
}
pfree(values);
}
if (numbers)
pfree(numbers);
/* The values[] array was separately palloc'd by deconstruct_array */
if (sslot->values)
pfree(sslot->values);
/* The numbers[] array points into numbers_arr, do not pfree it */
/* Free the detoasted array objects, if any */
if (sslot->values_arr)
pfree(sslot->values_arr);
if (sslot->numbers_arr)
pfree(sslot->numbers_arr);
}
/* ---------- PG_NAMESPACE CACHE ---------- */