mirror of
https://github.com/postgres/postgres.git
synced 2025-07-05 07:21:24 +03:00
Make EXPLAIN sampling smarter, to avoid excessive sampling delay.
Martijn van Oosterhout
This commit is contained in:
@ -7,16 +7,78 @@
|
||||
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.14 2006/03/05 15:58:26 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.15 2006/05/30 14:01:58 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "executor/instrument.h"
|
||||
|
||||
/* This is the function that is used to determine the sampling intervals. In
|
||||
* general, if the function is f(x), then for N tuples we will take on the
|
||||
* order of integral(1/f(x), x=0..N) samples. Some examples follow, with the
|
||||
* number of samples that would be collected over 1,000,000 tuples.
|
||||
|
||||
f(x) = x => log2(N) 20
|
||||
f(x) = x^(1/2) => 2 * N^(1/2) 2000
|
||||
f(x) = x^(1/3) => 1.5 * N^(2/3) 15000
|
||||
|
||||
* I've chosen the last one as it seems to provide a good compromise between
|
||||
* low overhead but still getting a meaningful number of samples. However,
|
||||
* not all machines have the cbrt() function so on those we substitute
|
||||
* sqrt(). The difference is not very significant in the tests I made.
|
||||
*/
|
||||
#ifdef HAVE_CBRT
|
||||
#define SampleFunc cbrt
|
||||
#else
|
||||
#define SampleFunc sqrt
|
||||
#endif
|
||||
|
||||
#define SAMPLE_THRESHOLD 50
|
||||
|
||||
static double SampleOverhead;
|
||||
static bool SampleOverheadCalculated;
|
||||
|
||||
static void
|
||||
CalculateSampleOverhead()
|
||||
{
|
||||
Instrumentation instr;
|
||||
int i;
|
||||
|
||||
/* We want to determine the sampling overhead, to correct
|
||||
* calculations later. This only needs to be done once per backend.
|
||||
* Is this the place? A wrong value here (due to a mistimed
|
||||
* task-switch) will cause bad calculations later.
|
||||
*
|
||||
* To minimize the risk we do it a few times and take the lowest.
|
||||
*/
|
||||
|
||||
SampleOverhead = 1.0e6;
|
||||
|
||||
for( i = 0; i<5; i++ )
|
||||
{
|
||||
int j;
|
||||
double overhead;
|
||||
|
||||
memset( &instr, 0, sizeof(instr) );
|
||||
|
||||
/* Loop SAMPLE_THRESHOLD times or 100 microseconds, whichever is faster */
|
||||
for( j=0; j<SAMPLE_THRESHOLD && INSTR_TIME_GET_DOUBLE(instr.counter) < 100e-6; i++ )
|
||||
{
|
||||
InstrStartNode( &instr );
|
||||
InstrStopNode( &instr, 1 );
|
||||
}
|
||||
overhead = INSTR_TIME_GET_DOUBLE(instr.counter) / instr.samplecount;
|
||||
if( overhead < SampleOverhead )
|
||||
SampleOverhead = overhead;
|
||||
}
|
||||
|
||||
SampleOverheadCalculated = true;
|
||||
}
|
||||
|
||||
/* Allocate new instrumentation structure(s) */
|
||||
Instrumentation *
|
||||
@ -25,7 +87,10 @@ InstrAlloc(int n)
|
||||
Instrumentation *instr = palloc0(n * sizeof(Instrumentation));
|
||||
|
||||
/* we don't need to do any initialization except zero 'em */
|
||||
|
||||
|
||||
/* Calculate overhead, if not done yet */
|
||||
if( !SampleOverheadCalculated )
|
||||
CalculateSampleOverhead();
|
||||
return instr;
|
||||
}
|
||||
|
||||
@ -34,49 +99,69 @@ void
|
||||
InstrStartNode(Instrumentation *instr)
|
||||
{
|
||||
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
||||
INSTR_TIME_SET_CURRENT(instr->starttime);
|
||||
{
|
||||
/* We always sample the first SAMPLE_THRESHOLD tuples, so small nodes are always accurate */
|
||||
if (instr->tuplecount < SAMPLE_THRESHOLD)
|
||||
instr->sampling = true;
|
||||
else
|
||||
{
|
||||
/* Otherwise we go to sampling, see the comments on SampleFunc at the top of the file */
|
||||
if( instr->tuplecount > instr->nextsample )
|
||||
{
|
||||
instr->sampling = true;
|
||||
/* The doubling is so the random will average 1 over time */
|
||||
instr->nextsample += 2.0 * SampleFunc(instr->tuplecount) * (double)rand() / (double)RAND_MAX;
|
||||
}
|
||||
}
|
||||
if (instr->sampling)
|
||||
INSTR_TIME_SET_CURRENT(instr->starttime);
|
||||
}
|
||||
else
|
||||
elog(DEBUG2, "InstrStartNode called twice in a row");
|
||||
}
|
||||
|
||||
/* Exit from a plan node */
|
||||
void
|
||||
InstrStopNode(Instrumentation *instr, bool returnedTuple)
|
||||
InstrStopNode(Instrumentation *instr, double nTuples)
|
||||
{
|
||||
instr_time endtime;
|
||||
|
||||
/* count the returned tuples */
|
||||
if (returnedTuple)
|
||||
instr->tuplecount += 1;
|
||||
instr->tuplecount += nTuples;
|
||||
|
||||
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
||||
if (instr->sampling)
|
||||
{
|
||||
elog(DEBUG2, "InstrStopNode called without start");
|
||||
return;
|
||||
}
|
||||
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
||||
{
|
||||
elog(DEBUG2, "InstrStopNode called without start");
|
||||
return;
|
||||
}
|
||||
|
||||
INSTR_TIME_SET_CURRENT(endtime);
|
||||
INSTR_TIME_SET_CURRENT(endtime);
|
||||
|
||||
#ifndef WIN32
|
||||
instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec;
|
||||
instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec;
|
||||
instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec;
|
||||
instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec;
|
||||
|
||||
/* Normalize after each add to avoid overflow/underflow of tv_usec */
|
||||
while (instr->counter.tv_usec < 0)
|
||||
{
|
||||
instr->counter.tv_usec += 1000000;
|
||||
instr->counter.tv_sec--;
|
||||
}
|
||||
while (instr->counter.tv_usec >= 1000000)
|
||||
{
|
||||
instr->counter.tv_usec -= 1000000;
|
||||
instr->counter.tv_sec++;
|
||||
}
|
||||
/* Normalize after each add to avoid overflow/underflow of tv_usec */
|
||||
while (instr->counter.tv_usec < 0)
|
||||
{
|
||||
instr->counter.tv_usec += 1000000;
|
||||
instr->counter.tv_sec--;
|
||||
}
|
||||
while (instr->counter.tv_usec >= 1000000)
|
||||
{
|
||||
instr->counter.tv_usec -= 1000000;
|
||||
instr->counter.tv_sec++;
|
||||
}
|
||||
#else /* WIN32 */
|
||||
instr->counter.QuadPart += (endtime.QuadPart - instr->starttime.QuadPart);
|
||||
instr->counter.QuadPart += (endtime.QuadPart - instr->starttime.QuadPart);
|
||||
#endif
|
||||
|
||||
INSTR_TIME_SET_ZERO(instr->starttime);
|
||||
INSTR_TIME_SET_ZERO(instr->starttime);
|
||||
instr->samplecount += nTuples;
|
||||
instr->sampling = false;
|
||||
}
|
||||
|
||||
/* Is this the first tuple of this cycle? */
|
||||
if (!instr->running)
|
||||
@ -86,17 +171,6 @@ InstrStopNode(Instrumentation *instr, bool returnedTuple)
|
||||
}
|
||||
}
|
||||
|
||||
/* As above, but count multiple tuples returned at once */
|
||||
void
|
||||
InstrStopNodeMulti(Instrumentation *instr, double nTuples)
|
||||
{
|
||||
/* count the returned tuples */
|
||||
instr->tuplecount += nTuples;
|
||||
|
||||
/* delegate the rest */
|
||||
InstrStopNode(instr, false);
|
||||
}
|
||||
|
||||
/* Finish a run cycle for a plan node */
|
||||
void
|
||||
InstrEndLoop(Instrumentation *instr)
|
||||
@ -114,8 +188,24 @@ InstrEndLoop(Instrumentation *instr)
|
||||
totaltime = INSTR_TIME_GET_DOUBLE(instr->counter);
|
||||
|
||||
instr->startup += instr->firsttuple;
|
||||
instr->total += totaltime;
|
||||
|
||||
/* Here we take into account sampling effects. Doing it naively ends
|
||||
* up assuming the sampling overhead applies to all tuples, even the
|
||||
* ones we didn't measure. We've calculated an overhead, so we
|
||||
* subtract that for all samples we didn't measure. The first tuple
|
||||
* is also special cased, because it usually takes longer. */
|
||||
|
||||
if( instr->samplecount < instr->tuplecount )
|
||||
{
|
||||
double pertuple = (totaltime - instr->firsttuple) / (instr->samplecount - 1);
|
||||
instr->total += instr->firsttuple + (pertuple * (instr->samplecount - 1))
|
||||
+ ((pertuple - SampleOverhead) * (instr->tuplecount - instr->samplecount));
|
||||
}
|
||||
else
|
||||
instr->total += totaltime;
|
||||
|
||||
instr->ntuples += instr->tuplecount;
|
||||
instr->nsamples += instr->samplecount;
|
||||
instr->nloops += 1;
|
||||
|
||||
/* Reset for next cycle (if any) */
|
||||
@ -123,5 +213,6 @@ InstrEndLoop(Instrumentation *instr)
|
||||
INSTR_TIME_SET_ZERO(instr->starttime);
|
||||
INSTR_TIME_SET_ZERO(instr->counter);
|
||||
instr->firsttuple = 0;
|
||||
instr->samplecount = 0;
|
||||
instr->tuplecount = 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user