mirror of
https://github.com/postgres/postgres.git
synced 2025-04-22 23:02:54 +03:00
Revert sampling patch for EXPLAIN ANALYZE; it turns out to be too unreliable
because node timing is much less predictable than the patch expects. I kept the API change for InstrStopNode, however.
This commit is contained in:
parent
9143144935
commit
5de0cbdf0c
@ -12,7 +12,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.55 2006/05/30 14:01:57 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.56 2006/06/09 19:30:56 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -423,7 +423,7 @@ ExecProcNode(PlanState *node)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (node->instrument)
|
if (node->instrument)
|
||||||
InstrStopNode(node->instrument, TupIsNull(result) ? 0 : 1 );
|
InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -7,112 +7,22 @@
|
|||||||
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
|
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.17 2006/06/07 18:49:03 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.18 2006/06/09 19:30:56 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include <math.h>
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "executor/instrument.h"
|
#include "executor/instrument.h"
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* As of PostgreSQL 8.2, we try to reduce the overhead of EXPLAIN ANALYZE
|
|
||||||
* by not calling INSTR_TIME_SET_CURRENT() for every single node execution.
|
|
||||||
* (Because that requires a kernel call on most systems, it's expensive.)
|
|
||||||
*
|
|
||||||
* This macro determines the sampling interval: that is, after how many more
|
|
||||||
* iterations will we take the next time sample, given that niters iterations
|
|
||||||
* have occurred already. In general, if the function is f(x), then for N
|
|
||||||
* iterations we will take on the order of integral(1/f(x), x=0..N)
|
|
||||||
* samples. Some examples follow, with the number of samples that would be
|
|
||||||
* collected over 1,000,000 iterations:
|
|
||||||
*
|
|
||||||
* f(x) = x => log2(N) 20
|
|
||||||
* f(x) = x^(1/2) => 2 * N^(1/2) 2000
|
|
||||||
* f(x) = x^(1/3) => 1.5 * N^(2/3) 15000
|
|
||||||
*
|
|
||||||
* I've chosen the last one as it seems to provide a good compromise between
|
|
||||||
* low overhead but still getting a meaningful number of samples. However,
|
|
||||||
* not all machines have the cbrt() function so on those we substitute
|
|
||||||
* sqrt(). The difference is not very significant in the tests I made.
|
|
||||||
*
|
|
||||||
* The actual sampling interval is randomized with the SampleFunc() value
|
|
||||||
* as the mean; this hopefully will reduce any measurement bias due to
|
|
||||||
* variation in the node execution time.
|
|
||||||
*/
|
|
||||||
#ifdef HAVE_CBRT
|
|
||||||
#define SampleFunc(niters) cbrt(niters)
|
|
||||||
#else
|
|
||||||
#define SampleFunc(niters) sqrt(niters)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define SampleInterval(niters) \
|
|
||||||
(SampleFunc(niters) * (double) random() / (double) (MAX_RANDOM_VALUE/2))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We sample at every node iteration until we've reached this threshold,
|
|
||||||
* so that nodes not called a large number of times are completely accurate.
|
|
||||||
* (Perhaps this should be a GUC variable?)
|
|
||||||
*/
|
|
||||||
#define SAMPLE_THRESHOLD 50
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Determine the sampling overhead. This only needs to be done once per
|
|
||||||
* backend (actually, probably once per postmaster would do ...)
|
|
||||||
*/
|
|
||||||
static double SampleOverhead;
|
|
||||||
static bool SampleOverheadCalculated = false;
|
|
||||||
|
|
||||||
static void
|
|
||||||
CalculateSampleOverhead(void)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We could get a wrong result due to being interrupted by task switch.
|
|
||||||
* To minimize the risk we do it a few times and take the lowest.
|
|
||||||
*/
|
|
||||||
SampleOverhead = 1.0e6;
|
|
||||||
|
|
||||||
for (i = 0; i < 5; i++)
|
|
||||||
{
|
|
||||||
Instrumentation timer;
|
|
||||||
instr_time tmptime;
|
|
||||||
int j;
|
|
||||||
double overhead;
|
|
||||||
|
|
||||||
memset(&timer, 0, sizeof(timer));
|
|
||||||
InstrStartNode(&timer);
|
|
||||||
#define TEST_COUNT 100
|
|
||||||
for (j = 0; j < TEST_COUNT; j++)
|
|
||||||
{
|
|
||||||
INSTR_TIME_SET_CURRENT(tmptime);
|
|
||||||
}
|
|
||||||
InstrStopNode(&timer, 1);
|
|
||||||
overhead = INSTR_TIME_GET_DOUBLE(timer.counter) / TEST_COUNT;
|
|
||||||
if (overhead < SampleOverhead)
|
|
||||||
SampleOverhead = overhead;
|
|
||||||
}
|
|
||||||
|
|
||||||
SampleOverheadCalculated = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Allocate new instrumentation structure(s) */
|
/* Allocate new instrumentation structure(s) */
|
||||||
Instrumentation *
|
Instrumentation *
|
||||||
InstrAlloc(int n)
|
InstrAlloc(int n)
|
||||||
{
|
{
|
||||||
Instrumentation *instr;
|
Instrumentation *instr = palloc0(n * sizeof(Instrumentation));
|
||||||
|
|
||||||
/* Calculate sampling overhead, if not done yet in this backend */
|
|
||||||
if (!SampleOverheadCalculated)
|
|
||||||
CalculateSampleOverhead();
|
|
||||||
|
|
||||||
instr = palloc0(n * sizeof(Instrumentation));
|
|
||||||
|
|
||||||
/* we don't need to do any initialization except zero 'em */
|
/* we don't need to do any initialization except zero 'em */
|
||||||
|
|
||||||
@ -124,22 +34,7 @@ void
|
|||||||
InstrStartNode(Instrumentation *instr)
|
InstrStartNode(Instrumentation *instr)
|
||||||
{
|
{
|
||||||
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Always sample if not yet up to threshold, else check whether
|
|
||||||
* next threshold has been reached
|
|
||||||
*/
|
|
||||||
if (instr->itercount < SAMPLE_THRESHOLD)
|
|
||||||
instr->sampling = true;
|
|
||||||
else if (instr->itercount >= instr->nextsample)
|
|
||||||
{
|
|
||||||
instr->sampling = true;
|
|
||||||
instr->nextsample =
|
|
||||||
instr->itercount + SampleInterval(instr->itercount);
|
|
||||||
}
|
|
||||||
if (instr->sampling)
|
|
||||||
INSTR_TIME_SET_CURRENT(instr->starttime);
|
INSTR_TIME_SET_CURRENT(instr->starttime);
|
||||||
}
|
|
||||||
else
|
else
|
||||||
elog(DEBUG2, "InstrStartNode called twice in a row");
|
elog(DEBUG2, "InstrStartNode called twice in a row");
|
||||||
}
|
}
|
||||||
@ -147,23 +42,11 @@ InstrStartNode(Instrumentation *instr)
|
|||||||
/* Exit from a plan node */
|
/* Exit from a plan node */
|
||||||
void
|
void
|
||||||
InstrStopNode(Instrumentation *instr, double nTuples)
|
InstrStopNode(Instrumentation *instr, double nTuples)
|
||||||
{
|
|
||||||
/* count the returned tuples and iterations */
|
|
||||||
instr->tuplecount += nTuples;
|
|
||||||
instr->itercount += 1;
|
|
||||||
|
|
||||||
/* measure runtime if appropriate */
|
|
||||||
if (instr->sampling)
|
|
||||||
{
|
{
|
||||||
instr_time endtime;
|
instr_time endtime;
|
||||||
|
|
||||||
/*
|
/* count the returned tuples */
|
||||||
* To be sure that SampleOverhead accurately reflects the extra
|
instr->tuplecount += nTuples;
|
||||||
* overhead, we must do INSTR_TIME_SET_CURRENT() as the *first*
|
|
||||||
* action that is different between the sampling and non-sampling
|
|
||||||
* code paths.
|
|
||||||
*/
|
|
||||||
INSTR_TIME_SET_CURRENT(endtime);
|
|
||||||
|
|
||||||
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
||||||
{
|
{
|
||||||
@ -171,6 +54,8 @@ InstrStopNode(Instrumentation *instr, double nTuples)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
INSTR_TIME_SET_CURRENT(endtime);
|
||||||
|
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec;
|
instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec;
|
||||||
instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec;
|
instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec;
|
||||||
@ -192,10 +77,6 @@ InstrStopNode(Instrumentation *instr, double nTuples)
|
|||||||
|
|
||||||
INSTR_TIME_SET_ZERO(instr->starttime);
|
INSTR_TIME_SET_ZERO(instr->starttime);
|
||||||
|
|
||||||
instr->samplecount += 1;
|
|
||||||
instr->sampling = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Is this the first tuple of this cycle? */
|
/* Is this the first tuple of this cycle? */
|
||||||
if (!instr->running)
|
if (!instr->running)
|
||||||
{
|
{
|
||||||
@ -217,31 +98,9 @@ InstrEndLoop(Instrumentation *instr)
|
|||||||
if (!INSTR_TIME_IS_ZERO(instr->starttime))
|
if (!INSTR_TIME_IS_ZERO(instr->starttime))
|
||||||
elog(DEBUG2, "InstrEndLoop called on running node");
|
elog(DEBUG2, "InstrEndLoop called on running node");
|
||||||
|
|
||||||
/* Compute time spent in node */
|
/* Accumulate per-cycle statistics into totals */
|
||||||
totaltime = INSTR_TIME_GET_DOUBLE(instr->counter);
|
totaltime = INSTR_TIME_GET_DOUBLE(instr->counter);
|
||||||
|
|
||||||
/*
|
|
||||||
* If we didn't measure runtime on every iteration, then we must increase
|
|
||||||
* the measured total to account for the other iterations. Naively
|
|
||||||
* multiplying totaltime by itercount/samplecount would be wrong because
|
|
||||||
* it effectively assumes the sampling overhead applies to all iterations,
|
|
||||||
* even the ones we didn't measure. (Note that what we are trying to
|
|
||||||
* estimate here is the actual time spent in the node, including the
|
|
||||||
* actual measurement overhead; not the time exclusive of measurement
|
|
||||||
* overhead.) We exclude the first iteration from the correction basis,
|
|
||||||
* because it often takes longer than others.
|
|
||||||
*/
|
|
||||||
if (instr->itercount > instr->samplecount)
|
|
||||||
{
|
|
||||||
double per_iter;
|
|
||||||
|
|
||||||
per_iter = (totaltime - instr->firsttuple) / (instr->samplecount - 1)
|
|
||||||
- SampleOverhead;
|
|
||||||
if (per_iter > 0) /* sanity check */
|
|
||||||
totaltime += per_iter * (instr->itercount - instr->samplecount);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Accumulate per-cycle statistics into totals */
|
|
||||||
instr->startup += instr->firsttuple;
|
instr->startup += instr->firsttuple;
|
||||||
instr->total += totaltime;
|
instr->total += totaltime;
|
||||||
instr->ntuples += instr->tuplecount;
|
instr->ntuples += instr->tuplecount;
|
||||||
@ -249,12 +108,8 @@ InstrEndLoop(Instrumentation *instr)
|
|||||||
|
|
||||||
/* Reset for next cycle (if any) */
|
/* Reset for next cycle (if any) */
|
||||||
instr->running = false;
|
instr->running = false;
|
||||||
instr->sampling = false;
|
|
||||||
INSTR_TIME_SET_ZERO(instr->starttime);
|
INSTR_TIME_SET_ZERO(instr->starttime);
|
||||||
INSTR_TIME_SET_ZERO(instr->counter);
|
INSTR_TIME_SET_ZERO(instr->counter);
|
||||||
instr->firsttuple = 0;
|
instr->firsttuple = 0;
|
||||||
instr->tuplecount = 0;
|
instr->tuplecount = 0;
|
||||||
instr->itercount = 0;
|
|
||||||
instr->samplecount = 0;
|
|
||||||
instr->nextsample = 0;
|
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
|
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.15 2006/05/30 19:24:25 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.16 2006/06/09 19:30:56 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -57,14 +57,10 @@ typedef struct Instrumentation
|
|||||||
{
|
{
|
||||||
/* Info about current plan cycle: */
|
/* Info about current plan cycle: */
|
||||||
bool running; /* TRUE if we've completed first tuple */
|
bool running; /* TRUE if we've completed first tuple */
|
||||||
bool sampling; /* Are we sampling in current iteration? */
|
|
||||||
instr_time starttime; /* Start time of current iteration of node */
|
instr_time starttime; /* Start time of current iteration of node */
|
||||||
instr_time counter; /* Accumulated runtime for this node */
|
instr_time counter; /* Accumulated runtime for this node */
|
||||||
double firsttuple; /* Time for first tuple of this cycle */
|
double firsttuple; /* Time for first tuple of this cycle */
|
||||||
double tuplecount; /* Tuples emitted so far this cycle */
|
double tuplecount; /* Tuples emitted so far this cycle */
|
||||||
double itercount; /* Plan node iterations this cycle */
|
|
||||||
double samplecount; /* Iterations in which we sampled runtime */
|
|
||||||
double nextsample; /* Next itercount to sample at */
|
|
||||||
/* Accumulated statistics across all completed cycles: */
|
/* Accumulated statistics across all completed cycles: */
|
||||||
double startup; /* Total startup time (in seconds) */
|
double startup; /* Total startup time (in seconds) */
|
||||||
double total; /* Total total time (in seconds) */
|
double total; /* Total total time (in seconds) */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user