mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Fix contrib/auto_explain to not cause problems in parallel workers.
A parallel worker process should not be making any decisions of its own about whether to auto-explain. If the parent session process passed down flags asking for instrumentation data, do that, otherwise not. Trying to enable instrumentation anyway leads to bugs like the "could not find key N in shm TOC" failure reported in bug #15821 from Christian Hofstaedtler. We can implement this cheaply by piggybacking on the existing logic for not doing anything when we've chosen not to sample a statement. While at it, clean up some tin-eared coding related to the sampling feature, including an off-by-one error that meant that asking for 1.0 sampling rate didn't actually result in sampling every statement. Although the specific case reported here only manifested in >= v11, I believe that related misbehaviors can be demonstrated in any version that has parallel query; and the off-by-one error is certainly there back to 9.6 where that feature was added. So back-patch to 9.6. Discussion: https://postgr.es/m/15821-5eb422e980594075@postgresql.org
This commit is contained in:
		@@ -14,6 +14,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <limits.h>
 | 
					#include <limits.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "access/parallel.h"
 | 
				
			||||||
#include "commands/explain.h"
 | 
					#include "commands/explain.h"
 | 
				
			||||||
#include "executor/instrument.h"
 | 
					#include "executor/instrument.h"
 | 
				
			||||||
#include "jit/jit.h"
 | 
					#include "jit/jit.h"
 | 
				
			||||||
@@ -59,19 +60,20 @@ static const struct config_enum_entry loglevel_options[] = {
 | 
				
			|||||||
/* Current nesting depth of ExecutorRun calls */
 | 
					/* Current nesting depth of ExecutorRun calls */
 | 
				
			||||||
static int	nesting_level = 0;
 | 
					static int	nesting_level = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Is the current top-level query to be sampled? */
 | 
				
			||||||
 | 
					static bool current_query_sampled = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define auto_explain_enabled() \
 | 
				
			||||||
 | 
						(auto_explain_log_min_duration >= 0 && \
 | 
				
			||||||
 | 
						 (nesting_level == 0 || auto_explain_log_nested_statements) && \
 | 
				
			||||||
 | 
						 current_query_sampled)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Saved hook values in case of unload */
 | 
					/* Saved hook values in case of unload */
 | 
				
			||||||
static ExecutorStart_hook_type prev_ExecutorStart = NULL;
 | 
					static ExecutorStart_hook_type prev_ExecutorStart = NULL;
 | 
				
			||||||
static ExecutorRun_hook_type prev_ExecutorRun = NULL;
 | 
					static ExecutorRun_hook_type prev_ExecutorRun = NULL;
 | 
				
			||||||
static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
 | 
					static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
 | 
				
			||||||
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 | 
					static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Is the current query sampled, per backend */
 | 
					 | 
				
			||||||
static bool current_query_sampled = true;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define auto_explain_enabled() \
 | 
					 | 
				
			||||||
	(auto_explain_log_min_duration >= 0 && \
 | 
					 | 
				
			||||||
	 (nesting_level == 0 || auto_explain_log_nested_statements))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void		_PG_init(void);
 | 
					void		_PG_init(void);
 | 
				
			||||||
void		_PG_fini(void);
 | 
					void		_PG_fini(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -249,14 +251,25 @@ static void
 | 
				
			|||||||
explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
 | 
					explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * For rate sampling, randomly choose top-level statement. Either all
 | 
						 * At the beginning of each top-level statement, decide whether we'll
 | 
				
			||||||
	 * nested statements will be explained or none will.
 | 
						 * sample this statement.  If nested-statement explaining is enabled,
 | 
				
			||||||
 | 
						 * either all nested statements will be explained or none will.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * When in a parallel worker, we should do nothing, which we can implement
 | 
				
			||||||
 | 
						 * cheaply by pretending we decided not to sample the current statement.
 | 
				
			||||||
 | 
						 * If EXPLAIN is active in the parent session, data will be collected and
 | 
				
			||||||
 | 
						 * reported back to the parent, and it's no business of ours to interfere.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (auto_explain_log_min_duration >= 0 && nesting_level == 0)
 | 
						if (nesting_level == 0)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							if (auto_explain_log_min_duration >= 0 && !IsParallelWorker())
 | 
				
			||||||
			current_query_sampled = (random() < auto_explain_sample_rate *
 | 
								current_query_sampled = (random() < auto_explain_sample_rate *
 | 
				
			||||||
								 MAX_RANDOM_VALUE);
 | 
														 ((double) MAX_RANDOM_VALUE + 1));
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								current_query_sampled = false;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (auto_explain_enabled() && current_query_sampled)
 | 
						if (auto_explain_enabled())
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		/* Enable per-node instrumentation iff log_analyze is required. */
 | 
							/* Enable per-node instrumentation iff log_analyze is required. */
 | 
				
			||||||
		if (auto_explain_log_analyze && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)
 | 
							if (auto_explain_log_analyze && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)
 | 
				
			||||||
@@ -275,7 +288,7 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
 | 
				
			|||||||
	else
 | 
						else
 | 
				
			||||||
		standard_ExecutorStart(queryDesc, eflags);
 | 
							standard_ExecutorStart(queryDesc, eflags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (auto_explain_enabled() && current_query_sampled)
 | 
						if (auto_explain_enabled())
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Set up to track total elapsed time in ExecutorRun.  Make sure the
 | 
							 * Set up to track total elapsed time in ExecutorRun.  Make sure the
 | 
				
			||||||
@@ -346,7 +359,7 @@ explain_ExecutorFinish(QueryDesc *queryDesc)
 | 
				
			|||||||
static void
 | 
					static void
 | 
				
			||||||
explain_ExecutorEnd(QueryDesc *queryDesc)
 | 
					explain_ExecutorEnd(QueryDesc *queryDesc)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (queryDesc->totaltime && auto_explain_enabled() && current_query_sampled)
 | 
						if (queryDesc->totaltime && auto_explain_enabled())
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		double		msec;
 | 
							double		msec;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user