diff --git a/contrib/pg_stat_statements/expected/extended.out b/contrib/pg_stat_statements/expected/extended.out index 7da308ba84f..1bfd0c1ca24 100644 --- a/contrib/pg_stat_statements/expected/extended.out +++ b/contrib/pg_stat_statements/expected/extended.out @@ -69,13 +69,13 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; (4 rows) -- Various parameter numbering patterns +-- Unique query IDs with parameter numbers switched. SELECT pg_stat_statements_reset() IS NOT NULL AS t; t --- t (1 row) --- Unique query IDs with parameter numbers switched. SELECT WHERE ($1::int, 7) IN ((8, $2::int), ($3::int, 9)) \bind '1' '2' '3' \g -- (0 rows) @@ -96,7 +96,24 @@ SELECT WHERE $3::int IN ($1::int, $2::int) \bind '1' '2' '3' \g -- (0 rows) +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------------------------------------------------+------- + SELECT WHERE $1::int IN ($2 /*, ... */) | 1 + SELECT WHERE $1::int IN ($2 /*, ... */) | 1 + SELECT WHERE $1::int IN ($2 /*, ... */) | 1 + SELECT WHERE ($1::int, $4) IN (($5, $2::int), ($3::int, $6)) | 1 + SELECT WHERE ($2::int, $4) IN (($5, $3::int), ($1::int, $6)) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(6 rows) + -- Two groups of two queries with the same query ID. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + SELECT WHERE '1'::int IN ($1::int, '2'::int) \bind '1' \g -- (1 row) @@ -114,15 +131,34 @@ SELECT WHERE $2::int IN ($1::int, '2'::int) \bind '3' '4' \g (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls ---------------------------------------------------------------+------- - SELECT WHERE $1::int IN ($2::int, $3::int) | 1 - SELECT WHERE $2::int IN ($1::int, $3::int) | 2 - SELECT WHERE $2::int IN ($1::int, $3::int) | 2 - SELECT WHERE $2::int IN ($3::int, $1::int) | 1 - SELECT WHERE $3::int IN ($1::int, $2::int) | 1 - SELECT WHERE ($1::int, $4) IN (($5, $2::int), ($3::int, $6)) | 1 - SELECT WHERE ($2::int, $4) IN (($5, $3::int), ($1::int, $6)) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 -(8 rows) + query | calls +----------------------------------------------------+------- + SELECT WHERE $1::int IN ($2 /*, ... */) | 2 + SELECT WHERE $1::int IN ($2 /*, ... */) | 2 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) + +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- no squashable list, the parameters id's are kept as-is +SELECT WHERE $3 = $1 AND $2 = $4 \bind 1 2 1 2 \g +-- +(1 row) + +-- squashable list, so the parameter IDs will be re-assigned +SELECT WHERE 1 IN (1, 2, 3) AND $3 = $1 AND $2 = $4 \bind 1 2 1 2 \g +-- +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------+------- + SELECT WHERE $1 IN ($2 /*, ... */) AND $3 = $4 AND $5 = $6 | 1 + SELECT WHERE $3 = $1 AND $2 = $4 | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out index e978564ad72..f952f47ef7b 100644 --- a/contrib/pg_stat_statements/expected/squashing.out +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -103,7 +103,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) --- external parameters will not be squashed +-- external parameters will be squashed SELECT pg_stat_statements_reset() IS NOT NULL AS t; t --- @@ -123,14 +123,14 @@ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls ----------------------------------------------------------------------------+------- - SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) | 1 - SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +----------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 1 + SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1 /*, ... */]) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (3 rows) --- neither are prepared statements +-- prepared statements will also be squashed -- the IN and ARRAY forms of this statement will have the same queryId SELECT pg_stat_statements_reset() IS NOT NULL AS t; t @@ -155,12 +155,12 @@ EXECUTE p1(1, 2, 3, 4, 5); DEALLOCATE p1; SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -------------------------------------------------------------+------- - DEALLOCATE $1 | 2 - PREPARE p1(int, int, int, int, int) AS +| 2 - SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) | - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +-------------------------------------------------------+------- + DEALLOCATE $1 | 2 + PREPARE p1(int, int, int, int, int) AS +| 2 + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (3 rows) -- More conditions in the query diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index ecc7f2fb266..5597fcaaa05 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2841,6 +2841,16 @@ generate_normalized_query(JumbleState *jstate, const char *query, int off, /* Offset from start for cur tok */ tok_len; /* Length (in bytes) of that tok */ + /* + * If we have an external param at this location, but no lists are + * being squashed across the query, then we skip here; this will make + * us print print the characters found in the original query that + * represent the parameter in the next iteration (or after the loop is + * done), which is a bit odd but seems to work okay in most cases. + */ + if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists) + continue; + off = jstate->clocations[i].location; /* Adjust recorded location if we're dealing with partial string */ diff --git a/contrib/pg_stat_statements/sql/extended.sql b/contrib/pg_stat_statements/sql/extended.sql index a366658a53a..9a6518e2f04 100644 --- a/contrib/pg_stat_statements/sql/extended.sql +++ b/contrib/pg_stat_statements/sql/extended.sql @@ -21,17 +21,26 @@ SELECT $1 \bind 'unnamed_val1' \g SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; -- Various parameter numbering patterns -SELECT pg_stat_statements_reset() IS NOT NULL AS t; -- Unique query IDs with parameter numbers switched. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT WHERE ($1::int, 7) IN ((8, $2::int), ($3::int, 9)) \bind '1' '2' '3' \g SELECT WHERE ($2::int, 10) IN ((11, $3::int), ($1::int, 12)) \bind '1' '2' '3' \g SELECT WHERE $1::int IN ($2::int, $3::int) \bind '1' '2' '3' \g SELECT WHERE $2::int IN ($3::int, $1::int) \bind '1' '2' '3' \g SELECT WHERE $3::int IN ($1::int, $2::int) \bind '1' '2' '3' \g +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; -- Two groups of two queries with the same query ID. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT WHERE '1'::int IN ($1::int, '2'::int) \bind '1' \g SELECT WHERE '4'::int IN ($1::int, '5'::int) \bind '2' \g SELECT WHERE $2::int IN ($1::int, '1'::int) \bind '1' '2' \g SELECT WHERE $2::int IN ($1::int, '2'::int) \bind '3' '4' \g +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + +-- no squashable list, the parameters id's are kept as-is +SELECT WHERE $3 = $1 AND $2 = $4 \bind 1 2 1 2 \g +-- squashable list, so the parameter IDs will be re-assigned +SELECT WHERE 1 IN (1, 2, 3) AND $3 = $1 AND $2 = $4 \bind 1 2 1 2 \g SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql index 946e149831c..53138d125a9 100644 --- a/contrib/pg_stat_statements/sql/squashing.sql +++ b/contrib/pg_stat_statements/sql/squashing.sql @@ -32,7 +32,7 @@ SELECT WHERE 1 IN (1, int4(1), int4(2), 2); SELECT WHERE 1 = ANY (ARRAY[1, int4(1), int4(2), 2]); SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; --- external parameters will not be squashed +-- external parameters will be squashed SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) \bind 1 2 3 4 5 ; @@ -40,7 +40,7 @@ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind ; SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; --- neither are prepared statements +-- prepared statements will also be squashed -- the IN and ARRAY forms of this statement will have the same queryId SELECT pg_stat_statements_reset() IS NOT NULL AS t; PREPARE p1(int, int, int, int, int) AS diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index 62e3a677cd1..31f97151977 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -21,6 +21,11 @@ * tree(s) generated from the query. The executor can then use this value * to blame query costs on the proper queryId. * + * Arrays of two or more constants and PARAM_EXTERN parameters are "squashed" + * and contribute only once to the jumble. This has the effect that queries + * that differ only on the length of such lists have the same queryId. + * + * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -61,11 +66,13 @@ static void AppendJumble(JumbleState *jstate, const unsigned char *value, Size size); static void FlushPendingNulls(JumbleState *jstate); static void RecordConstLocation(JumbleState *jstate, + bool extern_param, int location, int len); static void _jumbleNode(JumbleState *jstate, Node *node); -static void _jumbleElements(JumbleState *jstate, List *elements, Node *node); -static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); +static void _jumbleElements(JumbleState *jstate, List *elements, Node *node); +static void _jumbleParam(JumbleState *jstate, Node *node); +static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); static void _jumbleRangeTblEntry_eref(JumbleState *jstate, RangeTblEntry *rte, @@ -185,6 +192,7 @@ InitJumble(void) jstate->clocations_count = 0; jstate->highest_extern_param_id = 0; jstate->pending_nulls = 0; + jstate->has_squashed_lists = false; #ifdef USE_ASSERT_CHECKING jstate->total_jumble_len = 0; #endif @@ -207,6 +215,10 @@ DoJumble(JumbleState *jstate, Node *node) if (jstate->pending_nulls > 0) FlushPendingNulls(jstate); + /* Squashed list found, reset highest_extern_param_id */ + if (jstate->has_squashed_lists) + jstate->highest_extern_param_id = 0; + /* Process the jumble buffer and produce the hash value */ return DatumGetInt64(hash_any_extended(jstate->jumble, jstate->jumble_len, @@ -376,14 +388,14 @@ FlushPendingNulls(JumbleState *jstate) * Record the location of some kind of constant within a query string. * These are not only bare constants but also expressions that ultimately * constitute a constant, such as those inside casts and simple function - * calls. + * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param. * * If length is -1, it indicates a single such constant element. If * it's a positive integer, it indicates the length of a squashable * list of them. */ static void -RecordConstLocation(JumbleState *jstate, int location, int len) +RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -406,6 +418,7 @@ RecordConstLocation(JumbleState *jstate, int location, int len) Assert(len > -1 || len == -1); jstate->clocations[jstate->clocations_count].length = len; jstate->clocations[jstate->clocations_count].squashed = (len > -1); + jstate->clocations[jstate->clocations_count].extern_param = extern_param; jstate->clocations_count++; } } @@ -417,7 +430,8 @@ RecordConstLocation(JumbleState *jstate, int location, int len) * - See through any wrapping RelabelType and CoerceViaIO layers. * - If it's a FuncExpr, check that the function is a builtin * cast and its arguments are Const. - * - Otherwise test if the expression is a simple Const. + * - Otherwise test if the expression is a simple Const or a + * PARAM_EXTERN param. */ static bool IsSquashableConstant(Node *element) @@ -438,6 +452,9 @@ restart: case T_Const: return true; + case T_Param: + return castNode(Param, element)->paramkind == PARAM_EXTERN; + case T_FuncExpr: { FuncExpr *func = (FuncExpr *) element; @@ -487,8 +504,8 @@ restart: * Return value indicates if squashing is possible. * * Note that this function searches only for explicit Const nodes with - * possibly very simple decorations on top, and does not try to simplify - * expressions. + * possibly very simple decorations on top and PARAM_EXTERN parameters, + * and does not try to simplify expressions. */ static bool IsSquashableConstantList(List *elements) @@ -513,7 +530,7 @@ IsSquashableConstantList(List *elements) #define JUMBLE_ELEMENTS(list, node) \ _jumbleElements(jstate, (List *) expr->list, node) #define JUMBLE_LOCATION(location) \ - RecordConstLocation(jstate, expr->location, -1) + RecordConstLocation(jstate, false, expr->location, -1) #define JUMBLE_FIELD(item) \ do { \ if (sizeof(expr->item) == 8) \ @@ -540,42 +557,6 @@ do { \ #include "queryjumblefuncs.funcs.c" -/* - * We try to jumble lists of expressions as one individual item regardless - * of how many elements are in the list. This is know as squashing, which - * results in different queries jumbling to the same query_id, if the only - * difference is the number of elements in the list. - * - * We allow constants to be squashed. To normalize such queries, we use - * the start and end locations of the list of elements in a list. - */ -static void -_jumbleElements(JumbleState *jstate, List *elements, Node *node) -{ - bool normalize_list = false; - - if (IsSquashableConstantList(elements)) - { - if (IsA(node, ArrayExpr)) - { - ArrayExpr *aexpr = (ArrayExpr *) node; - - if (aexpr->list_start > 0 && aexpr->list_end > 0) - { - RecordConstLocation(jstate, - aexpr->list_start + 1, - (aexpr->list_end - aexpr->list_start) - 1); - normalize_list = true; - } - } - } - - if (!normalize_list) - { - _jumbleNode(jstate, (Node *) elements); - } -} - static void _jumbleNode(JumbleState *jstate, Node *node) { @@ -617,26 +598,6 @@ _jumbleNode(JumbleState *jstate, Node *node) break; } - /* Special cases to handle outside the automated code */ - switch (nodeTag(expr)) - { - case T_Param: - { - Param *p = (Param *) node; - - /* - * Update the highest Param id seen, in order to start - * normalization correctly. - */ - if (p->paramkind == PARAM_EXTERN && - p->paramid > jstate->highest_extern_param_id) - jstate->highest_extern_param_id = p->paramid; - } - break; - default: - break; - } - /* Ensure we added something to the jumble buffer */ Assert(jstate->total_jumble_len > prev_jumble_len); } @@ -672,6 +633,79 @@ _jumbleList(JumbleState *jstate, Node *node) } } +/* + * We try to jumble lists of expressions as one individual item regardless + * of how many elements are in the list. This is know as squashing, which + * results in different queries jumbling to the same query_id, if the only + * difference is the number of elements in the list. + * + * We allow constants and PARAM_EXTERN parameters to be squashed. To normalize + * such queries, we use the start and end locations of the list of elements in + * a list. + */ +static void +_jumbleElements(JumbleState *jstate, List *elements, Node *node) +{ + bool normalize_list = false; + + if (IsSquashableConstantList(elements)) + { + if (IsA(node, ArrayExpr)) + { + ArrayExpr *aexpr = (ArrayExpr *) node; + + if (aexpr->list_start > 0 && aexpr->list_end > 0) + { + RecordConstLocation(jstate, + false, + aexpr->list_start + 1, + (aexpr->list_end - aexpr->list_start) - 1); + normalize_list = true; + jstate->has_squashed_lists = true; + } + } + } + + if (!normalize_list) + { + _jumbleNode(jstate, (Node *) elements); + } +} + +/* + * We store the highest param ID of extern params. This can later be used + * to start the numbering of the placeholder for squashed lists. + */ +static void +_jumbleParam(JumbleState *jstate, Node *node) +{ + Param *expr = (Param *) node; + + JUMBLE_FIELD(paramkind); + JUMBLE_FIELD(paramid); + JUMBLE_FIELD(paramtype); + /* paramtypmode and paramcollid are ignored */ + + if (expr->paramkind == PARAM_EXTERN) + { + /* + * At this point, only external parameter locations outside of + * squashable lists will be recorded. + */ + RecordConstLocation(jstate, true, expr->location, -1); + + /* + * Update the highest Param id seen, in order to start normalization + * correctly. + * + * Note: This value is reset at the end of jumbling if there exists a + * squashable list. See the comment in the definition of JumbleState. + */ + if (expr->paramid > jstate->highest_extern_param_id) + jstate->highest_extern_param_id = expr->paramid; + } +} + static void _jumbleA_Const(JumbleState *jstate, Node *node) { diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 01510b01b64..6dfca3cb35b 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -389,14 +389,16 @@ typedef enum ParamKind typedef struct Param { + pg_node_attr(custom_query_jumble) + Expr xpr; ParamKind paramkind; /* kind of parameter. See above */ int paramid; /* numeric ID for parameter */ Oid paramtype; /* pg_type OID of parameter's datatype */ /* typmod value, if known */ - int32 paramtypmod pg_node_attr(query_jumble_ignore); + int32 paramtypmod; /* OID of collation, or InvalidOid if none */ - Oid paramcollid pg_node_attr(query_jumble_ignore); + Oid paramcollid; /* token location, or -1 if unknown */ ParseLoc location; } Param; diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index da7c7abed2e..dcb36dcb44f 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -24,11 +24,11 @@ typedef struct LocationLen int location; /* start offset in query text */ int length; /* length in bytes, or -1 to ignore */ - /* - * Indicates that this location represents the beginning or end of a run - * of squashed constants. - */ + /* Does this location represent a squashed list? */ bool squashed; + + /* Is this location a PARAM_EXTERN parameter? */ + bool extern_param; } LocationLen; /* @@ -52,9 +52,18 @@ typedef struct JumbleState /* Current number of valid entries in clocations array */ int clocations_count; - /* highest Param id we've seen, in order to start normalization correctly */ + /* + * ID of the highest PARAM_EXTERN parameter we've seen in the query; used + * to start normalization correctly. However, if there are any squashed + * lists in the query, we disregard query-supplied parameter numbers and + * renumber everything. This is to avoid possible gaps caused by + * squashing in case any params are in squashed lists. + */ int highest_extern_param_id; + /* Whether squashable lists are present */ + bool has_squashed_lists; + /* * Count of the number of NULL nodes seen since last appending a value. * These are flushed out to the jumble buffer before subsequent appends