1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Fix squashing algorithm for query texts

The algorithm to squash lists of constants added by commit 62d712ecfd
was a bit too simplistic; we wanted to avoid adding unnecessary
complexity, but cases like direct function calls of typecasting
functions (and others) were missed, and bogus SQL syntax was being shown
in pg_stat_statements normalized query text field.  To fix normalization
for those cases, we need the parser to transmit information about were
each list of constant values starts and ends, so add that to a couple of
nodes.  Also add a few more test cases to make sure we're doing the
right thing.

The patch initially submitted by Sami added a new private struct in
gram.y to carry the start/end information for A_Expr, but I (Álvaro)
decided that a better fix was to remove the parser indirection via the
in_expr production, and instead create separate components in the a_expr
rule.  I'm surprised that this works and doesn't require more changes,
but I assume (without checking) that the grammar used to be more complex
and got simplified at some point.

Bump catversion.

Author: Sami Imseih <samimseih@gmail.com>
Author: Dmitry Dolgov <9erthalion6@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/CAA5RZ0tRXoPG2y6bMgBCWNDt0Tn=unRerbzYM=oW0syi1=C1OA@mail.gmail.com
This commit is contained in:
Álvaro Herrera
2025-06-12 14:21:21 +02:00
parent f7b11414e9
commit 0f65f3eec4
14 changed files with 840 additions and 299 deletions

View File

@ -3,101 +3,160 @@
--
CREATE EXTENSION pg_stat_statements;
--
-- Simple Lists
--
CREATE TABLE test_squash (id int, data int);
-- IN queries
-- Normal scenario, too many simple constants for an IN query
-- single element will not be squashed
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash WHERE id IN (1);
SELECT * FROM test_squash WHERE id IN (1, 2, 3);
SELECT ARRAY[1];
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
-- more than 1 element in a list will be squashed
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash WHERE id IN (1, 2, 3);
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4);
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5);
SELECT ARRAY[1, 2, 3];
SELECT ARRAY[1, 2, 3, 4];
SELECT ARRAY[1, 2, 3, 4, 5];
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- built-in functions will be squashed
-- the IN and ARRAY forms of this statement will have the same queryId
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT WHERE 1 IN (1, int4(1), int4(2), 2);
SELECT WHERE 1 = ANY (ARRAY[1, int4(1), int4(2), 2]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- external parameters will not be squashed
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) \bind 1 2 3 4 5
;
SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind 1 2 3 4 5
;
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- neither are prepared statements
-- the IN and ARRAY forms of this statement will have the same queryId
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
PREPARE p1(int, int, int, int, int) AS
SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5);
EXECUTE p1(1, 2, 3, 4, 5);
DEALLOCATE p1;
PREPARE p1(int, int, int, int, int) AS
SELECT * FROM test_squash WHERE id = ANY(ARRAY[$1, $2, $3, $4, $5]);
EXECUTE p1(1, 2, 3, 4, 5);
DEALLOCATE p1;
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- More conditions in the query
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2;
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2;
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
SELECT * FROM test_squash WHERE id = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9]) AND data = 2;
SELECT * FROM test_squash WHERE id = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) AND data = 2;
SELECT * FROM test_squash WHERE id = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) AND data = 2;
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Multiple squashed intervals
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9)
AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT * FROM test_squash WHERE id = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9])
AND data = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9]);
SELECT * FROM test_squash WHERE id = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
AND data = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
SELECT * FROM test_squash WHERE id = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
AND data = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- No constants simplification for OpExpr
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
-- In the following two queries the operator expressions (+) and (@) have
-- different oppno, and will be given different query_id if squashed, even though
-- the normalized query will be the same
-- No constants squashing for OpExpr
-- The IN and ARRAY forms of this statement will have the same queryId
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash WHERE id IN
(1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
SELECT * FROM test_squash WHERE id IN
(@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
SELECT * FROM test_squash WHERE id = ANY(ARRAY
[1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9]);
SELECT * FROM test_squash WHERE id = ANY(ARRAY
[@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9']);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
--
-- FuncExpr
--
-- Verify multiple type representation end up with the same query_id
CREATE TABLE test_float (data float);
-- The casted ARRAY expressions will have the same queryId as the IN clause
-- form of the query
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT data FROM test_float WHERE data IN (1, 2);
SELECT data FROM test_float WHERE data IN (1, '2');
SELECT data FROM test_float WHERE data IN ('1', 2);
SELECT data FROM test_float WHERE data IN ('1', '2');
SELECT data FROM test_float WHERE data IN (1.0, 1.0);
SELECT data FROM test_float WHERE data = ANY(ARRAY['1'::double precision, '2'::double precision]);
SELECT data FROM test_float WHERE data = ANY(ARRAY[1.0::double precision, 1.0::double precision]);
SELECT data FROM test_float WHERE data = ANY(ARRAY[1, 2]);
SELECT data FROM test_float WHERE data = ANY(ARRAY[1, '2']);
SELECT data FROM test_float WHERE data = ANY(ARRAY['1', 2]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Numeric type, implicit cast is squashed
CREATE TABLE test_squash_numeric (id int, data numeric(5, 2));
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT * FROM test_squash_numeric WHERE data = ANY(ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Bigint, implicit cast is squashed
CREATE TABLE test_squash_bigint (id int, data bigint);
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT * FROM test_squash_bigint WHERE data = ANY(ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Bigint, explicit cast is not squashed
-- Bigint, explicit cast is squashed
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash_bigint WHERE data IN
(1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
SELECT * FROM test_squash_bigint WHERE data = ANY(ARRAY[
1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Bigint, long tokens with parenthesis
-- Bigint, long tokens with parenthesis, will not squash
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash_bigint WHERE id IN
(abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
abs(800), abs(900), abs(1000), ((abs(1100))));
SELECT * FROM test_squash_bigint WHERE id = ANY(ARRAY[
abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
abs(800), abs(900), abs(1000), ((abs(1100)))]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- CoerceViaIO, SubLink instead of a Const
CREATE TABLE test_squash_jsonb (id int, data jsonb);
-- Multiple FuncExpr's. Will not squash
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash_jsonb WHERE data IN
((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
(SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
(SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
(SELECT '"10"')::jsonb);
SELECT WHERE 1 IN (1::int::bigint::int, 2::int::bigint::int);
SELECT WHERE 1 = ANY(ARRAY[1::int::bigint::int, 2::int::bigint::int]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
--
-- CoerceViaIO
--
-- Create some dummy type to force CoerceViaIO
CREATE TYPE casttesttype;
@ -141,19 +200,73 @@ SELECT * FROM test_squash_cast WHERE data IN
4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
10::int4::casttesttype, 11::int4::casttesttype);
SELECT * FROM test_squash_cast WHERE data = ANY (ARRAY
[1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
10::int4::casttesttype, 11::int4::casttesttype]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Some casting expression are simplified to Const
CREATE TABLE test_squash_jsonb (id int, data jsonb);
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash_jsonb WHERE data IN
(('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
( '"9"')::jsonb, ( '"10"')::jsonb);
('"5"')::jsonb, ('"6"')::jsonb, ('"7"')::jsonb, ('"8"')::jsonb,
('"9"')::jsonb, ('"10"')::jsonb);
SELECT * FROM test_squash_jsonb WHERE data = ANY (ARRAY
[('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
('"5"')::jsonb, ('"6"')::jsonb, ('"7"')::jsonb, ('"8"')::jsonb,
('"9"')::jsonb, ('"10"')::jsonb]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- RelabelType
-- CoerceViaIO, SubLink instead of a Const. Will not squash
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
SELECT * FROM test_squash_jsonb WHERE data IN
((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
(SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
(SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
(SELECT '"10"')::jsonb);
SELECT * FROM test_squash_jsonb WHERE data = ANY(ARRAY
[(SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
(SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
(SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
(SELECT '"10"')::jsonb]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Multiple CoerceViaIO wrapping a constant. Will not squash
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT WHERE 1 IN (1::text::int::text::int, 1::text::int::text::int);
SELECT WHERE 1 = ANY(ARRAY[1::text::int::text::int, 1::text::int::text::int]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
--
-- RelabelType
--
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
-- if there is only one level of RelabelType, the list will be squashable
SELECT * FROM test_squash WHERE id IN
(1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
SELECT ARRAY[1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid];
-- if there is at least one element with multiple levels of RelabelType,
-- the list will not be squashable
SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid::int::oid);
SELECT * FROM test_squash WHERE id = ANY(ARRAY[1::oid, 2::oid::int::oid]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
--
-- edge cases
--
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
-- for nested arrays, only constants are squashed
SELECT ARRAY[
ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
];
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Test constants evaluation in a CTE, which was causing issues in the past
@ -163,7 +276,26 @@ WITH cte AS (
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
-- Simple array would be squashed as well
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
-- Rewritten as an OpExpr, so it will not be squashed
select where '1' IN ('1'::int, '2'::int::text);
-- Rewritten as an ArrayExpr, so it will be squashed
select where '1' IN ('1'::int, '2'::int);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
-- Both of these queries will be rewritten as an ArrayExpr, so they
-- will be squashed, and have a similar queryId
select where '1' IN ('1'::int::text, '2'::int::text);
select where '1' = ANY (array['1'::int::text, '2'::int::text]);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
--
-- cleanup
--
DROP TABLE test_squash;
DROP TABLE test_float;
DROP TABLE test_squash_numeric;
DROP TABLE test_squash_bigint;
DROP TABLE test_squash_cast CASCADE;
DROP TABLE test_squash_jsonb;