1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Fix squashing algorithm for query texts

The algorithm to squash lists of constants added by commit 62d712ecfd
was a bit too simplistic; we wanted to avoid adding unnecessary
complexity, but cases like direct function calls of typecasting
functions (and others) were missed, and bogus SQL syntax was being shown
in pg_stat_statements normalized query text field.  To fix normalization
for those cases, we need the parser to transmit information about were
each list of constant values starts and ends, so add that to a couple of
nodes.  Also add a few more test cases to make sure we're doing the
right thing.

The patch initially submitted by Sami added a new private struct in
gram.y to carry the start/end information for A_Expr, but I (Álvaro)
decided that a better fix was to remove the parser indirection via the
in_expr production, and instead create separate components in the a_expr
rule.  I'm surprised that this works and doesn't require more changes,
but I assume (without checking) that the grammar used to be more complex
and got simplified at some point.

Bump catversion.

Author: Sami Imseih <samimseih@gmail.com>
Author: Dmitry Dolgov <9erthalion6@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/CAA5RZ0tRXoPG2y6bMgBCWNDt0Tn=unRerbzYM=oW0syi1=C1OA@mail.gmail.com
This commit is contained in:
Álvaro Herrera
2025-06-12 14:21:21 +02:00
parent f7b11414e9
commit 0f65f3eec4
14 changed files with 840 additions and 299 deletions

View File

@ -2810,14 +2810,12 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
char *norm_query;
int query_len = *query_len_p;
int i,
norm_query_buflen, /* Space allowed for norm_query */
int norm_query_buflen, /* Space allowed for norm_query */
len_to_wrt, /* Length (in bytes) to write */
quer_loc = 0, /* Source query byte location */
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
bool in_squashed = false; /* in a run of squashed consts? */
int num_constants_replaced = 0;
/*
@ -2832,16 +2830,13 @@ generate_normalized_query(JumbleState *jstate, const char *query,
* certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
* could refine that limit based on the max value of n for the current
* query, but it hardly seems worth any extra effort to do so.
*
* Note this also gives enough room for the commented-out ", ..." list
* syntax used by constant squashing.
*/
norm_query_buflen = query_len + jstate->clocations_count * 10;
/* Allocate result buffer */
norm_query = palloc(norm_query_buflen + 1);
for (i = 0; i < jstate->clocations_count; i++)
for (int i = 0; i < jstate->clocations_count; i++)
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
@ -2856,65 +2851,24 @@ generate_normalized_query(JumbleState *jstate, const char *query,
if (tok_len < 0)
continue; /* ignore any duplicates */
/* Copy next chunk (what precedes the next constant) */
len_to_wrt = off - last_off;
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
/*
* What to do next depends on whether we're squashing constant lists,
* and whether we're already in a run of such constants.
* And insert a param symbol in place of the constant token; and, if
* we have a squashable list, insert a placeholder comment starting
* from the list's second value.
*/
if (!jstate->clocations[i].squashed)
{
/*
* This location corresponds to a constant not to be squashed.
* Print what comes before the constant ...
*/
len_to_wrt = off - last_off;
len_to_wrt -= last_tok_len;
n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s",
num_constants_replaced + 1 + jstate->highest_extern_param_id,
jstate->clocations[i].squashed ? " /*, ... */" : "");
num_constants_replaced++;
Assert(len_to_wrt >= 0);
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
/* ... and then a param symbol replacing the constant itself */
n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
num_constants_replaced++ + 1 + jstate->highest_extern_param_id);
/* In case previous constants were merged away, stop doing that */
in_squashed = false;
}
else if (!in_squashed)
{
/*
* This location is the start position of a run of constants to be
* squashed, so we need to print the representation of starting a
* group of stashed constants.
*
* Print what comes before the constant ...
*/
len_to_wrt = off - last_off;
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
Assert(i + 1 < jstate->clocations_count);
Assert(jstate->clocations[i + 1].squashed);
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
/* ... and then start a run of squashed constants */
n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */",
num_constants_replaced++ + 1 + jstate->highest_extern_param_id);
/* The next location will match the block below, to end the run */
in_squashed = true;
}
else
{
/*
* The second location of a run of squashable elements; this
* indicates its end.
*/
in_squashed = false;
}
/* Otherwise the constant is squashed away -- move forward */
/* move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
@ -3005,6 +2959,9 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
Assert(loc >= 0);
if (locs[i].squashed)
continue; /* squashable list, ignore */
if (loc <= last_loc)
continue; /* Duplicate constant, ignore */