1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Fix squashing algorithm for query texts

The algorithm to squash lists of constants added by commit 62d712ecfd
was a bit too simplistic; we wanted to avoid adding unnecessary
complexity, but cases like direct function calls of typecasting
functions (and others) were missed, and bogus SQL syntax was being shown
in pg_stat_statements normalized query text field.  To fix normalization
for those cases, we need the parser to transmit information about were
each list of constant values starts and ends, so add that to a couple of
nodes.  Also add a few more test cases to make sure we're doing the
right thing.

The patch initially submitted by Sami added a new private struct in
gram.y to carry the start/end information for A_Expr, but I (Álvaro)
decided that a better fix was to remove the parser indirection via the
in_expr production, and instead create separate components in the a_expr
rule.  I'm surprised that this works and doesn't require more changes,
but I assume (without checking) that the grammar used to be more complex
and got simplified at some point.

Bump catversion.

Author: Sami Imseih <samimseih@gmail.com>
Author: Dmitry Dolgov <9erthalion6@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/CAA5RZ0tRXoPG2y6bMgBCWNDt0Tn=unRerbzYM=oW0syi1=C1OA@mail.gmail.com
This commit is contained in:
Álvaro Herrera
2025-06-12 14:21:21 +02:00
parent f7b11414e9
commit 0f65f3eec4
14 changed files with 840 additions and 299 deletions

View File

@ -1329,7 +1329,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
# Node type. Squash constants if requested.
if ($query_jumble_squash)
{
print $jff "\tJUMBLE_ELEMENTS($f);\n"
print $jff "\tJUMBLE_ELEMENTS($f, node);\n"
unless $query_jumble_ignore;
}
else

View File

@ -653,6 +653,8 @@ _outA_Expr(StringInfo str, const A_Expr *node)
WRITE_NODE_FIELD(lexpr);
WRITE_NODE_FIELD(rexpr);
WRITE_LOCATION_FIELD(rexpr_list_start);
WRITE_LOCATION_FIELD(rexpr_list_end);
WRITE_LOCATION_FIELD(location);
}

View File

@ -61,9 +61,9 @@ static void AppendJumble(JumbleState *jstate,
const unsigned char *value, Size size);
static void FlushPendingNulls(JumbleState *jstate);
static void RecordConstLocation(JumbleState *jstate,
int location, bool squashed);
int location, int len);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@ -373,15 +373,17 @@ FlushPendingNulls(JumbleState *jstate)
/*
* Record location of constant within query string of query tree that is
* currently being walked.
* Record the location of some kind of constant within a query string.
* These are not only bare constants but also expressions that ultimately
* constitute a constant, such as those inside casts and simple function
* calls.
*
* 'squashed' signals that the constant represents the first or the last
* element in a series of merged constants, and everything but the first/last
* element contributes nothing to the jumble hash.
* If length is -1, it indicates a single such constant element. If
* it's a positive integer, it indicates the length of a squashable
* list of them.
*/
static void
RecordConstLocation(JumbleState *jstate, int location, bool squashed)
RecordConstLocation(JumbleState *jstate, int location, int len)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@ -396,9 +398,14 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].squashed = squashed;
jstate->clocations[jstate->clocations_count].length = -1;
/*
* Lengths are either positive integers (indicating a squashable
* list), or -1.
*/
Assert(len > -1 || len == -1);
jstate->clocations[jstate->clocations_count].length = len;
jstate->clocations[jstate->clocations_count].squashed = (len > -1);
jstate->clocations_count++;
}
}
@ -408,12 +415,12 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
* deduce that the expression is a constant:
*
* - Ignore a possible wrapping RelabelType and CoerceViaIO.
* - If it's a FuncExpr, check that the function is an implicit
* - If it's a FuncExpr, check that the function is a builtin
* cast and its arguments are Const.
* - Otherwise test if the expression is a simple Const.
*/
static bool
IsSquashableConst(Node *element)
IsSquashableConstant(Node *element)
{
if (IsA(element, RelabelType))
element = (Node *) ((RelabelType *) element)->arg;
@ -421,32 +428,50 @@ IsSquashableConst(Node *element)
if (IsA(element, CoerceViaIO))
element = (Node *) ((CoerceViaIO *) element)->arg;
if (IsA(element, FuncExpr))
switch (nodeTag(element))
{
FuncExpr *func = (FuncExpr *) element;
ListCell *temp;
case T_FuncExpr:
{
FuncExpr *func = (FuncExpr *) element;
ListCell *temp;
if (func->funcformat != COERCE_IMPLICIT_CAST &&
func->funcformat != COERCE_EXPLICIT_CAST)
return false;
if (func->funcformat != COERCE_IMPLICIT_CAST &&
func->funcformat != COERCE_EXPLICIT_CAST)
return false;
if (func->funcid > FirstGenbkiObjectId)
return false;
if (func->funcid > FirstGenbkiObjectId)
return false;
foreach(temp, func->args)
{
Node *arg = lfirst(temp);
/*
* We can check function arguments recursively, being careful
* about recursing too deep. At each recursion level it's
* enough to test the stack on the first element. (Note that
* I wasn't able to hit this without bloating the stack
* artificially in this function: the parser errors out before
* stack size becomes a problem here.)
*/
foreach(temp, func->args)
{
Node *arg = lfirst(temp);
if (!IsA(arg, Const)) /* XXX we could recurse here instead */
if (!IsA(arg, Const))
{
if (foreach_current_index(temp) == 0 &&
stack_is_too_deep())
return false;
else if (!IsSquashableConstant(arg))
return false;
}
}
return true;
}
default:
if (!IsA(element, Const))
return false;
}
return true;
}
if (!IsA(element, Const))
return false;
return true;
}
@ -461,35 +486,29 @@ IsSquashableConst(Node *element)
* expressions.
*/
static bool
IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
IsSquashableConstantList(List *elements)
{
ListCell *temp;
/*
* If squashing is disabled, or the list is too short, we don't try to
* squash it.
*/
/* If the list is too short, we don't try to squash it. */
if (list_length(elements) < 2)
return false;
foreach(temp, elements)
{
if (!IsSquashableConst(lfirst(temp)))
if (!IsSquashableConstant(lfirst(temp)))
return false;
}
*firstExpr = linitial(elements);
*lastExpr = llast(elements);
return true;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
#define JUMBLE_ELEMENTS(list) \
_jumbleElements(jstate, (List *) expr->list)
#define JUMBLE_ELEMENTS(list, node) \
_jumbleElements(jstate, (List *) expr->list, node)
#define JUMBLE_LOCATION(location) \
RecordConstLocation(jstate, expr->location, false)
RecordConstLocation(jstate, expr->location, -1)
#define JUMBLE_FIELD(item) \
do { \
if (sizeof(expr->item) == 8) \
@ -517,36 +536,36 @@ do { \
#include "queryjumblefuncs.funcs.c"
/*
* We jumble lists of constant elements as one individual item regardless
* of how many elements are in the list. This means different queries
* jumble to the same query_id, if the only difference is the number of
* elements in the list.
* We try to jumble lists of expressions as one individual item regardless
* of how many elements are in the list. This is know as squashing, which
* results in different queries jumbling to the same query_id, if the only
* difference is the number of elements in the list.
*
* We allow constants to be squashed. To normalize such queries, we use
* the start and end locations of the list of elements in a list.
*/
static void
_jumbleElements(JumbleState *jstate, List *elements)
_jumbleElements(JumbleState *jstate, List *elements, Node *node)
{
Node *first,
*last;
bool normalize_list = false;
if (IsSquashableConstList(elements, &first, &last))
if (IsSquashableConstantList(elements))
{
/*
* If this list of elements is squashable, keep track of the location
* of its first and last elements. When reading back the locations
* array, we'll see two consecutive locations with ->squashed set to
* true, indicating the location of initial and final elements of this
* list.
*
* For the limited set of cases we support now (implicit coerce via
* FuncExpr, Const) it's fine to use exprLocation of the 'last'
* expression, but if more complex composite expressions are to be
* supported (e.g., OpExpr or FuncExpr as an explicit call), more
* sophisticated tracking will be needed.
*/
RecordConstLocation(jstate, exprLocation(first), true);
RecordConstLocation(jstate, exprLocation(last), true);
if (IsA(node, ArrayExpr))
{
ArrayExpr *aexpr = (ArrayExpr *) node;
if (aexpr->list_start > 0 && aexpr->list_end > 0)
{
RecordConstLocation(jstate,
aexpr->list_start + 1,
(aexpr->list_end - aexpr->list_start) - 1);
normalize_list = true;
}
}
}
else
if (!normalize_list)
{
_jumbleNode(jstate, (Node *) elements);
}

View File

@ -526,6 +526,8 @@ _readA_Expr(void)
READ_NODE_FIELD(lexpr);
READ_NODE_FIELD(rexpr);
READ_LOCATION_FIELD(rexpr_list_start);
READ_LOCATION_FIELD(rexpr_list_end);
READ_LOCATION_FIELD(location);
READ_DONE();

View File

@ -183,7 +183,7 @@ static void doNegateFloat(Float *v);
static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location);
static Node *makeNotExpr(Node *expr, int location);
static Node *makeAArrayExpr(List *elements, int location);
static Node *makeAArrayExpr(List *elements, int location, int end_location);
static Node *makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod,
int location);
static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args,
@ -522,7 +522,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type <defelt> def_elem reloption_elem old_aggr_elem operator_def_elem
%type <node> def_arg columnElem where_clause where_or_current_clause
a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound
columnref in_expr having_clause func_table xmltable array_expr
columnref having_clause func_table xmltable array_expr
OptWhereClause operator_def_arg
%type <list> opt_column_and_period_list
%type <list> rowsfrom_item rowsfrom_list opt_col_def_list
@ -15264,49 +15264,50 @@ a_expr: c_expr { $$ = $1; }
(Node *) list_make2($5, $7),
@2);
}
| a_expr IN_P in_expr
| a_expr IN_P select_with_parens
{
/* in_expr returns a SubLink or a list of a_exprs */
if (IsA($3, SubLink))
{
/* generate foo = ANY (subquery) */
SubLink *n = (SubLink *) $3;
/* generate foo = ANY (subquery) */
SubLink *n = makeNode(SubLink);
n->subLinkType = ANY_SUBLINK;
n->subLinkId = 0;
n->testexpr = $1;
n->operName = NIL; /* show it's IN not = ANY */
n->location = @2;
$$ = (Node *) n;
}
else
{
/* generate scalar IN expression */
$$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2);
}
n->subselect = $3;
n->subLinkType = ANY_SUBLINK;
n->subLinkId = 0;
n->testexpr = $1;
n->operName = NIL; /* show it's IN not = ANY */
n->location = @2;
$$ = (Node *) n;
}
| a_expr NOT_LA IN_P in_expr %prec NOT_LA
| a_expr IN_P '(' expr_list ')'
{
/* in_expr returns a SubLink or a list of a_exprs */
if (IsA($4, SubLink))
{
/* generate NOT (foo = ANY (subquery)) */
/* Make an = ANY node */
SubLink *n = (SubLink *) $4;
/* generate scalar IN expression */
A_Expr *n = makeSimpleA_Expr(AEXPR_IN, "=", $1, (Node *) $4, @2);
n->subLinkType = ANY_SUBLINK;
n->subLinkId = 0;
n->testexpr = $1;
n->operName = NIL; /* show it's IN not = ANY */
n->location = @2;
/* Stick a NOT on top; must have same parse location */
$$ = makeNotExpr((Node *) n, @2);
}
else
{
/* generate scalar NOT IN expression */
$$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2);
}
n->rexpr_list_start = @3;
n->rexpr_list_end = @5;
$$ = (Node *) n;
}
| a_expr NOT_LA IN_P select_with_parens %prec NOT_LA
{
/* generate NOT (foo = ANY (subquery)) */
SubLink *n = makeNode(SubLink);
n->subselect = $4;
n->subLinkType = ANY_SUBLINK;
n->subLinkId = 0;
n->testexpr = $1;
n->operName = NIL; /* show it's IN not = ANY */
n->location = @2;
/* Stick a NOT on top; must have same parse location */
$$ = makeNotExpr((Node *) n, @2);
}
| a_expr NOT_LA IN_P '(' expr_list ')'
{
/* generate scalar NOT IN expression */
A_Expr *n = makeSimpleA_Expr(AEXPR_IN, "<>", $1, (Node *) $5, @2);
n->rexpr_list_start = @4;
n->rexpr_list_end = @6;
$$ = (Node *) n;
}
| a_expr subquery_Op sub_type select_with_parens %prec Op
{
@ -16741,15 +16742,15 @@ type_list: Typename { $$ = list_make1($1); }
array_expr: '[' expr_list ']'
{
$$ = makeAArrayExpr($2, @1);
$$ = makeAArrayExpr($2, @1, @3);
}
| '[' array_expr_list ']'
{
$$ = makeAArrayExpr($2, @1);
$$ = makeAArrayExpr($2, @1, @3);
}
| '[' ']'
{
$$ = makeAArrayExpr(NIL, @1);
$$ = makeAArrayExpr(NIL, @1, @2);
}
;
@ -16871,17 +16872,6 @@ trim_list: a_expr FROM expr_list { $$ = lappend($3, $1); }
| expr_list { $$ = $1; }
;
in_expr: select_with_parens
{
SubLink *n = makeNode(SubLink);
n->subselect = $1;
/* other fields will be filled later */
$$ = (Node *) n;
}
| '(' expr_list ')' { $$ = (Node *) $2; }
;
/*
* Define SQL-style CASE clause.
* - Full specification
@ -19232,12 +19222,14 @@ makeNotExpr(Node *expr, int location)
}
static Node *
makeAArrayExpr(List *elements, int location)
makeAArrayExpr(List *elements, int location, int location_end)
{
A_ArrayExpr *n = makeNode(A_ArrayExpr);
n->elements = elements;
n->location = location;
n->list_start = location;
n->list_end = location_end;
return (Node *) n;
}

View File

@ -1223,6 +1223,8 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
newa->element_typeid = scalar_type;
newa->elements = aexprs;
newa->multidims = false;
newa->list_start = a->rexpr_list_start;
newa->list_end = a->rexpr_list_end;
newa->location = -1;
result = (Node *) make_scalar_array_op(pstate,
@ -2165,6 +2167,8 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
/* array_collid will be set by parse_collate.c */
newa->element_typeid = element_type;
newa->elements = newcoercedelems;
newa->list_start = a->list_start;
newa->list_end = a->list_end;
newa->location = a->location;
return (Node *) newa;

View File

@ -57,6 +57,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202506021
#define CATALOG_VERSION_NO 202506121
#endif

View File

@ -351,6 +351,14 @@ typedef struct A_Expr
List *name; /* possibly-qualified name of operator */
Node *lexpr; /* left argument, or NULL if none */
Node *rexpr; /* right argument, or NULL if none */
/*
* If rexpr is a list of some kind, we separately track its starting and
* ending location; it's not the same as the starting and ending location
* of the token itself.
*/
ParseLoc rexpr_list_start;
ParseLoc rexpr_list_end;
ParseLoc location; /* token location, or -1 if unknown */
} A_Expr;
@ -506,6 +514,8 @@ typedef struct A_ArrayExpr
{
NodeTag type;
List *elements; /* array element expressions */
ParseLoc list_start; /* start of the element list */
ParseLoc list_end; /* end of the elements list */
ParseLoc location; /* token location, or -1 if unknown */
} A_ArrayExpr;

View File

@ -1397,6 +1397,10 @@ typedef struct ArrayExpr
List *elements pg_node_attr(query_jumble_squash);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* location of the start of the elements list */
ParseLoc list_start;
/* location of the end of the elements list */
ParseLoc list_end;
/* token location, or -1 if unknown */
ParseLoc location;
} ArrayExpr;