1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-12 21:01:52 +03:00

Fix representation of hash keys in Hash/HashJoin nodes.

In 5f32b29c18 I changed the creation of HashState.hashkeys to
actually use HashState as the parent (instead of HashJoinState, which
was incorrect, as they were executed below HashState), to fix the
problem of hashkeys expressions otherwise relying on slot types
appropriate for HashJoinState, rather than HashState as would be
correct. That reliance was only introduced in 12, which is why it
previously worked to use HashJoinState as the parent (although I'd be
unsurprised if there were problematic cases).

Unfortunately that's not a sufficient solution, because before this
commit, the to-be-hashed expressions referenced inner/outer as
appropriate for the HashJoin, not Hash. That didn't have obvious bad
consequences, because the slots containing the tuples were put into
ecxt_innertuple when hashing a tuple for HashState (even though Hash
doesn't have an inner plan).

There are less common cases where this can cause visible problems
however (rather than just confusion when inspecting such executor
trees). E.g. "ERROR: bogus varno: 65000", when explaining queries
containing a HashJoin where the subsidiary Hash node's hash keys
reference a subplan. While normally hashkeys aren't displayed by
EXPLAIN, if one of those expressions references a subplan, that
subplan may be printed as part of the Hash node - which then failed
because an inner plan was referenced, and Hash doesn't have that.

It seems quite possible that there's other broken cases, too.

Fix the problem by properly splitting the expression for the HashJoin
and Hash nodes at plan time, and have them reference the proper
subsidiary node. While other workarounds are possible, fixing this
correctly seems easy enough. It was a pretty ugly hack to have
ExecInitHashJoin put the expression into the already initialized
HashState, in the first place.

I decided to not just split inner/outer hashkeys inside
make_hashjoin(), but also to separate out hashoperators and
hashcollations at plan time. Otherwise we would have ended up having
two very similar loops, one at plan time and the other during executor
startup. The work seems to more appropriately belong to plan time,
anyway.

Reported-By: Nikita Glukhov, Alexander Korotkov
Author: Andres Freund
Reviewed-By: Tom Lane, in an earlier version
Discussion: https://postgr.es/m/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR+teQ_8tEXU8mxg@mail.gmail.com
Backpatch: 12-
This commit is contained in:
Andres Freund
2019-08-02 00:02:49 -07:00
parent 20f5cb1958
commit a668bc7599
11 changed files with 330 additions and 47 deletions

View File

@ -879,3 +879,137 @@ $$);
rollback to settings;
rollback;
-- Verify that hash key expressions reference the correct
-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's
-- need to reference Hash's outer plan (which is below HashJoin's
-- inner plan). It's not trivial to verify that the references are
-- correct (we don't display the hashkeys themselves), but if the
-- hashkeys contain subplan references, those will be displayed. Force
-- subplans to appear just about everywhere.
--
-- Bug report:
-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com
--
BEGIN;
SET LOCAL enable_sort = OFF; -- avoid mergejoins
SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order
CREATE TABLE hjtest_1 (a text, b int, id int, c bool);
CREATE TABLE hjtest_2 (a bool, id int, b text, c int);
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b;
EXPLAIN (COSTS OFF, VERBOSE)
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_1, hjtest_2
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
QUERY PLAN
------------------------------------------------------------------------------------------------
Hash Join
Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass
Hash Cond: ((hjtest_1.id = (SubPlan 1)) AND ((SubPlan 2) = (SubPlan 3)))
Join Filter: (hjtest_1.a <> hjtest_2.b)
-> Seq Scan on public.hjtest_1
Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b
Filter: ((SubPlan 4) < 50)
SubPlan 4
-> Result
Output: (hjtest_1.b * 5)
-> Hash
Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
-> Seq Scan on public.hjtest_2
Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
Output: (hjtest_2.c * 5)
SubPlan 1
-> Result
Output: 1
One-Time Filter: (hjtest_2.id = 1)
SubPlan 3
-> Result
Output: (hjtest_2.c * 5)
SubPlan 2
-> Result
Output: (hjtest_1.b * 5)
(28 rows)
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_1, hjtest_2
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
a1 | a2 | t1 | t2
------+----+----------+----------
text | t | hjtest_1 | hjtest_2
(1 row)
EXPLAIN (COSTS OFF, VERBOSE)
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_2, hjtest_1
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
QUERY PLAN
------------------------------------------------------------------------------------------------
Hash Join
Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass
Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2)))
Join Filter: (hjtest_1.a <> hjtest_2.b)
-> Seq Scan on public.hjtest_2
Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
Output: (hjtest_2.c * 5)
-> Hash
Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b
-> Seq Scan on public.hjtest_1
Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b
Filter: ((SubPlan 4) < 50)
SubPlan 4
-> Result
Output: (hjtest_1.b * 5)
SubPlan 2
-> Result
Output: (hjtest_1.b * 5)
SubPlan 1
-> Result
Output: 1
One-Time Filter: (hjtest_2.id = 1)
SubPlan 3
-> Result
Output: (hjtest_2.c * 5)
(28 rows)
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_2, hjtest_1
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
a1 | a2 | t1 | t2
------+----+----------+----------
text | t | hjtest_1 | hjtest_2
(1 row)
ROLLBACK;

View File

@ -468,3 +468,73 @@ $$);
rollback to settings;
rollback;
-- Verify that hash key expressions reference the correct
-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's
-- need to reference Hash's outer plan (which is below HashJoin's
-- inner plan). It's not trivial to verify that the references are
-- correct (we don't display the hashkeys themselves), but if the
-- hashkeys contain subplan references, those will be displayed. Force
-- subplans to appear just about everywhere.
--
-- Bug report:
-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com
--
BEGIN;
SET LOCAL enable_sort = OFF; -- avoid mergejoins
SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order
CREATE TABLE hjtest_1 (a text, b int, id int, c bool);
CREATE TABLE hjtest_2 (a bool, id int, b text, c int);
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50
INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b;
EXPLAIN (COSTS OFF, VERBOSE)
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_1, hjtest_2
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_1, hjtest_2
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
EXPLAIN (COSTS OFF, VERBOSE)
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_2, hjtest_1
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
FROM hjtest_2, hjtest_1
WHERE
hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
AND (SELECT hjtest_1.b * 5) < 50
AND (SELECT hjtest_2.c * 5) < 55
AND hjtest_1.a <> hjtest_2.b;
ROLLBACK;