diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index f47449b1c44..f02ef3f9787 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5785,3 +5785,460 @@ where exists (select 1 from j3 (13 rows) drop table j3; +-- +-- exercises for the hash join code +-- +begin; +set local min_parallel_table_scan_size = 0; +set local parallel_setup_cost = 0; +-- Extract bucket and batch counts from an explain analyze plan. In +-- general we can't make assertions about how many batches (or +-- buckets) will be required because it can vary, but we can in some +-- special cases and we can check for growth. +create or replace function find_hash(node json) +returns json language plpgsql +as +$$ +declare + x json; + child json; +begin + if node->>'Node Type' = 'Hash' then + return node; + else + for child in select json_array_elements(node->'Plans') + loop + x := find_hash(child); + if x is not null then + return x; + end if; + end loop; + return null; + end if; +end; +$$; +create or replace function hash_join_batches(query text) +returns table (original int, final int) language plpgsql +as +$$ +declare + whole_plan json; + hash_node json; +begin + for whole_plan in + execute 'explain (analyze, format ''json'') ' || query + loop + hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); + original := hash_node->>'Original Hash Batches'; + final := hash_node->>'Hash Batches'; + return next; + end loop; +end; +$$; +-- Make a simple relation with well distributed keys and correctly +-- estimated size. +create table simple as + select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table simple set (parallel_workers = 2); +analyze simple; +-- Make a relation whose size we will under-estimate. We want stats +-- to say 1000 rows, but actually there are 20,000 rows. +create table bigger_than_it_looks as + select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); +alter table bigger_than_it_looks set (parallel_workers = 2); +analyze bigger_than_it_looks; +update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; +-- Make a relation whose size we underestimate and that also has a +-- kind of skew that breaks our batching scheme. We want stats to say +-- 2 rows, but actually there are 20,000 rows with the same key. +create table extremely_skewed (id int, t text); +alter table extremely_skewed set (autovacuum_enabled = 'false'); +alter table extremely_skewed set (parallel_workers = 2); +analyze extremely_skewed; +insert into extremely_skewed + select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + from generate_series(1, 20000); +update pg_class + set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 + where relname = 'extremely_skewed'; +-- The "optimal" case: the hash table fits in memory; we plan for 1 +-- batch, we stick to that number, and peak memory usage stays within +-- our work_mem budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '4MB'; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- The "good" case: batches required, but we plan the right number; we +-- plan for some number of batches, and we stick to that number, and +-- peak memory usage says within our work_mem budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +rollback to settings; +-- The "bad" case: during execution we need to increase number of +-- batches; in this case we plan for 1 batch, and increase at least a +-- couple of times, and peak memory usage stays within our work_mem +-- budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +explain (costs off) + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); + QUERY PLAN +------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on bigger_than_it_looks s +(6 rows) + +select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); + QUERY PLAN +------------------------------------------------------------------ + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on bigger_than_it_looks s +(9 rows) + +select count(*) from simple r join bigger_than_it_looks s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- The "ugly" case: increasing the number of batches during execution +-- doesn't help, so stop trying to fit in work_mem and hope for the +-- best; in this case we plan for 1 batch, increases just once and +-- then stop increasing because that didn't help at all, so we blow +-- right through the work_mem budget and hope for the best... +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +-------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on extremely_skewed s +(6 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 2 +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +-------------------------------------------------------- + Aggregate + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on extremely_skewed s +(8 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 2 +(1 row) + +rollback to settings; +-- A couple of other hash join tests unrelated to work_mem management. +-- A full outer join where every record is matched. +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- parallelism not possible with parallel-oblivious outer hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- An full outer join where every record is not matched. +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Seq Scan on simple s + -> Hash + -> Seq Scan on simple r +(6 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- parallelism not possible with parallel-oblivious outer hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Seq Scan on simple s + -> Hash + -> Seq Scan on simple r +(6 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +rollback; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index d847d536533..cbb71c5b1be 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1934,3 +1934,256 @@ where exists (select 1 from j3 and t1.unique1 < 1; drop table j3; + +-- +-- exercises for the hash join code +-- + +begin; + +set local min_parallel_table_scan_size = 0; +set local parallel_setup_cost = 0; + +-- Extract bucket and batch counts from an explain analyze plan. In +-- general we can't make assertions about how many batches (or +-- buckets) will be required because it can vary, but we can in some +-- special cases and we can check for growth. +create or replace function find_hash(node json) +returns json language plpgsql +as +$$ +declare + x json; + child json; +begin + if node->>'Node Type' = 'Hash' then + return node; + else + for child in select json_array_elements(node->'Plans') + loop + x := find_hash(child); + if x is not null then + return x; + end if; + end loop; + return null; + end if; +end; +$$; +create or replace function hash_join_batches(query text) +returns table (original int, final int) language plpgsql +as +$$ +declare + whole_plan json; + hash_node json; +begin + for whole_plan in + execute 'explain (analyze, format ''json'') ' || query + loop + hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); + original := hash_node->>'Original Hash Batches'; + final := hash_node->>'Hash Batches'; + return next; + end loop; +end; +$$; + +-- Make a simple relation with well distributed keys and correctly +-- estimated size. +create table simple as + select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table simple set (parallel_workers = 2); +analyze simple; + +-- Make a relation whose size we will under-estimate. We want stats +-- to say 1000 rows, but actually there are 20,000 rows. +create table bigger_than_it_looks as + select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); +alter table bigger_than_it_looks set (parallel_workers = 2); +analyze bigger_than_it_looks; +update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; + +-- Make a relation whose size we underestimate and that also has a +-- kind of skew that breaks our batching scheme. We want stats to say +-- 2 rows, but actually there are 20,000 rows with the same key. +create table extremely_skewed (id int, t text); +alter table extremely_skewed set (autovacuum_enabled = 'false'); +alter table extremely_skewed set (parallel_workers = 2); +analyze extremely_skewed; +insert into extremely_skewed + select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + from generate_series(1, 20000); +update pg_class + set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 + where relname = 'extremely_skewed'; + +-- The "optimal" case: the hash table fits in memory; we plan for 1 +-- batch, we stick to that number, and peak memory usage stays within +-- our work_mem budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '4MB'; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- The "good" case: batches required, but we plan the right number; we +-- plan for some number of batches, and we stick to that number, and +-- peak memory usage says within our work_mem budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- The "bad" case: during execution we need to increase number of +-- batches; in this case we plan for 1 batch, and increase at least a +-- couple of times, and peak memory usage stays within our work_mem +-- budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +explain (costs off) + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); +select count(*) from simple r join bigger_than_it_looks s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); +rollback to settings; + +-- The "ugly" case: increasing the number of batches during execution +-- doesn't help, so stop trying to fit in work_mem and hope for the +-- best; in this case we plan for 1 batch, increases just once and +-- then stop increasing because that didn't help at all, so we blow +-- right through the work_mem budget and hope for the best... + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- A couple of other hash join tests unrelated to work_mem management. + +-- A full outer join where every record is matched. + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- parallelism not possible with parallel-oblivious outer hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- An full outer join where every record is not matched. + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + +-- parallelism not possible with parallel-oblivious outer hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + +rollback;