mirror of
https://github.com/postgres/postgres.git
synced 2025-05-06 19:59:18 +03:00
Add a hash opclass for type "tid".
Up to now we've not worried much about joins where the join key is a relation's CTID column, reasoning that storing a table's CTIDs in some other table would be pretty useless. However, there are use-cases for this sort of query involving self-joins, so that argument doesn't really hold water. With larger relations, a merge or hash join is desirable. We had a btree opclass for type "tid", allowing merge joins on CTID, but no hash opclass so that hash joins weren't possible. Add the missing infrastructure. This also potentially enables hash aggregation on "tid", though the use-cases for that aren't too clear. Discussion: https://postgr.es/m/1853.1545453106@sss.pgh.pa.us
This commit is contained in:
parent
b5415e3c21
commit
0a6ea4001a
@ -20,6 +20,7 @@
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "access/hash.h"
|
||||
#include "access/heapam.h"
|
||||
#include "access/sysattr.h"
|
||||
#include "catalog/namespace.h"
|
||||
@ -239,6 +240,33 @@ tidsmaller(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1, arg2) <= 0 ? arg1 : arg2);
|
||||
}
|
||||
|
||||
Datum
|
||||
hashtid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ItemPointer key = PG_GETARG_ITEMPOINTER(0);
|
||||
|
||||
/*
|
||||
* While you'll probably have a lot of trouble with a compiler that
|
||||
* insists on appending pad space to struct ItemPointerData, we can at
|
||||
* least make this code work, by not using sizeof(ItemPointerData).
|
||||
* Instead rely on knowing the sizes of the component fields.
|
||||
*/
|
||||
return hash_any((unsigned char *) key,
|
||||
sizeof(BlockIdData) + sizeof(OffsetNumber));
|
||||
}
|
||||
|
||||
Datum
|
||||
hashtidextended(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ItemPointer key = PG_GETARG_ITEMPOINTER(0);
|
||||
uint64 seed = PG_GETARG_INT64(1);
|
||||
|
||||
/* As above */
|
||||
return hash_any_extended((unsigned char *) key,
|
||||
sizeof(BlockIdData) + sizeof(OffsetNumber),
|
||||
seed);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Functions to get latest tid of a specified tuple.
|
||||
|
@ -53,6 +53,6 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 201812202
|
||||
#define CATALOG_VERSION_NO 201812301
|
||||
|
||||
#endif
|
||||
|
@ -1013,6 +1013,10 @@
|
||||
{ amopfamily => 'hash/cid_ops', amoplefttype => 'cid', amoprighttype => 'cid',
|
||||
amopstrategy => '1', amopopr => '=(cid,cid)', amopmethod => 'hash' },
|
||||
|
||||
# tid_ops
|
||||
{ amopfamily => 'hash/tid_ops', amoplefttype => 'tid', amoprighttype => 'tid',
|
||||
amopstrategy => '1', amopopr => '=(tid,tid)', amopmethod => 'hash' },
|
||||
|
||||
# text_pattern_ops
|
||||
{ amopfamily => 'hash/text_pattern_ops', amoplefttype => 'text',
|
||||
amoprighttype => 'text', amopstrategy => '1', amopopr => '=(text,text)',
|
||||
|
@ -340,6 +340,10 @@
|
||||
amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashint4' },
|
||||
{ amprocfamily => 'hash/cid_ops', amproclefttype => 'cid',
|
||||
amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashint4extended' },
|
||||
{ amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
|
||||
amprocrighttype => 'tid', amprocnum => '1', amproc => 'hashtid' },
|
||||
{ amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
|
||||
amprocrighttype => 'tid', amprocnum => '2', amproc => 'hashtidextended' },
|
||||
{ amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text',
|
||||
amprocrighttype => 'text', amprocnum => '1', amproc => 'hashtext' },
|
||||
{ amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text',
|
||||
|
@ -167,6 +167,8 @@
|
||||
opcintype => 'xid' },
|
||||
{ opcmethod => 'hash', opcname => 'cid_ops', opcfamily => 'hash/cid_ops',
|
||||
opcintype => 'cid' },
|
||||
{ opcmethod => 'hash', opcname => 'tid_ops', opcfamily => 'hash/tid_ops',
|
||||
opcintype => 'tid' },
|
||||
{ opcmethod => 'hash', opcname => 'text_pattern_ops',
|
||||
opcfamily => 'hash/text_pattern_ops', opcintype => 'text',
|
||||
opcdefault => 'f' },
|
||||
|
@ -204,9 +204,10 @@
|
||||
oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
|
||||
|
||||
{ oid => '387', oid_symbol => 'TIDEqualOperator', descr => 'equal',
|
||||
oprname => '=', oprcanmerge => 't', oprleft => 'tid', oprright => 'tid',
|
||||
oprresult => 'bool', oprcom => '=(tid,tid)', oprnegate => '<>(tid,tid)',
|
||||
oprcode => 'tideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
|
||||
oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'tid',
|
||||
oprright => 'tid', oprresult => 'bool', oprcom => '=(tid,tid)',
|
||||
oprnegate => '<>(tid,tid)', oprcode => 'tideq', oprrest => 'eqsel',
|
||||
oprjoin => 'eqjoinsel' },
|
||||
{ oid => '402', descr => 'not equal',
|
||||
oprname => '<>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
|
||||
oprcom => '<>(tid,tid)', oprnegate => '=(tid,tid)', oprcode => 'tidne',
|
||||
|
@ -112,6 +112,8 @@
|
||||
opfmethod => 'hash', opfname => 'xid_ops' },
|
||||
{ oid => '2226',
|
||||
opfmethod => 'hash', opfname => 'cid_ops' },
|
||||
{ oid => '2227',
|
||||
opfmethod => 'hash', opfname => 'tid_ops' },
|
||||
{ oid => '2229',
|
||||
opfmethod => 'hash', opfname => 'text_pattern_ops' },
|
||||
{ oid => '2231',
|
||||
|
@ -2484,6 +2484,12 @@
|
||||
{ oid => '2796', descr => 'smaller of two',
|
||||
proname => 'tidsmaller', prorettype => 'tid', proargtypes => 'tid tid',
|
||||
prosrc => 'tidsmaller' },
|
||||
{ oid => '2233', descr => 'hash',
|
||||
proname => 'hashtid', prorettype => 'int4', proargtypes => 'tid',
|
||||
prosrc => 'hashtid' },
|
||||
{ oid => '2234', descr => 'hash',
|
||||
proname => 'hashtidextended', prorettype => 'int8', proargtypes => 'tid int8',
|
||||
prosrc => 'hashtidextended' },
|
||||
|
||||
{ oid => '1296',
|
||||
proname => 'timedate_pl', prolang => '14', prorettype => 'timestamp',
|
||||
|
@ -109,6 +109,7 @@ WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1);
|
||||
(2 rows)
|
||||
|
||||
-- nestloop-with-inner-tidscan joins on tid
|
||||
SET enable_hashjoin TO off; -- otherwise hash join might win
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT t1.ctid, t1.*, t2.ctid, t2.*
|
||||
FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
|
||||
@ -147,6 +148,7 @@ FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
|
||||
(0,1) | 1 | (0,1) | 1
|
||||
(1 row)
|
||||
|
||||
RESET enable_hashjoin;
|
||||
-- exercise backward scan and rewind
|
||||
BEGIN;
|
||||
DECLARE c CURSOR FOR
|
||||
@ -231,4 +233,48 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
|
||||
UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
|
||||
ERROR: cursor "c" is not positioned on a row
|
||||
ROLLBACK;
|
||||
-- bulk joins on CTID
|
||||
-- (these plans don't use TID scans, but this still seems like an
|
||||
-- appropriate place for these tests)
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
|
||||
QUERY PLAN
|
||||
----------------------------------------
|
||||
Aggregate
|
||||
-> Hash Join
|
||||
Hash Cond: (t1.ctid = t2.ctid)
|
||||
-> Seq Scan on tenk1 t1
|
||||
-> Hash
|
||||
-> Seq Scan on tenk1 t2
|
||||
(6 rows)
|
||||
|
||||
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
|
||||
count
|
||||
-------
|
||||
10000
|
||||
(1 row)
|
||||
|
||||
SET enable_hashjoin TO off;
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
|
||||
QUERY PLAN
|
||||
-----------------------------------------
|
||||
Aggregate
|
||||
-> Merge Join
|
||||
Merge Cond: (t1.ctid = t2.ctid)
|
||||
-> Sort
|
||||
Sort Key: t1.ctid
|
||||
-> Seq Scan on tenk1 t1
|
||||
-> Sort
|
||||
Sort Key: t2.ctid
|
||||
-> Seq Scan on tenk1 t2
|
||||
(9 rows)
|
||||
|
||||
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
|
||||
count
|
||||
-------
|
||||
10000
|
||||
(1 row)
|
||||
|
||||
RESET enable_hashjoin;
|
||||
DROP TABLE tidscan;
|
||||
|
@ -40,6 +40,7 @@ SELECT ctid, * FROM tidscan
|
||||
WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1);
|
||||
|
||||
-- nestloop-with-inner-tidscan joins on tid
|
||||
SET enable_hashjoin TO off; -- otherwise hash join might win
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT t1.ctid, t1.*, t2.ctid, t2.*
|
||||
FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
|
||||
@ -50,6 +51,7 @@ SELECT t1.ctid, t1.*, t2.ctid, t2.*
|
||||
FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
|
||||
SELECT t1.ctid, t1.*, t2.ctid, t2.*
|
||||
FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
|
||||
RESET enable_hashjoin;
|
||||
|
||||
-- exercise backward scan and rewind
|
||||
BEGIN;
|
||||
@ -80,4 +82,16 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
|
||||
UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
|
||||
ROLLBACK;
|
||||
|
||||
-- bulk joins on CTID
|
||||
-- (these plans don't use TID scans, but this still seems like an
|
||||
-- appropriate place for these tests)
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
|
||||
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
|
||||
SET enable_hashjoin TO off;
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
|
||||
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
|
||||
RESET enable_hashjoin;
|
||||
|
||||
DROP TABLE tidscan;
|
||||
|
Loading…
x
Reference in New Issue
Block a user