diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c index 41d540b46ec..7b25947682e 100644 --- a/src/backend/utils/adt/tid.c +++ b/src/backend/utils/adt/tid.c @@ -20,6 +20,7 @@ #include #include +#include "access/hash.h" #include "access/heapam.h" #include "access/sysattr.h" #include "catalog/namespace.h" @@ -239,6 +240,33 @@ tidsmaller(PG_FUNCTION_ARGS) PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1, arg2) <= 0 ? arg1 : arg2); } +Datum +hashtid(PG_FUNCTION_ARGS) +{ + ItemPointer key = PG_GETARG_ITEMPOINTER(0); + + /* + * While you'll probably have a lot of trouble with a compiler that + * insists on appending pad space to struct ItemPointerData, we can at + * least make this code work, by not using sizeof(ItemPointerData). + * Instead rely on knowing the sizes of the component fields. + */ + return hash_any((unsigned char *) key, + sizeof(BlockIdData) + sizeof(OffsetNumber)); +} + +Datum +hashtidextended(PG_FUNCTION_ARGS) +{ + ItemPointer key = PG_GETARG_ITEMPOINTER(0); + uint64 seed = PG_GETARG_INT64(1); + + /* As above */ + return hash_any_extended((unsigned char *) key, + sizeof(BlockIdData) + sizeof(OffsetNumber), + seed); +} + /* * Functions to get latest tid of a specified tuple. diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 0e89b303650..588a110093a 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201812202 +#define CATALOG_VERSION_NO 201812301 #endif diff --git a/src/include/catalog/pg_amop.dat b/src/include/catalog/pg_amop.dat index e689c9b160a..436f1bd0762 100644 --- a/src/include/catalog/pg_amop.dat +++ b/src/include/catalog/pg_amop.dat @@ -1013,6 +1013,10 @@ { amopfamily => 'hash/cid_ops', amoplefttype => 'cid', amoprighttype => 'cid', amopstrategy => '1', amopopr => '=(cid,cid)', amopmethod => 'hash' }, +# tid_ops +{ amopfamily => 'hash/tid_ops', amoplefttype => 'tid', amoprighttype => 'tid', + amopstrategy => '1', amopopr => '=(tid,tid)', amopmethod => 'hash' }, + # text_pattern_ops { amopfamily => 'hash/text_pattern_ops', amoplefttype => 'text', amoprighttype => 'text', amopstrategy => '1', amopopr => '=(text,text)', diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index bbcee26fa8b..8ddb6991121 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -340,6 +340,10 @@ amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashint4' }, { amprocfamily => 'hash/cid_ops', amproclefttype => 'cid', amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashint4extended' }, +{ amprocfamily => 'hash/tid_ops', amproclefttype => 'tid', + amprocrighttype => 'tid', amprocnum => '1', amproc => 'hashtid' }, +{ amprocfamily => 'hash/tid_ops', amproclefttype => 'tid', + amprocrighttype => 'tid', amprocnum => '2', amproc => 'hashtidextended' }, { amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text', amprocrighttype => 'text', amprocnum => '1', amproc => 'hashtext' }, { amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text', diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat index 5178d04337f..c451d365a6f 100644 --- a/src/include/catalog/pg_opclass.dat +++ b/src/include/catalog/pg_opclass.dat @@ -167,6 +167,8 @@ opcintype => 'xid' }, { opcmethod => 'hash', opcname => 'cid_ops', opcfamily => 'hash/cid_ops', opcintype => 'cid' }, +{ opcmethod => 'hash', opcname => 'tid_ops', opcfamily => 'hash/tid_ops', + opcintype => 'tid' }, { opcmethod => 'hash', opcname => 'text_pattern_ops', opcfamily => 'hash/text_pattern_ops', opcintype => 'text', opcdefault => 'f' }, diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index 2abd5311c3e..e8452e10d21 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -204,9 +204,10 @@ oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, { oid => '387', oid_symbol => 'TIDEqualOperator', descr => 'equal', - oprname => '=', oprcanmerge => 't', oprleft => 'tid', oprright => 'tid', - oprresult => 'bool', oprcom => '=(tid,tid)', oprnegate => '<>(tid,tid)', - oprcode => 'tideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, + oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'tid', + oprright => 'tid', oprresult => 'bool', oprcom => '=(tid,tid)', + oprnegate => '<>(tid,tid)', oprcode => 'tideq', oprrest => 'eqsel', + oprjoin => 'eqjoinsel' }, { oid => '402', descr => 'not equal', oprname => '<>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '<>(tid,tid)', oprnegate => '=(tid,tid)', oprcode => 'tidne', diff --git a/src/include/catalog/pg_opfamily.dat b/src/include/catalog/pg_opfamily.dat index fe8a32485f2..c5ea37b5cd7 100644 --- a/src/include/catalog/pg_opfamily.dat +++ b/src/include/catalog/pg_opfamily.dat @@ -112,6 +112,8 @@ opfmethod => 'hash', opfname => 'xid_ops' }, { oid => '2226', opfmethod => 'hash', opfname => 'cid_ops' }, +{ oid => '2227', + opfmethod => 'hash', opfname => 'tid_ops' }, { oid => '2229', opfmethod => 'hash', opfname => 'text_pattern_ops' }, { oid => '2231', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index acb0154048a..6e1e1dfad71 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -2484,6 +2484,12 @@ { oid => '2796', descr => 'smaller of two', proname => 'tidsmaller', prorettype => 'tid', proargtypes => 'tid tid', prosrc => 'tidsmaller' }, +{ oid => '2233', descr => 'hash', + proname => 'hashtid', prorettype => 'int4', proargtypes => 'tid', + prosrc => 'hashtid' }, +{ oid => '2234', descr => 'hash', + proname => 'hashtidextended', prorettype => 'int8', proargtypes => 'tid int8', + prosrc => 'hashtidextended' }, { oid => '1296', proname => 'timedate_pl', prolang => '14', prorettype => 'timestamp', diff --git a/src/test/regress/expected/tidscan.out b/src/test/regress/expected/tidscan.out index 8f15c04dad1..9b5eb04bfd9 100644 --- a/src/test/regress/expected/tidscan.out +++ b/src/test/regress/expected/tidscan.out @@ -109,6 +109,7 @@ WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1); (2 rows) -- nestloop-with-inner-tidscan joins on tid +SET enable_hashjoin TO off; -- otherwise hash join might win EXPLAIN (COSTS OFF) SELECT t1.ctid, t1.*, t2.ctid, t2.* FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1; @@ -147,6 +148,7 @@ FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1; (0,1) | 1 | (0,1) | 1 (1 row) +RESET enable_hashjoin; -- exercise backward scan and rewind BEGIN; DECLARE c CURSOR FOR @@ -231,4 +233,48 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *; ERROR: cursor "c" is not positioned on a row ROLLBACK; +-- bulk joins on CTID +-- (these plans don't use TID scans, but this still seems like an +-- appropriate place for these tests) +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid; + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (t1.ctid = t2.ctid) + -> Seq Scan on tenk1 t1 + -> Hash + -> Seq Scan on tenk1 t2 +(6 rows) + +SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid; + count +------- + 10000 +(1 row) + +SET enable_hashjoin TO off; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid; + QUERY PLAN +----------------------------------------- + Aggregate + -> Merge Join + Merge Cond: (t1.ctid = t2.ctid) + -> Sort + Sort Key: t1.ctid + -> Seq Scan on tenk1 t1 + -> Sort + Sort Key: t2.ctid + -> Seq Scan on tenk1 t2 +(9 rows) + +SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid; + count +------- + 10000 +(1 row) + +RESET enable_hashjoin; DROP TABLE tidscan; diff --git a/src/test/regress/sql/tidscan.sql b/src/test/regress/sql/tidscan.sql index 2d63aa067fe..ef05c098420 100644 --- a/src/test/regress/sql/tidscan.sql +++ b/src/test/regress/sql/tidscan.sql @@ -40,6 +40,7 @@ SELECT ctid, * FROM tidscan WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1); -- nestloop-with-inner-tidscan joins on tid +SET enable_hashjoin TO off; -- otherwise hash join might win EXPLAIN (COSTS OFF) SELECT t1.ctid, t1.*, t2.ctid, t2.* FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1; @@ -50,6 +51,7 @@ SELECT t1.ctid, t1.*, t2.ctid, t2.* FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1; SELECT t1.ctid, t1.*, t2.ctid, t2.* FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1; +RESET enable_hashjoin; -- exercise backward scan and rewind BEGIN; @@ -80,4 +82,16 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *; ROLLBACK; +-- bulk joins on CTID +-- (these plans don't use TID scans, but this still seems like an +-- appropriate place for these tests) +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid; +SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid; +SET enable_hashjoin TO off; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid; +SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid; +RESET enable_hashjoin; + DROP TABLE tidscan;