mirror of
https://github.com/postgres/postgres.git
synced 2025-06-23 14:01:44 +03:00
Increment xactCompletionCount during subtransaction abort.
Snapshot caching, introduced in 623a9ba79b
, did not increment
xactCompletionCount during subtransaction abort. That could lead to an older
snapshot being reused. That is, at least as far as I can see, not a
correctness issue (for MVCC snapshots there's no difference between "in
progress" and "aborted"). The only difference between the old and new
snapshots would be a newer ->xmax.
While HeapTupleSatisfiesMVCC makes the same visibility determination, reusing
the old snapshot leads HeapTupleSatisfiesMVCC to not set
HEAP_XMIN_INVALID. Which subsequently causes the kill_prior_tuple optimization
to not kick in (via HeapTupleIsSurelyDead() returning false). The performance
effects of doing the same index-lookups over and over again is how the issue
was discovered...
Fix the issue by incrementing xactCompletionCount in
XidCacheRemoveRunningXids. It already acquires ProcArrayLock exclusively,
making that an easy proposition.
Add a test to ensure that kill_prior_tuple prevents index growth when it
involves aborted subtransaction of the current transaction.
Author: Andres Freund
Discussion: https://postgr.es/m/20210406043521.lopeo7bbigad3n6t@alap3.anarazel.de
Discussion: https://postgr.es/m/20210317055718.v6qs3ltzrformqoa%40alap3.anarazel.de
This commit is contained in:
@ -1210,6 +1210,11 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
|
|||||||
*/
|
*/
|
||||||
MaintainLatestCompletedXidRecovery(running->latestCompletedXid);
|
MaintainLatestCompletedXidRecovery(running->latestCompletedXid);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NB: No need to increment ShmemVariableCache->xactCompletionCount here,
|
||||||
|
* nobody can see it yet.
|
||||||
|
*/
|
||||||
|
|
||||||
LWLockRelease(ProcArrayLock);
|
LWLockRelease(ProcArrayLock);
|
||||||
|
|
||||||
/* ShmemVariableCache->nextXid must be beyond any observed xid. */
|
/* ShmemVariableCache->nextXid must be beyond any observed xid. */
|
||||||
@ -3915,6 +3920,9 @@ XidCacheRemoveRunningXids(TransactionId xid,
|
|||||||
/* Also advance global latestCompletedXid while holding the lock */
|
/* Also advance global latestCompletedXid while holding the lock */
|
||||||
MaintainLatestCompletedXid(latestXid);
|
MaintainLatestCompletedXid(latestXid);
|
||||||
|
|
||||||
|
/* ... and xactCompletionCount */
|
||||||
|
ShmemVariableCache->xactCompletionCount++;
|
||||||
|
|
||||||
LWLockRelease(ProcArrayLock);
|
LWLockRelease(ProcArrayLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
42
src/test/regress/expected/mvcc.out
Normal file
42
src/test/regress/expected/mvcc.out
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
--
|
||||||
|
-- Verify that index scans encountering dead rows produced by an
|
||||||
|
-- aborted subtransaction of the current transaction can utilize the
|
||||||
|
-- kill_prio_tuple optimization
|
||||||
|
--
|
||||||
|
-- NB: The table size is currently *not* expected to stay the same, we
|
||||||
|
-- don't have logic to trigger opportunistic pruning in cases like
|
||||||
|
-- this.
|
||||||
|
BEGIN;
|
||||||
|
SET LOCAL enable_seqscan = false;
|
||||||
|
SET LOCAL enable_indexonlyscan = false;
|
||||||
|
SET LOCAL enable_bitmapscan = false;
|
||||||
|
-- Can't easily use a unique index, since dead tuples can be found
|
||||||
|
-- independent of the kill_prior_tuples optimization.
|
||||||
|
CREATE TABLE clean_aborted_self(key int, data text);
|
||||||
|
CREATE INDEX clean_aborted_self_key ON clean_aborted_self(key);
|
||||||
|
INSERT INTO clean_aborted_self (key, data) VALUES (-1, 'just to allocate metapage');
|
||||||
|
-- save index size from before the changes, for comparison
|
||||||
|
SELECT pg_relation_size('clean_aborted_self_key') AS clean_aborted_self_key_before \gset
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- iterate often enough to see index growth even on larger-than-default page sizes
|
||||||
|
FOR i IN 1..100 LOOP
|
||||||
|
BEGIN
|
||||||
|
-- perform index scan over all the inserted keys to get them to be seen as dead
|
||||||
|
IF EXISTS(SELECT * FROM clean_aborted_self WHERE key > 0 AND key < 100) THEN
|
||||||
|
RAISE data_corrupted USING MESSAGE = 'these rows should not exist';
|
||||||
|
END IF;
|
||||||
|
INSERT INTO clean_aborted_self SELECT g.i, 'rolling back in a sec' FROM generate_series(1, 100) g(i);
|
||||||
|
-- just some error that's not normally thrown
|
||||||
|
RAISE reading_sql_data_not_permitted USING MESSAGE = 'round and round again';
|
||||||
|
EXCEPTION WHEN reading_sql_data_not_permitted THEN END;
|
||||||
|
END LOOP;
|
||||||
|
END;$$;
|
||||||
|
-- show sizes only if they differ
|
||||||
|
SELECT :clean_aborted_self_key_before AS size_before, pg_relation_size('clean_aborted_self_key') size_after
|
||||||
|
WHERE :clean_aborted_self_key_before != pg_relation_size('clean_aborted_self_key');
|
||||||
|
size_before | size_after
|
||||||
|
-------------+------------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
ROLLBACK;
|
@ -29,7 +29,7 @@ test: strings numerology point lseg line box path polygon circle date time timet
|
|||||||
# geometry depends on point, lseg, box, path, polygon and circle
|
# geometry depends on point, lseg, box, path, polygon and circle
|
||||||
# horology depends on interval, timetz, timestamp, timestamptz
|
# horology depends on interval, timetz, timestamp, timestamptz
|
||||||
# ----------
|
# ----------
|
||||||
test: geometry horology regex type_sanity opr_sanity misc_sanity comments expressions unicode xid
|
test: geometry horology regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc
|
||||||
|
|
||||||
# ----------
|
# ----------
|
||||||
# These four each depend on the previous one
|
# These four each depend on the previous one
|
||||||
|
@ -11,6 +11,7 @@ test: int4
|
|||||||
test: int8
|
test: int8
|
||||||
test: oid
|
test: oid
|
||||||
test: xid
|
test: xid
|
||||||
|
test: mvcc
|
||||||
test: float4
|
test: float4
|
||||||
test: float8
|
test: float8
|
||||||
test: bit
|
test: bit
|
||||||
|
44
src/test/regress/sql/mvcc.sql
Normal file
44
src/test/regress/sql/mvcc.sql
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
--
|
||||||
|
-- Verify that index scans encountering dead rows produced by an
|
||||||
|
-- aborted subtransaction of the current transaction can utilize the
|
||||||
|
-- kill_prio_tuple optimization
|
||||||
|
--
|
||||||
|
-- NB: The table size is currently *not* expected to stay the same, we
|
||||||
|
-- don't have logic to trigger opportunistic pruning in cases like
|
||||||
|
-- this.
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
SET LOCAL enable_seqscan = false;
|
||||||
|
SET LOCAL enable_indexonlyscan = false;
|
||||||
|
SET LOCAL enable_bitmapscan = false;
|
||||||
|
|
||||||
|
-- Can't easily use a unique index, since dead tuples can be found
|
||||||
|
-- independent of the kill_prior_tuples optimization.
|
||||||
|
CREATE TABLE clean_aborted_self(key int, data text);
|
||||||
|
CREATE INDEX clean_aborted_self_key ON clean_aborted_self(key);
|
||||||
|
INSERT INTO clean_aborted_self (key, data) VALUES (-1, 'just to allocate metapage');
|
||||||
|
|
||||||
|
-- save index size from before the changes, for comparison
|
||||||
|
SELECT pg_relation_size('clean_aborted_self_key') AS clean_aborted_self_key_before \gset
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- iterate often enough to see index growth even on larger-than-default page sizes
|
||||||
|
FOR i IN 1..100 LOOP
|
||||||
|
BEGIN
|
||||||
|
-- perform index scan over all the inserted keys to get them to be seen as dead
|
||||||
|
IF EXISTS(SELECT * FROM clean_aborted_self WHERE key > 0 AND key < 100) THEN
|
||||||
|
RAISE data_corrupted USING MESSAGE = 'these rows should not exist';
|
||||||
|
END IF;
|
||||||
|
INSERT INTO clean_aborted_self SELECT g.i, 'rolling back in a sec' FROM generate_series(1, 100) g(i);
|
||||||
|
-- just some error that's not normally thrown
|
||||||
|
RAISE reading_sql_data_not_permitted USING MESSAGE = 'round and round again';
|
||||||
|
EXCEPTION WHEN reading_sql_data_not_permitted THEN END;
|
||||||
|
END LOOP;
|
||||||
|
END;$$;
|
||||||
|
|
||||||
|
-- show sizes only if they differ
|
||||||
|
SELECT :clean_aborted_self_key_before AS size_before, pg_relation_size('clean_aborted_self_key') size_after
|
||||||
|
WHERE :clean_aborted_self_key_before != pg_relation_size('clean_aborted_self_key');
|
||||||
|
|
||||||
|
ROLLBACK;
|
Reference in New Issue
Block a user