mirror of
https://github.com/postgres/postgres.git
synced 2025-05-05 09:19:17 +03:00
Increment xactCompletionCount during subtransaction abort.
Snapshot caching, introduced in 623a9ba79b, did not increment xactCompletionCount during subtransaction abort. That could lead to an older snapshot being reused. That is, at least as far as I can see, not a correctness issue (for MVCC snapshots there's no difference between "in progress" and "aborted"). The only difference between the old and new snapshots would be a newer ->xmax. While HeapTupleSatisfiesMVCC makes the same visibility determination, reusing the old snapshot leads HeapTupleSatisfiesMVCC to not set HEAP_XMIN_INVALID. Which subsequently causes the kill_prior_tuple optimization to not kick in (via HeapTupleIsSurelyDead() returning false). The performance effects of doing the same index-lookups over and over again is how the issue was discovered... Fix the issue by incrementing xactCompletionCount in XidCacheRemoveRunningXids. It already acquires ProcArrayLock exclusively, making that an easy proposition. Add a test to ensure that kill_prior_tuple prevents index growth when it involves aborted subtransaction of the current transaction. Author: Andres Freund Discussion: https://postgr.es/m/20210406043521.lopeo7bbigad3n6t@alap3.anarazel.de Discussion: https://postgr.es/m/20210317055718.v6qs3ltzrformqoa%40alap3.anarazel.de
This commit is contained in:
parent
8523492d4e
commit
90c885cdab
@ -1210,6 +1210,11 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
|
|||||||
*/
|
*/
|
||||||
MaintainLatestCompletedXidRecovery(running->latestCompletedXid);
|
MaintainLatestCompletedXidRecovery(running->latestCompletedXid);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NB: No need to increment ShmemVariableCache->xactCompletionCount here,
|
||||||
|
* nobody can see it yet.
|
||||||
|
*/
|
||||||
|
|
||||||
LWLockRelease(ProcArrayLock);
|
LWLockRelease(ProcArrayLock);
|
||||||
|
|
||||||
/* ShmemVariableCache->nextXid must be beyond any observed xid. */
|
/* ShmemVariableCache->nextXid must be beyond any observed xid. */
|
||||||
@ -3915,6 +3920,9 @@ XidCacheRemoveRunningXids(TransactionId xid,
|
|||||||
/* Also advance global latestCompletedXid while holding the lock */
|
/* Also advance global latestCompletedXid while holding the lock */
|
||||||
MaintainLatestCompletedXid(latestXid);
|
MaintainLatestCompletedXid(latestXid);
|
||||||
|
|
||||||
|
/* ... and xactCompletionCount */
|
||||||
|
ShmemVariableCache->xactCompletionCount++;
|
||||||
|
|
||||||
LWLockRelease(ProcArrayLock);
|
LWLockRelease(ProcArrayLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
42
src/test/regress/expected/mvcc.out
Normal file
42
src/test/regress/expected/mvcc.out
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
--
|
||||||
|
-- Verify that index scans encountering dead rows produced by an
|
||||||
|
-- aborted subtransaction of the current transaction can utilize the
|
||||||
|
-- kill_prio_tuple optimization
|
||||||
|
--
|
||||||
|
-- NB: The table size is currently *not* expected to stay the same, we
|
||||||
|
-- don't have logic to trigger opportunistic pruning in cases like
|
||||||
|
-- this.
|
||||||
|
BEGIN;
|
||||||
|
SET LOCAL enable_seqscan = false;
|
||||||
|
SET LOCAL enable_indexonlyscan = false;
|
||||||
|
SET LOCAL enable_bitmapscan = false;
|
||||||
|
-- Can't easily use a unique index, since dead tuples can be found
|
||||||
|
-- independent of the kill_prior_tuples optimization.
|
||||||
|
CREATE TABLE clean_aborted_self(key int, data text);
|
||||||
|
CREATE INDEX clean_aborted_self_key ON clean_aborted_self(key);
|
||||||
|
INSERT INTO clean_aborted_self (key, data) VALUES (-1, 'just to allocate metapage');
|
||||||
|
-- save index size from before the changes, for comparison
|
||||||
|
SELECT pg_relation_size('clean_aborted_self_key') AS clean_aborted_self_key_before \gset
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- iterate often enough to see index growth even on larger-than-default page sizes
|
||||||
|
FOR i IN 1..100 LOOP
|
||||||
|
BEGIN
|
||||||
|
-- perform index scan over all the inserted keys to get them to be seen as dead
|
||||||
|
IF EXISTS(SELECT * FROM clean_aborted_self WHERE key > 0 AND key < 100) THEN
|
||||||
|
RAISE data_corrupted USING MESSAGE = 'these rows should not exist';
|
||||||
|
END IF;
|
||||||
|
INSERT INTO clean_aborted_self SELECT g.i, 'rolling back in a sec' FROM generate_series(1, 100) g(i);
|
||||||
|
-- just some error that's not normally thrown
|
||||||
|
RAISE reading_sql_data_not_permitted USING MESSAGE = 'round and round again';
|
||||||
|
EXCEPTION WHEN reading_sql_data_not_permitted THEN END;
|
||||||
|
END LOOP;
|
||||||
|
END;$$;
|
||||||
|
-- show sizes only if they differ
|
||||||
|
SELECT :clean_aborted_self_key_before AS size_before, pg_relation_size('clean_aborted_self_key') size_after
|
||||||
|
WHERE :clean_aborted_self_key_before != pg_relation_size('clean_aborted_self_key');
|
||||||
|
size_before | size_after
|
||||||
|
-------------+------------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
ROLLBACK;
|
@ -29,7 +29,7 @@ test: strings numerology point lseg line box path polygon circle date time timet
|
|||||||
# geometry depends on point, lseg, box, path, polygon and circle
|
# geometry depends on point, lseg, box, path, polygon and circle
|
||||||
# horology depends on interval, timetz, timestamp, timestamptz
|
# horology depends on interval, timetz, timestamp, timestamptz
|
||||||
# ----------
|
# ----------
|
||||||
test: geometry horology regex type_sanity opr_sanity misc_sanity comments expressions unicode xid
|
test: geometry horology regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc
|
||||||
|
|
||||||
# ----------
|
# ----------
|
||||||
# These four each depend on the previous one
|
# These four each depend on the previous one
|
||||||
|
@ -11,6 +11,7 @@ test: int4
|
|||||||
test: int8
|
test: int8
|
||||||
test: oid
|
test: oid
|
||||||
test: xid
|
test: xid
|
||||||
|
test: mvcc
|
||||||
test: float4
|
test: float4
|
||||||
test: float8
|
test: float8
|
||||||
test: bit
|
test: bit
|
||||||
|
44
src/test/regress/sql/mvcc.sql
Normal file
44
src/test/regress/sql/mvcc.sql
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
--
|
||||||
|
-- Verify that index scans encountering dead rows produced by an
|
||||||
|
-- aborted subtransaction of the current transaction can utilize the
|
||||||
|
-- kill_prio_tuple optimization
|
||||||
|
--
|
||||||
|
-- NB: The table size is currently *not* expected to stay the same, we
|
||||||
|
-- don't have logic to trigger opportunistic pruning in cases like
|
||||||
|
-- this.
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
SET LOCAL enable_seqscan = false;
|
||||||
|
SET LOCAL enable_indexonlyscan = false;
|
||||||
|
SET LOCAL enable_bitmapscan = false;
|
||||||
|
|
||||||
|
-- Can't easily use a unique index, since dead tuples can be found
|
||||||
|
-- independent of the kill_prior_tuples optimization.
|
||||||
|
CREATE TABLE clean_aborted_self(key int, data text);
|
||||||
|
CREATE INDEX clean_aborted_self_key ON clean_aborted_self(key);
|
||||||
|
INSERT INTO clean_aborted_self (key, data) VALUES (-1, 'just to allocate metapage');
|
||||||
|
|
||||||
|
-- save index size from before the changes, for comparison
|
||||||
|
SELECT pg_relation_size('clean_aborted_self_key') AS clean_aborted_self_key_before \gset
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- iterate often enough to see index growth even on larger-than-default page sizes
|
||||||
|
FOR i IN 1..100 LOOP
|
||||||
|
BEGIN
|
||||||
|
-- perform index scan over all the inserted keys to get them to be seen as dead
|
||||||
|
IF EXISTS(SELECT * FROM clean_aborted_self WHERE key > 0 AND key < 100) THEN
|
||||||
|
RAISE data_corrupted USING MESSAGE = 'these rows should not exist';
|
||||||
|
END IF;
|
||||||
|
INSERT INTO clean_aborted_self SELECT g.i, 'rolling back in a sec' FROM generate_series(1, 100) g(i);
|
||||||
|
-- just some error that's not normally thrown
|
||||||
|
RAISE reading_sql_data_not_permitted USING MESSAGE = 'round and round again';
|
||||||
|
EXCEPTION WHEN reading_sql_data_not_permitted THEN END;
|
||||||
|
END LOOP;
|
||||||
|
END;$$;
|
||||||
|
|
||||||
|
-- show sizes only if they differ
|
||||||
|
SELECT :clean_aborted_self_key_before AS size_before, pg_relation_size('clean_aborted_self_key') size_after
|
||||||
|
WHERE :clean_aborted_self_key_before != pg_relation_size('clean_aborted_self_key');
|
||||||
|
|
||||||
|
ROLLBACK;
|
Loading…
x
Reference in New Issue
Block a user