1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-18 17:42:25 +03:00

Re-think predicate locking on GIN indexes.

The principle behind the locking was not very well thought-out, and not
documented. Add a section in the README to explain how it's supposed to
work, and change the code so that it actually works that way.

This fixes two bugs:

1. If fast update was turned on concurrently, subsequent inserts to the
   pending list would not conflict with predicate locks that were acquired
   earlier, on entry pages. The included 'predicate-gin-fastupdate' test
   demonstrates that. To fix, make all scans acquire a predicate lock on
   the metapage. That lock represents a scan of the pending list, whether
   or not there is a pending list at the moment. Forget about the
   optimization to skip locking/checking for locks, when fastupdate=off.
2. If a scan finds no match, it still needs to lock the entry page. The
   point of predicate locks is to lock the gabs between values, whether
   or not there is a match. The included 'predicate-gin-nomatch' test
   tests that case.

In addition to those two bug fixes, this removes some unnecessary locking,
following the principle laid out in the README. Because all items in
a posting tree have the same key value, a lock on the posting tree root is
enough to cover all the items. (With a very large posting tree, it would
possibly be better to lock the posting tree leaf pages instead, so that a
"skip scan" with a query like "A & B", you could avoid unnecessary conflict
if a new tuple is inserted with A but !B. But let's keep this simple.)

Also, some spelling  fixes.

Author: Heikki Linnakangas with some editorization by me
Review: Andrey Borodin, Alexander Korotkov
Discussion: https://www.postgresql.org/message-id/0b3ad2c2-2692-62a9-3a04-5724f2af9114@iki.fi
This commit is contained in:
Teodor Sigaev
2018-05-04 11:27:50 +03:00
parent 7d8679975f
commit 0bef1c0678
18 changed files with 251 additions and 117 deletions

View File

@ -0,0 +1,30 @@
Parsed test spec with 3 sessions
starting permutation: r1 r2 w1 c1 w2 c2
step r1: SELECT count(*) FROM gin_tbl WHERE p @> array[1000];
count
2
step r2: SELECT * FROM other_tbl;
id
step w1: INSERT INTO other_tbl VALUES (42);
step c1: COMMIT;
step w2: INSERT INTO gin_tbl SELECT array[1000,19001];
ERROR: could not serialize access due to read/write dependencies among transactions
step c2: COMMIT;
starting permutation: r1 r2 w1 c1 fastupdate_on w2 c2
step r1: SELECT count(*) FROM gin_tbl WHERE p @> array[1000];
count
2
step r2: SELECT * FROM other_tbl;
id
step w1: INSERT INTO other_tbl VALUES (42);
step c1: COMMIT;
step fastupdate_on: ALTER INDEX ginidx SET (fastupdate = on);
step w2: INSERT INTO gin_tbl SELECT array[1000,19001];
ERROR: could not serialize access due to read/write dependencies among transactions
step c2: COMMIT;

View File

@ -0,0 +1,15 @@
Parsed test spec with 2 sessions
starting permutation: r1 r2 w1 c1 w2 c2
step r1: SELECT count(*) FROM gin_tbl WHERE p @> array[-1];
count
0
step r2: SELECT * FROM other_tbl;
id
step w1: INSERT INTO other_tbl VALUES (42);
step c1: COMMIT;
step w2: INSERT INTO gin_tbl SELECT array[-1];
ERROR: could not serialize access due to read/write dependencies among transactions
step c2: COMMIT;

View File

@ -737,8 +737,8 @@ step c2: commit;
starting permutation: fu1 rxy1 rxy2fu wx1 c1 wy2fu c2
step fu1: alter index ginidx set (fastupdate = on);
commit;
begin isolation level serializable;
set enable_seqscan=off;
begin isolation level serializable;
set enable_seqscan=off;
step rxy1: select count(*) from gin_tbl where p @> array[4,5];
count

View File

@ -69,6 +69,8 @@ test: vacuum-concurrent-drop
test: predicate-hash
test: predicate-gist
test: predicate-gin
test: predicate-gin-fastupdate
test: predicate-gin-nomatch
test: partition-key-update-1
test: partition-key-update-2
test: partition-key-update-3

View File

@ -0,0 +1,49 @@
#
# Test that predicate locking on a GIN index works correctly, even if
# fastupdate is turned on concurrently.
#
# 0. fastupdate is off
# 1. Session 's1' acquires predicate lock on page X
# 2. fastupdate is turned on
# 3. Session 's2' inserts a new tuple to the pending list
#
# This test tests that if the lock acquired in step 1 would conflict with
# the scan in step 1, we detect that conflict correctly, even if fastupdate
# was turned on in-between.
#
setup
{
create table gin_tbl(p int4[]);
insert into gin_tbl select array[g, g*2,g*3] from generate_series(1, 10000) g;
insert into gin_tbl select array[4,5,6] from generate_series(10001, 20000) g;
create index ginidx on gin_tbl using gin(p) with (fastupdate = off);
create table other_tbl (id int4);
}
teardown
{
drop table gin_tbl;
drop table other_tbl;
}
session "s1"
setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
step "r1" { SELECT count(*) FROM gin_tbl WHERE p @> array[1000]; }
step "w1" { INSERT INTO other_tbl VALUES (42); }
step "c1" { COMMIT; }
session "s2"
setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
step "r2" { SELECT * FROM other_tbl; }
step "w2" { INSERT INTO gin_tbl SELECT array[1000,19001]; }
step "c2" { COMMIT; }
session "s3"
step "fastupdate_on" { ALTER INDEX ginidx SET (fastupdate = on); }
# This correctly throws serialization failure.
permutation "r1" "r2" "w1" "c1" "w2" "c2"
# But if fastupdate is turned on in the middle, we miss it.
permutation "r1" "r2" "w1" "c1" "fastupdate_on" "w2" "c2"

View File

@ -0,0 +1,35 @@
#
# Check that GIN index grabs an appropriate lock, even if there is no match.
#
setup
{
create table gin_tbl(p int4[]);
insert into gin_tbl select array[g, g*2,g*3] from generate_series(1, 10000) g;
insert into gin_tbl select array[4,5,6] from generate_series(10001, 20000) g;
create index ginidx on gin_tbl using gin(p) with (fastupdate = off);
create table other_tbl (id int4);
}
teardown
{
drop table gin_tbl;
drop table other_tbl;
}
session "s1"
setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
# Scan with no match.
step "r1" { SELECT count(*) FROM gin_tbl WHERE p @> array[-1]; }
step "w1" { INSERT INTO other_tbl VALUES (42); }
step "c1" { COMMIT; }
session "s2"
setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
step "r2" { SELECT * FROM other_tbl; }
# Insert row that would've matched in step "r1"
step "w2" { INSERT INTO gin_tbl SELECT array[-1]; }
step "c2" { COMMIT; }
# This should throw serialization failure.
permutation "r1" "r2" "w1" "c1" "w2" "c2"

View File

@ -32,8 +32,8 @@ setup
# enable pending list for a small subset of tests
step "fu1" { alter index ginidx set (fastupdate = on);
commit;
begin isolation level serializable;
set enable_seqscan=off; }
begin isolation level serializable;
set enable_seqscan=off; }
step "rxy1" { select count(*) from gin_tbl where p @> array[4,5]; }
step "wx1" { insert into gin_tbl select g, array[5,6] from generate_series