1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Allow pgbench to retry in some cases.

When serialization or deadlock errors are reported by backend, allow
to retry and continue the benchmarking. For this purpose new options
"--max-tries", "--failures-detailed" and "--verbose-errors" are added.

Transactions with serialization errors or deadlock errors will be
repeated after rollbacks until they complete successfully or reach the
maximum number of tries (specified by the --max-tries option), or the
maximum time of tries (specified by the --latency-limit option).
These options can be specified at the same time. It is not possible to
use an unlimited number of tries (--max-tries=0) without the
--latency-limit option or the --time option. By default the option
--max-tries is set to 1, which means transactions with
serialization/deadlock errors are not retried. If the last try fails,
this transaction will be reported as failed, and the client variables
will be set as they were before the first run of this transaction.

Statistics on retries and failures are printed in the progress,
transaction / aggregation logs and in the end with other results (all
and for each script). Also retries and failures are printed
per-command with average latency by using option
(--report-per-command, -r).

Option --failures-detailed prints group failures by basic types
(serialization failures / deadlock failures).

Option --verbose-errors prints distinct reports on errors and failures
(errors without retrying) by type with detailed information like which
limit for retries was violated and how far it was exceeded for the
serialization/deadlock failures.

Patch originally written by Marina Polyakova then Yugo Nagata
inherited the discussion and heavily modified the patch to make it
commitable.

Authors: Yugo Nagata, Marina Polyakova
Reviewed-by: Fabien Coelho, Tatsuo Ishii, Alvaro Herrera, Kevin Grittner, Andres Freund, Arthur Zakirov, Alexander Korotkov, Teodor Sigaev, Ildus Kurbangaliev
Discussion: https://postgr.es/m/flat/72a0d590d6ba06f242d75c2e641820ec%40postgrespro.ru
This commit is contained in:
Tatsuo Ishii
2022-03-23 18:52:37 +09:00
parent 383f222119
commit 4a39f87acd
6 changed files with 1599 additions and 213 deletions

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,9 @@ use Config;
# start a pgbench specific server
my $node = PostgreSQL::Test::Cluster->new('main');
$node->init;
# Set to untranslated messages, to be able to compare program output with
# expected strings.
$node->init(extra => [ '--locale', 'C' ]);
$node->start;
# tablespace for testing, because partitioned tables cannot use pg_default
@ -109,7 +111,8 @@ $node->pgbench(
qr{builtin: TPC-B},
qr{clients: 2\b},
qr{processed: 10/10},
qr{mode: simple}
qr{mode: simple},
qr{maximum number of tries: 1}
],
[qr{^$}],
'pgbench tpcb-like');
@ -1198,6 +1201,214 @@ $node->pgbench(
check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
qr{^0 \d{1,2} \d+ \d \d+ \d+$});
# abortion of the client if the script contains an incomplete transaction block
$node->pgbench(
'--no-vacuum', 2, [ qr{processed: 1/10} ],
[ qr{client 0 aborted: end of script reached without completing the last transaction} ],
'incomplete transaction block',
{ '001_pgbench_incomplete_transaction_block' => q{BEGIN;SELECT 1;} });
# Test the concurrent update in the table row and deadlocks.
$node->safe_psql('postgres',
'CREATE UNLOGGED TABLE first_client_table (value integer); '
. 'CREATE UNLOGGED TABLE xy (x integer, y integer); '
. 'INSERT INTO xy VALUES (1, 2);');
# Serialization error and retry
local $ENV{PGOPTIONS} = "-c default_transaction_isolation=repeatable\\ read";
# Check that we have a serialization error and the same random value of the
# delta variable in the next try
my $err_pattern =
"client (0|1) got an error in command 3 \\(SQL\\) of script 0; "
. "ERROR: could not serialize access due to concurrent update\\b.*"
. "\\g1";
$node->pgbench(
"-n -c 2 -t 1 -d --verbose-errors --max-tries 2",
0,
[ qr{processed: 2/2\b}, qr{number of transactions retried: 1\b},
qr{total number of retries: 1\b} ],
[ qr/$err_pattern/s ],
'concurrent update with retrying',
{
'001_pgbench_serialization' => q{
-- What's happening:
-- The first client starts the transaction with the isolation level Repeatable
-- Read:
--
-- BEGIN;
-- UPDATE xy SET y = ... WHERE x = 1;
--
-- The second client starts a similar transaction with the same isolation level:
--
-- BEGIN;
-- UPDATE xy SET y = ... WHERE x = 1;
-- <waiting for the first client>
--
-- The first client commits its transaction, and the second client gets a
-- serialization error.
\set delta random(-5000, 5000)
-- The second client will stop here
SELECT pg_advisory_lock(0);
-- Start transaction with concurrent update
BEGIN;
UPDATE xy SET y = y + :delta WHERE x = 1 AND pg_advisory_lock(1) IS NOT NULL;
-- Wait for the second client
DO $$
DECLARE
exists boolean;
waiters integer;
BEGIN
-- The second client always comes in second, and the number of rows in the
-- table first_client_table reflect this. Here the first client inserts a row,
-- so the second client will see a non-empty table when repeating the
-- transaction after the serialization error.
SELECT EXISTS (SELECT * FROM first_client_table) INTO STRICT exists;
IF NOT exists THEN
-- Let the second client begin
PERFORM pg_advisory_unlock(0);
-- And wait until the second client tries to get the same lock
LOOP
SELECT COUNT(*) INTO STRICT waiters FROM pg_locks WHERE
locktype = 'advisory' AND objsubid = 1 AND
((classid::bigint << 32) | objid::bigint = 1::bigint) AND NOT granted;
IF waiters = 1 THEN
INSERT INTO first_client_table VALUES (1);
-- Exit loop
EXIT;
END IF;
END LOOP;
END IF;
END$$;
COMMIT;
SELECT pg_advisory_unlock_all();
}
});
# Clean up
$node->safe_psql('postgres', 'DELETE FROM first_client_table;');
local $ENV{PGOPTIONS} = "-c default_transaction_isolation=read\\ committed";
# Deadlock error and retry
# Check that we have a deadlock error
$err_pattern =
"client (0|1) got an error in command (3|5) \\(SQL\\) of script 0; "
. "ERROR: deadlock detected\\b";
$node->pgbench(
"-n -c 2 -t 1 --max-tries 2 --verbose-errors",
0,
[ qr{processed: 2/2\b}, qr{number of transactions retried: 1\b},
qr{total number of retries: 1\b} ],
[ qr{$err_pattern} ],
'deadlock with retrying',
{
'001_pgbench_deadlock' => q{
-- What's happening:
-- The first client gets the lock 2.
-- The second client gets the lock 3 and tries to get the lock 2.
-- The first client tries to get the lock 3 and one of them gets a deadlock
-- error.
--
-- A client that does not get a deadlock error must hold a lock at the
-- transaction start. Thus in the end it releases all of its locks before the
-- client with the deadlock error starts a retry (we do not want any errors
-- again).
-- Since the client with the deadlock error has not released the blocking locks,
-- let's do this here.
SELECT pg_advisory_unlock_all();
-- The second client and the client with the deadlock error stop here
SELECT pg_advisory_lock(0);
SELECT pg_advisory_lock(1);
-- The second client and the client with the deadlock error always come after
-- the first and the number of rows in the table first_client_table reflects
-- this. Here the first client inserts a row, so in the future the table is
-- always non-empty.
DO $$
DECLARE
exists boolean;
BEGIN
SELECT EXISTS (SELECT * FROM first_client_table) INTO STRICT exists;
IF exists THEN
-- We are the second client or the client with the deadlock error
-- The first client will take care by itself of this lock (see below)
PERFORM pg_advisory_unlock(0);
PERFORM pg_advisory_lock(3);
-- The second client can get a deadlock here
PERFORM pg_advisory_lock(2);
ELSE
-- We are the first client
-- This code should not be used in a new transaction after an error
INSERT INTO first_client_table VALUES (1);
PERFORM pg_advisory_lock(2);
END IF;
END$$;
DO $$
DECLARE
num_rows integer;
waiters integer;
BEGIN
-- Check if we are the first client
SELECT COUNT(*) FROM first_client_table INTO STRICT num_rows;
IF num_rows = 1 THEN
-- This code should not be used in a new transaction after an error
INSERT INTO first_client_table VALUES (2);
-- Let the second client begin
PERFORM pg_advisory_unlock(0);
PERFORM pg_advisory_unlock(1);
-- Make sure the second client is ready for deadlock
LOOP
SELECT COUNT(*) INTO STRICT waiters FROM pg_locks WHERE
locktype = 'advisory' AND
objsubid = 1 AND
((classid::bigint << 32) | objid::bigint = 2::bigint) AND
NOT granted;
IF waiters = 1 THEN
-- Exit loop
EXIT;
END IF;
END LOOP;
PERFORM pg_advisory_lock(0);
-- And the second client took care by itself of the lock 1
END IF;
END$$;
-- The first client can get a deadlock here
SELECT pg_advisory_lock(3);
SELECT pg_advisory_unlock_all();
}
});
# Clean up
$node->safe_psql('postgres', 'DROP TABLE first_client_table, xy;');
# done
$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
$node->stop;

View File

@ -188,6 +188,16 @@ my @options = (
'-i --partition-method=hash',
[qr{partition-method requires greater than zero --partitions}]
],
[
'bad maximum number of tries',
'--max-tries -10',
[qr{invalid number of maximum tries: "-10"}]
],
[
'an infinite number of tries',
'--max-tries 0',
[qr{an unlimited number of transaction tries can only be used with --latency-limit or a duration}]
],
# logging sub-options
[

View File

@ -23,14 +23,26 @@ conditional_stack_create(void)
return cstack;
}
/*
* Destroy all the elements from the stack. The stack itself is not freed.
*/
void
conditional_stack_reset(ConditionalStack cstack)
{
if (!cstack)
return; /* nothing to do here */
while (conditional_stack_pop(cstack))
continue;
}
/*
* destroy stack
*/
void
conditional_stack_destroy(ConditionalStack cstack)
{
while (conditional_stack_pop(cstack))
continue;
conditional_stack_reset(cstack);
free(cstack);
}

View File

@ -73,6 +73,8 @@ typedef struct ConditionalStackData *ConditionalStack;
extern ConditionalStack conditional_stack_create(void);
extern void conditional_stack_reset(ConditionalStack cstack);
extern void conditional_stack_destroy(ConditionalStack cstack);
extern int conditional_stack_depth(ConditionalStack cstack);