mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Allow pgbench to retry in some cases.
When serialization or deadlock errors are reported by backend, allow to retry and continue the benchmarking. For this purpose new options "--max-tries", "--failures-detailed" and "--verbose-errors" are added. Transactions with serialization errors or deadlock errors will be repeated after rollbacks until they complete successfully or reach the maximum number of tries (specified by the --max-tries option), or the maximum time of tries (specified by the --latency-limit option). These options can be specified at the same time. It is not possible to use an unlimited number of tries (--max-tries=0) without the --latency-limit option or the --time option. By default the option --max-tries is set to 1, which means transactions with serialization/deadlock errors are not retried. If the last try fails, this transaction will be reported as failed, and the client variables will be set as they were before the first run of this transaction. Statistics on retries and failures are printed in the progress, transaction / aggregation logs and in the end with other results (all and for each script). Also retries and failures are printed per-command with average latency by using option (--report-per-command, -r). Option --failures-detailed prints group failures by basic types (serialization failures / deadlock failures). Option --verbose-errors prints distinct reports on errors and failures (errors without retrying) by type with detailed information like which limit for retries was violated and how far it was exceeded for the serialization/deadlock failures. Patch originally written by Marina Polyakova then Yugo Nagata inherited the discussion and heavily modified the patch to make it commitable. Authors: Yugo Nagata, Marina Polyakova Reviewed-by: Fabien Coelho, Tatsuo Ishii, Alvaro Herrera, Kevin Grittner, Andres Freund, Arthur Zakirov, Alexander Korotkov, Teodor Sigaev, Ildus Kurbangaliev Discussion: https://postgr.es/m/flat/72a0d590d6ba06f242d75c2e641820ec%40postgrespro.ru
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,9 @@ use Config;
|
||||
|
||||
# start a pgbench specific server
|
||||
my $node = PostgreSQL::Test::Cluster->new('main');
|
||||
$node->init;
|
||||
# Set to untranslated messages, to be able to compare program output with
|
||||
# expected strings.
|
||||
$node->init(extra => [ '--locale', 'C' ]);
|
||||
$node->start;
|
||||
|
||||
# tablespace for testing, because partitioned tables cannot use pg_default
|
||||
@ -109,7 +111,8 @@ $node->pgbench(
|
||||
qr{builtin: TPC-B},
|
||||
qr{clients: 2\b},
|
||||
qr{processed: 10/10},
|
||||
qr{mode: simple}
|
||||
qr{mode: simple},
|
||||
qr{maximum number of tries: 1}
|
||||
],
|
||||
[qr{^$}],
|
||||
'pgbench tpcb-like');
|
||||
@ -1198,6 +1201,214 @@ $node->pgbench(
|
||||
check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
|
||||
qr{^0 \d{1,2} \d+ \d \d+ \d+$});
|
||||
|
||||
# abortion of the client if the script contains an incomplete transaction block
|
||||
$node->pgbench(
|
||||
'--no-vacuum', 2, [ qr{processed: 1/10} ],
|
||||
[ qr{client 0 aborted: end of script reached without completing the last transaction} ],
|
||||
'incomplete transaction block',
|
||||
{ '001_pgbench_incomplete_transaction_block' => q{BEGIN;SELECT 1;} });
|
||||
|
||||
# Test the concurrent update in the table row and deadlocks.
|
||||
|
||||
$node->safe_psql('postgres',
|
||||
'CREATE UNLOGGED TABLE first_client_table (value integer); '
|
||||
. 'CREATE UNLOGGED TABLE xy (x integer, y integer); '
|
||||
. 'INSERT INTO xy VALUES (1, 2);');
|
||||
|
||||
# Serialization error and retry
|
||||
|
||||
local $ENV{PGOPTIONS} = "-c default_transaction_isolation=repeatable\\ read";
|
||||
|
||||
# Check that we have a serialization error and the same random value of the
|
||||
# delta variable in the next try
|
||||
my $err_pattern =
|
||||
"client (0|1) got an error in command 3 \\(SQL\\) of script 0; "
|
||||
. "ERROR: could not serialize access due to concurrent update\\b.*"
|
||||
. "\\g1";
|
||||
|
||||
$node->pgbench(
|
||||
"-n -c 2 -t 1 -d --verbose-errors --max-tries 2",
|
||||
0,
|
||||
[ qr{processed: 2/2\b}, qr{number of transactions retried: 1\b},
|
||||
qr{total number of retries: 1\b} ],
|
||||
[ qr/$err_pattern/s ],
|
||||
'concurrent update with retrying',
|
||||
{
|
||||
'001_pgbench_serialization' => q{
|
||||
-- What's happening:
|
||||
-- The first client starts the transaction with the isolation level Repeatable
|
||||
-- Read:
|
||||
--
|
||||
-- BEGIN;
|
||||
-- UPDATE xy SET y = ... WHERE x = 1;
|
||||
--
|
||||
-- The second client starts a similar transaction with the same isolation level:
|
||||
--
|
||||
-- BEGIN;
|
||||
-- UPDATE xy SET y = ... WHERE x = 1;
|
||||
-- <waiting for the first client>
|
||||
--
|
||||
-- The first client commits its transaction, and the second client gets a
|
||||
-- serialization error.
|
||||
|
||||
\set delta random(-5000, 5000)
|
||||
|
||||
-- The second client will stop here
|
||||
SELECT pg_advisory_lock(0);
|
||||
|
||||
-- Start transaction with concurrent update
|
||||
BEGIN;
|
||||
UPDATE xy SET y = y + :delta WHERE x = 1 AND pg_advisory_lock(1) IS NOT NULL;
|
||||
|
||||
-- Wait for the second client
|
||||
DO $$
|
||||
DECLARE
|
||||
exists boolean;
|
||||
waiters integer;
|
||||
BEGIN
|
||||
-- The second client always comes in second, and the number of rows in the
|
||||
-- table first_client_table reflect this. Here the first client inserts a row,
|
||||
-- so the second client will see a non-empty table when repeating the
|
||||
-- transaction after the serialization error.
|
||||
SELECT EXISTS (SELECT * FROM first_client_table) INTO STRICT exists;
|
||||
IF NOT exists THEN
|
||||
-- Let the second client begin
|
||||
PERFORM pg_advisory_unlock(0);
|
||||
-- And wait until the second client tries to get the same lock
|
||||
LOOP
|
||||
SELECT COUNT(*) INTO STRICT waiters FROM pg_locks WHERE
|
||||
locktype = 'advisory' AND objsubid = 1 AND
|
||||
((classid::bigint << 32) | objid::bigint = 1::bigint) AND NOT granted;
|
||||
IF waiters = 1 THEN
|
||||
INSERT INTO first_client_table VALUES (1);
|
||||
|
||||
-- Exit loop
|
||||
EXIT;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
END$$;
|
||||
|
||||
COMMIT;
|
||||
SELECT pg_advisory_unlock_all();
|
||||
}
|
||||
});
|
||||
|
||||
# Clean up
|
||||
|
||||
$node->safe_psql('postgres', 'DELETE FROM first_client_table;');
|
||||
|
||||
local $ENV{PGOPTIONS} = "-c default_transaction_isolation=read\\ committed";
|
||||
|
||||
# Deadlock error and retry
|
||||
|
||||
# Check that we have a deadlock error
|
||||
$err_pattern =
|
||||
"client (0|1) got an error in command (3|5) \\(SQL\\) of script 0; "
|
||||
. "ERROR: deadlock detected\\b";
|
||||
|
||||
$node->pgbench(
|
||||
"-n -c 2 -t 1 --max-tries 2 --verbose-errors",
|
||||
0,
|
||||
[ qr{processed: 2/2\b}, qr{number of transactions retried: 1\b},
|
||||
qr{total number of retries: 1\b} ],
|
||||
[ qr{$err_pattern} ],
|
||||
'deadlock with retrying',
|
||||
{
|
||||
'001_pgbench_deadlock' => q{
|
||||
-- What's happening:
|
||||
-- The first client gets the lock 2.
|
||||
-- The second client gets the lock 3 and tries to get the lock 2.
|
||||
-- The first client tries to get the lock 3 and one of them gets a deadlock
|
||||
-- error.
|
||||
--
|
||||
-- A client that does not get a deadlock error must hold a lock at the
|
||||
-- transaction start. Thus in the end it releases all of its locks before the
|
||||
-- client with the deadlock error starts a retry (we do not want any errors
|
||||
-- again).
|
||||
|
||||
-- Since the client with the deadlock error has not released the blocking locks,
|
||||
-- let's do this here.
|
||||
SELECT pg_advisory_unlock_all();
|
||||
|
||||
-- The second client and the client with the deadlock error stop here
|
||||
SELECT pg_advisory_lock(0);
|
||||
SELECT pg_advisory_lock(1);
|
||||
|
||||
-- The second client and the client with the deadlock error always come after
|
||||
-- the first and the number of rows in the table first_client_table reflects
|
||||
-- this. Here the first client inserts a row, so in the future the table is
|
||||
-- always non-empty.
|
||||
DO $$
|
||||
DECLARE
|
||||
exists boolean;
|
||||
BEGIN
|
||||
SELECT EXISTS (SELECT * FROM first_client_table) INTO STRICT exists;
|
||||
IF exists THEN
|
||||
-- We are the second client or the client with the deadlock error
|
||||
|
||||
-- The first client will take care by itself of this lock (see below)
|
||||
PERFORM pg_advisory_unlock(0);
|
||||
|
||||
PERFORM pg_advisory_lock(3);
|
||||
|
||||
-- The second client can get a deadlock here
|
||||
PERFORM pg_advisory_lock(2);
|
||||
ELSE
|
||||
-- We are the first client
|
||||
|
||||
-- This code should not be used in a new transaction after an error
|
||||
INSERT INTO first_client_table VALUES (1);
|
||||
|
||||
PERFORM pg_advisory_lock(2);
|
||||
END IF;
|
||||
END$$;
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
num_rows integer;
|
||||
waiters integer;
|
||||
BEGIN
|
||||
-- Check if we are the first client
|
||||
SELECT COUNT(*) FROM first_client_table INTO STRICT num_rows;
|
||||
IF num_rows = 1 THEN
|
||||
-- This code should not be used in a new transaction after an error
|
||||
INSERT INTO first_client_table VALUES (2);
|
||||
|
||||
-- Let the second client begin
|
||||
PERFORM pg_advisory_unlock(0);
|
||||
PERFORM pg_advisory_unlock(1);
|
||||
|
||||
-- Make sure the second client is ready for deadlock
|
||||
LOOP
|
||||
SELECT COUNT(*) INTO STRICT waiters FROM pg_locks WHERE
|
||||
locktype = 'advisory' AND
|
||||
objsubid = 1 AND
|
||||
((classid::bigint << 32) | objid::bigint = 2::bigint) AND
|
||||
NOT granted;
|
||||
|
||||
IF waiters = 1 THEN
|
||||
-- Exit loop
|
||||
EXIT;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
PERFORM pg_advisory_lock(0);
|
||||
-- And the second client took care by itself of the lock 1
|
||||
END IF;
|
||||
END$$;
|
||||
|
||||
-- The first client can get a deadlock here
|
||||
SELECT pg_advisory_lock(3);
|
||||
|
||||
SELECT pg_advisory_unlock_all();
|
||||
}
|
||||
});
|
||||
|
||||
# Clean up
|
||||
$node->safe_psql('postgres', 'DROP TABLE first_client_table, xy;');
|
||||
|
||||
|
||||
# done
|
||||
$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
|
||||
$node->stop;
|
||||
|
@ -188,6 +188,16 @@ my @options = (
|
||||
'-i --partition-method=hash',
|
||||
[qr{partition-method requires greater than zero --partitions}]
|
||||
],
|
||||
[
|
||||
'bad maximum number of tries',
|
||||
'--max-tries -10',
|
||||
[qr{invalid number of maximum tries: "-10"}]
|
||||
],
|
||||
[
|
||||
'an infinite number of tries',
|
||||
'--max-tries 0',
|
||||
[qr{an unlimited number of transaction tries can only be used with --latency-limit or a duration}]
|
||||
],
|
||||
|
||||
# logging sub-options
|
||||
[
|
||||
|
@ -23,14 +23,26 @@ conditional_stack_create(void)
|
||||
return cstack;
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy all the elements from the stack. The stack itself is not freed.
|
||||
*/
|
||||
void
|
||||
conditional_stack_reset(ConditionalStack cstack)
|
||||
{
|
||||
if (!cstack)
|
||||
return; /* nothing to do here */
|
||||
|
||||
while (conditional_stack_pop(cstack))
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* destroy stack
|
||||
*/
|
||||
void
|
||||
conditional_stack_destroy(ConditionalStack cstack)
|
||||
{
|
||||
while (conditional_stack_pop(cstack))
|
||||
continue;
|
||||
conditional_stack_reset(cstack);
|
||||
free(cstack);
|
||||
}
|
||||
|
||||
|
@ -73,6 +73,8 @@ typedef struct ConditionalStackData *ConditionalStack;
|
||||
|
||||
extern ConditionalStack conditional_stack_create(void);
|
||||
|
||||
extern void conditional_stack_reset(ConditionalStack cstack);
|
||||
|
||||
extern void conditional_stack_destroy(ConditionalStack cstack);
|
||||
|
||||
extern int conditional_stack_depth(ConditionalStack cstack);
|
||||
|
Reference in New Issue
Block a user