From b3f6b14cf48ffa81084072f1fdeee2fe9df20746 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 29 Feb 2024 09:45:20 +0530 Subject: [PATCH] Fixups for commit 93db6cbda0. Ensure to set always-secure search path for both local and remote connections during slot synchronization, so that malicious users can't redirect user code (e.g. operators). In the passing, improve the name of define, remove spurious return statement, and a minor change in one of the comments. Author: Bertrand Drouvot and Shveta Malik Reviewed-by: Amit Kapila, Peter Smith Discussion: https://postgr.es/m/514f6f2f-6833-4539-39f1-96cd1e011f23@enterprisedb.com Discussion: https://postgr.es/m/ZdcejBDCr+wlVGnO@ip-10-97-1-34.eu-west-3.compute.internal Discussion: https://postgr.es/m/CAJpy0uBNP=nrkNJkJSfF=jSocEh8vU2Owa8Rtpi=63fG=SvfVQ@mail.gmail.com --- src/backend/access/transam/xlogrecovery.c | 15 +++--- .../libpqwalreceiver/libpqwalreceiver.c | 6 ++- src/backend/replication/logical/slotsync.c | 23 +++++--- .../t/040_standby_failover_slots_sync.pl | 54 +++++++++++++++++++ 4 files changed, 82 insertions(+), 16 deletions(-) diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index d73a49b3e81..9d907bf0e45 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -1472,13 +1472,14 @@ FinishWalRecovery(void) * Shutdown the slot sync worker to drop any temporary slots acquired by * it and to prevent it from keep trying to fetch the failover slots. * - * We do not update the 'synced' column from true to false here, as any - * failed update could leave 'synced' column false for some slots. This - * could cause issues during slot sync after restarting the server as a - * standby. While updating the 'synced' column after switching to the new - * timeline is an option, it does not simplify the handling for the - * 'synced' column. Therefore, we retain the 'synced' column as true after - * promotion as it may provide useful information about the slot origin. + * We do not update the 'synced' column in 'pg_replication_slots' system + * view from true to false here, as any failed update could leave 'synced' + * column false for some slots. This could cause issues during slot sync + * after restarting the server as a standby. While updating the 'synced' + * column after switching to the new timeline is an option, it does not + * simplify the handling for the 'synced' column. Therefore, we retain the + * 'synced' column as true after promotion as it may provide useful + * information about the slot origin. */ ShutDownSlotSync(); diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index 04271ee7032..1519b27adca 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -271,7 +271,11 @@ libpqrcv_connect(const char *conninfo, bool replication, bool logical, errhint("Target server's authentication method must be changed, or set password_required=false in the subscription parameters."))); } - if (logical) + /* + * Set always-secure search path for the cases where the connection is + * used to run SQL queries, so malicious users can't get control. + */ + if (!replication || logical) { PGresult *res; diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index 36773cfe73f..8ecb85b86a3 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -105,10 +105,10 @@ bool sync_replication_slots = false; * (within a MIN/MAX range) according to slot activity. See * wait_for_slot_activity() for details. */ -#define MIN_WORKER_NAPTIME_MS 200 -#define MAX_WORKER_NAPTIME_MS 30000 /* 30s */ +#define MIN_SLOTSYNC_WORKER_NAPTIME_MS 200 +#define MAX_SLOTSYNC_WORKER_NAPTIME_MS 30000 /* 30s */ -static long sleep_ms = MIN_WORKER_NAPTIME_MS; +static long sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS; /* The restart interval for slot sync work used by postmaster */ #define SLOTSYNC_RESTART_INTERVAL_SEC 10 @@ -924,12 +924,9 @@ ValidateSlotSyncParams(int elevel) * in this case regardless of elevel provided by caller. */ if (wal_level < WAL_LEVEL_LOGICAL) - { ereport(ERROR, errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("slot synchronization requires wal_level >= \"logical\"")); - return false; - } /* * A physical replication slot(primary_slot_name) is required on the @@ -1082,7 +1079,7 @@ wait_for_slot_activity(bool some_slot_updated) * No slots were updated, so double the sleep time, but not beyond the * maximum allowable value. */ - sleep_ms = Min(sleep_ms * 2, MAX_WORKER_NAPTIME_MS); + sleep_ms = Min(sleep_ms * 2, MAX_SLOTSYNC_WORKER_NAPTIME_MS); } else { @@ -1090,7 +1087,7 @@ wait_for_slot_activity(bool some_slot_updated) * Some slots were updated since the last sleep, so reset the sleep * time. */ - sleep_ms = MIN_WORKER_NAPTIME_MS; + sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS; } rc = WaitLatch(MyLatch, @@ -1215,6 +1212,16 @@ ReplSlotSyncWorkerMain(int argc, char *argv[]) */ sigprocmask(SIG_SETMASK, &UnBlockSig, NULL); + /* + * Set always-secure search path, so malicious users can't redirect user + * code (e.g. operators). + * + * It's not strictly necessary since we won't be scanning or writing to + * any user table locally, but it's good to retain it here for added + * precaution. + */ + SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE); + dbname = CheckAndGetDbnameFromConninfo(); /* diff --git a/src/test/recovery/t/040_standby_failover_slots_sync.pl b/src/test/recovery/t/040_standby_failover_slots_sync.pl index 968aa7b05bf..825c26da6f2 100644 --- a/src/test/recovery/t/040_standby_failover_slots_sync.pl +++ b/src/test/recovery/t/040_standby_failover_slots_sync.pl @@ -361,6 +361,60 @@ ok( $stderr =~ $cascading_standby->stop; +################################################## +# Test to confirm that the slot synchronization is protected from malicious +# users. +################################################## + +$primary->psql('postgres', "CREATE DATABASE slotsync_test_db"); +$primary->wait_for_replay_catchup($standby1); + +$standby1->stop; + +# On the primary server, create '=' operator in another schema mapped to +# inequality function and redirect the queries to use new operator by setting +# search_path. The new '=' operator is created with leftarg as 'bigint' and +# right arg as 'int' to redirect 'count(*) = 1' in slot sync's query to use +# new '=' operator. +$primary->safe_psql( + 'slotsync_test_db', q{ + +CREATE ROLE repl_role REPLICATION LOGIN; +CREATE SCHEMA myschema; + +CREATE FUNCTION myschema.myintne(bigint, int) RETURNS bool as $$ + BEGIN + RETURN $1 <> $2; + END; + $$ LANGUAGE plpgsql immutable; + +CREATE OPERATOR myschema.= ( + leftarg = bigint, + rightarg = int, + procedure = myschema.myintne); + +ALTER DATABASE slotsync_test_db SET SEARCH_PATH TO myschema,pg_catalog; +GRANT USAGE on SCHEMA myschema TO repl_role; +}); + +# Start the standby with changed primary_conninfo. +$standby1->append_conf('postgresql.conf', "primary_conninfo = '$connstr_1 dbname=slotsync_test_db user=repl_role'"); +$standby1->start; + +# Run the synchronization function. If the sync flow was not prepared +# to handle such attacks, it would have failed during the validation +# of the primary_slot_name itself resulting in +# ERROR: slot synchronization requires valid primary_slot_name +$standby1->safe_psql('slotsync_test_db', "SELECT pg_sync_replication_slots();"); + +# Reset the dbname and user in primary_conninfo to the earlier values. +$standby1->append_conf('postgresql.conf', "primary_conninfo = '$connstr_1 dbname=postgres'"); +$standby1->reload; + +# Drop the newly created database. +$primary->psql('postgres', + q{DROP DATABASE slotsync_test_db;}); + ################################################## # Test to confirm that the slot sync worker exits on invalid GUC(s) and # get started again on valid GUC(s).