mirror of
https://github.com/postgres/postgres.git
synced 2025-11-09 06:21:09 +03:00
Introduce replication slots.
Replication slots are a crash-safe data structure which can be created on either a master or a standby to prevent premature removal of write-ahead log segments needed by a standby, as well as (with hot_standby_feedback=on) pruning of tuples whose removal would cause replication conflicts. Slots have some advantages over existing techniques, as explained in the documentation. In a few places, we refer to the type of replication slots introduced by this patch as "physical" slots, because forthcoming patches for logical decoding will also have slots, but with somewhat different properties. Andres Freund and Robert Haas
This commit is contained in:
@@ -15,7 +15,7 @@ include $(top_builddir)/src/Makefile.global
|
||||
override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
|
||||
|
||||
OBJS = walsender.o walreceiverfuncs.o walreceiver.o basebackup.o \
|
||||
repl_gram.o syncrep.o
|
||||
repl_gram.o slot.o slotfuncs.o syncrep.o
|
||||
|
||||
include $(top_srcdir)/src/backend/common.mk
|
||||
|
||||
|
||||
@@ -47,8 +47,9 @@ to fetch more WAL (if streaming replication is configured).
|
||||
|
||||
Walreceiver is a postmaster subprocess, so the startup process can't fork it
|
||||
directly. Instead, it sends a signal to postmaster, asking postmaster to launch
|
||||
it. Before that, however, startup process fills in WalRcvData->conninfo,
|
||||
and initializes the starting point in WalRcvData->receiveStart.
|
||||
it. Before that, however, startup process fills in WalRcvData->conninfo
|
||||
and WalRcvData->slotname, and initializes the starting point in
|
||||
WalRcvData->receiveStart.
|
||||
|
||||
As walreceiver receives WAL from the master server, and writes and flushes
|
||||
it to disk (in pg_xlog), it updates WalRcvData->receivedUpto and signals
|
||||
|
||||
@@ -847,6 +847,10 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
|
||||
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
|
||||
continue;
|
||||
|
||||
/* Skip pg_replslot, not useful to copy */
|
||||
if (strcmp(de->d_name, "pg_replslot") == 0)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Check if the postmaster has signaled us to exit, and abort with an
|
||||
* error in that case. The error handler further up will call
|
||||
|
||||
@@ -49,7 +49,8 @@ static char *recvBuf = NULL;
|
||||
static void libpqrcv_connect(char *conninfo);
|
||||
static void libpqrcv_identify_system(TimeLineID *primary_tli);
|
||||
static void libpqrcv_readtimelinehistoryfile(TimeLineID tli, char **filename, char **content, int *len);
|
||||
static bool libpqrcv_startstreaming(TimeLineID tli, XLogRecPtr startpoint);
|
||||
static bool libpqrcv_startstreaming(TimeLineID tli, XLogRecPtr startpoint,
|
||||
char *slotname);
|
||||
static void libpqrcv_endstreaming(TimeLineID *next_tli);
|
||||
static int libpqrcv_receive(int timeout, char **buffer);
|
||||
static void libpqrcv_send(const char *buffer, int nbytes);
|
||||
@@ -171,15 +172,20 @@ libpqrcv_identify_system(TimeLineID *primary_tli)
|
||||
* throws an ERROR.
|
||||
*/
|
||||
static bool
|
||||
libpqrcv_startstreaming(TimeLineID tli, XLogRecPtr startpoint)
|
||||
libpqrcv_startstreaming(TimeLineID tli, XLogRecPtr startpoint, char *slotname)
|
||||
{
|
||||
char cmd[64];
|
||||
PGresult *res;
|
||||
|
||||
/* Start streaming from the point requested by startup process */
|
||||
snprintf(cmd, sizeof(cmd), "START_REPLICATION %X/%X TIMELINE %u",
|
||||
(uint32) (startpoint >> 32), (uint32) startpoint,
|
||||
tli);
|
||||
if (slotname != NULL)
|
||||
snprintf(cmd, sizeof(cmd),
|
||||
"START_REPLICATION SLOT \"%s\" %X/%X TIMELINE %u", slotname,
|
||||
(uint32) (startpoint >> 32), (uint32) startpoint, tli);
|
||||
else
|
||||
snprintf(cmd, sizeof(cmd),
|
||||
"START_REPLICATION %X/%X TIMELINE %u",
|
||||
(uint32) (startpoint >> 32), (uint32) startpoint, tli);
|
||||
res = libpqrcv_PQexec(cmd);
|
||||
|
||||
if (PQresultStatus(res) == PGRES_COMMAND_OK)
|
||||
|
||||
@@ -65,7 +65,7 @@ Node *replication_parse_result;
|
||||
}
|
||||
|
||||
/* Non-keyword tokens */
|
||||
%token <str> SCONST
|
||||
%token <str> SCONST IDENT
|
||||
%token <uintval> UCONST
|
||||
%token <recptr> RECPTR
|
||||
|
||||
@@ -73,6 +73,8 @@ Node *replication_parse_result;
|
||||
%token K_BASE_BACKUP
|
||||
%token K_IDENTIFY_SYSTEM
|
||||
%token K_START_REPLICATION
|
||||
%token K_CREATE_REPLICATION_SLOT
|
||||
%token K_DROP_REPLICATION_SLOT
|
||||
%token K_TIMELINE_HISTORY
|
||||
%token K_LABEL
|
||||
%token K_PROGRESS
|
||||
@@ -80,12 +82,15 @@ Node *replication_parse_result;
|
||||
%token K_NOWAIT
|
||||
%token K_WAL
|
||||
%token K_TIMELINE
|
||||
%token K_PHYSICAL
|
||||
%token K_SLOT
|
||||
|
||||
%type <node> command
|
||||
%type <node> base_backup start_replication identify_system timeline_history
|
||||
%type <node> base_backup start_replication create_replication_slot drop_replication_slot identify_system timeline_history
|
||||
%type <list> base_backup_opt_list
|
||||
%type <defelt> base_backup_opt
|
||||
%type <uintval> opt_timeline
|
||||
%type <str> opt_slot
|
||||
%%
|
||||
|
||||
firstcmd: command opt_semicolon
|
||||
@@ -102,6 +107,8 @@ command:
|
||||
identify_system
|
||||
| base_backup
|
||||
| start_replication
|
||||
| create_replication_slot
|
||||
| drop_replication_slot
|
||||
| timeline_history
|
||||
;
|
||||
|
||||
@@ -158,18 +165,42 @@ base_backup_opt:
|
||||
}
|
||||
;
|
||||
|
||||
/* CREATE_REPLICATION_SLOT SLOT slot PHYSICAL */
|
||||
create_replication_slot:
|
||||
K_CREATE_REPLICATION_SLOT IDENT K_PHYSICAL
|
||||
{
|
||||
CreateReplicationSlotCmd *cmd;
|
||||
cmd = makeNode(CreateReplicationSlotCmd);
|
||||
cmd->kind = REPLICATION_KIND_PHYSICAL;
|
||||
cmd->slotname = $2;
|
||||
$$ = (Node *) cmd;
|
||||
}
|
||||
;
|
||||
|
||||
/* DROP_REPLICATION_SLOT SLOT slot */
|
||||
drop_replication_slot:
|
||||
K_DROP_REPLICATION_SLOT IDENT
|
||||
{
|
||||
DropReplicationSlotCmd *cmd;
|
||||
cmd = makeNode(DropReplicationSlotCmd);
|
||||
cmd->slotname = $2;
|
||||
$$ = (Node *) cmd;
|
||||
}
|
||||
;
|
||||
|
||||
/*
|
||||
* START_REPLICATION %X/%X [TIMELINE %d]
|
||||
* START_REPLICATION [SLOT slot] [PHYSICAL] %X/%X [TIMELINE %d]
|
||||
*/
|
||||
start_replication:
|
||||
K_START_REPLICATION RECPTR opt_timeline
|
||||
K_START_REPLICATION opt_slot opt_physical RECPTR opt_timeline
|
||||
{
|
||||
StartReplicationCmd *cmd;
|
||||
|
||||
cmd = makeNode(StartReplicationCmd);
|
||||
cmd->startpoint = $2;
|
||||
cmd->timeline = $3;
|
||||
|
||||
cmd->kind = REPLICATION_KIND_PHYSICAL;
|
||||
cmd->slotname = $2;
|
||||
cmd->startpoint = $4;
|
||||
cmd->timeline = $5;
|
||||
$$ = (Node *) cmd;
|
||||
}
|
||||
;
|
||||
@@ -205,6 +236,15 @@ timeline_history:
|
||||
$$ = (Node *) cmd;
|
||||
}
|
||||
;
|
||||
|
||||
opt_physical : K_PHYSICAL | /* EMPTY */;
|
||||
|
||||
|
||||
opt_slot : K_SLOT IDENT
|
||||
{
|
||||
$$ = $2;
|
||||
}
|
||||
| /* nothing */ { $$ = NULL; }
|
||||
%%
|
||||
|
||||
#include "repl_scanner.c"
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "utils/builtins.h"
|
||||
#include "parser/scansup.h"
|
||||
|
||||
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
|
||||
#undef fprintf
|
||||
@@ -48,7 +49,7 @@ static void addlitchar(unsigned char ychar);
|
||||
%option warn
|
||||
%option prefix="replication_yy"
|
||||
|
||||
%x xq
|
||||
%x xq xd
|
||||
|
||||
/* Extended quote
|
||||
* xqdouble implements embedded quote, ''''
|
||||
@@ -57,12 +58,26 @@ xqstart {quote}
|
||||
xqdouble {quote}{quote}
|
||||
xqinside [^']+
|
||||
|
||||
/* Double quote
|
||||
* Allows embedded spaces and other special characters into identifiers.
|
||||
*/
|
||||
dquote \"
|
||||
xdstart {dquote}
|
||||
xdstop {dquote}
|
||||
xddouble {dquote}{dquote}
|
||||
xdinside [^"]+
|
||||
|
||||
digit [0-9]+
|
||||
hexdigit [0-9A-Za-z]+
|
||||
|
||||
quote '
|
||||
quotestop {quote}
|
||||
|
||||
ident_start [A-Za-z\200-\377_]
|
||||
ident_cont [A-Za-z\200-\377_0-9\$]
|
||||
|
||||
identifier {ident_start}{ident_cont}*
|
||||
|
||||
%%
|
||||
|
||||
BASE_BACKUP { return K_BASE_BACKUP; }
|
||||
@@ -74,9 +89,16 @@ PROGRESS { return K_PROGRESS; }
|
||||
WAL { return K_WAL; }
|
||||
TIMELINE { return K_TIMELINE; }
|
||||
START_REPLICATION { return K_START_REPLICATION; }
|
||||
CREATE_REPLICATION_SLOT { return K_CREATE_REPLICATION_SLOT; }
|
||||
DROP_REPLICATION_SLOT { return K_DROP_REPLICATION_SLOT; }
|
||||
TIMELINE_HISTORY { return K_TIMELINE_HISTORY; }
|
||||
PHYSICAL { return K_PHYSICAL; }
|
||||
SLOT { return K_SLOT; }
|
||||
|
||||
"," { return ','; }
|
||||
";" { return ';'; }
|
||||
"(" { return '('; }
|
||||
")" { return ')'; }
|
||||
|
||||
[\n] ;
|
||||
[\t] ;
|
||||
@@ -100,20 +122,49 @@ TIMELINE_HISTORY { return K_TIMELINE_HISTORY; }
|
||||
BEGIN(xq);
|
||||
startlit();
|
||||
}
|
||||
|
||||
<xq>{quotestop} {
|
||||
yyless(1);
|
||||
BEGIN(INITIAL);
|
||||
yylval.str = litbufdup();
|
||||
return SCONST;
|
||||
}
|
||||
<xq>{xqdouble} {
|
||||
|
||||
<xq>{xqdouble} {
|
||||
addlitchar('\'');
|
||||
}
|
||||
|
||||
<xq>{xqinside} {
|
||||
addlit(yytext, yyleng);
|
||||
}
|
||||
|
||||
<xq><<EOF>> { yyerror("unterminated quoted string"); }
|
||||
{xdstart} {
|
||||
BEGIN(xd);
|
||||
startlit();
|
||||
}
|
||||
|
||||
<xd>{xdstop} {
|
||||
int len;
|
||||
yyless(1);
|
||||
BEGIN(INITIAL);
|
||||
yylval.str = litbufdup();
|
||||
len = strlen(yylval.str);
|
||||
truncate_identifier(yylval.str, len, true);
|
||||
return IDENT;
|
||||
}
|
||||
|
||||
<xd>{xdinside} {
|
||||
addlit(yytext, yyleng);
|
||||
}
|
||||
|
||||
{identifier} {
|
||||
int len = strlen(yytext);
|
||||
|
||||
yylval.str = downcase_truncate_identifier(yytext, len, true);
|
||||
return IDENT;
|
||||
}
|
||||
|
||||
<xq,xd><<EOF>> { yyerror("unterminated quoted string"); }
|
||||
|
||||
|
||||
<<EOF>> {
|
||||
|
||||
1066
src/backend/replication/slot.c
Normal file
1066
src/backend/replication/slot.c
Normal file
File diff suppressed because it is too large
Load Diff
193
src/backend/replication/slotfuncs.c
Normal file
193
src/backend/replication/slotfuncs.c
Normal file
@@ -0,0 +1,193 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* slotfuncs.c
|
||||
* Support functions for replication slots
|
||||
*
|
||||
* Copyright (c) 2012-2014, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/backend/replication/slotfuncs.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "funcapi.h"
|
||||
#include "miscadmin.h"
|
||||
#include "access/htup_details.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "replication/slot.h"
|
||||
|
||||
Datum pg_create_physical_replication_slot(PG_FUNCTION_ARGS);
|
||||
Datum pg_drop_replication_slot(PG_FUNCTION_ARGS);
|
||||
|
||||
static void
|
||||
check_permissions(void)
|
||||
{
|
||||
if (!superuser() && !has_rolreplication(GetUserId()))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
||||
(errmsg("must be superuser or replication role to use replication slots"))));
|
||||
}
|
||||
|
||||
/*
|
||||
* SQL function for creating a new physical (streaming replication)
|
||||
* replication slot.
|
||||
*/
|
||||
Datum
|
||||
pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Name name = PG_GETARG_NAME(0);
|
||||
Datum values[2];
|
||||
bool nulls[2];
|
||||
TupleDesc tupdesc;
|
||||
HeapTuple tuple;
|
||||
Datum result;
|
||||
|
||||
check_permissions();
|
||||
|
||||
CheckSlotRequirements();
|
||||
|
||||
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
||||
elog(ERROR, "return type must be a row type");
|
||||
|
||||
/* acquire replication slot, this will check for conflicting names*/
|
||||
ReplicationSlotCreate(NameStr(*name), false);
|
||||
|
||||
values[0] = CStringGetTextDatum(NameStr(MyReplicationSlot->data.name));
|
||||
|
||||
nulls[0] = false;
|
||||
nulls[1] = true;
|
||||
|
||||
tuple = heap_form_tuple(tupdesc, values, nulls);
|
||||
result = HeapTupleGetDatum(tuple);
|
||||
|
||||
ReplicationSlotRelease();
|
||||
|
||||
PG_RETURN_DATUM(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* SQL function for dropping a replication slot.
|
||||
*/
|
||||
Datum
|
||||
pg_drop_replication_slot(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Name name = PG_GETARG_NAME(0);
|
||||
|
||||
check_permissions();
|
||||
|
||||
CheckSlotRequirements();
|
||||
|
||||
ReplicationSlotDrop(NameStr(*name));
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/*
|
||||
* pg_get_replication_slots - SQL SRF showing active replication slots.
|
||||
*/
|
||||
Datum
|
||||
pg_get_replication_slots(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#define PG_STAT_GET_REPLICATION_SLOTS_COLS 6
|
||||
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
||||
TupleDesc tupdesc;
|
||||
Tuplestorestate *tupstore;
|
||||
MemoryContext per_query_ctx;
|
||||
MemoryContext oldcontext;
|
||||
int slotno;
|
||||
|
||||
/* check to see if caller supports us returning a tuplestore */
|
||||
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("set-valued function called in context that cannot accept a set")));
|
||||
if (!(rsinfo->allowedModes & SFRM_Materialize))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("materialize mode required, but it is not " \
|
||||
"allowed in this context")));
|
||||
|
||||
/* Build a tuple descriptor for our result type */
|
||||
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
||||
elog(ERROR, "return type must be a row type");
|
||||
|
||||
/*
|
||||
* We don't require any special permission to see this function's data
|
||||
* because nothing should be sensitive. The most critical being the slot
|
||||
* name, which shouldn't contain anything particularly sensitive.
|
||||
*/
|
||||
|
||||
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
|
||||
oldcontext = MemoryContextSwitchTo(per_query_ctx);
|
||||
|
||||
tupstore = tuplestore_begin_heap(true, false, work_mem);
|
||||
rsinfo->returnMode = SFRM_Materialize;
|
||||
rsinfo->setResult = tupstore;
|
||||
rsinfo->setDesc = tupdesc;
|
||||
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
|
||||
for (slotno = 0; slotno < max_replication_slots; slotno++)
|
||||
{
|
||||
ReplicationSlot *slot = &ReplicationSlotCtl->replication_slots[slotno];
|
||||
Datum values[PG_STAT_GET_REPLICATION_SLOTS_COLS];
|
||||
bool nulls[PG_STAT_GET_REPLICATION_SLOTS_COLS];
|
||||
|
||||
TransactionId xmin;
|
||||
XLogRecPtr restart_lsn;
|
||||
bool active;
|
||||
Oid database;
|
||||
const char *slot_name;
|
||||
|
||||
char restart_lsn_s[MAXFNAMELEN];
|
||||
int i;
|
||||
|
||||
SpinLockAcquire(&slot->mutex);
|
||||
if (!slot->in_use)
|
||||
{
|
||||
SpinLockRelease(&slot->mutex);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
xmin = slot->data.xmin;
|
||||
database = slot->data.database;
|
||||
restart_lsn = slot->data.restart_lsn;
|
||||
slot_name = pstrdup(NameStr(slot->data.name));
|
||||
|
||||
active = slot->active;
|
||||
}
|
||||
SpinLockRelease(&slot->mutex);
|
||||
|
||||
memset(nulls, 0, sizeof(nulls));
|
||||
|
||||
snprintf(restart_lsn_s, sizeof(restart_lsn_s), "%X/%X",
|
||||
(uint32) (restart_lsn >> 32), (uint32) restart_lsn);
|
||||
|
||||
i = 0;
|
||||
values[i++] = CStringGetTextDatum(slot_name);
|
||||
if (database == InvalidOid)
|
||||
values[i++] = CStringGetTextDatum("physical");
|
||||
else
|
||||
values[i++] = CStringGetTextDatum("logical");
|
||||
values[i++] = database;
|
||||
values[i++] = BoolGetDatum(active);
|
||||
if (xmin != InvalidTransactionId)
|
||||
values[i++] = TransactionIdGetDatum(xmin);
|
||||
else
|
||||
nulls[i++] = true;
|
||||
if (restart_lsn != InvalidTransactionId)
|
||||
values[i++] = CStringGetTextDatum(restart_lsn_s);
|
||||
else
|
||||
nulls[i++] = true;
|
||||
|
||||
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
|
||||
}
|
||||
|
||||
tuplestore_donestoring(tupstore);
|
||||
|
||||
return (Datum) 0;
|
||||
}
|
||||
@@ -187,6 +187,7 @@ void
|
||||
WalReceiverMain(void)
|
||||
{
|
||||
char conninfo[MAXCONNINFO];
|
||||
char slotname[NAMEDATALEN];
|
||||
XLogRecPtr startpoint;
|
||||
TimeLineID startpointTLI;
|
||||
TimeLineID primaryTLI;
|
||||
@@ -241,6 +242,7 @@ WalReceiverMain(void)
|
||||
|
||||
/* Fetch information required to start streaming */
|
||||
strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
|
||||
strlcpy(slotname, (char *) walrcv->slotname, NAMEDATALEN);
|
||||
startpoint = walrcv->receiveStart;
|
||||
startpointTLI = walrcv->receiveStartTLI;
|
||||
|
||||
@@ -355,7 +357,8 @@ WalReceiverMain(void)
|
||||
* on the new timeline.
|
||||
*/
|
||||
ThisTimeLineID = startpointTLI;
|
||||
if (walrcv_startstreaming(startpointTLI, startpoint))
|
||||
if (walrcv_startstreaming(startpointTLI, startpoint,
|
||||
slotname[0] != '\0' ? slotname : NULL))
|
||||
{
|
||||
bool endofwal = false;
|
||||
|
||||
|
||||
@@ -219,11 +219,13 @@ ShutdownWalRcv(void)
|
||||
/*
|
||||
* Request postmaster to start walreceiver.
|
||||
*
|
||||
* recptr indicates the position where streaming should begin, and conninfo
|
||||
* is a libpq connection string to use.
|
||||
* recptr indicates the position where streaming should begin, conninfo
|
||||
* is a libpq connection string to use, and slotname is, optionally, the name
|
||||
* of a replication slot to acquire.
|
||||
*/
|
||||
void
|
||||
RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo)
|
||||
RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
|
||||
const char *slotname)
|
||||
{
|
||||
/* use volatile pointer to prevent code rearrangement */
|
||||
volatile WalRcvData *walrcv = WalRcv;
|
||||
@@ -250,6 +252,11 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo)
|
||||
else
|
||||
walrcv->conninfo[0] = '\0';
|
||||
|
||||
if (slotname != NULL)
|
||||
strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
|
||||
else
|
||||
walrcv->slotname[0] = '\0';
|
||||
|
||||
if (walrcv->walRcvState == WALRCV_STOPPED)
|
||||
{
|
||||
launch = true;
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/replnodes.h"
|
||||
#include "replication/basebackup.h"
|
||||
#include "replication/slot.h"
|
||||
#include "replication/syncrep.h"
|
||||
#include "replication/walreceiver.h"
|
||||
#include "replication/walsender.h"
|
||||
@@ -218,12 +219,17 @@ InitWalSender(void)
|
||||
void
|
||||
WalSndErrorCleanup()
|
||||
{
|
||||
LWLockReleaseAll();
|
||||
|
||||
if (sendFile >= 0)
|
||||
{
|
||||
close(sendFile);
|
||||
sendFile = -1;
|
||||
}
|
||||
|
||||
if (MyReplicationSlot != NULL)
|
||||
ReplicationSlotRelease();
|
||||
|
||||
replication_active = false;
|
||||
if (walsender_ready_to_stop)
|
||||
proc_exit(0);
|
||||
@@ -421,6 +427,15 @@ StartReplication(StartReplicationCmd *cmd)
|
||||
* written at wal_level='minimal'.
|
||||
*/
|
||||
|
||||
if (cmd->slotname)
|
||||
{
|
||||
ReplicationSlotAcquire(cmd->slotname);
|
||||
if (MyReplicationSlot->data.database != InvalidOid)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
(errmsg("cannot use a replication slot created for changeset extraction for streaming replication"))));
|
||||
}
|
||||
|
||||
/*
|
||||
* Select the timeline. If it was given explicitly by the client, use
|
||||
* that. Otherwise use the timeline of the last replayed record, which is
|
||||
@@ -565,6 +580,9 @@ StartReplication(StartReplicationCmd *cmd)
|
||||
Assert(streamingDoneSending && streamingDoneReceiving);
|
||||
}
|
||||
|
||||
if (cmd->slotname)
|
||||
ReplicationSlotRelease();
|
||||
|
||||
/*
|
||||
* Copy is finished now. Send a single-row result set indicating the next
|
||||
* timeline.
|
||||
@@ -622,6 +640,75 @@ StartReplication(StartReplicationCmd *cmd)
|
||||
pq_puttextmessage('C', "START_STREAMING");
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a new replication slot.
|
||||
*/
|
||||
static void
|
||||
CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
|
||||
{
|
||||
const char *slot_name;
|
||||
StringInfoData buf;
|
||||
|
||||
Assert(!MyReplicationSlot);
|
||||
|
||||
/* setup state for XLogReadPage */
|
||||
sendTimeLineIsHistoric = false;
|
||||
sendTimeLine = ThisTimeLineID;
|
||||
|
||||
ReplicationSlotCreate(cmd->slotname, cmd->kind == REPLICATION_KIND_LOGICAL);
|
||||
|
||||
initStringInfo(&output_message);
|
||||
|
||||
slot_name = NameStr(MyReplicationSlot->data.name);
|
||||
|
||||
/*
|
||||
* It may seem somewhat pointless to send back the same slot name the
|
||||
* client just requested and nothing else, but logical replication
|
||||
* will add more fields here. (We could consider removing the slot
|
||||
* name from what's sent back, though, since the client has specified
|
||||
* that.)
|
||||
*/
|
||||
|
||||
pq_beginmessage(&buf, 'T');
|
||||
pq_sendint(&buf, 1, 2); /* 1 field */
|
||||
|
||||
/* first field: slot name */
|
||||
pq_sendstring(&buf, "slot_name"); /* col name */
|
||||
pq_sendint(&buf, 0, 4); /* table oid */
|
||||
pq_sendint(&buf, 0, 2); /* attnum */
|
||||
pq_sendint(&buf, TEXTOID, 4); /* type oid */
|
||||
pq_sendint(&buf, -1, 2); /* typlen */
|
||||
pq_sendint(&buf, 0, 4); /* typmod */
|
||||
pq_sendint(&buf, 0, 2); /* format code */
|
||||
|
||||
pq_endmessage(&buf);
|
||||
|
||||
/* Send a DataRow message */
|
||||
pq_beginmessage(&buf, 'D');
|
||||
pq_sendint(&buf, 1, 2); /* # of columns */
|
||||
|
||||
/* slot_name */
|
||||
pq_sendint(&buf, strlen(slot_name), 4); /* col1 len */
|
||||
pq_sendbytes(&buf, slot_name, strlen(slot_name));
|
||||
|
||||
pq_endmessage(&buf);
|
||||
|
||||
/*
|
||||
* release active status again, START_REPLICATION will reacquire it
|
||||
*/
|
||||
ReplicationSlotRelease();
|
||||
}
|
||||
|
||||
/*
|
||||
* Get rid of a replication slot that is no longer wanted.
|
||||
*/
|
||||
static void
|
||||
DropReplicationSlot(DropReplicationSlotCmd *cmd)
|
||||
{
|
||||
ReplicationSlotDrop(cmd->slotname);
|
||||
EndCommand("DROP_REPLICATION_SLOT", DestRemote);
|
||||
}
|
||||
|
||||
/*
|
||||
* Execute an incoming replication command.
|
||||
*/
|
||||
@@ -660,14 +747,28 @@ exec_replication_command(const char *cmd_string)
|
||||
IdentifySystem();
|
||||
break;
|
||||
|
||||
case T_StartReplicationCmd:
|
||||
StartReplication((StartReplicationCmd *) cmd_node);
|
||||
break;
|
||||
|
||||
case T_BaseBackupCmd:
|
||||
SendBaseBackup((BaseBackupCmd *) cmd_node);
|
||||
break;
|
||||
|
||||
case T_CreateReplicationSlotCmd:
|
||||
CreateReplicationSlot((CreateReplicationSlotCmd *) cmd_node);
|
||||
break;
|
||||
|
||||
case T_DropReplicationSlotCmd:
|
||||
DropReplicationSlot((DropReplicationSlotCmd *) cmd_node);
|
||||
break;
|
||||
|
||||
case T_StartReplicationCmd:
|
||||
{
|
||||
StartReplicationCmd *cmd = (StartReplicationCmd *) cmd_node;
|
||||
if (cmd->kind == REPLICATION_KIND_PHYSICAL)
|
||||
StartReplication(cmd);
|
||||
else
|
||||
elog(ERROR, "cannot handle changeset extraction yet");
|
||||
break;
|
||||
}
|
||||
|
||||
case T_TimeLineHistoryCmd:
|
||||
SendTimeLineHistory((TimeLineHistoryCmd *) cmd_node);
|
||||
break;
|
||||
@@ -830,6 +931,39 @@ ProcessStandbyMessage(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Remember that a walreceiver just confirmed receipt of lsn `lsn`.
|
||||
*/
|
||||
static void
|
||||
PhysicalConfirmReceivedLocation(XLogRecPtr lsn)
|
||||
{
|
||||
bool changed = false;
|
||||
/* use volatile pointer to prevent code rearrangement */
|
||||
volatile ReplicationSlot *slot = MyReplicationSlot;
|
||||
|
||||
Assert(lsn != InvalidXLogRecPtr);
|
||||
SpinLockAcquire(&slot->mutex);
|
||||
if (slot->data.restart_lsn != lsn)
|
||||
{
|
||||
changed = true;
|
||||
slot->data.restart_lsn = lsn;
|
||||
}
|
||||
SpinLockRelease(&slot->mutex);
|
||||
|
||||
if (changed)
|
||||
{
|
||||
ReplicationSlotMarkDirty();
|
||||
ReplicationSlotsComputeRequiredLSN();
|
||||
}
|
||||
|
||||
/*
|
||||
* One could argue that the slot should saved to disk now, but that'd be
|
||||
* energy wasted - the worst lost information can do here is give us wrong
|
||||
* information in a statistics view - we'll just potentially be more
|
||||
* conservative in removing files.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Regular reply from standby advising of WAL positions on standby server.
|
||||
*/
|
||||
@@ -875,6 +1009,48 @@ ProcessStandbyReplyMessage(void)
|
||||
|
||||
if (!am_cascading_walsender)
|
||||
SyncRepReleaseWaiters();
|
||||
|
||||
/*
|
||||
* Advance our local xmin horizon when the client confirmed a flush.
|
||||
*/
|
||||
if (MyReplicationSlot && flushPtr != InvalidXLogRecPtr)
|
||||
{
|
||||
if (MyReplicationSlot->data.database != InvalidOid)
|
||||
elog(ERROR, "cannot handle changeset extraction yet");
|
||||
else
|
||||
PhysicalConfirmReceivedLocation(flushPtr);
|
||||
}
|
||||
}
|
||||
|
||||
/* compute new replication slot xmin horizon if needed */
|
||||
static void
|
||||
PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin)
|
||||
{
|
||||
bool changed = false;
|
||||
volatile ReplicationSlot *slot = MyReplicationSlot;
|
||||
|
||||
SpinLockAcquire(&slot->mutex);
|
||||
MyPgXact->xmin = InvalidTransactionId;
|
||||
/*
|
||||
* For physical replication we don't need the the interlock provided
|
||||
* by xmin and effective_xmin since the consequences of a missed increase
|
||||
* are limited to query cancellations, so set both at once.
|
||||
*/
|
||||
if (!TransactionIdIsNormal(slot->data.xmin) ||
|
||||
!TransactionIdIsNormal(feedbackXmin) ||
|
||||
TransactionIdPrecedes(slot->data.xmin, feedbackXmin))
|
||||
{
|
||||
changed = true;
|
||||
slot->data.xmin = feedbackXmin;
|
||||
slot->effective_xmin = feedbackXmin;
|
||||
}
|
||||
SpinLockRelease(&slot->mutex);
|
||||
|
||||
if (changed)
|
||||
{
|
||||
ReplicationSlotMarkDirty();
|
||||
ReplicationSlotsComputeRequiredXmin();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -904,6 +1080,8 @@ ProcessStandbyHSFeedbackMessage(void)
|
||||
if (!TransactionIdIsNormal(feedbackXmin))
|
||||
{
|
||||
MyPgXact->xmin = InvalidTransactionId;
|
||||
if (MyReplicationSlot != NULL)
|
||||
PhysicalReplicationSlotNewXmin(feedbackXmin);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -951,8 +1129,17 @@ ProcessStandbyHSFeedbackMessage(void)
|
||||
* GetOldestXmin. (If we're moving our xmin forward, this is obviously
|
||||
* safe, and if we're moving it backwards, well, the data is at risk
|
||||
* already since a VACUUM could have just finished calling GetOldestXmin.)
|
||||
*
|
||||
* If we're using a replication slot we reserve the xmin via that,
|
||||
* otherwise via the walsender's PGXACT entry.
|
||||
|
||||
* XXX: It might make sense to introduce ephemeral slots and always use
|
||||
* the slot mechanism.
|
||||
*/
|
||||
MyPgXact->xmin = feedbackXmin;
|
||||
if (MyReplicationSlot != NULL) /* XXX: persistency configurable? */
|
||||
PhysicalReplicationSlotNewXmin(feedbackXmin);
|
||||
else
|
||||
MyPgXact->xmin = feedbackXmin;
|
||||
}
|
||||
|
||||
/* Main loop of walsender process that streams the WAL over Copy messages. */
|
||||
|
||||
Reference in New Issue
Block a user