1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

Logical replication

- Add PUBLICATION catalogs and DDL
- Add SUBSCRIPTION catalog and DDL
- Define logical replication protocol and output plugin
- Add logical replication workers

From: Petr Jelinek <petr@2ndquadrant.com>
Reviewed-by: Steve Singer <steve@ssinger.info>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Erik Rijkers <er@xs4all.nl>
Reviewed-by: Peter Eisentraut <peter.eisentraut@2ndquadrant.com>
This commit is contained in:
Peter Eisentraut
2017-01-19 12:00:00 -05:00
parent ba61a04bc7
commit 665d1fad99
119 changed files with 13354 additions and 95 deletions

View File

@@ -14,7 +14,7 @@ include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
OBJS = decode.o logical.o logicalfuncs.o message.o origin.o reorderbuffer.o \
snapbuild.o
OBJS = decode.o launcher.o logical.o logicalfuncs.o message.o origin.o \
proto.o relation.o reorderbuffer.o snapbuild.o worker.o
include $(top_srcdir)/src/backend/common.mk

View File

@@ -0,0 +1,759 @@
/*-------------------------------------------------------------------------
* launcher.c
* PostgreSQL logical replication worker launcher process
*
* Copyright (c) 2012-2016, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/replication/logical/launcher.c
*
* NOTES
* This module contains the logical replication worker launcher which
* uses the background worker infrastructure to start the logical
* replication workers for every enabled subscription.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "access/heapam.h"
#include "access/htup.h"
#include "access/htup_details.h"
#include "access/xact.h"
#include "catalog/pg_subscription.h"
#include "libpq/pqsignal.h"
#include "postmaster/bgworker.h"
#include "postmaster/fork_process.h"
#include "postmaster/postmaster.h"
#include "replication/logicallauncher.h"
#include "replication/logicalworker.h"
#include "replication/slot.h"
#include "replication/worker_internal.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/procsignal.h"
#include "tcop/tcopprot.h"
#include "utils/memutils.h"
#include "utils/pg_lsn.h"
#include "utils/ps_status.h"
#include "utils/timeout.h"
#include "utils/snapmgr.h"
/* max sleep time between cycles (3min) */
#define DEFAULT_NAPTIME_PER_CYCLE 180000L
int max_logical_replication_workers = 4;
LogicalRepWorker *MyLogicalRepWorker = NULL;
typedef struct LogicalRepCtxStruct
{
/* Supervisor process. */
pid_t launcher_pid;
/* Background workers. */
LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER];
} LogicalRepCtxStruct;
LogicalRepCtxStruct *LogicalRepCtx;
static void logicalrep_worker_onexit(int code, Datum arg);
static void logicalrep_worker_detach(void);
bool got_SIGTERM = false;
static bool on_commit_laucher_wakeup = false;
Datum pg_stat_get_subscription(PG_FUNCTION_ARGS);
/*
* Load the list of subscriptions.
*
* Only the fields interesting for worker start/stop functions are filled for
* each subscription.
*/
static List *
get_subscription_list(void)
{
List *res = NIL;
Relation rel;
HeapScanDesc scan;
HeapTuple tup;
MemoryContext resultcxt;
/* This is the context that we will allocate our output data in */
resultcxt = CurrentMemoryContext;
/*
* Start a transaction so we can access pg_database, and get a snapshot.
* We don't have a use for the snapshot itself, but we're interested in
* the secondary effect that it sets RecentGlobalXmin. (This is critical
* for anything that reads heap pages, because HOT may decide to prune
* them even if the process doesn't attempt to modify any tuples.)
*/
StartTransactionCommand();
(void) GetTransactionSnapshot();
rel = heap_open(SubscriptionRelationId, AccessShareLock);
scan = heap_beginscan_catalog(rel, 0, NULL);
while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
{
Form_pg_subscription subform = (Form_pg_subscription) GETSTRUCT(tup);
Subscription *sub;
MemoryContext oldcxt;
/*
* Allocate our results in the caller's context, not the
* transaction's. We do this inside the loop, and restore the original
* context at the end, so that leaky things like heap_getnext() are
* not called in a potentially long-lived context.
*/
oldcxt = MemoryContextSwitchTo(resultcxt);
sub = (Subscription *) palloc(sizeof(Subscription));
sub->oid = HeapTupleGetOid(tup);
sub->dbid = subform->subdbid;
sub->owner = subform->subowner;
sub->enabled = subform->subenabled;
sub->name = pstrdup(NameStr(subform->subname));
/* We don't fill fields we are not interested in. */
sub->conninfo = NULL;
sub->slotname = NULL;
sub->publications = NIL;
res = lappend(res, sub);
MemoryContextSwitchTo(oldcxt);
}
heap_endscan(scan);
heap_close(rel, AccessShareLock);
CommitTransactionCommand();
return res;
}
/*
* Wait for a background worker to start up and attach to the shmem context.
*
* This is like WaitForBackgroundWorkerStartup(), except that we wait for
* attaching, not just start and we also just exit if postmaster died.
*/
static bool
WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
BackgroundWorkerHandle *handle)
{
BgwHandleStatus status;
int rc;
for (;;)
{
pid_t pid;
CHECK_FOR_INTERRUPTS();
status = GetBackgroundWorkerPid(handle, &pid);
/*
* Worker started and attached to our shmem. This check is safe
* because only laucher ever starts the workers, so nobody can steal
* the worker slot.
*/
if (status == BGWH_STARTED && worker->proc)
return true;
/* Worker didn't start or died before attaching to our shmem. */
if (status == BGWH_STOPPED)
return false;
/*
* We need timeout because we generaly don't get notified via latch
* about the worker attach.
*/
rc = WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
1000L, WAIT_EVENT_BGWORKER_STARTUP);
if (rc & WL_POSTMASTER_DEATH)
proc_exit(1);
ResetLatch(MyLatch);
}
return false;
}
/*
* Walks the workers array and searches for one that matches given
* subscription id.
*/
LogicalRepWorker *
logicalrep_worker_find(Oid subid)
{
int i;
LogicalRepWorker *res = NULL;
Assert(LWLockHeldByMe(LogicalRepWorkerLock));
/* Search for attached worker for a given subscription id. */
for (i = 0; i < max_logical_replication_workers; i++)
{
LogicalRepWorker *w = &LogicalRepCtx->workers[i];
if (w->subid == subid && w->proc && IsBackendPid(w->proc->pid))
{
res = w;
break;
}
}
return res;
}
/*
* Start new apply background worker.
*/
void
logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid)
{
BackgroundWorker bgw;
BackgroundWorkerHandle *bgw_handle;
int slot;
LogicalRepWorker *worker = NULL;
ereport(LOG,
(errmsg("starting logical replication worker for subscription \"%s\"",
subname)));
/* Report this after the initial starting message for consistency. */
if (max_replication_slots == 0)
ereport(ERROR,
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
errmsg("cannot start logical replication workers when max_replication_slots = 0")));
/*
* We need to do the modification of the shared memory under lock so that
* we have consistent view.
*/
LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
/* Find unused worker slot. */
for (slot = 0; slot < max_logical_replication_workers; slot++)
{
if (!LogicalRepCtx->workers[slot].proc)
{
worker = &LogicalRepCtx->workers[slot];
break;
}
}
/* Bail if not found */
if (worker == NULL)
{
ereport(WARNING,
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
errmsg("out of logical replication workers slots"),
errhint("You might need to increase max_logical_replication_workers.")));
return;
}
/* Prepare the worker info. */
memset(worker, 0, sizeof(LogicalRepWorker));
worker->dbid = dbid;
worker->userid = userid;
worker->subid = subid;
LWLockRelease(LogicalRepWorkerLock);
/* Register the new dynamic worker. */
bgw.bgw_flags = BGWORKER_SHMEM_ACCESS |
BGWORKER_BACKEND_DATABASE_CONNECTION;
bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
bgw.bgw_main = ApplyWorkerMain;
snprintf(bgw.bgw_name, BGW_MAXLEN,
"logical replication worker for subscription %u", subid);
bgw.bgw_restart_time = BGW_NEVER_RESTART;
bgw.bgw_notify_pid = MyProcPid;
bgw.bgw_main_arg = slot;
if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
{
ereport(WARNING,
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
errmsg("out of background workers slots"),
errhint("You might need to increase max_worker_processes.")));
return;
}
/* Now wait until it attaches. */
WaitForReplicationWorkerAttach(worker, bgw_handle);
}
/*
* Stop the logical replication worker and wait until it detaches from the
* slot.
*
* The caller must hold LogicalRepLauncherLock to ensure that new workers are
* not being started during this function call.
*/
void
logicalrep_worker_stop(Oid subid)
{
LogicalRepWorker *worker;
Assert(LWLockHeldByMe(LogicalRepLauncherLock));
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
worker = logicalrep_worker_find(subid);
/* No worker, nothing to do. */
if (!worker)
{
LWLockRelease(LogicalRepWorkerLock);
return;
}
/*
* If we found worker but it does not have proc set it is starting up,
* wait for it to finish and then kill it.
*/
while (worker && !worker->proc)
{
int rc;
LWLockRelease(LogicalRepWorkerLock);
CHECK_FOR_INTERRUPTS();
/* Wait for signal. */
rc = WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
1000L, WAIT_EVENT_BGWORKER_STARTUP);
/* emergency bailout if postmaster has died */
if (rc & WL_POSTMASTER_DEATH)
proc_exit(1);
ResetLatch(&MyProc->procLatch);
/* Check if the worker has started. */
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
worker = logicalrep_worker_find(subid);
if (!worker || worker->proc)
break;
}
/* Now terminate the worker ... */
kill(worker->proc->pid, SIGTERM);
LWLockRelease(LogicalRepWorkerLock);
/* ... and wait for it to die. */
for (;;)
{
int rc;
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
if (!worker->proc)
{
LWLockRelease(LogicalRepWorkerLock);
break;
}
LWLockRelease(LogicalRepWorkerLock);
CHECK_FOR_INTERRUPTS();
/* Wait for more work. */
rc = WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
1000L, WAIT_EVENT_BGWORKER_SHUTDOWN);
/* emergency bailout if postmaster has died */
if (rc & WL_POSTMASTER_DEATH)
proc_exit(1);
ResetLatch(&MyProc->procLatch);
}
}
/*
* Attach to a slot.
*/
void
logicalrep_worker_attach(int slot)
{
/* Block concurrent access. */
LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
Assert(slot >= 0 && slot < max_logical_replication_workers);
MyLogicalRepWorker = &LogicalRepCtx->workers[slot];
if (MyLogicalRepWorker->proc)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("logical replication worker slot %d already used by "
"another worker", slot)));
MyLogicalRepWorker->proc = MyProc;
before_shmem_exit(logicalrep_worker_onexit, (Datum) 0);
LWLockRelease(LogicalRepWorkerLock);
}
/*
* Detach the worker (cleans up the worker info).
*/
static void
logicalrep_worker_detach(void)
{
/* Block concurrent access. */
LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
MyLogicalRepWorker->dbid = InvalidOid;
MyLogicalRepWorker->userid = InvalidOid;
MyLogicalRepWorker->subid = InvalidOid;
MyLogicalRepWorker->proc = NULL;
LWLockRelease(LogicalRepWorkerLock);
}
/*
* Cleanup function.
*
* Called on logical replication worker exit.
*/
static void
logicalrep_worker_onexit(int code, Datum arg)
{
logicalrep_worker_detach();
}
/* SIGTERM: set flag to exit at next convenient time */
void
logicalrep_worker_sigterm(SIGNAL_ARGS)
{
got_SIGTERM = true;
/* Waken anything waiting on the process latch */
SetLatch(MyLatch);
}
/*
* ApplyLauncherShmemSize
* Compute space needed for replication launcher shared memory
*/
Size
ApplyLauncherShmemSize(void)
{
Size size;
/*
* Need the fixed struct and the array of LogicalRepWorker.
*/
size = sizeof(LogicalRepCtxStruct);
size = MAXALIGN(size);
size = add_size(size, mul_size(max_logical_replication_workers,
sizeof(LogicalRepWorker)));
return size;
}
void
ApplyLauncherRegister(void)
{
BackgroundWorker bgw;
if (max_logical_replication_workers == 0)
return;
bgw.bgw_flags = BGWORKER_SHMEM_ACCESS |
BGWORKER_BACKEND_DATABASE_CONNECTION;
bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
bgw.bgw_main = ApplyLauncherMain;
snprintf(bgw.bgw_name, BGW_MAXLEN,
"logical replication launcher");
bgw.bgw_restart_time = 5;
bgw.bgw_notify_pid = 0;
bgw.bgw_main_arg = (Datum) 0;
RegisterBackgroundWorker(&bgw);
}
/*
* ApplyLauncherShmemInit
* Allocate and initialize replication launcher shared memory
*/
void
ApplyLauncherShmemInit(void)
{
bool found;
LogicalRepCtx = (LogicalRepCtxStruct *)
ShmemInitStruct("Logical Replication Launcher Data",
ApplyLauncherShmemSize(),
&found);
if (!found)
memset(LogicalRepCtx, 0, ApplyLauncherShmemSize());
}
/*
* Wakeup the launcher on commit if requested.
*/
void
AtCommit_ApplyLauncher(void)
{
if (on_commit_laucher_wakeup)
ApplyLauncherWakeup();
}
/*
* Request wakeup of the launcher on commit of the transaction.
*
* This is used to send launcher signal to stop sleeping and proccess the
* subscriptions when current transaction commits. Should be used when new
* tuple was added to the pg_subscription catalog.
*/
void
ApplyLauncherWakeupAtCommit(void)
{
if (!on_commit_laucher_wakeup)
on_commit_laucher_wakeup = true;
}
void
ApplyLauncherWakeup(void)
{
if (IsBackendPid(LogicalRepCtx->launcher_pid))
kill(LogicalRepCtx->launcher_pid, SIGUSR1);
}
/*
* Main loop for the apply launcher process.
*/
void
ApplyLauncherMain(Datum main_arg)
{
ereport(LOG,
(errmsg("logical replication launcher started")));
/* Establish signal handlers. */
pqsignal(SIGTERM, logicalrep_worker_sigterm);
BackgroundWorkerUnblockSignals();
/* Make it easy to identify our processes. */
SetConfigOption("application_name", MyBgworkerEntry->bgw_name,
PGC_USERSET, PGC_S_SESSION);
LogicalRepCtx->launcher_pid = MyProcPid;
/*
* Establish connection to nailed catalogs (we only ever access
* pg_subscription).
*/
BackgroundWorkerInitializeConnection(NULL, NULL);
/* Enter main loop */
while (!got_SIGTERM)
{
int rc;
List *sublist;
ListCell *lc;
MemoryContext subctx;
MemoryContext oldctx;
TimestampTz now;
TimestampTz last_start_time = 0;
long wait_time = DEFAULT_NAPTIME_PER_CYCLE;
now = GetCurrentTimestamp();
/* Limit the start retry to once a wal_retrieve_retry_interval */
if (TimestampDifferenceExceeds(last_start_time, now,
wal_retrieve_retry_interval))
{
/* Use temporary context for the database list and worker info. */
subctx = AllocSetContextCreate(TopMemoryContext,
"Logical Replication Launcher sublist",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldctx = MemoryContextSwitchTo(subctx);
/* Block any concurrent DROP SUBSCRIPTION. */
LWLockAcquire(LogicalRepLauncherLock, LW_EXCLUSIVE);
/* search for subscriptions to start or stop. */
sublist = get_subscription_list();
/* Start the missing workers for enabled subscriptions. */
foreach(lc, sublist)
{
Subscription *sub = (Subscription *) lfirst(lc);
LogicalRepWorker *w;
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
w = logicalrep_worker_find(sub->oid);
LWLockRelease(LogicalRepWorkerLock);
if (sub->enabled && w == NULL)
{
logicalrep_worker_launch(sub->dbid, sub->oid, sub->name, sub->owner);
last_start_time = now;
wait_time = wal_retrieve_retry_interval;
/* Limit to one worker per mainloop cycle. */
break;
}
}
LWLockRelease(LogicalRepLauncherLock);
/* Switch back to original memory context. */
MemoryContextSwitchTo(oldctx);
/* Clean the temporary memory. */
MemoryContextDelete(subctx);
}
else
{
/*
* The wait in previous cycle was interruped in less than
* wal_retrieve_retry_interval since last worker was started,
* this usually means crash of the worker, so we should retry
* in wal_retrieve_retry_interval again.
*/
wait_time = wal_retrieve_retry_interval;
}
/* Wait for more work. */
rc = WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
wait_time,
WAIT_EVENT_LOGICAL_LAUNCHER_MAIN);
/* emergency bailout if postmaster has died */
if (rc & WL_POSTMASTER_DEATH)
proc_exit(1);
ResetLatch(&MyProc->procLatch);
}
LogicalRepCtx->launcher_pid = 0;
/* ... and if it returns, we're done */
ereport(LOG,
(errmsg("logical replication launcher shutting down")));
proc_exit(0);
}
/*
* Returns state of the subscriptions.
*/
Datum
pg_stat_get_subscription(PG_FUNCTION_ARGS)
{
#define PG_STAT_GET_SUBSCRIPTION_COLS 7
Oid subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
int i;
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
TupleDesc tupdesc;
Tuplestorestate *tupstore;
MemoryContext per_query_ctx;
MemoryContext oldcontext;
/* check to see if caller supports us returning a tuplestore */
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that cannot accept a set")));
if (!(rsinfo->allowedModes & SFRM_Materialize))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("materialize mode required, but it is not " \
"allowed in this context")));
/* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
oldcontext = MemoryContextSwitchTo(per_query_ctx);
tupstore = tuplestore_begin_heap(true, false, work_mem);
rsinfo->returnMode = SFRM_Materialize;
rsinfo->setResult = tupstore;
rsinfo->setDesc = tupdesc;
MemoryContextSwitchTo(oldcontext);
/* Make sure we get consistent view of the workers. */
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
for (i = 0; i <= max_logical_replication_workers; i++)
{
/* for each row */
Datum values[PG_STAT_GET_SUBSCRIPTION_COLS];
bool nulls[PG_STAT_GET_SUBSCRIPTION_COLS];
int worker_pid;
LogicalRepWorker worker;
memcpy(&worker, &LogicalRepCtx->workers[i],
sizeof(LogicalRepWorker));
if (!worker.proc || !IsBackendPid(worker.proc->pid))
continue;
if (OidIsValid(subid) && worker.subid != subid)
continue;
worker_pid = worker.proc->pid;
MemSet(values, 0, sizeof(values));
MemSet(nulls, 0, sizeof(nulls));
values[0] = ObjectIdGetDatum(worker.subid);
values[1] = Int32GetDatum(worker_pid);
if (XLogRecPtrIsInvalid(worker.last_lsn))
nulls[2] = true;
else
values[2] = LSNGetDatum(worker.last_lsn);
if (worker.last_send_time == 0)
nulls[3] = true;
else
values[3] = TimestampTzGetDatum(worker.last_send_time);
if (worker.last_recv_time == 0)
nulls[4] = true;
else
values[4] = TimestampTzGetDatum(worker.last_recv_time);
if (XLogRecPtrIsInvalid(worker.reply_lsn))
nulls[5] = true;
else
values[5] = LSNGetDatum(worker.reply_lsn);
if (worker.reply_time == 0)
nulls[6] = true;
else
values[6] = TimestampTzGetDatum(worker.reply_time);
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
/* If only a single subscription was requested, and we found it, break. */
if (OidIsValid(subid))
break;
}
LWLockRelease(LogicalRepWorkerLock);
/* clean up and return the tuplestore */
tuplestore_donestoring(tupstore);
return (Datum) 0;
}

View File

@@ -0,0 +1,637 @@
/*-------------------------------------------------------------------------
*
* proto.c
* logical replication protocol functions
*
* Copyright (c) 2015, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/replication/logical/proto.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/sysattr.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_type.h"
#include "libpq/pqformat.h"
#include "replication/logicalproto.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
/*
* Protocol message flags.
*/
#define LOGICALREP_IS_REPLICA_IDENTITY 1
static void logicalrep_write_attrs(StringInfo out, Relation rel);
static void logicalrep_write_tuple(StringInfo out, Relation rel,
HeapTuple tuple);
static void logicalrep_read_attrs(StringInfo in, LogicalRepRelation *rel);
static void logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple);
static void logicalrep_write_namespace(StringInfo out, Oid nspid);
static const char *logicalrep_read_namespace(StringInfo in);
/*
* Write BEGIN to the output stream.
*/
void
logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn)
{
pq_sendbyte(out, 'B'); /* BEGIN */
/* fixed fields */
pq_sendint64(out, txn->final_lsn);
pq_sendint64(out, txn->commit_time);
pq_sendint(out, txn->xid, 4);
}
/*
* Read transaction BEGIN from the stream.
*/
void
logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data)
{
/* read fields */
begin_data->final_lsn = pq_getmsgint64(in);
if (begin_data->final_lsn == InvalidXLogRecPtr)
elog(ERROR, "final_lsn not set in begin message");
begin_data->committime = pq_getmsgint64(in);
begin_data->xid = pq_getmsgint(in, 4);
}
/*
* Write COMMIT to the output stream.
*/
void
logicalrep_write_commit(StringInfo out, ReorderBufferTXN *txn,
XLogRecPtr commit_lsn)
{
uint8 flags = 0;
pq_sendbyte(out, 'C'); /* sending COMMIT */
/* send the flags field (unused for now) */
pq_sendbyte(out, flags);
/* send fields */
pq_sendint64(out, commit_lsn);
pq_sendint64(out, txn->end_lsn);
pq_sendint64(out, txn->commit_time);
}
/*
* Read transaction COMMIT from the stream.
*/
void
logicalrep_read_commit(StringInfo in, LogicalRepCommitData *commit_data)
{
/* read flags (unused for now) */
uint8 flags = pq_getmsgbyte(in);
if (flags != 0)
elog(ERROR, "unknown flags %u in commit message", flags);
/* read fields */
commit_data->commit_lsn = pq_getmsgint64(in);
commit_data->end_lsn = pq_getmsgint64(in);
commit_data->committime = pq_getmsgint64(in);
}
/*
* Write ORIGIN to the output stream.
*/
void
logicalrep_write_origin(StringInfo out, const char *origin,
XLogRecPtr origin_lsn)
{
pq_sendbyte(out, 'O'); /* ORIGIN */
/* fixed fields */
pq_sendint64(out, origin_lsn);
/* origin string */
pq_sendstring(out, origin);
}
/*
* Read ORIGIN from the output stream.
*/
char *
logicalrep_read_origin(StringInfo in, XLogRecPtr *origin_lsn)
{
/* fixed fields */
*origin_lsn = pq_getmsgint64(in);
/* return origin */
return pstrdup(pq_getmsgstring(in));
}
/*
* Write INSERT to the output stream.
*/
void
logicalrep_write_insert(StringInfo out, Relation rel, HeapTuple newtuple)
{
pq_sendbyte(out, 'I'); /* action INSERT */
Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT ||
rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX);
/* use Oid as relation identifier */
pq_sendint(out, RelationGetRelid(rel), 4);
pq_sendbyte(out, 'N'); /* new tuple follows */
logicalrep_write_tuple(out, rel, newtuple);
}
/*
* Read INSERT from stream.
*
* Fills the new tuple.
*/
LogicalRepRelId
logicalrep_read_insert(StringInfo in, LogicalRepTupleData *newtup)
{
char action;
LogicalRepRelId relid;
/* read the relation id */
relid = pq_getmsgint(in, 4);
action = pq_getmsgbyte(in);
if (action != 'N')
elog(ERROR, "expected new tuple but got %d",
action);
logicalrep_read_tuple(in, newtup);
return relid;
}
/*
* Write UPDATE to the output stream.
*/
void
logicalrep_write_update(StringInfo out, Relation rel, HeapTuple oldtuple,
HeapTuple newtuple)
{
pq_sendbyte(out, 'U'); /* action UPDATE */
Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT ||
rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX);
/* use Oid as relation identifier */
pq_sendint(out, RelationGetRelid(rel), 4);
if (oldtuple != NULL)
{
if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
pq_sendbyte(out, 'O'); /* old tuple follows */
else
pq_sendbyte(out, 'K'); /* old key follows */
logicalrep_write_tuple(out, rel, oldtuple);
}
pq_sendbyte(out, 'N'); /* new tuple follows */
logicalrep_write_tuple(out, rel, newtuple);
}
/*
* Read UPDATE from stream.
*/
LogicalRepRelId
logicalrep_read_update(StringInfo in, bool *has_oldtuple,
LogicalRepTupleData *oldtup,
LogicalRepTupleData *newtup)
{
char action;
LogicalRepRelId relid;
/* read the relation id */
relid = pq_getmsgint(in, 4);
/* read and verify action */
action = pq_getmsgbyte(in);
if (action != 'K' && action != 'O' && action != 'N')
elog(ERROR, "expected action 'N', 'O' or 'K', got %c",
action);
/* check for old tuple */
if (action == 'K' || action == 'O')
{
logicalrep_read_tuple(in, oldtup);
*has_oldtuple = true;
action = pq_getmsgbyte(in);
}
else
*has_oldtuple = false;
/* check for new tuple */
if (action != 'N')
elog(ERROR, "expected action 'N', got %c",
action);
logicalrep_read_tuple(in, newtup);
return relid;
}
/*
* Write DELETE to the output stream.
*/
void
logicalrep_write_delete(StringInfo out, Relation rel, HeapTuple oldtuple)
{
Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT ||
rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX);
pq_sendbyte(out, 'D'); /* action DELETE */
/* use Oid as relation identifier */
pq_sendint(out, RelationGetRelid(rel), 4);
if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
pq_sendbyte(out, 'O'); /* old tuple follows */
else
pq_sendbyte(out, 'K'); /* old key follows */
logicalrep_write_tuple(out, rel, oldtuple);
}
/*
* Read DELETE from stream.
*
* Fills the old tuple.
*/
LogicalRepRelId
logicalrep_read_delete(StringInfo in, LogicalRepTupleData *oldtup)
{
char action;
LogicalRepRelId relid;
/* read the relation id */
relid = pq_getmsgint(in, 4);
/* read and verify action */
action = pq_getmsgbyte(in);
if (action != 'K' && action != 'O')
elog(ERROR, "expected action 'O' or 'K', got %c", action);
logicalrep_read_tuple(in, oldtup);
return relid;
}
/*
* Write relation description to the output stream.
*/
void
logicalrep_write_rel(StringInfo out, Relation rel)
{
char *relname;
pq_sendbyte(out, 'R'); /* sending RELATION */
/* use Oid as relation identifier */
pq_sendint(out, RelationGetRelid(rel), 4);
/* send qualified relation name */
logicalrep_write_namespace(out, RelationGetNamespace(rel));
relname = RelationGetRelationName(rel);
pq_sendstring(out, relname);
/* send replica identity */
pq_sendbyte(out, rel->rd_rel->relreplident);
/* send the attribute info */
logicalrep_write_attrs(out, rel);
}
/*
* Read the relation info from stream and return as LogicalRepRelation.
*/
LogicalRepRelation *
logicalrep_read_rel(StringInfo in)
{
LogicalRepRelation *rel = palloc(sizeof(LogicalRepRelation));
rel->remoteid = pq_getmsgint(in, 4);
/* Read relation name from stream */
rel->nspname = pstrdup(logicalrep_read_namespace(in));
rel->relname = pstrdup(pq_getmsgstring(in));
/* Read the replica identity. */
rel->replident = pq_getmsgbyte(in);
/* Get attribute description */
logicalrep_read_attrs(in, rel);
return rel;
}
/*
* Write type info to the output stream.
*
* This function will always write base type info.
*/
void
logicalrep_write_typ(StringInfo out, Oid typoid)
{
Oid basetypoid = getBaseType(typoid);
HeapTuple tup;
Form_pg_type typtup;
pq_sendbyte(out, 'Y'); /* sending TYPE */
tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(basetypoid));
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for type %u", basetypoid);
typtup = (Form_pg_type) GETSTRUCT(tup);
/* use Oid as relation identifier */
pq_sendint(out, typoid, 4);
/* send qualified type name */
logicalrep_write_namespace(out, typtup->typnamespace);
pq_sendstring(out, NameStr(typtup->typname));
ReleaseSysCache(tup);
}
/*
* Read type info from the output stream.
*/
void
logicalrep_read_typ(StringInfo in, LogicalRepTyp *ltyp)
{
ltyp->remoteid = pq_getmsgint(in, 4);
/* Read tupe name from stream */
ltyp->nspname = pstrdup(logicalrep_read_namespace(in));
ltyp->typname = pstrdup(pq_getmsgstring(in));
}
/*
* Write a tuple to the outputstream, in the most efficient format possible.
*/
static void
logicalrep_write_tuple(StringInfo out, Relation rel, HeapTuple tuple)
{
TupleDesc desc;
Datum values[MaxTupleAttributeNumber];
bool isnull[MaxTupleAttributeNumber];
int i;
uint16 nliveatts = 0;
desc = RelationGetDescr(rel);
for (i = 0; i < desc->natts; i++)
{
if (desc->attrs[i]->attisdropped)
continue;
nliveatts++;
}
pq_sendint(out, nliveatts, 2);
/* try to allocate enough memory from the get-go */
enlargeStringInfo(out, tuple->t_len +
nliveatts * (1 + 4));
heap_deform_tuple(tuple, desc, values, isnull);
/* Write the values */
for (i = 0; i < desc->natts; i++)
{
HeapTuple typtup;
Form_pg_type typclass;
Form_pg_attribute att = desc->attrs[i];
char *outputstr;
int len;
/* skip dropped columns */
if (att->attisdropped)
continue;
if (isnull[i])
{
pq_sendbyte(out, 'n'); /* null column */
continue;
}
else if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i]))
{
pq_sendbyte(out, 'u'); /* unchanged toast column */
continue;
}
typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(att->atttypid));
if (!HeapTupleIsValid(typtup))
elog(ERROR, "cache lookup failed for type %u", att->atttypid);
typclass = (Form_pg_type) GETSTRUCT(typtup);
pq_sendbyte(out, 't'); /* 'text' data follows */
outputstr = OidOutputFunctionCall(typclass->typoutput, values[i]);
len = strlen(outputstr) + 1; /* null terminated */
pq_sendint(out, len, 4); /* length */
appendBinaryStringInfo(out, outputstr, len); /* data */
pfree(outputstr);
ReleaseSysCache(typtup);
}
}
/*
* Read tuple in remote format from stream.
*
* The returned tuple points into the input stringinfo.
*/
static void
logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple)
{
int i;
int natts;
/* Get of attributes. */
natts = pq_getmsgint(in, 2);
memset(tuple->changed, 0, sizeof(tuple->changed));
/* Read the data */
for (i = 0; i < natts; i++)
{
char kind;
int len;
kind = pq_getmsgbyte(in);
switch (kind)
{
case 'n': /* null */
tuple->values[i] = NULL;
tuple->changed[i] = true;
break;
case 'u': /* unchanged column */
tuple->values[i] = (char *) 0xdeadbeef; /* make bad usage more obvious */
break;
case 't': /* text formatted value */
{
tuple->changed[i] = true;
len = pq_getmsgint(in, 4); /* read length */
/* and data */
tuple->values[i] = (char *) pq_getmsgbytes(in, len);
}
break;
default:
elog(ERROR, "unknown data representation type '%c'", kind);
}
}
}
/*
* Write relation attributes to the stream.
*/
static void
logicalrep_write_attrs(StringInfo out, Relation rel)
{
TupleDesc desc;
int i;
uint16 nliveatts = 0;
Bitmapset *idattrs = NULL;
bool replidentfull;
desc = RelationGetDescr(rel);
/* send number of live attributes */
for (i = 0; i < desc->natts; i++)
{
if (desc->attrs[i]->attisdropped)
continue;
nliveatts++;
}
pq_sendint(out, nliveatts, 2);
/* fetch bitmap of REPLICATION IDENTITY attributes */
replidentfull = (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL);
if (!replidentfull)
idattrs = RelationGetIndexAttrBitmap(rel,
INDEX_ATTR_BITMAP_IDENTITY_KEY);
/* send the attributes */
for (i = 0; i < desc->natts; i++)
{
Form_pg_attribute att = desc->attrs[i];
uint8 flags = 0;
if (att->attisdropped)
continue;
/* REPLICA IDENTITY FULL means all colums are sent as part of key. */
if (replidentfull ||
bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
idattrs))
flags |= LOGICALREP_IS_REPLICA_IDENTITY;
pq_sendbyte(out, flags);
/* attribute name */
pq_sendstring(out, NameStr(att->attname));
/* attribute type id */
pq_sendint(out, (int) att->atttypid, sizeof(att->atttypid));
/* attribute mode */
pq_sendint(out, att->atttypmod, sizeof(att->atttypmod));
}
bms_free(idattrs);
}
/*
* Read relation attribute names from the stream.
*/
static void
logicalrep_read_attrs(StringInfo in, LogicalRepRelation *rel)
{
int i;
int natts;
char **attnames;
Oid *atttyps;
Bitmapset *attkeys = NULL;
natts = pq_getmsgint(in, 2);
attnames = palloc(natts * sizeof(char *));
atttyps = palloc(natts * sizeof(Oid));
/* read the attributes */
for (i = 0; i < natts; i++)
{
uint8 flags;
/* Check for replica identity column */
flags = pq_getmsgbyte(in);
if (flags & LOGICALREP_IS_REPLICA_IDENTITY)
attkeys = bms_add_member(attkeys, i);
/* attribute name */
attnames[i] = pstrdup(pq_getmsgstring(in));
/* attribute type id */
atttyps[i] = (Oid) pq_getmsgint(in, 4);
/* we ignore attribute mode for now */
(void) pq_getmsgint(in, 4);
}
rel->attnames = attnames;
rel->atttyps = atttyps;
rel->attkeys = attkeys;
rel->natts = natts;
}
/*
* Write the namespace name or empty string for pg_catalog (to save space).
*/
static void
logicalrep_write_namespace(StringInfo out, Oid nspid)
{
if (nspid == PG_CATALOG_NAMESPACE)
pq_sendbyte(out, '\0');
else
{
char *nspname = get_namespace_name(nspid);
if (nspname == NULL)
elog(ERROR, "cache lookup failed for namespace %u",
nspid);
pq_sendstring(out, nspname);
}
}
/*
* Read the namespace name while treating empty string as pg_catalog.
*/
static const char *
logicalrep_read_namespace(StringInfo in)
{
const char *nspname = pq_getmsgstring(in);
if (nspname[0] == '\0')
nspname = "pg_catalog";
return nspname;
}

View File

@@ -0,0 +1,489 @@
/*-------------------------------------------------------------------------
* relation.c
* PostgreSQL logical replication
*
* Copyright (c) 2012-2016, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/replication/logical/relation.c
*
* NOTES
* This file contains helper functions for logical replication relation
* mapping cache.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/sysattr.h"
#include "catalog/namespace.h"
#include "nodes/makefuncs.h"
#include "replication/logicalrelation.h"
#include "replication/worker_internal.h"
#include "utils/builtins.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
static MemoryContext LogicalRepRelMapContext = NULL;
static HTAB *LogicalRepRelMap = NULL;
static HTAB *LogicalRepTypMap = NULL;
static void logicalrep_typmap_invalidate_cb(Datum arg, int cacheid,
uint32 hashvalue);
/*
* Relcache invalidation callback for our relation map cache.
*/
static void
logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid)
{
LogicalRepRelMapEntry *entry;
/* Just to be sure. */
if (LogicalRepRelMap == NULL)
return;
if (reloid != InvalidOid)
{
HASH_SEQ_STATUS status;
hash_seq_init(&status, LogicalRepRelMap);
/* TODO, use inverse lookup hashtable? */
while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
{
if (entry->localreloid == reloid)
{
entry->localreloid = InvalidOid;
hash_seq_term(&status);
break;
}
}
}
else
{
/* invalidate all cache entries */
HASH_SEQ_STATUS status;
hash_seq_init(&status, LogicalRepRelMap);
while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
entry->localreloid = InvalidOid;
}
}
/*
* Initialize the relation map cache.
*/
static void
logicalrep_relmap_init()
{
HASHCTL ctl;
if (!LogicalRepRelMapContext)
LogicalRepRelMapContext =
AllocSetContextCreate(CacheMemoryContext,
"LogicalRepRelMapContext",
ALLOCSET_DEFAULT_SIZES);
/* Initialize the relation hash table. */
MemSet(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(LogicalRepRelId);
ctl.entrysize = sizeof(LogicalRepRelMapEntry);
ctl.hcxt = LogicalRepRelMapContext;
LogicalRepRelMap = hash_create("logicalrep relation map cache", 128, &ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
/* Initialize the type hash table. */
MemSet(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(Oid);
ctl.entrysize = sizeof(LogicalRepTyp);
ctl.hcxt = LogicalRepRelMapContext;
/* This will usually be small. */
LogicalRepTypMap = hash_create("logicalrep type map cache", 2, &ctl,
HASH_ELEM | HASH_BLOBS |HASH_CONTEXT);
/* Watch for invalidation events. */
CacheRegisterRelcacheCallback(logicalrep_relmap_invalidate_cb,
(Datum) 0);
CacheRegisterSyscacheCallback(TYPEOID, logicalrep_typmap_invalidate_cb,
(Datum) 0);
}
/*
* Free the entry of a relation map cache.
*/
static void
logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry)
{
LogicalRepRelation *remoterel;
remoterel = &entry->remoterel;
pfree(remoterel->nspname);
pfree(remoterel->relname);
if (remoterel->natts > 0)
{
int i;
for (i = 0; i < remoterel->natts; i++)
pfree(remoterel->attnames[i]);
pfree(remoterel->attnames);
pfree(remoterel->atttyps);
}
remoterel->attnames = NULL;
remoterel->atttyps = NULL;
bms_free(remoterel->attkeys);
remoterel->attkeys = NULL;
if (entry->attrmap)
pfree(entry->attrmap);
entry->attrmap = NULL;
remoterel->natts = 0;
entry->localreloid = InvalidOid;
entry->localrel = NULL;
}
/*
* Add new entry or update existing entry in the relation map cache.
*
* Called when new relation mapping is sent by the publisher to update
* our expected view of incoming data from said publisher.
*/
void
logicalrep_relmap_update(LogicalRepRelation *remoterel)
{
MemoryContext oldctx;
LogicalRepRelMapEntry *entry;
bool found;
int i;
if (LogicalRepRelMap == NULL)
logicalrep_relmap_init();
/*
* HASH_ENTER returns the existing entry if present or creates a new one.
*/
entry = hash_search(LogicalRepRelMap, (void *) &remoterel->remoteid,
HASH_ENTER, &found);
if (found)
logicalrep_relmap_free_entry(entry);
/* Make cached copy of the data */
oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
entry->remoterel.remoteid = remoterel->remoteid;
entry->remoterel.nspname = pstrdup(remoterel->nspname);
entry->remoterel.relname = pstrdup(remoterel->relname);
entry->remoterel.natts = remoterel->natts;
entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
for (i = 0; i < remoterel->natts; i++)
{
entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
entry->remoterel.atttyps[i] = remoterel->atttyps[i];
}
entry->remoterel.replident = remoterel->replident;
entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
entry->attrmap = NULL;
entry->localreloid = InvalidOid;
MemoryContextSwitchTo(oldctx);
}
/*
* Find attribute index in TupleDesc struct by attribute name.
*
* Returns -1 if not found.
*/
static int
logicalrep_rel_att_by_name(LogicalRepRelation *remoterel, const char *attname)
{
int i;
for (i = 0; i < remoterel->natts; i++)
{
if (strcmp(remoterel->attnames[i], attname) == 0)
return i;
}
return -1;
}
/*
* Open the local relation associated with the remote one.
*
* Optionally rebuilds the Relcache mapping if it was invalidated
* by local DDL.
*/
LogicalRepRelMapEntry *
logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
{
LogicalRepRelMapEntry *entry;
bool found;
if (LogicalRepRelMap == NULL)
logicalrep_relmap_init();
/* Search for existing entry. */
entry = hash_search(LogicalRepRelMap, (void *) &remoteid,
HASH_FIND, &found);
if (!found)
elog(ERROR, "no relation map entry for remote relation ID %u",
remoteid);
/* Need to update the local cache? */
if (!OidIsValid(entry->localreloid))
{
Oid relid;
int i;
int found;
Bitmapset *idkey;
TupleDesc desc;
LogicalRepRelation *remoterel;
MemoryContext oldctx;
remoterel = &entry->remoterel;
/* Try to find and lock the relation by name. */
relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname,
remoterel->relname, -1),
lockmode, true);
if (!OidIsValid(relid))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("logical replication target relation \"%s.%s\" does not exist",
remoterel->nspname, remoterel->relname)));
entry->localrel = heap_open(relid, NoLock);
/*
* We currently only support writing to regular and partitioned
* tables.
*/
if (entry->localrel->rd_rel->relkind != RELKIND_RELATION)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("logical replication target relation \"%s.%s\" is not a table",
remoterel->nspname, remoterel->relname)));
/*
* Build the mapping of local attribute numbers to remote attribute
* numbers and validate that we don't miss any replicated columns
* as that would result in potentially unwanted data loss.
*/
desc = RelationGetDescr(entry->localrel);
oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
entry->attrmap = palloc(desc->natts * sizeof(int));
MemoryContextSwitchTo(oldctx);
found = 0;
for (i = 0; i < desc->natts; i++)
{
int attnum = logicalrep_rel_att_by_name(remoterel,
NameStr(desc->attrs[i]->attname));
entry->attrmap[i] = attnum;
if (attnum >= 0)
found++;
}
/* TODO, detail message with names of missing columns */
if (found < remoterel->natts)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("logical replication target relation \"%s.%s\" is missing "
"some replicated columns",
remoterel->nspname, remoterel->relname)));
/*
* Check that replica identity matches. We allow for stricter replica
* identity (fewer columns) on subscriber as that will not stop us
* from finding unique tuple. IE, if publisher has identity
* (id,timestamp) and subscriber just (id) this will not be a problem,
* but in the opposite scenario it will.
*
* Don't throw any error here just mark the relation entry as not
* updatable, as replica identity is only for updates and deletes
* but inserts can be replicated even without it.
*/
entry->updatable = true;
idkey = RelationGetIndexAttrBitmap(entry->localrel,
INDEX_ATTR_BITMAP_IDENTITY_KEY);
/* fallback to PK if no replica identity */
if (idkey == NULL)
{
idkey = RelationGetIndexAttrBitmap(entry->localrel,
INDEX_ATTR_BITMAP_PRIMARY_KEY);
/*
* If no replica identity index and no PK, the published table
* must have replica identity FULL.
*/
if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL)
entry->updatable = false;
}
i = -1;
while ((i = bms_next_member(idkey, i)) >= 0)
{
int attnum = i + FirstLowInvalidHeapAttributeNumber;
if (!AttrNumberIsForUserDefinedAttr(attnum))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("logical replication target relation \"%s.%s\" uses "
"system columns in REPLICA IDENTITY index",
remoterel->nspname, remoterel->relname)));
attnum = AttrNumberGetAttrOffset(attnum);
if (!bms_is_member(entry->attrmap[attnum], remoterel->attkeys))
{
entry->updatable = false;
break;
}
}
entry->localreloid = relid;
}
else
entry->localrel = heap_open(entry->localreloid, lockmode);
return entry;
}
/*
* Close the previously opened logical relation.
*/
void
logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
{
heap_close(rel->localrel, lockmode);
rel->localrel = NULL;
}
/*
* Type cache invalidation callback for our type map cache.
*/
static void
logicalrep_typmap_invalidate_cb(Datum arg, int cacheid, uint32 hashvalue)
{
HASH_SEQ_STATUS status;
LogicalRepTyp *entry;
/* Just to be sure. */
if (LogicalRepTypMap == NULL)
return;
/* invalidate all cache entries */
hash_seq_init(&status, LogicalRepTypMap);
while ((entry = (LogicalRepTyp *) hash_seq_search(&status)) != NULL)
entry->typoid = InvalidOid;
}
/*
* Free the type map cache entry data.
*/
static void
logicalrep_typmap_free_entry(LogicalRepTyp *entry)
{
pfree(entry->nspname);
pfree(entry->typname);
entry->typoid = InvalidOid;
}
/*
* Add new entry or update existing entry in the type map cache.
*/
void
logicalrep_typmap_update(LogicalRepTyp *remotetyp)
{
MemoryContext oldctx;
LogicalRepTyp *entry;
bool found;
if (LogicalRepTypMap == NULL)
logicalrep_relmap_init();
/*
* HASH_ENTER returns the existing entry if present or creates a new one.
*/
entry = hash_search(LogicalRepTypMap, (void *) &remotetyp->remoteid,
HASH_ENTER, &found);
if (found)
logicalrep_typmap_free_entry(entry);
/* Make cached copy of the data */
entry->remoteid = remotetyp->remoteid;
oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
entry->nspname = pstrdup(remotetyp->nspname);
entry->typname = pstrdup(remotetyp->typname);
MemoryContextSwitchTo(oldctx);
entry->typoid = InvalidOid;
}
/*
* Fetch type info from the cache.
*/
Oid
logicalrep_typmap_getid(Oid remoteid)
{
LogicalRepTyp *entry;
bool found;
Oid nspoid;
/* Internal types are mapped directly. */
if (remoteid < FirstNormalObjectId)
{
if (!get_typisdefined(remoteid))
ereport(ERROR,
(errmsg("builtin type %u not found", remoteid),
errhint("This can be caused by having publisher with "
"higher major version than subscriber")));
return remoteid;
}
if (LogicalRepTypMap == NULL)
logicalrep_relmap_init();
/* Try finding the mapping. */
entry = hash_search(LogicalRepTypMap, (void *) &remoteid,
HASH_FIND, &found);
if (!found)
elog(ERROR, "no type map entry for remote type %u",
remoteid);
/* Found and mapped, return the oid. */
if (OidIsValid(entry->typoid))
return entry->typoid;
/* Otherwise, try to map to local type. */
nspoid = LookupExplicitNamespace(entry->nspname, true);
if (OidIsValid(nspoid))
entry->typoid = GetSysCacheOid2(TYPENAMENSP,
PointerGetDatum(entry->typname),
ObjectIdGetDatum(nspoid));
else
entry->typoid = InvalidOid;
if (!OidIsValid(entry->typoid))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("data type \"%s.%s\" required for logical replication does not exist",
entry->nspname, entry->typname)));
return entry->typoid;
}

File diff suppressed because it is too large Load Diff