mirror of
https://github.com/postgres/postgres.git
synced 2025-04-27 22:56:53 +03:00
Currently, for large transactions, the publisher sends the data in multiple streams (changes divided into chunks depending upon logical_decoding_work_mem), and then on the subscriber-side, the apply worker writes the changes into temporary files and once it receives the commit, it reads from those files and applies the entire transaction. To improve the performance of such transactions, we can instead allow them to be applied via parallel workers. In this approach, we assign a new parallel apply worker (if available) as soon as the xact's first stream is received and the leader apply worker will send changes to this new worker via shared memory. The parallel apply worker will directly apply the change instead of writing it to temporary files. However, if the leader apply worker times out while attempting to send a message to the parallel apply worker, it will switch to "partial serialize" mode - in this mode, the leader serializes all remaining changes to a file and notifies the parallel apply workers to read and apply them at the end of the transaction. We use a non-blocking way to send the messages from the leader apply worker to the parallel apply to avoid deadlocks. We keep this parallel apply assigned till the transaction commit is received and also wait for the worker to finish at commit. This preserves commit ordering and avoid writing to and reading from files in most cases. We still need to spill if there is no worker available. This patch also extends the SUBSCRIPTION 'streaming' parameter so that the user can control whether to apply the streaming transaction in a parallel apply worker or spill the change to disk. The user can set the streaming parameter to 'on/off', or 'parallel'. The parameter value 'parallel' means the streaming will be applied via a parallel apply worker, if available. The parameter value 'on' means the streaming transaction will be spilled to disk. The default value is 'off' (same as current behaviour). In addition, the patch extends the logical replication STREAM_ABORT message so that abort_lsn and abort_time can also be sent which can be used to update the replication origin in parallel apply worker when the streaming transaction is aborted. Because this message extension is needed to support parallel streaming, parallel streaming is not supported for publications on servers < PG16. Author: Hou Zhijie, Wang wei, Amit Kapila with design inputs from Sawada Masahiko Reviewed-by: Sawada Masahiko, Peter Smith, Dilip Kumar, Shi yu, Kuroda Hayato, Shveta Mallik Discussion: https://postgr.es/m/CAA4eK1+wyN6zpaHUkCLorEWNx75MG0xhMwcFhvjqm2KURZEAGw@mail.gmail.com
1255 lines
37 KiB
C
1255 lines
37 KiB
C
/* -------------------------------------------------------------------------
|
|
*
|
|
* decode.c
|
|
* This module decodes WAL records read using xlogreader.h's APIs for the
|
|
* purpose of logical decoding by passing information to the
|
|
* reorderbuffer module (containing the actual changes) and to the
|
|
* snapbuild module to build a fitting catalog snapshot (to be able to
|
|
* properly decode the changes in the reorderbuffer).
|
|
*
|
|
* NOTE:
|
|
* This basically tries to handle all low level xlog stuff for
|
|
* reorderbuffer.c and snapbuild.c. There's some minor leakage where a
|
|
* specific record's struct is used to pass data along, but those just
|
|
* happen to contain the right amount of data in a convenient
|
|
* format. There isn't and shouldn't be much intelligence about the
|
|
* contents of records in here except turning them into a more usable
|
|
* format.
|
|
*
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/replication/logical/decode.c
|
|
*
|
|
* -------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/heapam.h"
|
|
#include "access/heapam_xlog.h"
|
|
#include "access/transam.h"
|
|
#include "access/xact.h"
|
|
#include "access/xlog_internal.h"
|
|
#include "access/xlogreader.h"
|
|
#include "access/xlogrecord.h"
|
|
#include "access/xlogutils.h"
|
|
#include "catalog/pg_control.h"
|
|
#include "replication/decode.h"
|
|
#include "replication/logical.h"
|
|
#include "replication/message.h"
|
|
#include "replication/origin.h"
|
|
#include "replication/reorderbuffer.h"
|
|
#include "replication/snapbuild.h"
|
|
#include "storage/standby.h"
|
|
|
|
/* individual record(group)'s handlers */
|
|
static void DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
|
static void DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
|
static void DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
|
static void DecodeTruncate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
|
static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
|
static void DecodeSpecConfirm(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
|
|
|
static void DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
|
|
xl_xact_parsed_commit *parsed, TransactionId xid,
|
|
bool two_phase);
|
|
static void DecodeAbort(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
|
|
xl_xact_parsed_abort *parsed, TransactionId xid,
|
|
bool two_phase);
|
|
static void DecodePrepare(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
|
|
xl_xact_parsed_prepare *parsed);
|
|
|
|
|
|
/* common function to decode tuples */
|
|
static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple);
|
|
|
|
/* helper functions for decoding transactions */
|
|
static inline bool FilterPrepare(LogicalDecodingContext *ctx,
|
|
TransactionId xid, const char *gid);
|
|
static bool DecodeTXNNeedSkip(LogicalDecodingContext *ctx,
|
|
XLogRecordBuffer *buf, Oid txn_dbid,
|
|
RepOriginId origin_id);
|
|
|
|
/*
|
|
* Take every XLogReadRecord()ed record and perform the actions required to
|
|
* decode it using the output plugin already setup in the logical decoding
|
|
* context.
|
|
*
|
|
* NB: Note that every record's xid needs to be processed by reorderbuffer
|
|
* (xids contained in the content of records are not relevant for this rule).
|
|
* That means that for records which'd otherwise not go through the
|
|
* reorderbuffer ReorderBufferProcessXid() has to be called. We don't want to
|
|
* call ReorderBufferProcessXid for each record type by default, because
|
|
* e.g. empty xacts can be handled more efficiently if there's no previous
|
|
* state for them.
|
|
*
|
|
* We also support the ability to fast forward thru records, skipping some
|
|
* record types completely - see individual record types for details.
|
|
*/
|
|
void
|
|
LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *record)
|
|
{
|
|
XLogRecordBuffer buf;
|
|
TransactionId txid;
|
|
RmgrData rmgr;
|
|
|
|
buf.origptr = ctx->reader->ReadRecPtr;
|
|
buf.endptr = ctx->reader->EndRecPtr;
|
|
buf.record = record;
|
|
|
|
txid = XLogRecGetTopXid(record);
|
|
|
|
/*
|
|
* If the top-level xid is valid, we need to assign the subxact to the
|
|
* top-level xact. We need to do this for all records, hence we do it
|
|
* before the switch.
|
|
*/
|
|
if (TransactionIdIsValid(txid))
|
|
{
|
|
ReorderBufferAssignChild(ctx->reorder,
|
|
txid,
|
|
XLogRecGetXid(record),
|
|
buf.origptr);
|
|
}
|
|
|
|
rmgr = GetRmgr(XLogRecGetRmid(record));
|
|
|
|
if (rmgr.rm_decode != NULL)
|
|
rmgr.rm_decode(ctx, &buf);
|
|
else
|
|
{
|
|
/* just deal with xid, and done */
|
|
ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(record),
|
|
buf.origptr);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Handle rmgr XLOG_ID records for LogicalDecodingProcessRecord().
|
|
*/
|
|
void
|
|
xlog_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
SnapBuild *builder = ctx->snapshot_builder;
|
|
uint8 info = XLogRecGetInfo(buf->record) & ~XLR_INFO_MASK;
|
|
|
|
ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(buf->record),
|
|
buf->origptr);
|
|
|
|
switch (info)
|
|
{
|
|
/* this is also used in END_OF_RECOVERY checkpoints */
|
|
case XLOG_CHECKPOINT_SHUTDOWN:
|
|
case XLOG_END_OF_RECOVERY:
|
|
SnapBuildSerializationPoint(builder, buf->origptr);
|
|
|
|
break;
|
|
case XLOG_CHECKPOINT_ONLINE:
|
|
|
|
/*
|
|
* a RUNNING_XACTS record will have been logged near to this, we
|
|
* can restart from there.
|
|
*/
|
|
break;
|
|
case XLOG_NOOP:
|
|
case XLOG_NEXTOID:
|
|
case XLOG_SWITCH:
|
|
case XLOG_BACKUP_END:
|
|
case XLOG_PARAMETER_CHANGE:
|
|
case XLOG_RESTORE_POINT:
|
|
case XLOG_FPW_CHANGE:
|
|
case XLOG_FPI_FOR_HINT:
|
|
case XLOG_FPI:
|
|
case XLOG_OVERWRITE_CONTRECORD:
|
|
break;
|
|
default:
|
|
elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Handle rmgr XACT_ID records for LogicalDecodingProcessRecord().
|
|
*/
|
|
void
|
|
xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
SnapBuild *builder = ctx->snapshot_builder;
|
|
ReorderBuffer *reorder = ctx->reorder;
|
|
XLogReaderState *r = buf->record;
|
|
uint8 info = XLogRecGetInfo(r) & XLOG_XACT_OPMASK;
|
|
|
|
/*
|
|
* If the snapshot isn't yet fully built, we cannot decode anything, so
|
|
* bail out.
|
|
*/
|
|
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
|
|
return;
|
|
|
|
switch (info)
|
|
{
|
|
case XLOG_XACT_COMMIT:
|
|
case XLOG_XACT_COMMIT_PREPARED:
|
|
{
|
|
xl_xact_commit *xlrec;
|
|
xl_xact_parsed_commit parsed;
|
|
TransactionId xid;
|
|
bool two_phase = false;
|
|
|
|
xlrec = (xl_xact_commit *) XLogRecGetData(r);
|
|
ParseCommitRecord(XLogRecGetInfo(buf->record), xlrec, &parsed);
|
|
|
|
if (!TransactionIdIsValid(parsed.twophase_xid))
|
|
xid = XLogRecGetXid(r);
|
|
else
|
|
xid = parsed.twophase_xid;
|
|
|
|
/*
|
|
* We would like to process the transaction in a two-phase
|
|
* manner iff output plugin supports two-phase commits and
|
|
* doesn't filter the transaction at prepare time.
|
|
*/
|
|
if (info == XLOG_XACT_COMMIT_PREPARED)
|
|
two_phase = !(FilterPrepare(ctx, xid,
|
|
parsed.twophase_gid));
|
|
|
|
DecodeCommit(ctx, buf, &parsed, xid, two_phase);
|
|
break;
|
|
}
|
|
case XLOG_XACT_ABORT:
|
|
case XLOG_XACT_ABORT_PREPARED:
|
|
{
|
|
xl_xact_abort *xlrec;
|
|
xl_xact_parsed_abort parsed;
|
|
TransactionId xid;
|
|
bool two_phase = false;
|
|
|
|
xlrec = (xl_xact_abort *) XLogRecGetData(r);
|
|
ParseAbortRecord(XLogRecGetInfo(buf->record), xlrec, &parsed);
|
|
|
|
if (!TransactionIdIsValid(parsed.twophase_xid))
|
|
xid = XLogRecGetXid(r);
|
|
else
|
|
xid = parsed.twophase_xid;
|
|
|
|
/*
|
|
* We would like to process the transaction in a two-phase
|
|
* manner iff output plugin supports two-phase commits and
|
|
* doesn't filter the transaction at prepare time.
|
|
*/
|
|
if (info == XLOG_XACT_ABORT_PREPARED)
|
|
two_phase = !(FilterPrepare(ctx, xid,
|
|
parsed.twophase_gid));
|
|
|
|
DecodeAbort(ctx, buf, &parsed, xid, two_phase);
|
|
break;
|
|
}
|
|
case XLOG_XACT_ASSIGNMENT:
|
|
|
|
/*
|
|
* We assign subxact to the toplevel xact while processing each
|
|
* record if required. So, we don't need to do anything here. See
|
|
* LogicalDecodingProcessRecord.
|
|
*/
|
|
break;
|
|
case XLOG_XACT_INVALIDATIONS:
|
|
{
|
|
TransactionId xid;
|
|
xl_xact_invals *invals;
|
|
|
|
xid = XLogRecGetXid(r);
|
|
invals = (xl_xact_invals *) XLogRecGetData(r);
|
|
|
|
/*
|
|
* Execute the invalidations for xid-less transactions,
|
|
* otherwise, accumulate them so that they can be processed at
|
|
* the commit time.
|
|
*/
|
|
if (TransactionIdIsValid(xid))
|
|
{
|
|
if (!ctx->fast_forward)
|
|
ReorderBufferAddInvalidations(reorder, xid,
|
|
buf->origptr,
|
|
invals->nmsgs,
|
|
invals->msgs);
|
|
ReorderBufferXidSetCatalogChanges(ctx->reorder, xid,
|
|
buf->origptr);
|
|
}
|
|
else if ((!ctx->fast_forward))
|
|
ReorderBufferImmediateInvalidation(ctx->reorder,
|
|
invals->nmsgs,
|
|
invals->msgs);
|
|
}
|
|
break;
|
|
case XLOG_XACT_PREPARE:
|
|
{
|
|
xl_xact_parsed_prepare parsed;
|
|
xl_xact_prepare *xlrec;
|
|
|
|
/* ok, parse it */
|
|
xlrec = (xl_xact_prepare *) XLogRecGetData(r);
|
|
ParsePrepareRecord(XLogRecGetInfo(buf->record),
|
|
xlrec, &parsed);
|
|
|
|
/*
|
|
* We would like to process the transaction in a two-phase
|
|
* manner iff output plugin supports two-phase commits and
|
|
* doesn't filter the transaction at prepare time.
|
|
*/
|
|
if (FilterPrepare(ctx, parsed.twophase_xid,
|
|
parsed.twophase_gid))
|
|
{
|
|
ReorderBufferProcessXid(reorder, parsed.twophase_xid,
|
|
buf->origptr);
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Note that if the prepared transaction has locked [user]
|
|
* catalog tables exclusively then decoding prepare can block
|
|
* till the main transaction is committed because it needs to
|
|
* lock the catalog tables.
|
|
*
|
|
* XXX Now, this can even lead to a deadlock if the prepare
|
|
* transaction is waiting to get it logically replicated for
|
|
* distributed 2PC. This can be avoided by disallowing
|
|
* preparing transactions that have locked [user] catalog
|
|
* tables exclusively but as of now, we ask users not to do
|
|
* such an operation.
|
|
*/
|
|
DecodePrepare(ctx, buf, &parsed);
|
|
break;
|
|
}
|
|
default:
|
|
elog(ERROR, "unexpected RM_XACT_ID record type: %u", info);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Handle rmgr STANDBY_ID records for LogicalDecodingProcessRecord().
|
|
*/
|
|
void
|
|
standby_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
SnapBuild *builder = ctx->snapshot_builder;
|
|
XLogReaderState *r = buf->record;
|
|
uint8 info = XLogRecGetInfo(r) & ~XLR_INFO_MASK;
|
|
|
|
ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(r), buf->origptr);
|
|
|
|
switch (info)
|
|
{
|
|
case XLOG_RUNNING_XACTS:
|
|
{
|
|
xl_running_xacts *running = (xl_running_xacts *) XLogRecGetData(r);
|
|
|
|
SnapBuildProcessRunningXacts(builder, buf->origptr, running);
|
|
|
|
/*
|
|
* Abort all transactions that we keep track of, that are
|
|
* older than the record's oldestRunningXid. This is the most
|
|
* convenient spot for doing so since, in contrast to shutdown
|
|
* or end-of-recovery checkpoints, we have information about
|
|
* all running transactions which includes prepared ones,
|
|
* while shutdown checkpoints just know that no non-prepared
|
|
* transactions are in progress.
|
|
*/
|
|
ReorderBufferAbortOld(ctx->reorder, running->oldestRunningXid);
|
|
}
|
|
break;
|
|
case XLOG_STANDBY_LOCK:
|
|
break;
|
|
case XLOG_INVALIDATIONS:
|
|
|
|
/*
|
|
* We are processing the invalidations at the command level via
|
|
* XLOG_XACT_INVALIDATIONS. So we don't need to do anything here.
|
|
*/
|
|
break;
|
|
default:
|
|
elog(ERROR, "unexpected RM_STANDBY_ID record type: %u", info);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Handle rmgr HEAP2_ID records for LogicalDecodingProcessRecord().
|
|
*/
|
|
void
|
|
heap2_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
uint8 info = XLogRecGetInfo(buf->record) & XLOG_HEAP_OPMASK;
|
|
TransactionId xid = XLogRecGetXid(buf->record);
|
|
SnapBuild *builder = ctx->snapshot_builder;
|
|
|
|
ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
|
|
|
|
/*
|
|
* If we don't have snapshot or we are just fast-forwarding, there is no
|
|
* point in decoding changes.
|
|
*/
|
|
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
|
|
ctx->fast_forward)
|
|
return;
|
|
|
|
switch (info)
|
|
{
|
|
case XLOG_HEAP2_MULTI_INSERT:
|
|
if (!ctx->fast_forward &&
|
|
SnapBuildProcessChange(builder, xid, buf->origptr))
|
|
DecodeMultiInsert(ctx, buf);
|
|
break;
|
|
case XLOG_HEAP2_NEW_CID:
|
|
{
|
|
xl_heap_new_cid *xlrec;
|
|
|
|
xlrec = (xl_heap_new_cid *) XLogRecGetData(buf->record);
|
|
SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
|
|
|
|
break;
|
|
}
|
|
case XLOG_HEAP2_REWRITE:
|
|
|
|
/*
|
|
* Although these records only exist to serve the needs of logical
|
|
* decoding, all the work happens as part of crash or archive
|
|
* recovery, so we don't need to do anything here.
|
|
*/
|
|
break;
|
|
|
|
/*
|
|
* Everything else here is just low level physical stuff we're not
|
|
* interested in.
|
|
*/
|
|
case XLOG_HEAP2_FREEZE_PAGE:
|
|
case XLOG_HEAP2_PRUNE:
|
|
case XLOG_HEAP2_VACUUM:
|
|
case XLOG_HEAP2_VISIBLE:
|
|
case XLOG_HEAP2_LOCK_UPDATED:
|
|
break;
|
|
default:
|
|
elog(ERROR, "unexpected RM_HEAP2_ID record type: %u", info);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Handle rmgr HEAP_ID records for LogicalDecodingProcessRecord().
|
|
*/
|
|
void
|
|
heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
uint8 info = XLogRecGetInfo(buf->record) & XLOG_HEAP_OPMASK;
|
|
TransactionId xid = XLogRecGetXid(buf->record);
|
|
SnapBuild *builder = ctx->snapshot_builder;
|
|
|
|
ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
|
|
|
|
/*
|
|
* If we don't have snapshot or we are just fast-forwarding, there is no
|
|
* point in decoding data changes.
|
|
*/
|
|
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
|
|
ctx->fast_forward)
|
|
return;
|
|
|
|
switch (info)
|
|
{
|
|
case XLOG_HEAP_INSERT:
|
|
if (SnapBuildProcessChange(builder, xid, buf->origptr))
|
|
DecodeInsert(ctx, buf);
|
|
break;
|
|
|
|
/*
|
|
* Treat HOT update as normal updates. There is no useful
|
|
* information in the fact that we could make it a HOT update
|
|
* locally and the WAL layout is compatible.
|
|
*/
|
|
case XLOG_HEAP_HOT_UPDATE:
|
|
case XLOG_HEAP_UPDATE:
|
|
if (SnapBuildProcessChange(builder, xid, buf->origptr))
|
|
DecodeUpdate(ctx, buf);
|
|
break;
|
|
|
|
case XLOG_HEAP_DELETE:
|
|
if (SnapBuildProcessChange(builder, xid, buf->origptr))
|
|
DecodeDelete(ctx, buf);
|
|
break;
|
|
|
|
case XLOG_HEAP_TRUNCATE:
|
|
if (SnapBuildProcessChange(builder, xid, buf->origptr))
|
|
DecodeTruncate(ctx, buf);
|
|
break;
|
|
|
|
case XLOG_HEAP_INPLACE:
|
|
|
|
/*
|
|
* Inplace updates are only ever performed on catalog tuples and
|
|
* can, per definition, not change tuple visibility. Since we
|
|
* don't decode catalog tuples, we're not interested in the
|
|
* record's contents.
|
|
*
|
|
* In-place updates can be used either by XID-bearing transactions
|
|
* (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less
|
|
* transactions (e.g. VACUUM). In the former case, the commit
|
|
* record will include cache invalidations, so we mark the
|
|
* transaction as catalog modifying here. Currently that's
|
|
* redundant because the commit will do that as well, but once we
|
|
* support decoding in-progress relations, this will be important.
|
|
*/
|
|
if (!TransactionIdIsValid(xid))
|
|
break;
|
|
|
|
(void) SnapBuildProcessChange(builder, xid, buf->origptr);
|
|
ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
|
|
break;
|
|
|
|
case XLOG_HEAP_CONFIRM:
|
|
if (SnapBuildProcessChange(builder, xid, buf->origptr))
|
|
DecodeSpecConfirm(ctx, buf);
|
|
break;
|
|
|
|
case XLOG_HEAP_LOCK:
|
|
/* we don't care about row level locks for now */
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "unexpected RM_HEAP_ID record type: %u", info);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Ask output plugin whether we want to skip this PREPARE and send
|
|
* this transaction as a regular commit later.
|
|
*/
|
|
static inline bool
|
|
FilterPrepare(LogicalDecodingContext *ctx, TransactionId xid,
|
|
const char *gid)
|
|
{
|
|
/*
|
|
* Skip if decoding of two-phase transactions at PREPARE time is not
|
|
* enabled. In that case, all two-phase transactions are considered
|
|
* filtered out and will be applied as regular transactions at COMMIT
|
|
* PREPARED.
|
|
*/
|
|
if (!ctx->twophase)
|
|
return true;
|
|
|
|
/*
|
|
* The filter_prepare callback is optional. When not supplied, all
|
|
* prepared transactions should go through.
|
|
*/
|
|
if (ctx->callbacks.filter_prepare_cb == NULL)
|
|
return false;
|
|
|
|
return filter_prepare_cb_wrapper(ctx, xid, gid);
|
|
}
|
|
|
|
static inline bool
|
|
FilterByOrigin(LogicalDecodingContext *ctx, RepOriginId origin_id)
|
|
{
|
|
if (ctx->callbacks.filter_by_origin_cb == NULL)
|
|
return false;
|
|
|
|
return filter_by_origin_cb_wrapper(ctx, origin_id);
|
|
}
|
|
|
|
/*
|
|
* Handle rmgr LOGICALMSG_ID records for LogicalDecodingProcessRecord().
|
|
*/
|
|
void
|
|
logicalmsg_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
SnapBuild *builder = ctx->snapshot_builder;
|
|
XLogReaderState *r = buf->record;
|
|
TransactionId xid = XLogRecGetXid(r);
|
|
uint8 info = XLogRecGetInfo(r) & ~XLR_INFO_MASK;
|
|
RepOriginId origin_id = XLogRecGetOrigin(r);
|
|
Snapshot snapshot;
|
|
xl_logical_message *message;
|
|
|
|
if (info != XLOG_LOGICAL_MESSAGE)
|
|
elog(ERROR, "unexpected RM_LOGICALMSG_ID record type: %u", info);
|
|
|
|
ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(r), buf->origptr);
|
|
|
|
/*
|
|
* If we don't have snapshot or we are just fast-forwarding, there is no
|
|
* point in decoding messages.
|
|
*/
|
|
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
|
|
ctx->fast_forward)
|
|
return;
|
|
|
|
message = (xl_logical_message *) XLogRecGetData(r);
|
|
|
|
if (message->dbId != ctx->slot->data.database ||
|
|
FilterByOrigin(ctx, origin_id))
|
|
return;
|
|
|
|
if (message->transactional &&
|
|
!SnapBuildProcessChange(builder, xid, buf->origptr))
|
|
return;
|
|
else if (!message->transactional &&
|
|
(SnapBuildCurrentState(builder) != SNAPBUILD_CONSISTENT ||
|
|
SnapBuildXactNeedsSkip(builder, buf->origptr)))
|
|
return;
|
|
|
|
snapshot = SnapBuildGetOrBuildSnapshot(builder);
|
|
ReorderBufferQueueMessage(ctx->reorder, xid, snapshot, buf->endptr,
|
|
message->transactional,
|
|
message->message, /* first part of message is
|
|
* prefix */
|
|
message->message_size,
|
|
message->message + message->prefix_size);
|
|
}
|
|
|
|
/*
|
|
* Consolidated commit record handling between the different form of commit
|
|
* records.
|
|
*
|
|
* 'two_phase' indicates that caller wants to process the transaction in two
|
|
* phases, first process prepare if not already done and then process
|
|
* commit_prepared.
|
|
*/
|
|
static void
|
|
DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
|
|
xl_xact_parsed_commit *parsed, TransactionId xid,
|
|
bool two_phase)
|
|
{
|
|
XLogRecPtr origin_lsn = InvalidXLogRecPtr;
|
|
TimestampTz commit_time = parsed->xact_time;
|
|
RepOriginId origin_id = XLogRecGetOrigin(buf->record);
|
|
int i;
|
|
|
|
if (parsed->xinfo & XACT_XINFO_HAS_ORIGIN)
|
|
{
|
|
origin_lsn = parsed->origin_lsn;
|
|
commit_time = parsed->origin_timestamp;
|
|
}
|
|
|
|
SnapBuildCommitTxn(ctx->snapshot_builder, buf->origptr, xid,
|
|
parsed->nsubxacts, parsed->subxacts,
|
|
parsed->xinfo);
|
|
|
|
/* ----
|
|
* Check whether we are interested in this specific transaction, and tell
|
|
* the reorderbuffer to forget the content of the (sub-)transactions
|
|
* if not.
|
|
*
|
|
* We can't just use ReorderBufferAbort() here, because we need to execute
|
|
* the transaction's invalidations. This currently won't be needed if
|
|
* we're just skipping over the transaction because currently we only do
|
|
* so during startup, to get to the first transaction the client needs. As
|
|
* we have reset the catalog caches before starting to read WAL, and we
|
|
* haven't yet touched any catalogs, there can't be anything to invalidate.
|
|
* But if we're "forgetting" this commit because it happened in another
|
|
* database, the invalidations might be important, because they could be
|
|
* for shared catalogs and we might have loaded data into the relevant
|
|
* syscaches.
|
|
* ---
|
|
*/
|
|
if (DecodeTXNNeedSkip(ctx, buf, parsed->dbId, origin_id))
|
|
{
|
|
for (i = 0; i < parsed->nsubxacts; i++)
|
|
{
|
|
ReorderBufferForget(ctx->reorder, parsed->subxacts[i], buf->origptr);
|
|
}
|
|
ReorderBufferForget(ctx->reorder, xid, buf->origptr);
|
|
|
|
return;
|
|
}
|
|
|
|
/* tell the reorderbuffer about the surviving subtransactions */
|
|
for (i = 0; i < parsed->nsubxacts; i++)
|
|
{
|
|
ReorderBufferCommitChild(ctx->reorder, xid, parsed->subxacts[i],
|
|
buf->origptr, buf->endptr);
|
|
}
|
|
|
|
/*
|
|
* Send the final commit record if the transaction data is already
|
|
* decoded, otherwise, process the entire transaction.
|
|
*/
|
|
if (two_phase)
|
|
{
|
|
ReorderBufferFinishPrepared(ctx->reorder, xid, buf->origptr, buf->endptr,
|
|
SnapBuildGetTwoPhaseAt(ctx->snapshot_builder),
|
|
commit_time, origin_id, origin_lsn,
|
|
parsed->twophase_gid, true);
|
|
}
|
|
else
|
|
{
|
|
ReorderBufferCommit(ctx->reorder, xid, buf->origptr, buf->endptr,
|
|
commit_time, origin_id, origin_lsn);
|
|
}
|
|
|
|
/*
|
|
* Update the decoding stats at transaction prepare/commit/abort.
|
|
* Additionally we send the stats when we spill or stream the changes to
|
|
* avoid losing them in case the decoding is interrupted. It is not clear
|
|
* that sending more or less frequently than this would be better.
|
|
*/
|
|
UpdateDecodingStats(ctx);
|
|
}
|
|
|
|
/*
|
|
* Decode PREPARE record. Similar logic as in DecodeCommit.
|
|
*
|
|
* Note that we don't skip prepare even if have detected concurrent abort
|
|
* because it is quite possible that we had already sent some changes before we
|
|
* detect abort in which case we need to abort those changes in the subscriber.
|
|
* To abort such changes, we do send the prepare and then the rollback prepared
|
|
* which is what happened on the publisher-side as well. Now, we can invent a
|
|
* new abort API wherein in such cases we send abort and skip sending prepared
|
|
* and rollback prepared but then it is not that straightforward because we
|
|
* might have streamed this transaction by that time in which case it is
|
|
* handled when the rollback is encountered. It is not impossible to optimize
|
|
* the concurrent abort case but it can introduce design complexity w.r.t
|
|
* handling different cases so leaving it for now as it doesn't seem worth it.
|
|
*/
|
|
static void
|
|
DecodePrepare(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
|
|
xl_xact_parsed_prepare *parsed)
|
|
{
|
|
SnapBuild *builder = ctx->snapshot_builder;
|
|
XLogRecPtr origin_lsn = parsed->origin_lsn;
|
|
TimestampTz prepare_time = parsed->xact_time;
|
|
RepOriginId origin_id = XLogRecGetOrigin(buf->record);
|
|
int i;
|
|
TransactionId xid = parsed->twophase_xid;
|
|
|
|
if (parsed->origin_timestamp != 0)
|
|
prepare_time = parsed->origin_timestamp;
|
|
|
|
/*
|
|
* Remember the prepare info for a txn so that it can be used later in
|
|
* commit prepared if required. See ReorderBufferFinishPrepared.
|
|
*/
|
|
if (!ReorderBufferRememberPrepareInfo(ctx->reorder, xid, buf->origptr,
|
|
buf->endptr, prepare_time, origin_id,
|
|
origin_lsn))
|
|
return;
|
|
|
|
/* We can't start streaming unless a consistent state is reached. */
|
|
if (SnapBuildCurrentState(builder) < SNAPBUILD_CONSISTENT)
|
|
{
|
|
ReorderBufferSkipPrepare(ctx->reorder, xid);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Check whether we need to process this transaction. See
|
|
* DecodeTXNNeedSkip for the reasons why we sometimes want to skip the
|
|
* transaction.
|
|
*
|
|
* We can't call ReorderBufferForget as we did in DecodeCommit as the txn
|
|
* hasn't yet been committed, removing this txn before a commit might
|
|
* result in the computation of an incorrect restart_lsn. See
|
|
* SnapBuildProcessRunningXacts. But we need to process cache
|
|
* invalidations if there are any for the reasons mentioned in
|
|
* DecodeCommit.
|
|
*/
|
|
if (DecodeTXNNeedSkip(ctx, buf, parsed->dbId, origin_id))
|
|
{
|
|
ReorderBufferSkipPrepare(ctx->reorder, xid);
|
|
ReorderBufferInvalidate(ctx->reorder, xid, buf->origptr);
|
|
return;
|
|
}
|
|
|
|
/* Tell the reorderbuffer about the surviving subtransactions. */
|
|
for (i = 0; i < parsed->nsubxacts; i++)
|
|
{
|
|
ReorderBufferCommitChild(ctx->reorder, xid, parsed->subxacts[i],
|
|
buf->origptr, buf->endptr);
|
|
}
|
|
|
|
/* replay actions of all transaction + subtransactions in order */
|
|
ReorderBufferPrepare(ctx->reorder, xid, parsed->twophase_gid);
|
|
|
|
/*
|
|
* Update the decoding stats at transaction prepare/commit/abort.
|
|
* Additionally we send the stats when we spill or stream the changes to
|
|
* avoid losing them in case the decoding is interrupted. It is not clear
|
|
* that sending more or less frequently than this would be better.
|
|
*/
|
|
UpdateDecodingStats(ctx);
|
|
}
|
|
|
|
|
|
/*
|
|
* Get the data from the various forms of abort records and pass it on to
|
|
* snapbuild.c and reorderbuffer.c.
|
|
*
|
|
* 'two_phase' indicates to finish prepared transaction.
|
|
*/
|
|
static void
|
|
DecodeAbort(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
|
|
xl_xact_parsed_abort *parsed, TransactionId xid,
|
|
bool two_phase)
|
|
{
|
|
int i;
|
|
XLogRecPtr origin_lsn = InvalidXLogRecPtr;
|
|
TimestampTz abort_time = parsed->xact_time;
|
|
RepOriginId origin_id = XLogRecGetOrigin(buf->record);
|
|
bool skip_xact;
|
|
|
|
if (parsed->xinfo & XACT_XINFO_HAS_ORIGIN)
|
|
{
|
|
origin_lsn = parsed->origin_lsn;
|
|
abort_time = parsed->origin_timestamp;
|
|
}
|
|
|
|
/*
|
|
* Check whether we need to process this transaction. See
|
|
* DecodeTXNNeedSkip for the reasons why we sometimes want to skip the
|
|
* transaction.
|
|
*/
|
|
skip_xact = DecodeTXNNeedSkip(ctx, buf, parsed->dbId, origin_id);
|
|
|
|
/*
|
|
* Send the final rollback record for a prepared transaction unless we
|
|
* need to skip it. For non-two-phase xacts, simply forget the xact.
|
|
*/
|
|
if (two_phase && !skip_xact)
|
|
{
|
|
ReorderBufferFinishPrepared(ctx->reorder, xid, buf->origptr, buf->endptr,
|
|
InvalidXLogRecPtr,
|
|
abort_time, origin_id, origin_lsn,
|
|
parsed->twophase_gid, false);
|
|
}
|
|
else
|
|
{
|
|
for (i = 0; i < parsed->nsubxacts; i++)
|
|
{
|
|
ReorderBufferAbort(ctx->reorder, parsed->subxacts[i],
|
|
buf->record->EndRecPtr, abort_time);
|
|
}
|
|
|
|
ReorderBufferAbort(ctx->reorder, xid, buf->record->EndRecPtr,
|
|
abort_time);
|
|
}
|
|
|
|
/* update the decoding stats */
|
|
UpdateDecodingStats(ctx);
|
|
}
|
|
|
|
/*
|
|
* Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs.
|
|
*
|
|
* Deletes can contain the new tuple.
|
|
*/
|
|
static void
|
|
DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
Size datalen;
|
|
char *tupledata;
|
|
Size tuplelen;
|
|
XLogReaderState *r = buf->record;
|
|
xl_heap_insert *xlrec;
|
|
ReorderBufferChange *change;
|
|
RelFileLocator target_locator;
|
|
|
|
xlrec = (xl_heap_insert *) XLogRecGetData(r);
|
|
|
|
/*
|
|
* Ignore insert records without new tuples (this does happen when
|
|
* raw_heap_insert marks the TOAST record as HEAP_INSERT_NO_LOGICAL).
|
|
*/
|
|
if (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))
|
|
return;
|
|
|
|
/* only interested in our database */
|
|
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
|
|
if (target_locator.dbOid != ctx->slot->data.database)
|
|
return;
|
|
|
|
/* output plugin doesn't look for this origin, no need to queue */
|
|
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
|
|
return;
|
|
|
|
change = ReorderBufferGetChange(ctx->reorder);
|
|
if (!(xlrec->flags & XLH_INSERT_IS_SPECULATIVE))
|
|
change->action = REORDER_BUFFER_CHANGE_INSERT;
|
|
else
|
|
change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT;
|
|
change->origin_id = XLogRecGetOrigin(r);
|
|
|
|
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
|
|
|
|
tupledata = XLogRecGetBlockData(r, 0, &datalen);
|
|
tuplelen = datalen - SizeOfHeapHeader;
|
|
|
|
change->data.tp.newtuple =
|
|
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
|
|
|
|
DecodeXLogTuple(tupledata, datalen, change->data.tp.newtuple);
|
|
|
|
change->data.tp.clear_toast_afterwards = true;
|
|
|
|
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
|
|
change,
|
|
xlrec->flags & XLH_INSERT_ON_TOAST_RELATION);
|
|
}
|
|
|
|
/*
|
|
* Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout
|
|
* in the record, from wal into proper tuplebufs.
|
|
*
|
|
* Updates can possibly contain a new tuple and the old primary key.
|
|
*/
|
|
static void
|
|
DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
XLogReaderState *r = buf->record;
|
|
xl_heap_update *xlrec;
|
|
ReorderBufferChange *change;
|
|
char *data;
|
|
RelFileLocator target_locator;
|
|
|
|
xlrec = (xl_heap_update *) XLogRecGetData(r);
|
|
|
|
/* only interested in our database */
|
|
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
|
|
if (target_locator.dbOid != ctx->slot->data.database)
|
|
return;
|
|
|
|
/* output plugin doesn't look for this origin, no need to queue */
|
|
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
|
|
return;
|
|
|
|
change = ReorderBufferGetChange(ctx->reorder);
|
|
change->action = REORDER_BUFFER_CHANGE_UPDATE;
|
|
change->origin_id = XLogRecGetOrigin(r);
|
|
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
|
|
|
|
if (xlrec->flags & XLH_UPDATE_CONTAINS_NEW_TUPLE)
|
|
{
|
|
Size datalen;
|
|
Size tuplelen;
|
|
|
|
data = XLogRecGetBlockData(r, 0, &datalen);
|
|
|
|
tuplelen = datalen - SizeOfHeapHeader;
|
|
|
|
change->data.tp.newtuple =
|
|
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
|
|
|
|
DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
|
|
}
|
|
|
|
if (xlrec->flags & XLH_UPDATE_CONTAINS_OLD)
|
|
{
|
|
Size datalen;
|
|
Size tuplelen;
|
|
|
|
/* caution, remaining data in record is not aligned */
|
|
data = XLogRecGetData(r) + SizeOfHeapUpdate;
|
|
datalen = XLogRecGetDataLen(r) - SizeOfHeapUpdate;
|
|
tuplelen = datalen - SizeOfHeapHeader;
|
|
|
|
change->data.tp.oldtuple =
|
|
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
|
|
|
|
DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
|
|
}
|
|
|
|
change->data.tp.clear_toast_afterwards = true;
|
|
|
|
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
|
|
change, false);
|
|
}
|
|
|
|
/*
|
|
* Parse XLOG_HEAP_DELETE from wal into proper tuplebufs.
|
|
*
|
|
* Deletes can possibly contain the old primary key.
|
|
*/
|
|
static void
|
|
DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
XLogReaderState *r = buf->record;
|
|
xl_heap_delete *xlrec;
|
|
ReorderBufferChange *change;
|
|
RelFileLocator target_locator;
|
|
|
|
xlrec = (xl_heap_delete *) XLogRecGetData(r);
|
|
|
|
/* only interested in our database */
|
|
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
|
|
if (target_locator.dbOid != ctx->slot->data.database)
|
|
return;
|
|
|
|
/* output plugin doesn't look for this origin, no need to queue */
|
|
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
|
|
return;
|
|
|
|
change = ReorderBufferGetChange(ctx->reorder);
|
|
|
|
if (xlrec->flags & XLH_DELETE_IS_SUPER)
|
|
change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT;
|
|
else
|
|
change->action = REORDER_BUFFER_CHANGE_DELETE;
|
|
|
|
change->origin_id = XLogRecGetOrigin(r);
|
|
|
|
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
|
|
|
|
/* old primary key stored */
|
|
if (xlrec->flags & XLH_DELETE_CONTAINS_OLD)
|
|
{
|
|
Size datalen = XLogRecGetDataLen(r) - SizeOfHeapDelete;
|
|
Size tuplelen = datalen - SizeOfHeapHeader;
|
|
|
|
Assert(XLogRecGetDataLen(r) > (SizeOfHeapDelete + SizeOfHeapHeader));
|
|
|
|
change->data.tp.oldtuple =
|
|
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
|
|
|
|
DecodeXLogTuple((char *) xlrec + SizeOfHeapDelete,
|
|
datalen, change->data.tp.oldtuple);
|
|
}
|
|
|
|
change->data.tp.clear_toast_afterwards = true;
|
|
|
|
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
|
|
change, false);
|
|
}
|
|
|
|
/*
|
|
* Parse XLOG_HEAP_TRUNCATE from wal
|
|
*/
|
|
static void
|
|
DecodeTruncate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
XLogReaderState *r = buf->record;
|
|
xl_heap_truncate *xlrec;
|
|
ReorderBufferChange *change;
|
|
|
|
xlrec = (xl_heap_truncate *) XLogRecGetData(r);
|
|
|
|
/* only interested in our database */
|
|
if (xlrec->dbId != ctx->slot->data.database)
|
|
return;
|
|
|
|
/* output plugin doesn't look for this origin, no need to queue */
|
|
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
|
|
return;
|
|
|
|
change = ReorderBufferGetChange(ctx->reorder);
|
|
change->action = REORDER_BUFFER_CHANGE_TRUNCATE;
|
|
change->origin_id = XLogRecGetOrigin(r);
|
|
if (xlrec->flags & XLH_TRUNCATE_CASCADE)
|
|
change->data.truncate.cascade = true;
|
|
if (xlrec->flags & XLH_TRUNCATE_RESTART_SEQS)
|
|
change->data.truncate.restart_seqs = true;
|
|
change->data.truncate.nrelids = xlrec->nrelids;
|
|
change->data.truncate.relids = ReorderBufferGetRelids(ctx->reorder,
|
|
xlrec->nrelids);
|
|
memcpy(change->data.truncate.relids, xlrec->relids,
|
|
xlrec->nrelids * sizeof(Oid));
|
|
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),
|
|
buf->origptr, change, false);
|
|
}
|
|
|
|
/*
|
|
* Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs.
|
|
*
|
|
* Currently MULTI_INSERT will always contain the full tuples.
|
|
*/
|
|
static void
|
|
DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
XLogReaderState *r = buf->record;
|
|
xl_heap_multi_insert *xlrec;
|
|
int i;
|
|
char *data;
|
|
char *tupledata;
|
|
Size tuplelen;
|
|
RelFileLocator rlocator;
|
|
|
|
xlrec = (xl_heap_multi_insert *) XLogRecGetData(r);
|
|
|
|
/*
|
|
* Ignore insert records without new tuples. This happens when a
|
|
* multi_insert is done on a catalog or on a non-persistent relation.
|
|
*/
|
|
if (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))
|
|
return;
|
|
|
|
/* only interested in our database */
|
|
XLogRecGetBlockTag(r, 0, &rlocator, NULL, NULL);
|
|
if (rlocator.dbOid != ctx->slot->data.database)
|
|
return;
|
|
|
|
/* output plugin doesn't look for this origin, no need to queue */
|
|
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
|
|
return;
|
|
|
|
/*
|
|
* We know that this multi_insert isn't for a catalog, so the block should
|
|
* always have data even if a full-page write of it is taken.
|
|
*/
|
|
tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
|
|
Assert(tupledata != NULL);
|
|
|
|
data = tupledata;
|
|
for (i = 0; i < xlrec->ntuples; i++)
|
|
{
|
|
ReorderBufferChange *change;
|
|
xl_multi_insert_tuple *xlhdr;
|
|
int datalen;
|
|
ReorderBufferTupleBuf *tuple;
|
|
HeapTupleHeader header;
|
|
|
|
change = ReorderBufferGetChange(ctx->reorder);
|
|
change->action = REORDER_BUFFER_CHANGE_INSERT;
|
|
change->origin_id = XLogRecGetOrigin(r);
|
|
|
|
memcpy(&change->data.tp.rlocator, &rlocator, sizeof(RelFileLocator));
|
|
|
|
xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(data);
|
|
data = ((char *) xlhdr) + SizeOfMultiInsertTuple;
|
|
datalen = xlhdr->datalen;
|
|
|
|
change->data.tp.newtuple =
|
|
ReorderBufferGetTupleBuf(ctx->reorder, datalen);
|
|
|
|
tuple = change->data.tp.newtuple;
|
|
header = tuple->tuple.t_data;
|
|
|
|
/* not a disk based tuple */
|
|
ItemPointerSetInvalid(&tuple->tuple.t_self);
|
|
|
|
/*
|
|
* We can only figure this out after reassembling the transactions.
|
|
*/
|
|
tuple->tuple.t_tableOid = InvalidOid;
|
|
|
|
tuple->tuple.t_len = datalen + SizeofHeapTupleHeader;
|
|
|
|
memset(header, 0, SizeofHeapTupleHeader);
|
|
|
|
memcpy((char *) tuple->tuple.t_data + SizeofHeapTupleHeader,
|
|
(char *) data,
|
|
datalen);
|
|
header->t_infomask = xlhdr->t_infomask;
|
|
header->t_infomask2 = xlhdr->t_infomask2;
|
|
header->t_hoff = xlhdr->t_hoff;
|
|
|
|
/*
|
|
* Reset toast reassembly state only after the last row in the last
|
|
* xl_multi_insert_tuple record emitted by one heap_multi_insert()
|
|
* call.
|
|
*/
|
|
if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI &&
|
|
(i + 1) == xlrec->ntuples)
|
|
change->data.tp.clear_toast_afterwards = true;
|
|
else
|
|
change->data.tp.clear_toast_afterwards = false;
|
|
|
|
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),
|
|
buf->origptr, change, false);
|
|
|
|
/* move to the next xl_multi_insert_tuple entry */
|
|
data += datalen;
|
|
}
|
|
Assert(data == tupledata + tuplelen);
|
|
}
|
|
|
|
/*
|
|
* Parse XLOG_HEAP_CONFIRM from wal into a confirmation change.
|
|
*
|
|
* This is pretty trivial, all the state essentially already setup by the
|
|
* speculative insertion.
|
|
*/
|
|
static void
|
|
DecodeSpecConfirm(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|
{
|
|
XLogReaderState *r = buf->record;
|
|
ReorderBufferChange *change;
|
|
RelFileLocator target_locator;
|
|
|
|
/* only interested in our database */
|
|
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
|
|
if (target_locator.dbOid != ctx->slot->data.database)
|
|
return;
|
|
|
|
/* output plugin doesn't look for this origin, no need to queue */
|
|
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
|
|
return;
|
|
|
|
change = ReorderBufferGetChange(ctx->reorder);
|
|
change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM;
|
|
change->origin_id = XLogRecGetOrigin(r);
|
|
|
|
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
|
|
|
|
change->data.tp.clear_toast_afterwards = true;
|
|
|
|
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
|
|
change, false);
|
|
}
|
|
|
|
|
|
/*
|
|
* Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete
|
|
* (but not by heap_multi_insert) into a tuplebuf.
|
|
*
|
|
* The size 'len' and the pointer 'data' in the record need to be
|
|
* computed outside as they are record specific.
|
|
*/
|
|
static void
|
|
DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)
|
|
{
|
|
xl_heap_header xlhdr;
|
|
int datalen = len - SizeOfHeapHeader;
|
|
HeapTupleHeader header;
|
|
|
|
Assert(datalen >= 0);
|
|
|
|
tuple->tuple.t_len = datalen + SizeofHeapTupleHeader;
|
|
header = tuple->tuple.t_data;
|
|
|
|
/* not a disk based tuple */
|
|
ItemPointerSetInvalid(&tuple->tuple.t_self);
|
|
|
|
/* we can only figure this out after reassembling the transactions */
|
|
tuple->tuple.t_tableOid = InvalidOid;
|
|
|
|
/* data is not stored aligned, copy to aligned storage */
|
|
memcpy((char *) &xlhdr,
|
|
data,
|
|
SizeOfHeapHeader);
|
|
|
|
memset(header, 0, SizeofHeapTupleHeader);
|
|
|
|
memcpy(((char *) tuple->tuple.t_data) + SizeofHeapTupleHeader,
|
|
data + SizeOfHeapHeader,
|
|
datalen);
|
|
|
|
header->t_infomask = xlhdr.t_infomask;
|
|
header->t_infomask2 = xlhdr.t_infomask2;
|
|
header->t_hoff = xlhdr.t_hoff;
|
|
}
|
|
|
|
/*
|
|
* Check whether we are interested in this specific transaction.
|
|
*
|
|
* There can be several reasons we might not be interested in this
|
|
* transaction:
|
|
* 1) We might not be interested in decoding transactions up to this
|
|
* LSN. This can happen because we previously decoded it and now just
|
|
* are restarting or if we haven't assembled a consistent snapshot yet.
|
|
* 2) The transaction happened in another database.
|
|
* 3) The output plugin is not interested in the origin.
|
|
* 4) We are doing fast-forwarding
|
|
*/
|
|
static bool
|
|
DecodeTXNNeedSkip(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
|
|
Oid txn_dbid, RepOriginId origin_id)
|
|
{
|
|
return (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
|
|
(txn_dbid != InvalidOid && txn_dbid != ctx->slot->data.database) ||
|
|
ctx->fast_forward || FilterByOrigin(ctx, origin_id));
|
|
}
|