mirror of
https://github.com/postgres/postgres.git
synced 2025-05-28 05:21:27 +03:00
Refrain from duplicating data in reorderbuffers
If a walsender exits leaving data in reorderbuffers, the next walsender that tries to decode the same transaction would append its decoded data in the same spill files without truncating it first, which effectively duplicate the data. Avoid that by removing any leftover reorderbuffer spill files when a walsender starts. Backpatch to 9.4; this bug has been there from the very beginning of logical decoding. Author: Craig Ringer, revised by me Reviewed by: Álvaro Herrera, Petr Jelínek, Masahiko Sawada
This commit is contained in:
parent
e20dd6a13d
commit
cee1dd1eed
@ -199,6 +199,9 @@ static Size ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn
|
|||||||
static void ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
|
static void ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
|
||||||
char *change);
|
char *change);
|
||||||
static void ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn);
|
static void ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn);
|
||||||
|
static void ReorderBufferCleanupSerializedTXNs(const char *slotname);
|
||||||
|
static void ReorderBufferSerializedPath(char *path, ReplicationSlot *slot,
|
||||||
|
TransactionId xid, XLogSegNo segno);
|
||||||
|
|
||||||
static void ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap);
|
static void ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap);
|
||||||
static Snapshot ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap,
|
static Snapshot ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap,
|
||||||
@ -217,7 +220,8 @@ static void ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *t
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate a new ReorderBuffer
|
* Allocate a new ReorderBuffer and clean out any old serialized state from
|
||||||
|
* prior ReorderBuffer instances for the same slot.
|
||||||
*/
|
*/
|
||||||
ReorderBuffer *
|
ReorderBuffer *
|
||||||
ReorderBufferAllocate(void)
|
ReorderBufferAllocate(void)
|
||||||
@ -226,6 +230,8 @@ ReorderBufferAllocate(void)
|
|||||||
HASHCTL hash_ctl;
|
HASHCTL hash_ctl;
|
||||||
MemoryContext new_ctx;
|
MemoryContext new_ctx;
|
||||||
|
|
||||||
|
Assert(MyReplicationSlot != NULL);
|
||||||
|
|
||||||
/* allocate memory in own context, to have better accountability */
|
/* allocate memory in own context, to have better accountability */
|
||||||
new_ctx = AllocSetContextCreate(CurrentMemoryContext,
|
new_ctx = AllocSetContextCreate(CurrentMemoryContext,
|
||||||
"ReorderBuffer",
|
"ReorderBuffer",
|
||||||
@ -268,6 +274,13 @@ ReorderBufferAllocate(void)
|
|||||||
dlist_init(&buffer->toplevel_by_lsn);
|
dlist_init(&buffer->toplevel_by_lsn);
|
||||||
slist_init(&buffer->cached_tuplebufs);
|
slist_init(&buffer->cached_tuplebufs);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure there's no stale data from prior uses of this slot, in case some
|
||||||
|
* prior exit avoided calling ReorderBufferFree. Failure to do this can
|
||||||
|
* produce duplicated txns, and it's very cheap if there's nothing there.
|
||||||
|
*/
|
||||||
|
ReorderBufferCleanupSerializedTXNs(NameStr(MyReplicationSlot->data.name));
|
||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -284,6 +297,9 @@ ReorderBufferFree(ReorderBuffer *rb)
|
|||||||
* memory context.
|
* memory context.
|
||||||
*/
|
*/
|
||||||
MemoryContextDelete(context);
|
MemoryContextDelete(context);
|
||||||
|
|
||||||
|
/* Free disk space used by unconsumed reorder buffers */
|
||||||
|
ReorderBufferCleanupSerializedTXNs(NameStr(MyReplicationSlot->data.name));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2073,7 +2089,6 @@ ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
|
|||||||
int fd = -1;
|
int fd = -1;
|
||||||
XLogSegNo curOpenSegNo = 0;
|
XLogSegNo curOpenSegNo = 0;
|
||||||
Size spilled = 0;
|
Size spilled = 0;
|
||||||
char path[MAXPGPATH];
|
|
||||||
|
|
||||||
elog(DEBUG2, "spill %u changes in XID %u to disk",
|
elog(DEBUG2, "spill %u changes in XID %u to disk",
|
||||||
(uint32) txn->nentries_mem, txn->xid);
|
(uint32) txn->nentries_mem, txn->xid);
|
||||||
@ -2100,21 +2115,19 @@ ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
|
|||||||
*/
|
*/
|
||||||
if (fd == -1 || !XLByteInSeg(change->lsn, curOpenSegNo))
|
if (fd == -1 || !XLByteInSeg(change->lsn, curOpenSegNo))
|
||||||
{
|
{
|
||||||
XLogRecPtr recptr;
|
char path[MAXPGPATH];
|
||||||
|
|
||||||
if (fd != -1)
|
if (fd != -1)
|
||||||
CloseTransientFile(fd);
|
CloseTransientFile(fd);
|
||||||
|
|
||||||
XLByteToSeg(change->lsn, curOpenSegNo);
|
XLByteToSeg(change->lsn, curOpenSegNo);
|
||||||
XLogSegNoOffsetToRecPtr(curOpenSegNo, 0, recptr);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No need to care about TLIs here, only used during a single run,
|
* No need to care about TLIs here, only used during a single run,
|
||||||
* so each LSN only maps to a specific WAL record.
|
* so each LSN only maps to a specific WAL record.
|
||||||
*/
|
*/
|
||||||
sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap",
|
ReorderBufferSerializedPath(path, MyReplicationSlot, txn->xid,
|
||||||
NameStr(MyReplicationSlot->data.name), txn->xid,
|
curOpenSegNo);
|
||||||
(uint32) (recptr >> 32), (uint32) recptr);
|
|
||||||
|
|
||||||
/* open segment, create it if necessary */
|
/* open segment, create it if necessary */
|
||||||
fd = OpenTransientFile(path,
|
fd = OpenTransientFile(path,
|
||||||
@ -2124,8 +2137,7 @@ ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
|
|||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
errmsg("could not open file \"%s\": %m",
|
errmsg("could not open file \"%s\": %m", path)));
|
||||||
path)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ReorderBufferSerializeChange(rb, txn, fd, change);
|
ReorderBufferSerializeChange(rb, txn, fd, change);
|
||||||
@ -2343,25 +2355,20 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
|
|||||||
|
|
||||||
if (*fd == -1)
|
if (*fd == -1)
|
||||||
{
|
{
|
||||||
XLogRecPtr recptr;
|
|
||||||
char path[MAXPGPATH];
|
char path[MAXPGPATH];
|
||||||
|
|
||||||
/* first time in */
|
/* first time in */
|
||||||
if (*segno == 0)
|
if (*segno == 0)
|
||||||
{
|
|
||||||
XLByteToSeg(txn->first_lsn, *segno);
|
XLByteToSeg(txn->first_lsn, *segno);
|
||||||
}
|
|
||||||
|
|
||||||
Assert(*segno != 0 || dlist_is_empty(&txn->changes));
|
Assert(*segno != 0 || dlist_is_empty(&txn->changes));
|
||||||
XLogSegNoOffsetToRecPtr(*segno, 0, recptr);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No need to care about TLIs here, only used during a single run,
|
* No need to care about TLIs here, only used during a single run,
|
||||||
* so each LSN only maps to a specific WAL record.
|
* so each LSN only maps to a specific WAL record.
|
||||||
*/
|
*/
|
||||||
sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap",
|
ReorderBufferSerializedPath(path, MyReplicationSlot, txn->xid,
|
||||||
NameStr(MyReplicationSlot->data.name), txn->xid,
|
*segno);
|
||||||
(uint32) (recptr >> 32), (uint32) recptr);
|
|
||||||
|
|
||||||
*fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
|
*fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
|
||||||
if (*fd < 0 && errno == ENOENT)
|
if (*fd < 0 && errno == ENOENT)
|
||||||
@ -2597,13 +2604,8 @@ ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn)
|
|||||||
for (cur = first; cur <= last; cur++)
|
for (cur = first; cur <= last; cur++)
|
||||||
{
|
{
|
||||||
char path[MAXPGPATH];
|
char path[MAXPGPATH];
|
||||||
XLogRecPtr recptr;
|
|
||||||
|
|
||||||
XLogSegNoOffsetToRecPtr(cur, 0, recptr);
|
ReorderBufferSerializedPath(path, MyReplicationSlot, txn->xid, cur);
|
||||||
|
|
||||||
sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap",
|
|
||||||
NameStr(MyReplicationSlot->data.name), txn->xid,
|
|
||||||
(uint32) (recptr >> 32), (uint32) recptr);
|
|
||||||
if (unlink(path) != 0 && errno != ENOENT)
|
if (unlink(path) != 0 && errno != ENOENT)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode_for_file_access(),
|
(errcode_for_file_access(),
|
||||||
@ -2611,6 +2613,63 @@ ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove any leftover serialized reorder buffers from a slot directory after a
|
||||||
|
* prior crash or decoding session exit.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ReorderBufferCleanupSerializedTXNs(const char *slotname)
|
||||||
|
{
|
||||||
|
DIR *spill_dir;
|
||||||
|
struct dirent *spill_de;
|
||||||
|
struct stat statbuf;
|
||||||
|
char path[MAXPGPATH * 2 + 12];
|
||||||
|
|
||||||
|
sprintf(path, "pg_replslot/%s", slotname);
|
||||||
|
|
||||||
|
/* we're only handling directories here, skip if it's not ours */
|
||||||
|
if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
|
||||||
|
return;
|
||||||
|
|
||||||
|
spill_dir = AllocateDir(path);
|
||||||
|
while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL)
|
||||||
|
{
|
||||||
|
/* only look at names that can be ours */
|
||||||
|
if (strncmp(spill_de->d_name, "xid", 3) == 0)
|
||||||
|
{
|
||||||
|
snprintf(path, sizeof(path),
|
||||||
|
"pg_replslot/%s/%s", slotname,
|
||||||
|
spill_de->d_name);
|
||||||
|
|
||||||
|
if (unlink(path) != 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not remove file \"%s\" during removal of pg_replslot/%s/*.xid: %m",
|
||||||
|
path, slotname)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FreeDir(spill_dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a replication slot, transaction ID and segment number, fill in the
|
||||||
|
* corresponding spill file into 'path', which is a caller-owned buffer of size
|
||||||
|
* at least MAXPGPATH.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ReorderBufferSerializedPath(char *path, ReplicationSlot *slot, TransactionId xid,
|
||||||
|
XLogSegNo segno)
|
||||||
|
{
|
||||||
|
XLogRecPtr recptr;
|
||||||
|
|
||||||
|
XLogSegNoOffsetToRecPtr(segno, 0, recptr);
|
||||||
|
|
||||||
|
snprintf(path, MAXPGPATH, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap",
|
||||||
|
NameStr(MyReplicationSlot->data.name),
|
||||||
|
xid,
|
||||||
|
(uint32) (recptr >> 32), (uint32) recptr);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Delete all data spilled to disk after we've restarted/crashed. It will be
|
* Delete all data spilled to disk after we've restarted/crashed. It will be
|
||||||
* recreated when the respective slots are reused.
|
* recreated when the respective slots are reused.
|
||||||
@ -2621,15 +2680,9 @@ StartupReorderBuffer(void)
|
|||||||
DIR *logical_dir;
|
DIR *logical_dir;
|
||||||
struct dirent *logical_de;
|
struct dirent *logical_de;
|
||||||
|
|
||||||
DIR *spill_dir;
|
|
||||||
struct dirent *spill_de;
|
|
||||||
|
|
||||||
logical_dir = AllocateDir("pg_replslot");
|
logical_dir = AllocateDir("pg_replslot");
|
||||||
while ((logical_de = ReadDir(logical_dir, "pg_replslot")) != NULL)
|
while ((logical_de = ReadDir(logical_dir, "pg_replslot")) != NULL)
|
||||||
{
|
{
|
||||||
struct stat statbuf;
|
|
||||||
char path[MAXPGPATH * 2 + 12];
|
|
||||||
|
|
||||||
if (strcmp(logical_de->d_name, ".") == 0 ||
|
if (strcmp(logical_de->d_name, ".") == 0 ||
|
||||||
strcmp(logical_de->d_name, "..") == 0)
|
strcmp(logical_de->d_name, "..") == 0)
|
||||||
continue;
|
continue;
|
||||||
@ -2642,33 +2695,7 @@ StartupReorderBuffer(void)
|
|||||||
* ok, has to be a surviving logical slot, iterate and delete
|
* ok, has to be a surviving logical slot, iterate and delete
|
||||||
* everything starting with xid-*
|
* everything starting with xid-*
|
||||||
*/
|
*/
|
||||||
sprintf(path, "pg_replslot/%s", logical_de->d_name);
|
ReorderBufferCleanupSerializedTXNs(logical_de->d_name);
|
||||||
|
|
||||||
/* we're only creating directories here, skip if it's not our's */
|
|
||||||
if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
spill_dir = AllocateDir(path);
|
|
||||||
while ((spill_de = ReadDir(spill_dir, path)) != NULL)
|
|
||||||
{
|
|
||||||
if (strcmp(spill_de->d_name, ".") == 0 ||
|
|
||||||
strcmp(spill_de->d_name, "..") == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* only look at names that can be ours */
|
|
||||||
if (strncmp(spill_de->d_name, "xid", 3) == 0)
|
|
||||||
{
|
|
||||||
sprintf(path, "pg_replslot/%s/%s", logical_de->d_name,
|
|
||||||
spill_de->d_name);
|
|
||||||
|
|
||||||
if (unlink(path) != 0)
|
|
||||||
ereport(PANIC,
|
|
||||||
(errcode_for_file_access(),
|
|
||||||
errmsg("could not remove file \"%s\": %m",
|
|
||||||
path)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FreeDir(spill_dir);
|
|
||||||
}
|
}
|
||||||
FreeDir(logical_dir);
|
FreeDir(logical_dir);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user