mirror of
https://github.com/postgres/postgres.git
synced 2025-11-16 15:02:33 +03:00
Commit 8431e296ea reworked ProcArrayApplyRecoveryInfo to sort XIDs
before adding them to KnownAssignedXids. But the XIDs are sorted using
xidComparator, which compares the XIDs simply as uint32 values, not
logically. KnownAssignedXidsAdd() however expects XIDs in logical order,
and calls TransactionIdFollowsOrEquals() to enforce that. If there are
XIDs for which the two orderings disagree, an error is raised and the
recovery fails/restarts.
Hitting this issue is fairly easy - you just need two transactions, one
started before the 4B limit (e.g. XID 4294967290), the other sometime
after it (e.g. XID 1000). Logically (4294967290 <= 1000) but when
compared using xidComparator we try to add them in the opposite order.
Which makes KnownAssignedXidsAdd() fail with an error like this:
ERROR: out-of-order XID insertion in KnownAssignedXids
This only happens during replica startup, while processing RUNNING_XACTS
records to build the snapshot. Once we reach STANDBY_SNAPSHOT_READY, we
skip these records. So this does not affect already running replicas,
but if you restart (or create) a replica while there are transactions
with XIDs for which the two orderings disagree, you may hit this.
Long-running transactions and frequent replica restarts increase the
likelihood of hitting this issue. Once the replica gets into this state,
it can't be started (even if the old transactions are terminated).
Fixed by sorting the XIDs logically - this is fine because we're dealing
with normal XIDs (because it's XIDs assigned to backends) and from the
same wraparound epoch (otherwise the backends could not be running at
the same time on the primary node). So there are no problems with the
triangle inequality, which is why xidComparator compares raw values.
Investigation and root cause analysis by Abhijit Menon-Sen. Patch by me.
This issue is present in all releases since 9.4, however releases up to
9.6 are EOL already so backpatch to 10 only.
Reviewed-by: Abhijit Menon-Sen
Reviewed-by: Alvaro Herrera
Backpatch-through: 10
Discussion: https://postgr.es/m/36b8a501-5d73-277c-4972-f58a4dce088a%40enterprisedb.com
237 lines
5.2 KiB
C
237 lines
5.2 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* xid.c
|
|
* POSTGRES transaction identifier and command identifier datatypes.
|
|
*
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/utils/adt/xid.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <limits.h>
|
|
|
|
#include "access/multixact.h"
|
|
#include "access/transam.h"
|
|
#include "access/xact.h"
|
|
#include "libpq/pqformat.h"
|
|
#include "utils/builtins.h"
|
|
|
|
#define PG_GETARG_TRANSACTIONID(n) DatumGetTransactionId(PG_GETARG_DATUM(n))
|
|
#define PG_RETURN_TRANSACTIONID(x) return TransactionIdGetDatum(x)
|
|
|
|
#define PG_GETARG_COMMANDID(n) DatumGetCommandId(PG_GETARG_DATUM(n))
|
|
#define PG_RETURN_COMMANDID(x) return CommandIdGetDatum(x)
|
|
|
|
|
|
Datum
|
|
xidin(PG_FUNCTION_ARGS)
|
|
{
|
|
char *str = PG_GETARG_CSTRING(0);
|
|
|
|
PG_RETURN_TRANSACTIONID((TransactionId) strtoul(str, NULL, 0));
|
|
}
|
|
|
|
Datum
|
|
xidout(PG_FUNCTION_ARGS)
|
|
{
|
|
TransactionId transactionId = PG_GETARG_TRANSACTIONID(0);
|
|
char *result = (char *) palloc(16);
|
|
|
|
snprintf(result, 16, "%lu", (unsigned long) transactionId);
|
|
PG_RETURN_CSTRING(result);
|
|
}
|
|
|
|
/*
|
|
* xidrecv - converts external binary format to xid
|
|
*/
|
|
Datum
|
|
xidrecv(PG_FUNCTION_ARGS)
|
|
{
|
|
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
|
|
|
PG_RETURN_TRANSACTIONID((TransactionId) pq_getmsgint(buf, sizeof(TransactionId)));
|
|
}
|
|
|
|
/*
|
|
* xidsend - converts xid to binary format
|
|
*/
|
|
Datum
|
|
xidsend(PG_FUNCTION_ARGS)
|
|
{
|
|
TransactionId arg1 = PG_GETARG_TRANSACTIONID(0);
|
|
StringInfoData buf;
|
|
|
|
pq_begintypsend(&buf);
|
|
pq_sendint(&buf, arg1, sizeof(arg1));
|
|
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
|
|
}
|
|
|
|
/*
|
|
* xideq - are two xids equal?
|
|
*/
|
|
Datum
|
|
xideq(PG_FUNCTION_ARGS)
|
|
{
|
|
TransactionId xid1 = PG_GETARG_TRANSACTIONID(0);
|
|
TransactionId xid2 = PG_GETARG_TRANSACTIONID(1);
|
|
|
|
PG_RETURN_BOOL(TransactionIdEquals(xid1, xid2));
|
|
}
|
|
|
|
/*
|
|
* xidneq - are two xids different?
|
|
*/
|
|
Datum
|
|
xidneq(PG_FUNCTION_ARGS)
|
|
{
|
|
TransactionId xid1 = PG_GETARG_TRANSACTIONID(0);
|
|
TransactionId xid2 = PG_GETARG_TRANSACTIONID(1);
|
|
|
|
PG_RETURN_BOOL(!TransactionIdEquals(xid1, xid2));
|
|
}
|
|
|
|
/*
|
|
* xid_age - compute age of an XID (relative to latest stable xid)
|
|
*/
|
|
Datum
|
|
xid_age(PG_FUNCTION_ARGS)
|
|
{
|
|
TransactionId xid = PG_GETARG_TRANSACTIONID(0);
|
|
TransactionId now = GetStableLatestTransactionId();
|
|
|
|
/* Permanent XIDs are always infinitely old */
|
|
if (!TransactionIdIsNormal(xid))
|
|
PG_RETURN_INT32(INT_MAX);
|
|
|
|
PG_RETURN_INT32((int32) (now - xid));
|
|
}
|
|
|
|
/*
|
|
* mxid_age - compute age of a multi XID (relative to latest stable mxid)
|
|
*/
|
|
Datum
|
|
mxid_age(PG_FUNCTION_ARGS)
|
|
{
|
|
TransactionId xid = PG_GETARG_TRANSACTIONID(0);
|
|
MultiXactId now = ReadNextMultiXactId();
|
|
|
|
if (!MultiXactIdIsValid(xid))
|
|
PG_RETURN_INT32(INT_MAX);
|
|
|
|
PG_RETURN_INT32((int32) (now - xid));
|
|
}
|
|
|
|
/*
|
|
* xidComparator
|
|
* qsort comparison function for XIDs
|
|
*
|
|
* We can't use wraparound comparison for XIDs because that does not respect
|
|
* the triangle inequality! Any old sort order will do.
|
|
*/
|
|
int
|
|
xidComparator(const void *arg1, const void *arg2)
|
|
{
|
|
TransactionId xid1 = *(const TransactionId *) arg1;
|
|
TransactionId xid2 = *(const TransactionId *) arg2;
|
|
|
|
if (xid1 > xid2)
|
|
return 1;
|
|
if (xid1 < xid2)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* xidLogicalComparator
|
|
* qsort comparison function for XIDs
|
|
*
|
|
* This is used to compare only XIDs from the same epoch (e.g. for backends
|
|
* running at the same time). So there must be only normal XIDs, so there's
|
|
* no issue with triangle inequality.
|
|
*/
|
|
int
|
|
xidLogicalComparator(const void *arg1, const void *arg2)
|
|
{
|
|
TransactionId xid1 = *(const TransactionId *) arg1;
|
|
TransactionId xid2 = *(const TransactionId *) arg2;
|
|
|
|
Assert(TransactionIdIsNormal(xid1));
|
|
Assert(TransactionIdIsNormal(xid2));
|
|
|
|
if (TransactionIdPrecedes(xid1, xid2))
|
|
return -1;
|
|
|
|
if (TransactionIdPrecedes(xid2, xid1))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* COMMAND IDENTIFIER ROUTINES *
|
|
*****************************************************************************/
|
|
|
|
/*
|
|
* cidin - converts CommandId to internal representation.
|
|
*/
|
|
Datum
|
|
cidin(PG_FUNCTION_ARGS)
|
|
{
|
|
char *str = PG_GETARG_CSTRING(0);
|
|
|
|
PG_RETURN_COMMANDID((CommandId) strtoul(str, NULL, 0));
|
|
}
|
|
|
|
/*
|
|
* cidout - converts a cid to external representation.
|
|
*/
|
|
Datum
|
|
cidout(PG_FUNCTION_ARGS)
|
|
{
|
|
CommandId c = PG_GETARG_COMMANDID(0);
|
|
char *result = (char *) palloc(16);
|
|
|
|
snprintf(result, 16, "%lu", (unsigned long) c);
|
|
PG_RETURN_CSTRING(result);
|
|
}
|
|
|
|
/*
|
|
* cidrecv - converts external binary format to cid
|
|
*/
|
|
Datum
|
|
cidrecv(PG_FUNCTION_ARGS)
|
|
{
|
|
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
|
|
|
PG_RETURN_COMMANDID((CommandId) pq_getmsgint(buf, sizeof(CommandId)));
|
|
}
|
|
|
|
/*
|
|
* cidsend - converts cid to binary format
|
|
*/
|
|
Datum
|
|
cidsend(PG_FUNCTION_ARGS)
|
|
{
|
|
CommandId arg1 = PG_GETARG_COMMANDID(0);
|
|
StringInfoData buf;
|
|
|
|
pq_begintypsend(&buf);
|
|
pq_sendint(&buf, arg1, sizeof(arg1));
|
|
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
|
|
}
|
|
|
|
Datum
|
|
cideq(PG_FUNCTION_ARGS)
|
|
{
|
|
CommandId arg1 = PG_GETARG_COMMANDID(0);
|
|
CommandId arg2 = PG_GETARG_COMMANDID(1);
|
|
|
|
PG_RETURN_BOOL(arg1 == arg2);
|
|
}
|