mirror of
https://github.com/postgres/postgres.git
synced 2025-12-04 12:02:48 +03:00
Allow logical replication to transfer data in binary format.
This patch adds a "binary" option to CREATE/ALTER SUBSCRIPTION. When that's set, the publisher will send data using the data type's typsend function if any, rather than typoutput. This is generally faster, if slightly less robust. As committed, we won't try to transfer user-defined array or composite types in binary, for fear that type OIDs won't match at the subscriber. This might be changed later, but it seems like fit material for a follow-on patch. Dave Cramer, reviewed by Daniel Gustafsson, Petr Jelinek, and others; adjusted some by me Discussion: https://postgr.es/m/CADK3HH+R3xMn=8t3Ct+uD+qJ1KD=Hbif5NFMJ+d5DkoCzp6Vgw@mail.gmail.com
This commit is contained in:
@@ -17,7 +17,6 @@
|
||||
#include "catalog/pg_type.h"
|
||||
#include "libpq/pqformat.h"
|
||||
#include "replication/logicalproto.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/syscache.h"
|
||||
|
||||
@@ -31,7 +30,7 @@
|
||||
|
||||
static void logicalrep_write_attrs(StringInfo out, Relation rel);
|
||||
static void logicalrep_write_tuple(StringInfo out, Relation rel,
|
||||
HeapTuple tuple);
|
||||
HeapTuple tuple, bool binary);
|
||||
|
||||
static void logicalrep_read_attrs(StringInfo in, LogicalRepRelation *rel);
|
||||
static void logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple);
|
||||
@@ -139,7 +138,7 @@ logicalrep_read_origin(StringInfo in, XLogRecPtr *origin_lsn)
|
||||
* Write INSERT to the output stream.
|
||||
*/
|
||||
void
|
||||
logicalrep_write_insert(StringInfo out, Relation rel, HeapTuple newtuple)
|
||||
logicalrep_write_insert(StringInfo out, Relation rel, HeapTuple newtuple, bool binary)
|
||||
{
|
||||
pq_sendbyte(out, 'I'); /* action INSERT */
|
||||
|
||||
@@ -147,7 +146,7 @@ logicalrep_write_insert(StringInfo out, Relation rel, HeapTuple newtuple)
|
||||
pq_sendint32(out, RelationGetRelid(rel));
|
||||
|
||||
pq_sendbyte(out, 'N'); /* new tuple follows */
|
||||
logicalrep_write_tuple(out, rel, newtuple);
|
||||
logicalrep_write_tuple(out, rel, newtuple, binary);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -179,7 +178,7 @@ logicalrep_read_insert(StringInfo in, LogicalRepTupleData *newtup)
|
||||
*/
|
||||
void
|
||||
logicalrep_write_update(StringInfo out, Relation rel, HeapTuple oldtuple,
|
||||
HeapTuple newtuple)
|
||||
HeapTuple newtuple, bool binary)
|
||||
{
|
||||
pq_sendbyte(out, 'U'); /* action UPDATE */
|
||||
|
||||
@@ -196,11 +195,11 @@ logicalrep_write_update(StringInfo out, Relation rel, HeapTuple oldtuple,
|
||||
pq_sendbyte(out, 'O'); /* old tuple follows */
|
||||
else
|
||||
pq_sendbyte(out, 'K'); /* old key follows */
|
||||
logicalrep_write_tuple(out, rel, oldtuple);
|
||||
logicalrep_write_tuple(out, rel, oldtuple, binary);
|
||||
}
|
||||
|
||||
pq_sendbyte(out, 'N'); /* new tuple follows */
|
||||
logicalrep_write_tuple(out, rel, newtuple);
|
||||
logicalrep_write_tuple(out, rel, newtuple, binary);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -248,7 +247,7 @@ logicalrep_read_update(StringInfo in, bool *has_oldtuple,
|
||||
* Write DELETE to the output stream.
|
||||
*/
|
||||
void
|
||||
logicalrep_write_delete(StringInfo out, Relation rel, HeapTuple oldtuple)
|
||||
logicalrep_write_delete(StringInfo out, Relation rel, HeapTuple oldtuple, bool binary)
|
||||
{
|
||||
Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT ||
|
||||
rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
|
||||
@@ -264,7 +263,7 @@ logicalrep_write_delete(StringInfo out, Relation rel, HeapTuple oldtuple)
|
||||
else
|
||||
pq_sendbyte(out, 'K'); /* old key follows */
|
||||
|
||||
logicalrep_write_tuple(out, rel, oldtuple);
|
||||
logicalrep_write_tuple(out, rel, oldtuple, binary);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -437,7 +436,7 @@ logicalrep_read_typ(StringInfo in, LogicalRepTyp *ltyp)
|
||||
* Write a tuple to the outputstream, in the most efficient format possible.
|
||||
*/
|
||||
static void
|
||||
logicalrep_write_tuple(StringInfo out, Relation rel, HeapTuple tuple)
|
||||
logicalrep_write_tuple(StringInfo out, Relation rel, HeapTuple tuple, bool binary)
|
||||
{
|
||||
TupleDesc desc;
|
||||
Datum values[MaxTupleAttributeNumber];
|
||||
@@ -474,12 +473,18 @@ logicalrep_write_tuple(StringInfo out, Relation rel, HeapTuple tuple)
|
||||
|
||||
if (isnull[i])
|
||||
{
|
||||
pq_sendbyte(out, 'n'); /* null column */
|
||||
pq_sendbyte(out, LOGICALREP_COLUMN_NULL);
|
||||
continue;
|
||||
}
|
||||
else if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i]))
|
||||
|
||||
if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i]))
|
||||
{
|
||||
pq_sendbyte(out, 'u'); /* unchanged toast column */
|
||||
/*
|
||||
* Unchanged toasted datum. (Note that we don't promise to detect
|
||||
* unchanged data in general; this is just a cheap check to avoid
|
||||
* sending large values unnecessarily.)
|
||||
*/
|
||||
pq_sendbyte(out, LOGICALREP_COLUMN_UNCHANGED);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -488,20 +493,48 @@ logicalrep_write_tuple(StringInfo out, Relation rel, HeapTuple tuple)
|
||||
elog(ERROR, "cache lookup failed for type %u", att->atttypid);
|
||||
typclass = (Form_pg_type) GETSTRUCT(typtup);
|
||||
|
||||
pq_sendbyte(out, 't'); /* 'text' data follows */
|
||||
/*
|
||||
* Choose whether to send in binary. Obviously, the option must be
|
||||
* requested and the type must have a send function. Also, if the
|
||||
* type is not built-in then it must not be a composite or array type.
|
||||
* Such types contain type OIDs, which will likely not match at the
|
||||
* receiver if it's not a built-in type.
|
||||
*
|
||||
* XXX this could be relaxed if we changed record_recv and array_recv
|
||||
* to be less picky.
|
||||
*
|
||||
* XXX this fails to apply the restriction to domains over such types.
|
||||
*/
|
||||
if (binary &&
|
||||
OidIsValid(typclass->typsend) &&
|
||||
(att->atttypid < FirstGenbkiObjectId ||
|
||||
(typclass->typtype != TYPTYPE_COMPOSITE &&
|
||||
typclass->typelem == InvalidOid)))
|
||||
{
|
||||
bytea *outputbytes;
|
||||
int len;
|
||||
|
||||
outputstr = OidOutputFunctionCall(typclass->typoutput, values[i]);
|
||||
pq_sendcountedtext(out, outputstr, strlen(outputstr), false);
|
||||
pfree(outputstr);
|
||||
pq_sendbyte(out, LOGICALREP_COLUMN_BINARY);
|
||||
outputbytes = OidSendFunctionCall(typclass->typsend, values[i]);
|
||||
len = VARSIZE(outputbytes) - VARHDRSZ;
|
||||
pq_sendint(out, len, 4); /* length */
|
||||
pq_sendbytes(out, VARDATA(outputbytes), len); /* data */
|
||||
pfree(outputbytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
pq_sendbyte(out, LOGICALREP_COLUMN_TEXT);
|
||||
outputstr = OidOutputFunctionCall(typclass->typoutput, values[i]);
|
||||
pq_sendcountedtext(out, outputstr, strlen(outputstr), false);
|
||||
pfree(outputstr);
|
||||
}
|
||||
|
||||
ReleaseSysCache(typtup);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Read tuple in remote format from stream.
|
||||
*
|
||||
* The returned tuple points into the input stringinfo.
|
||||
* Read tuple in logical replication format from stream.
|
||||
*/
|
||||
static void
|
||||
logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple)
|
||||
@@ -512,38 +545,52 @@ logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple)
|
||||
/* Get number of attributes */
|
||||
natts = pq_getmsgint(in, 2);
|
||||
|
||||
memset(tuple->changed, 0, sizeof(tuple->changed));
|
||||
/* Allocate space for per-column values; zero out unused StringInfoDatas */
|
||||
tuple->colvalues = (StringInfoData *) palloc0(natts * sizeof(StringInfoData));
|
||||
tuple->colstatus = (char *) palloc(natts * sizeof(char));
|
||||
|
||||
/* Read the data */
|
||||
for (i = 0; i < natts; i++)
|
||||
{
|
||||
char kind;
|
||||
int len;
|
||||
StringInfo value = &tuple->colvalues[i];
|
||||
|
||||
kind = pq_getmsgbyte(in);
|
||||
tuple->colstatus[i] = kind;
|
||||
|
||||
switch (kind)
|
||||
{
|
||||
case 'n': /* null */
|
||||
tuple->values[i] = NULL;
|
||||
tuple->changed[i] = true;
|
||||
case LOGICALREP_COLUMN_NULL:
|
||||
/* nothing more to do */
|
||||
break;
|
||||
case 'u': /* unchanged column */
|
||||
case LOGICALREP_COLUMN_UNCHANGED:
|
||||
/* we don't receive the value of an unchanged column */
|
||||
tuple->values[i] = NULL;
|
||||
break;
|
||||
case 't': /* text formatted value */
|
||||
{
|
||||
int len;
|
||||
case LOGICALREP_COLUMN_TEXT:
|
||||
len = pq_getmsgint(in, 4); /* read length */
|
||||
|
||||
tuple->changed[i] = true;
|
||||
/* and data */
|
||||
value->data = palloc(len + 1);
|
||||
pq_copymsgbytes(in, value->data, len);
|
||||
value->data[len] = '\0';
|
||||
/* make StringInfo fully valid */
|
||||
value->len = len;
|
||||
value->cursor = 0;
|
||||
value->maxlen = len;
|
||||
break;
|
||||
case LOGICALREP_COLUMN_BINARY:
|
||||
len = pq_getmsgint(in, 4); /* read length */
|
||||
|
||||
len = pq_getmsgint(in, 4); /* read length */
|
||||
|
||||
/* and data */
|
||||
tuple->values[i] = palloc(len + 1);
|
||||
pq_copymsgbytes(in, tuple->values[i], len);
|
||||
tuple->values[i][len] = '\0';
|
||||
}
|
||||
/* and data */
|
||||
value->data = palloc(len + 1);
|
||||
pq_copymsgbytes(in, value->data, len);
|
||||
/* not strictly necessary but per StringInfo practice */
|
||||
value->data[len] = '\0';
|
||||
/* make StringInfo fully valid */
|
||||
value->len = len;
|
||||
value->cursor = 0;
|
||||
value->maxlen = len;
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "unrecognized data representation type '%c'", kind);
|
||||
@@ -552,7 +599,7 @@ logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple)
|
||||
}
|
||||
|
||||
/*
|
||||
* Write relation attributes to the stream.
|
||||
* Write relation attribute metadata to the stream.
|
||||
*/
|
||||
static void
|
||||
logicalrep_write_attrs(StringInfo out, Relation rel)
|
||||
@@ -611,7 +658,7 @@ logicalrep_write_attrs(StringInfo out, Relation rel)
|
||||
}
|
||||
|
||||
/*
|
||||
* Read relation attribute names from the stream.
|
||||
* Read relation attribute metadata from the stream.
|
||||
*/
|
||||
static void
|
||||
logicalrep_read_attrs(StringInfo in, LogicalRepRelation *rel)
|
||||
|
||||
Reference in New Issue
Block a user