1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-15 19:21:59 +03:00

Allow specifying row filters for logical replication of tables.

This feature adds row filtering for publication tables. When a publication
is defined or modified, an optional WHERE clause can be specified. Rows
that don't satisfy this WHERE clause will be filtered out. This allows a
set of tables to be partially replicated. The row filter is per table. A
new row filter can be added simply by specifying a WHERE clause after the
table name. The WHERE clause must be enclosed by parentheses.

The row filter WHERE clause for a table added to a publication that
publishes UPDATE and/or DELETE operations must contain only columns that
are covered by REPLICA IDENTITY. The row filter WHERE clause for a table
added to a publication that publishes INSERT can use any column. If the
row filter evaluates to NULL, it is regarded as "false". The WHERE clause
only allows simple expressions that don't have user-defined functions,
user-defined operators, user-defined types, user-defined collations,
non-immutable built-in functions, or references to system columns. These
restrictions could be addressed in the future.

If you choose to do the initial table synchronization, only data that
satisfies the row filters is copied to the subscriber. If the subscription
has several publications in which a table has been published with
different WHERE clauses, rows that satisfy ANY of the expressions will be
copied. If a subscriber is a pre-15 version, the initial table
synchronization won't use row filters even if they are defined in the
publisher.

The row filters are applied before publishing the changes. If the
subscription has several publications in which the same table has been
published with different filters (for the same publish operation), those
expressions get OR'ed together so that rows satisfying any of the
expressions will be replicated.

This means all the other filters become redundant if (a) one of the
publications have no filter at all, (b) one of the publications was
created using FOR ALL TABLES, (c) one of the publications was created
using FOR ALL TABLES IN SCHEMA and the table belongs to that same schema.

If your publication contains a partitioned table, the publication
parameter publish_via_partition_root determines if it uses the partition's
row filter (if the parameter is false, the default) or the root
partitioned table's row filter.

Psql commands \dRp+ and \d <table-name> will display any row filters.

Author: Hou Zhijie, Euler Taveira, Peter Smith, Ajin Cherian
Reviewed-by: Greg Nancarrow, Haiying Tang, Amit Kapila, Tomas Vondra, Dilip Kumar, Vignesh C, Alvaro Herrera, Andres Freund, Wei Wang
Discussion: https://www.postgresql.org/message-id/flat/CAHE3wggb715X%2BmK_DitLXF25B%3DjE6xyNCH4YOwM860JR7HarGQ%40mail.gmail.com
This commit is contained in:
Amit Kapila
2022-02-22 07:54:12 +05:30
parent ebf6c5249b
commit 52e4f0cd47
33 changed files with 3120 additions and 243 deletions

View File

@ -66,6 +66,7 @@
#include "catalog/schemapg.h"
#include "catalog/storage.h"
#include "commands/policy.h"
#include "commands/publicationcmds.h"
#include "commands/trigger.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@ -2419,8 +2420,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
bms_free(relation->rd_pkattr);
bms_free(relation->rd_idattr);
bms_free(relation->rd_hotblockingattr);
if (relation->rd_pubactions)
pfree(relation->rd_pubactions);
if (relation->rd_pubdesc)
pfree(relation->rd_pubdesc);
if (relation->rd_options)
pfree(relation->rd_options);
if (relation->rd_indextuple)
@ -5523,38 +5524,57 @@ RelationGetExclusionInfo(Relation indexRelation,
}
/*
* Get publication actions for the given relation.
* Get the publication information for the given relation.
*
* Traverse all the publications which the relation is in to get the
* publication actions and validate the row filter expressions for such
* publications if any. We consider the row filter expression as invalid if it
* references any column which is not part of REPLICA IDENTITY.
*
* To avoid fetching the publication information repeatedly, we cache the
* publication actions and row filter validation information.
*/
struct PublicationActions *
GetRelationPublicationActions(Relation relation)
void
RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
{
List *puboids;
ListCell *lc;
MemoryContext oldcxt;
Oid schemaid;
PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
List *ancestors = NIL;
Oid relid = RelationGetRelid(relation);
/*
* If not publishable, it publishes no actions. (pgoutput_change() will
* ignore it.)
*/
if (!is_publishable_relation(relation))
return pubactions;
{
memset(pubdesc, 0, sizeof(PublicationDesc));
pubdesc->rf_valid_for_update = true;
pubdesc->rf_valid_for_delete = true;
return;
}
if (relation->rd_pubactions)
return memcpy(pubactions, relation->rd_pubactions,
sizeof(PublicationActions));
if (relation->rd_pubdesc)
{
memcpy(pubdesc, relation->rd_pubdesc, sizeof(PublicationDesc));
return;
}
memset(pubdesc, 0, sizeof(PublicationDesc));
pubdesc->rf_valid_for_update = true;
pubdesc->rf_valid_for_delete = true;
/* Fetch the publication membership info. */
puboids = GetRelationPublications(RelationGetRelid(relation));
puboids = GetRelationPublications(relid);
schemaid = RelationGetNamespace(relation);
puboids = list_concat_unique_oid(puboids, GetSchemaPublications(schemaid));
if (relation->rd_rel->relispartition)
{
/* Add publications that the ancestors are in too. */
List *ancestors = get_partition_ancestors(RelationGetRelid(relation));
ListCell *lc;
ancestors = get_partition_ancestors(relid);
foreach(lc, ancestors)
{
@ -5582,35 +5602,53 @@ GetRelationPublicationActions(Relation relation)
pubform = (Form_pg_publication) GETSTRUCT(tup);
pubactions->pubinsert |= pubform->pubinsert;
pubactions->pubupdate |= pubform->pubupdate;
pubactions->pubdelete |= pubform->pubdelete;
pubactions->pubtruncate |= pubform->pubtruncate;
pubdesc->pubactions.pubinsert |= pubform->pubinsert;
pubdesc->pubactions.pubupdate |= pubform->pubupdate;
pubdesc->pubactions.pubdelete |= pubform->pubdelete;
pubdesc->pubactions.pubtruncate |= pubform->pubtruncate;
/*
* Check if all columns referenced in the filter expression are part of
* the REPLICA IDENTITY index or not.
*
* If the publication is FOR ALL TABLES then it means the table has no
* row filters and we can skip the validation.
*/
if (!pubform->puballtables &&
(pubform->pubupdate || pubform->pubdelete) &&
contain_invalid_rfcolumn(pubid, relation, ancestors,
pubform->pubviaroot))
{
if (pubform->pubupdate)
pubdesc->rf_valid_for_update = false;
if (pubform->pubdelete)
pubdesc->rf_valid_for_delete = false;
}
ReleaseSysCache(tup);
/*
* If we know everything is replicated, there is no point to check for
* other publications.
* If we know everything is replicated and the row filter is invalid
* for update and delete, there is no point to check for other
* publications.
*/
if (pubactions->pubinsert && pubactions->pubupdate &&
pubactions->pubdelete && pubactions->pubtruncate)
if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
!pubdesc->rf_valid_for_update && !pubdesc->rf_valid_for_delete)
break;
}
if (relation->rd_pubactions)
if (relation->rd_pubdesc)
{
pfree(relation->rd_pubactions);
relation->rd_pubactions = NULL;
pfree(relation->rd_pubdesc);
relation->rd_pubdesc = NULL;
}
/* Now save copy of the actions in the relcache entry. */
/* Now save copy of the descriptor in the relcache entry. */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
relation->rd_pubactions = palloc(sizeof(PublicationActions));
memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
relation->rd_pubdesc = palloc(sizeof(PublicationDesc));
memcpy(relation->rd_pubdesc, pubdesc, sizeof(PublicationDesc));
MemoryContextSwitchTo(oldcxt);
return pubactions;
}
/*
@ -6163,7 +6201,7 @@ load_relcache_init_file(bool shared)
rel->rd_pkattr = NULL;
rel->rd_idattr = NULL;
rel->rd_hotblockingattr = NULL;
rel->rd_pubactions = NULL;
rel->rd_pubdesc = NULL;
rel->rd_statvalid = false;
rel->rd_statlist = NIL;
rel->rd_fkeyvalid = false;