diff options
author | Tomas Vondra | 2022-03-25 23:45:21 +0000 |
---|---|---|
committer | Tomas Vondra | 2022-03-26 00:01:27 +0000 |
commit | 923def9a533a7d986acfb524139d8b9e5466d0a5 (patch) | |
tree | b6ce8d5bfe8d932e3cc89e52aba68519558e8033 /src/backend/replication/pgoutput/pgoutput.c | |
parent | 05843b1aa49df2ecc9b97c693b755bd1b6f856a9 (diff) |
Allow specifying column lists for logical replication
This allows specifying an optional column list when adding a table to
logical replication. The column list may be specified after the table
name, enclosed in parentheses. Columns not included in this list are not
sent to the subscriber, allowing the schema on the subscriber to be a
subset of the publisher schema.
For UPDATE/DELETE publications, the column list needs to cover all
REPLICA IDENTITY columns. For INSERT publications, the column list is
arbitrary and may omit some REPLICA IDENTITY columns. Furthermore, if
the table uses REPLICA IDENTITY FULL, column list is not allowed.
The column list can contain only simple column references. Complex
expressions, function calls etc. are not allowed. This restriction could
be relaxed in the future.
During the initial table synchronization, only columns included in the
column list are copied to the subscriber. If the subscription has
several publications, containing the same table with different column
lists, columns specified in any of the lists will be copied.
This means all columns are replicated if the table has no column list
at all (which is treated as column list with all columns), or when of
the publications is defined as FOR ALL TABLES (possibly IN SCHEMA that
matches the schema of the table).
For partitioned tables, publish_via_partition_root determines whether
the column list for the root or the leaf relation will be used. If the
parameter is 'false' (the default), the list defined for the leaf
relation is used. Otherwise, the column list for the root partition
will be used.
Psql commands \dRp+ and \d <table-name> now display any column lists.
Author: Tomas Vondra, Alvaro Herrera, Rahila Syed
Reviewed-by: Peter Eisentraut, Alvaro Herrera, Vignesh C, Ibrar Ahmed,
Amit Kapila, Hou zj, Peter Smith, Wang wei, Tang, Shi yu
Discussion: https://postgr.es/m/CAH2L28vddB_NFdRVpuyRBJEBWjz4BSyTB=_ektNRH8NJ1jf95g@mail.gmail.com
Diffstat (limited to 'src/backend/replication/pgoutput/pgoutput.c')
-rw-r--r-- | src/backend/replication/pgoutput/pgoutput.c | 201 |
1 files changed, 174 insertions, 27 deletions
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c index 292e7299d88..893833ea83c 100644 --- a/src/backend/replication/pgoutput/pgoutput.c +++ b/src/backend/replication/pgoutput/pgoutput.c @@ -30,6 +30,7 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/rel.h" #include "utils/syscache.h" #include "utils/varlena.h" @@ -90,7 +91,8 @@ static List *LoadPublications(List *pubnames); static void publication_invalidation_cb(Datum arg, int cacheid, uint32 hashvalue); static void send_relation_and_attrs(Relation relation, TransactionId xid, - LogicalDecodingContext *ctx); + LogicalDecodingContext *ctx, + Bitmapset *columns); static void send_repl_origin(LogicalDecodingContext *ctx, RepOriginId origin_id, XLogRecPtr origin_lsn, bool send_origin); @@ -148,9 +150,6 @@ typedef struct RelationSyncEntry */ ExprState *exprstate[NUM_ROWFILTER_PUBACTIONS]; EState *estate; /* executor state used for row filter */ - MemoryContext cache_expr_cxt; /* private context for exprstate and - * estate, if any */ - TupleTableSlot *new_slot; /* slot for storing new tuple */ TupleTableSlot *old_slot; /* slot for storing old tuple */ @@ -169,6 +168,19 @@ typedef struct RelationSyncEntry * having identical TupleDesc. */ AttrMap *attrmap; + + /* + * Columns included in the publication, or NULL if all columns are + * included implicitly. Note that the attnums in this bitmap are not + * shifted by FirstLowInvalidHeapAttributeNumber. + */ + Bitmapset *columns; + + /* + * Private context to store additional data for this entry - state for + * the row filter expressions, column list, etc. + */ + MemoryContext entry_cxt; } RelationSyncEntry; /* Map used to remember which relation schemas we sent. */ @@ -200,6 +212,11 @@ static bool pgoutput_row_filter(Relation relation, TupleTableSlot *old_slot, RelationSyncEntry *entry, ReorderBufferChangeType *action); +/* column list routines */ +static void pgoutput_column_list_init(PGOutputData *data, + List *publications, + RelationSyncEntry *entry); + /* * Specify output plugin callbacks */ @@ -622,11 +639,11 @@ maybe_send_schema(LogicalDecodingContext *ctx, { Relation ancestor = RelationIdGetRelation(relentry->publish_as_relid); - send_relation_and_attrs(ancestor, xid, ctx); + send_relation_and_attrs(ancestor, xid, ctx, relentry->columns); RelationClose(ancestor); } - send_relation_and_attrs(relation, xid, ctx); + send_relation_and_attrs(relation, xid, ctx, relentry->columns); if (in_streaming) set_schema_sent_in_streamed_txn(relentry, topxid); @@ -639,7 +656,8 @@ maybe_send_schema(LogicalDecodingContext *ctx, */ static void send_relation_and_attrs(Relation relation, TransactionId xid, - LogicalDecodingContext *ctx) + LogicalDecodingContext *ctx, + Bitmapset *columns) { TupleDesc desc = RelationGetDescr(relation); int i; @@ -662,13 +680,17 @@ send_relation_and_attrs(Relation relation, TransactionId xid, if (att->atttypid < FirstGenbkiObjectId) continue; + /* Skip this attribute if it's not present in the column list */ + if (columns != NULL && !bms_is_member(att->attnum, columns)) + continue; + OutputPluginPrepareWrite(ctx, false); logicalrep_write_typ(ctx->out, xid, att->atttypid); OutputPluginWrite(ctx, false); } OutputPluginPrepareWrite(ctx, false); - logicalrep_write_rel(ctx->out, xid, relation); + logicalrep_write_rel(ctx->out, xid, relation, columns); OutputPluginWrite(ctx, false); } @@ -723,6 +745,28 @@ pgoutput_row_filter_exec_expr(ExprState *state, ExprContext *econtext) } /* + * Make sure the per-entry memory context exists. + */ +static void +pgoutput_ensure_entry_cxt(PGOutputData *data, RelationSyncEntry *entry) +{ + Relation relation; + + /* The context may already exist, in which case bail out. */ + if (entry->entry_cxt) + return; + + relation = RelationIdGetRelation(entry->publish_as_relid); + + entry->entry_cxt = AllocSetContextCreate(data->cachectx, + "entry private context", + ALLOCSET_SMALL_SIZES); + + MemoryContextCopyAndSetIdentifier(entry->entry_cxt, + RelationGetRelationName(relation)); +} + +/* * Initialize the row filter. */ static void @@ -842,21 +886,13 @@ pgoutput_row_filter_init(PGOutputData *data, List *publications, { Relation relation = RelationIdGetRelation(entry->publish_as_relid); - Assert(entry->cache_expr_cxt == NULL); - - /* Create the memory context for row filters */ - entry->cache_expr_cxt = AllocSetContextCreate(data->cachectx, - "Row filter expressions", - ALLOCSET_DEFAULT_SIZES); - - MemoryContextCopyAndSetIdentifier(entry->cache_expr_cxt, - RelationGetRelationName(relation)); + pgoutput_ensure_entry_cxt(data, entry); /* * Now all the filters for all pubactions are known. Combine them when * their pubactions are the same. */ - oldctx = MemoryContextSwitchTo(entry->cache_expr_cxt); + oldctx = MemoryContextSwitchTo(entry->entry_cxt); entry->estate = create_estate_for_relation(relation); for (idx = 0; idx < NUM_ROWFILTER_PUBACTIONS; idx++) { @@ -880,6 +916,105 @@ pgoutput_row_filter_init(PGOutputData *data, List *publications, } /* + * Initialize the column list. + */ +static void +pgoutput_column_list_init(PGOutputData *data, List *publications, + RelationSyncEntry *entry) +{ + ListCell *lc; + + /* + * Find if there are any column lists for this relation. If there are, + * build a bitmap merging all the column lists. + * + * All the given publication-table mappings must be checked. + * + * Multiple publications might have multiple column lists for this relation. + * + * FOR ALL TABLES and FOR ALL TABLES IN SCHEMA implies "don't use column + * list" so it takes precedence. + */ + foreach(lc, publications) + { + Publication *pub = lfirst(lc); + HeapTuple cftuple = NULL; + Datum cfdatum = 0; + + /* + * Assume there's no column list. Only if we find pg_publication_rel + * entry with a column list we'll switch it to false. + */ + bool pub_no_list = true; + + /* + * If the publication is FOR ALL TABLES then it is treated the same as if + * there are no column lists (even if other publications have a list). + */ + if (!pub->alltables) + { + /* + * Check for the presence of a column list in this publication. + * + * Note: If we find no pg_publication_rel row, it's a publication + * defined for a whole schema, so it can't have a column list, just + * like a FOR ALL TABLES publication. + */ + cftuple = SearchSysCache2(PUBLICATIONRELMAP, + ObjectIdGetDatum(entry->publish_as_relid), + ObjectIdGetDatum(pub->oid)); + + if (HeapTupleIsValid(cftuple)) + { + /* + * Lookup the column list attribute. + * + * Note: We update the pub_no_list value directly, because if + * the value is NULL, we have no list (and vice versa). + */ + cfdatum = SysCacheGetAttr(PUBLICATIONRELMAP, cftuple, + Anum_pg_publication_rel_prattrs, + &pub_no_list); + + /* + * Build the column list bitmap in the per-entry context. + * + * We need to merge column lists from all publications, so we + * update the same bitmapset. If the column list is null, we + * interpret it as replicating all columns. + */ + if (!pub_no_list) /* when not null */ + { + pgoutput_ensure_entry_cxt(data, entry); + + entry->columns = pub_collist_to_bitmapset(entry->columns, + cfdatum, + entry->entry_cxt); + } + } + } + + /* + * Found a publication with no column list, so we're done. But first + * discard column list we might have from preceding publications. + */ + if (pub_no_list) + { + if (cftuple) + ReleaseSysCache(cftuple); + + bms_free(entry->columns); + entry->columns = NULL; + + break; + } + + ReleaseSysCache(cftuple); + } /* loop all subscribed publications */ + +} + +/* * Initialize the slot for storing new and old tuples, and build the map that * will be used to convert the relation's tuples into the ancestor's format. */ @@ -1243,7 +1378,7 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, OutputPluginPrepareWrite(ctx, true); logicalrep_write_insert(ctx->out, xid, targetrel, new_slot, - data->binary); + data->binary, relentry->columns); OutputPluginWrite(ctx, true); break; case REORDER_BUFFER_CHANGE_UPDATE: @@ -1297,11 +1432,13 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, { case REORDER_BUFFER_CHANGE_INSERT: logicalrep_write_insert(ctx->out, xid, targetrel, - new_slot, data->binary); + new_slot, data->binary, + relentry->columns); break; case REORDER_BUFFER_CHANGE_UPDATE: logicalrep_write_update(ctx->out, xid, targetrel, - old_slot, new_slot, data->binary); + old_slot, new_slot, data->binary, + relentry->columns); break; case REORDER_BUFFER_CHANGE_DELETE: logicalrep_write_delete(ctx->out, xid, targetrel, @@ -1794,8 +1931,9 @@ get_rel_sync_entry(PGOutputData *data, Relation relation) entry->new_slot = NULL; entry->old_slot = NULL; memset(entry->exprstate, 0, sizeof(entry->exprstate)); - entry->cache_expr_cxt = NULL; + entry->entry_cxt = NULL; entry->publish_as_relid = InvalidOid; + entry->columns = NULL; entry->attrmap = NULL; } @@ -1841,6 +1979,8 @@ get_rel_sync_entry(PGOutputData *data, Relation relation) entry->schema_sent = false; list_free(entry->streamed_txns); entry->streamed_txns = NIL; + bms_free(entry->columns); + entry->columns = NULL; entry->pubactions.pubinsert = false; entry->pubactions.pubupdate = false; entry->pubactions.pubdelete = false; @@ -1865,17 +2005,18 @@ get_rel_sync_entry(PGOutputData *data, Relation relation) /* * Row filter cache cleanups. */ - if (entry->cache_expr_cxt) - MemoryContextDelete(entry->cache_expr_cxt); + if (entry->entry_cxt) + MemoryContextDelete(entry->entry_cxt); - entry->cache_expr_cxt = NULL; + entry->entry_cxt = NULL; entry->estate = NULL; memset(entry->exprstate, 0, sizeof(entry->exprstate)); /* * Build publication cache. We can't use one provided by relcache as - * relcache considers all publications given relation is in, but here - * we only need to consider ones that the subscriber requested. + * relcache considers all publications that the given relation is in, + * but here we only need to consider ones that the subscriber + * requested. */ foreach(lc, data->publications) { @@ -1946,6 +2087,9 @@ get_rel_sync_entry(PGOutputData *data, Relation relation) } /* + * If the relation is to be published, determine actions to + * publish, and list of columns, if appropriate. + * * Don't publish changes for partitioned tables, because * publishing those of its partitions suffices, unless partition * changes won't be published due to pubviaroot being set. @@ -2007,6 +2151,9 @@ get_rel_sync_entry(PGOutputData *data, Relation relation) /* Initialize the row filter */ pgoutput_row_filter_init(data, rel_publications, entry); + + /* Initialize the column list */ + pgoutput_column_list_init(data, rel_publications, entry); } list_free(pubids); |