From 7be51eb4e169672d2029d955cb776e2252e6b7d3 Mon Sep 17 00:00:00 2001
From: Daniel Gustafsson <dgustafsson@postgresql.org>
Date: Wed, 30 Apr 2025 20:36:23 +0200
Subject: [PATCH] Fix assertion failure in snapshot building
Clear any potential stale next_phase_at value from the snapshot
builder which otherwise may trip an assertion check ensuring
that there is no next_phase_at value.
This can be reproduced by running 80 concurrent sessions like
the below where $c is a loop counter (assumes there has been
1..$c databases created) :
echo "
CREATE TABLE replication_example(id SERIAL PRIMARY KEY,
somedata int,
text varchar(120));
SELECT 'init' FROM
pg_create_logical_replication_slot('regression_slot_$c',
'test_decoding');
SELECT data FROM
pg_logical_slot_get_changes('regression_slot_$c', NULL,
NULL, 'include-xids', '0',
'skip-empty-xacts', '1');
" | psql -d regress_$c >>psql.log &
This was originally committed as 48efb23 and backpatched down to
v16, but since then there have been reports of this happening on
v14 and v15 as well so this is a backpatch of 48efb23 down to 14.
Bug: #17695
Author: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Alexander Lakhin <exclusion@gmail.com>
Reported-by: bowenshi <zxwsbg@qq.com>
Reported-by: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
Reported-by: Teja Mupparti
Discussion: https://postgr.es/m/17695-6be9277c9295985f@postgresql.org
Backpatch-through: v14
---
src/backend/replication/logical/snapbuild.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 8abd669c51e..6025f2727ae 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -2021,8 +2021,12 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
if (TransactionIdPrecedes(ondisk.builder.xmin, builder->initial_xmin_horizon))
goto snapshot_not_interesting;
- /* consistent snapshots have no next phase */
+ /*
+ * Consistent snapshots have no next phase. Reset next_phase_at as it is
+ * possible that an old value may remain.
+ */
Assert(ondisk.builder.next_phase_at == InvalidTransactionId);
+ builder->next_phase_at = InvalidTransactionId;
/* ok, we think the snapshot is sensible, copy over everything important */
builder->xmin = ondisk.builder.xmin;
--
2.39.5