From 7be51eb4e169672d2029d955cb776e2252e6b7d3 Mon Sep 17 00:00:00 2001
From: Daniel Gustafsson <dgustafsson@postgresql.org>
Date: Wed, 30 Apr 2025 20:36:23 +0200
Subject: [PATCH] Fix assertion failure in snapshot building

Clear any potential stale next_phase_at value from the snapshot
builder which otherwise may trip an assertion check ensuring
that there is no next_phase_at value.

This can be reproduced by running 80 concurrent sessions like
the below where $c is a loop counter (assumes there has been
1..$c databases created) :

  echo "
    CREATE TABLE replication_example(id SERIAL PRIMARY KEY,
                                     somedata int,
                                     text varchar(120));
    SELECT 'init' FROM
      pg_create_logical_replication_slot('regression_slot_$c',
                                         'test_decoding');
    SELECT data FROM
      pg_logical_slot_get_changes('regression_slot_$c', NULL,
                                  NULL, 'include-xids', '0',
                                  'skip-empty-xacts', '1');
  " | psql -d regress_$c >>psql.log &

This was originally committed as 48efb23 and backpatched down to
v16, but since then there have been reports of this happening on
v14 and v15 as well so this is a backpatch of 48efb23 down to 14.

Bug: #17695
Author: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Alexander Lakhin <exclusion@gmail.com>
Reported-by: bowenshi <zxwsbg@qq.com>
Reported-by: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
Reported-by: Teja Mupparti
Discussion: https://postgr.es/m/17695-6be9277c9295985f@postgresql.org
Backpatch-through: v14
---
 src/backend/replication/logical/snapbuild.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 8abd669c51e..6025f2727ae 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -2021,8 +2021,12 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	if (TransactionIdPrecedes(ondisk.builder.xmin, builder->initial_xmin_horizon))
 		goto snapshot_not_interesting;
 
-	/* consistent snapshots have no next phase */
+	/*
+	 * Consistent snapshots have no next phase. Reset next_phase_at as it is
+	 * possible that an old value may remain.
+	 */
 	Assert(ondisk.builder.next_phase_at == InvalidTransactionId);
+	builder->next_phase_at = InvalidTransactionId;
 
 	/* ok, we think the snapshot is sensible, copy over everything important */
 	builder->xmin = ondisk.builder.xmin;
-- 
2.39.5