From db2fd4f3f3a60f4e36a5d93d7d0867a3705e21e2 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 27 Sep 2023 16:17:42 +0900
Subject: [PATCH v3] Strengthen use of ArchiveRecoveryRequested and
 InArchiveRecovery

---
 src/backend/access/transam/xlogrecovery.c     | 119 +++++++++++-------
 src/bin/pg_basebackup/t/010_pg_basebackup.pl  |   3 +-
 src/bin/pg_rewind/t/008_min_recovery_point.pl |   1 +
 3 files changed, 78 insertions(+), 45 deletions(-)

diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index becc2bda62..7b5d1d6baa 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -125,15 +125,19 @@ static TimeLineID curFileTLI;
 
 /*
  * When ArchiveRecoveryRequested is set, archive recovery was requested,
- * ie. signal files were present.  When InArchiveRecovery is set, we are
- * currently recovering using offline XLOG archives.  These variables are only
- * valid in the startup process.
+ * ie. signal files or backup_label were present.  When InArchiveRecovery is
+ * set, we are currently recovering using offline XLOG archives.  These
+ + variables are only valid in the startup process.  Note that the presence of
+ * a backup_label file forces archive recovery even if there is no signal
+ * file.
  *
  * When ArchiveRecoveryRequested is true, but InArchiveRecovery is false, we're
  * currently performing crash recovery using only XLOG files in pg_wal, but
  * will switch to using offline XLOG archives as soon as we reach the end of
  * WAL in pg_wal.
-*/
+ *
+ * InArchiveRecovery should never be set without ArchiveRecoveryRequested.
+ */
 bool		ArchiveRecoveryRequested = false;
 bool		InArchiveRecovery = false;
 
@@ -540,42 +544,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 	readRecoverySignalFile();
 	validateRecoveryParameters();
 
-	if (ArchiveRecoveryRequested)
-	{
-		if (StandbyModeRequested)
-			ereport(LOG,
-					(errmsg("entering standby mode")));
-		else if (recoveryTarget == RECOVERY_TARGET_XID)
-			ereport(LOG,
-					(errmsg("starting point-in-time recovery to XID %u",
-							recoveryTargetXid)));
-		else if (recoveryTarget == RECOVERY_TARGET_TIME)
-			ereport(LOG,
-					(errmsg("starting point-in-time recovery to %s",
-							timestamptz_to_str(recoveryTargetTime))));
-		else if (recoveryTarget == RECOVERY_TARGET_NAME)
-			ereport(LOG,
-					(errmsg("starting point-in-time recovery to \"%s\"",
-							recoveryTargetName)));
-		else if (recoveryTarget == RECOVERY_TARGET_LSN)
-			ereport(LOG,
-					(errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%X\"",
-							LSN_FORMAT_ARGS(recoveryTargetLSN))));
-		else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
-			ereport(LOG,
-					(errmsg("starting point-in-time recovery to earliest consistent point")));
-		else
-			ereport(LOG,
-					(errmsg("starting archive recovery")));
-	}
-
-	/*
-	 * Take ownership of the wakeup latch if we're going to sleep during
-	 * recovery.
-	 */
-	if (ArchiveRecoveryRequested)
-		OwnLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
-
+	/* Set the WAL reading processor, as backup_label may need it */
 	private = palloc0(sizeof(XLogPageReadPrivate));
 	xlogreader =
 		XLogReaderAllocate(wal_segment_size, NULL,
@@ -609,11 +578,29 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 	replay_image_masked = (char *) palloc(BLCKSZ);
 	primary_image_masked = (char *) palloc(BLCKSZ);
 
+	/*
+	 * Read the backup_label file.  We want to run this part of the recovery
+	 * process after checking for signal files and perform validation of the
+	 * recovery parameters, as it may be possible that a backup needs to be
+	 * run, but no signal files have been set.
+	 */
 	if (read_backup_label(&CheckPointLoc, &CheckPointTLI, &backupEndRequired,
 						  &backupFromStandby))
 	{
 		List	   *tablespaces = NIL;
 
+		if (!ArchiveRecoveryRequested)
+			ereport(FATAL,
+					(errmsg("could not find recovery.signal or standby.signal when recovering with backup_label"),
+					 errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.",
+							 DataDir, DataDir)));
+
+		/*
+		 * Take ownership of the wakeup latch if we're going to sleep during
+		 * recovery if needed.
+		 */
+		OwnLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
+
 		/*
 		 * Archive recovery was requested, and thanks to the backup label
 		 * file, we know how far we need to replay to reach consistency. Enter
@@ -651,20 +638,20 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 								checkPoint.ThisTimeLineID))
 					ereport(FATAL,
 							(errmsg("could not find redo location referenced by checkpoint record"),
-							 errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" and add required recovery options.\n"
+							 errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
 									 "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
 									 "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
-									 DataDir, DataDir, DataDir)));
+									 DataDir, DataDir, DataDir, DataDir)));
 			}
 		}
 		else
 		{
 			ereport(FATAL,
 					(errmsg("could not locate required checkpoint record"),
-					 errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" and add required recovery options.\n"
+					 errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
 							 "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
 							 "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
-							 DataDir, DataDir, DataDir)));
+							 DataDir, DataDir, DataDir, DataDir)));
 			wasShutdown = false;	/* keep compiler quiet */
 		}
 
@@ -706,6 +693,15 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 	}
 	else
 	{
+		/* No backup_label file has been found if we are here. */
+
+		/*
+		 * Take ownership of the wakeup latch if we're going to sleep during
+		 * recovery if needed.
+		 */
+		if (ArchiveRecoveryRequested)
+			OwnLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
+
 		/*
 		 * If tablespace_map file is present without backup_label file, there
 		 * is no use of such file.  There is no harm in retaining it, but it
@@ -789,6 +785,35 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 		wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
 	}
 
+	if (ArchiveRecoveryRequested)
+	{
+		if (StandbyModeRequested)
+			ereport(LOG,
+					(errmsg("entering standby mode")));
+		else if (recoveryTarget == RECOVERY_TARGET_XID)
+			ereport(LOG,
+					(errmsg("starting point-in-time recovery to XID %u",
+							recoveryTargetXid)));
+		else if (recoveryTarget == RECOVERY_TARGET_TIME)
+			ereport(LOG,
+					(errmsg("starting point-in-time recovery to %s",
+							timestamptz_to_str(recoveryTargetTime))));
+		else if (recoveryTarget == RECOVERY_TARGET_NAME)
+			ereport(LOG,
+					(errmsg("starting point-in-time recovery to \"%s\"",
+							recoveryTargetName)));
+		else if (recoveryTarget == RECOVERY_TARGET_LSN)
+			ereport(LOG,
+					(errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%X\"",
+							LSN_FORMAT_ARGS(recoveryTargetLSN))));
+		else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
+			ereport(LOG,
+					(errmsg("starting point-in-time recovery to earliest consistent point")));
+		else
+			ereport(LOG,
+					(errmsg("starting archive recovery")));
+	}
+
 	/*
 	 * If the location of the checkpoint record is not on the expected
 	 * timeline in the history of the requested timeline, we cannot proceed:
@@ -1574,6 +1599,12 @@ ShutdownWalRecovery(void)
 	 */
 	if (ArchiveRecoveryRequested)
 		DisownLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
+
+	/*
+	 * InArchiveRecovery should never have been set without
+	 * ArchiveRecoveryRequested.
+	 */
+	Assert(ArchiveRecoveryRequested || !InArchiveRecovery);
 }
 
 /*
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index b9f5e1266b..b9e54f4562 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -392,7 +392,8 @@ SKIP:
 	my $node2 = PostgreSQL::Test::Cluster->new('replica');
 
 	# Recover main data directory
-	$node2->init_from_backup($node, 'tarbackup2', tar_program => $tar);
+	$node2->init_from_backup($node, 'tarbackup2', tar_program => $tar,
+	    has_restoring => 1);
 
 	# Recover tablespace into a new directory (not where it was!)
 	my $repTsDir = "$tempdir/tblspc1replica";
diff --git a/src/bin/pg_rewind/t/008_min_recovery_point.pl b/src/bin/pg_rewind/t/008_min_recovery_point.pl
index d4c89451e6..1cff5b7019 100644
--- a/src/bin/pg_rewind/t/008_min_recovery_point.pl
+++ b/src/bin/pg_rewind/t/008_min_recovery_point.pl
@@ -152,6 +152,7 @@ move(
 	"$tmp_folder/node_2-postgresql.conf.tmp",
 	"$node_2_pgdata/postgresql.conf");
 
+$node_2->append_conf('standby.signal', '');
 $node_2->start;
 
 # Check contents of the test tables after rewind. The rows inserted in node 3
-- 
2.40.1

