From 8f7975a96f9864b3d62da303f02b7edc6bac1de6 Mon Sep 17 00:00:00 2001
From: Christoph Berg <myon@debian.org>
Date: Fri, 30 May 2025 17:58:35 +0200
Subject: [PATCH v5 2/2] Add mode and flush_unlogged options to CHECKPOINT

Field reports indicate that some users are running CHECKPOINT just
before shutting down to reduce the amount of data that the shutdown
checkpoint has to write out, making restarts faster.

That works well unless big unlogged tables are in play; a regular
CHECKPOINT does not flush these. Hence, add a CHECKPOINT option to force
flushing of all relations. To control the write load during these
checkpoints, add an MODE option to choose between FAST and SPREAD.
---
 doc/src/sgml/ref/checkpoint.sgml    | 82 ++++++++++++++++++++++++-----
 src/backend/parser/gram.y           |  8 +++
 src/backend/tcop/utility.c          | 60 +++++++++++++++++----
 src/bin/psql/tab-complete.in.c      |  7 +++
 src/include/nodes/parsenodes.h      |  1 +
 src/test/regress/expected/stats.out |  6 +--
 src/test/regress/sql/stats.sql      |  6 +--
 7 files changed, 141 insertions(+), 29 deletions(-)

diff --git a/doc/src/sgml/ref/checkpoint.sgml b/doc/src/sgml/ref/checkpoint.sgml
index 10a433e4757..2e95141db91 100644
--- a/doc/src/sgml/ref/checkpoint.sgml
+++ b/doc/src/sgml/ref/checkpoint.sgml
@@ -21,7 +21,12 @@ PostgreSQL documentation
 
  <refsynopsisdiv>
 <synopsis>
-CHECKPOINT
+CHECKPOINT [ ( option [, ...] ) ]
+
+<phrase>where <replaceable class="parameter">option</replaceable> can be one of:</phrase>
+
+    MODE { FAST | SPREAD }
+    FLUSH_UNLOGGED [ <replaceable class="parameter">boolean</replaceable> ]
 </synopsis>
  </refsynopsisdiv>
 
@@ -31,24 +36,32 @@ CHECKPOINT
   <para>
    A checkpoint is a point in the write-ahead log sequence at which
    all data files have been updated to reflect the information in the
-   log.  All data files will be flushed to disk.  Refer to
-   <xref linkend="wal-configuration"/> for more details about what happens
-   during a checkpoint.
+   log.  Should the system crash, recovery will start at the last checkpoint.
+   Refer to <xref linkend="wal-configuration"/> for more details about what
+   happens during a checkpoint.
   </para>
 
   <para>
-   The <command>CHECKPOINT</command> command forces a fast
-   checkpoint when the command is issued, without waiting for a
-   regular checkpoint scheduled by the system (controlled by the settings in
-   <xref linkend="runtime-config-wal-checkpoints"/>).
-   <command>CHECKPOINT</command> is not intended for use during normal
-   operation.
+   Running <command>CHECKPOINT</command> is not required during normal
+   operation; the system schedules checkpoints automatically (controlled by
+   the settings in <xref linkend="runtime-config-wal-checkpoints"/>).
+   However, it can be useful to perform an explicit checkpoint immediately
+   before shutting down the server or performing an online file system backup,
+   if you want the checkpoint implicit in these operations to have to write out
+   less data.  In particular, <literal>UNLOGGED</literal> table data is
+   normally only flushed to disk during a shutdown checkpoint, so you might use
+   the option <literal>FLUSH_UNLOGGED</literal> for an explicit checkpoint right
+   before a shutdown.
   </para>
 
   <para>
    If executed during recovery, the <command>CHECKPOINT</command> command
-   will force a restartpoint (see <xref linkend="wal-configuration"/>)
-   rather than writing a new checkpoint.
+   will force a restartpoint rather than writing a new checkpoint.  (The
+   operation will be a no-op if there is no corresponding checkpoint in the
+   write-ahead log.)  If a checkpoint is already being written when a
+   <command>CHECKPOINT</command> is issued, the running checkpoint is upgraded
+   with this command's <literal>MODE</literal> and
+   <literal>FLUSH_UNLOGGED</literal> options.
   </para>
 
   <para>
@@ -58,6 +71,51 @@ CHECKPOINT
   </para>
  </refsect1>
 
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><literal>MODE</literal></term>
+    <listitem>
+     <para>
+      The default <literal>FAST</literal> mode causes the checkpoint to be
+      performed as fast as possible.  A <literal>SPREAD</literal> checkpoint
+      will instead spread out the write load as determined by the
+      <xref linkend="guc-checkpoint-completion-target"/> setting, like the
+      system-scheduled checkpoints.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>FLUSH_UNLOGGED</literal></term>
+    <listitem>
+     <para>
+      Normally, data files marked as <literal>UNLOGGED</literal> are not
+      flushed to disk during a checkpoint.  Enabling this option will also
+      flush <literal>UNLOGGED</literal> relations.  This option is disabled
+      by default.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">boolean</replaceable></term>
+    <listitem>
+     <para>
+      Specifies whether the selected option should be turned on or off.
+      You can write <literal>TRUE</literal>, <literal>ON</literal>, or
+      <literal>1</literal> to enable the option, and <literal>FALSE</literal>,
+      <literal>OFF</literal>, or <literal>0</literal> to disable it.  The
+      <replaceable class="parameter">boolean</replaceable> value can also
+      be omitted, in which case <literal>TRUE</literal> is assumed.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
  <refsect1>
   <title>Compatibility</title>
 
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 50f53159d58..78ee7aefd92 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -2033,6 +2033,14 @@ CheckPointStmt:
 					CheckPointStmt *n = makeNode(CheckPointStmt);
 
 					$$ = (Node *) n;
+					n->options = NULL;
+				}
+			| CHECKPOINT '(' utility_option_list ')'
+				{
+					CheckPointStmt *n = makeNode(CheckPointStmt);
+
+					$$ = (Node *) n;
+					n->options = $3;
 				}
 		;
 
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index cda86ad44b0..73184d6bf88 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -943,17 +943,55 @@ standard_ProcessUtility(PlannedStmt *pstmt,
 			break;
 
 		case T_CheckPointStmt:
-			if (!has_privs_of_role(GetUserId(), ROLE_PG_CHECKPOINT))
-				ereport(ERROR,
-						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
-				/* translator: %s is name of a SQL command, eg CHECKPOINT */
-						 errmsg("permission denied to execute %s command",
-								"CHECKPOINT"),
-						 errdetail("Only roles with privileges of the \"%s\" role may execute this command.",
-								   "pg_checkpoint")));
-
-			RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_WAIT |
-							  (RecoveryInProgress() ? 0 : CHECKPOINT_FORCE));
+			{
+				CheckPointStmt   *stmt = (CheckPointStmt *) parsetree;
+				ListCell   *lc;
+				bool		fast = true;
+				bool		flush_unlogged = false;
+
+				if (!has_privs_of_role(GetUserId(), ROLE_PG_CHECKPOINT))
+					ereport(ERROR,
+							(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					/* translator: %s is name of a SQL command, eg CHECKPOINT */
+							 errmsg("permission denied to execute %s command",
+									"CHECKPOINT"),
+							 errdetail("Only roles with privileges of the \"%s\" role may execute this command.",
+									   "pg_checkpoint")));
+
+				/* Parse options list */
+				foreach(lc, stmt->options)
+				{
+					DefElem    *opt = (DefElem *) lfirst(lc);
+
+					if (strcmp(opt->defname, "mode") == 0)
+					{
+						char   *mode = defGetString(opt);
+						if (strcmp(mode, "fast") == 0)
+							fast = true;
+						else if (strcmp(mode, "spread") == 0)
+							fast = false;
+						else
+							ereport(ERROR,
+									(errcode(ERRCODE_SYNTAX_ERROR),
+									 errmsg("CHECKPOINT option \"%s\" argument \"%s\" is invalid", opt->defname, mode),
+									 errhint("valid arguments are \"FAST\" and \"SPREAD\""),
+									 parser_errposition(pstate, opt->location)));
+					}
+					else if (strcmp(opt->defname, "flush_unlogged") == 0)
+						flush_unlogged = defGetBoolean(opt);
+					else
+						ereport(ERROR,
+								(errcode(ERRCODE_SYNTAX_ERROR),
+								 errmsg("unrecognized CHECKPOINT option \"%s\"", opt->defname),
+								 errhint("valid options are \"MODE\" and \"FLUSH_UNLOGGED\""),
+								 parser_errposition(pstate, opt->location)));
+				}
+
+				RequestCheckpoint(CHECKPOINT_WAIT |
+								  (fast ? CHECKPOINT_FAST : 0) |
+								  (flush_unlogged ? CHECKPOINT_FLUSH_UNLOGGED : 0) |
+								  (RecoveryInProgress() ? 0 : CHECKPOINT_FORCE));
+			}
 			break;
 
 			/*
diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c
index 2c0b4f28c14..0e61e8b66b8 100644
--- a/src/bin/psql/tab-complete.in.c
+++ b/src/bin/psql/tab-complete.in.c
@@ -3125,6 +3125,13 @@ match_previous_words(int pattern_id,
 		COMPLETE_WITH_VERSIONED_SCHEMA_QUERY(Query_for_list_of_procedures);
 	else if (Matches("CALL", MatchAny))
 		COMPLETE_WITH("(");
+/* CHECKPOINT */
+	else if (Matches("CHECKPOINT"))
+		COMPLETE_WITH("(");
+	else if (Matches("CHECKPOINT", "("))
+		COMPLETE_WITH("MODE", "FLUSH_UNLOGGED");
+	else if (Matches("CHECKPOINT", "(", "MODE"))
+		COMPLETE_WITH("FAST", "SPREAD");
 /* CLOSE */
 	else if (Matches("CLOSE"))
 		COMPLETE_WITH_QUERY_PLUS(Query_for_list_of_cursors,
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index ba12678d1cb..a5497e4a602 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -4015,6 +4015,7 @@ typedef struct RefreshMatViewStmt
 typedef struct CheckPointStmt
 {
 	NodeTag		type;
+	List	   *options;
 } CheckPointStmt;
 
 /* ----------------------
diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index 776f1ad0e53..e1203b78ff5 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -925,9 +925,9 @@ CREATE TEMP TABLE test_stats_temp AS SELECT 17;
 DROP TABLE test_stats_temp;
 -- Checkpoint twice: The checkpointer reports stats after reporting completion
 -- of the checkpoint. But after a second checkpoint we'll see at least the
--- results of the first.
-CHECKPOINT;
-CHECKPOINT;
+-- results of the first. And while at it, test checkpoint options.
+CHECKPOINT (mode fast);
+CHECKPOINT (mode spread, flush_unlogged);
 SELECT num_requested > :rqst_ckpts_before FROM pg_stat_checkpointer;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index 232ab8db8fa..79c0e51c19c 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -438,9 +438,9 @@ DROP TABLE test_stats_temp;
 
 -- Checkpoint twice: The checkpointer reports stats after reporting completion
 -- of the checkpoint. But after a second checkpoint we'll see at least the
--- results of the first.
-CHECKPOINT;
-CHECKPOINT;
+-- results of the first. And while at it, test checkpoint options.
+CHECKPOINT (mode fast);
+CHECKPOINT (mode spread, flush_unlogged);
 
 SELECT num_requested > :rqst_ckpts_before FROM pg_stat_checkpointer;
 SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal;
-- 
2.47.2

