Avoid unlikely data-loss scenarios due to rename() without fsync.
authorAndres Freund <[email protected]>
Thu, 10 Mar 2016 02:53:53 +0000 (18:53 -0800)
committerAndres Freund <[email protected]>
Thu, 10 Mar 2016 02:53:53 +0000 (18:53 -0800)
Renaming a file using rename(2) is not guaranteed to be durable in face
of crashes. Use the previously added durable_rename()/durable_link_or_rename()
in various places where we previously just renamed files.

Most of the changed call sites are arguably not critical, but it seems
better to err on the side of too much durability.  The most prominent
known case where the previously missing fsyncs could cause data loss is
crashes at the end of a checkpoint. After the actual checkpoint has been
performed, old WAL files are recycled. When they're filled, their
contents are fdatasynced, but we did not fsync the containing
directory. An OS/hardware crash in an unfortunate moment could then end
up leaving that file with its old name, but new content; WAL replay
would thus not replay it.

Reported-By: Tomas Vondra
Author: Michael Paquier, Tomas Vondra, Andres Freund
Discussion: 56583BDD.9060302@2ndquadrant.com
Backpatch: All supported branches

contrib/pg_stat_statements/pg_stat_statements.c
src/backend/access/transam/timeline.c
src/backend/access/transam/xlog.c
src/backend/access/transam/xlogarchive.c
src/backend/postmaster/pgarch.c
src/backend/replication/logical/origin.c
src/backend/utils/misc/guc.c

index cea84ee4b0f29b1ac3a866658d9e481304036449..17ca1c94a62fa34de120918f4e852aaf9523285c 100644 (file)
@@ -741,11 +741,7 @@ pgss_shmem_shutdown(int code, Datum arg)
    /*
     * Rename file into place, so we atomically replace any old one.
     */
-   if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
-       ereport(LOG,
-               (errcode_for_file_access(),
-                errmsg("could not rename pg_stat_statement file \"%s\": %m",
-                       PGSS_DUMP_FILE ".tmp")));
+   (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
 
    /* Unlink query-texts file; it's not needed while shutdown */
    unlink(PGSS_TEXT_FILE);
index c6862a8a6ec06c0b95f996f89cd58d63a56673f7..25ac70e8f4f759027b689c0059410b3e50ec87f4 100644 (file)
@@ -418,24 +418,10 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
    TLHistoryFilePath(path, newTLI);
 
    /*
-    * Prefer link() to rename() here just to be really sure that we don't
-    * overwrite an existing file.  However, there shouldn't be one, so
-    * rename() is an acceptable substitute except for the truly paranoid.
+    * Perform the rename using link if available, paranoidly trying to avoid
+    * overwriting an existing file (there shouldn't be one).
     */
-#if HAVE_WORKING_LINK
-   if (link(tmppath, path) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not link file \"%s\" to \"%s\": %m",
-                       tmppath, path)));
-   unlink(tmppath);
-#else
-   if (rename(tmppath, path) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       tmppath, path)));
-#endif
+   durable_link_or_rename(tmppath, path, ERROR);
 
    /* The history file can be archived immediately. */
    if (XLogArchivingActive())
@@ -508,24 +494,10 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
    TLHistoryFilePath(path, tli);
 
    /*
-    * Prefer link() to rename() here just to be really sure that we don't
-    * overwrite an existing logfile.  However, there shouldn't be one, so
-    * rename() is an acceptable substitute except for the truly paranoid.
+    * Perform the rename using link if available, paranoidly trying to avoid
+    * overwriting an existing file (there shouldn't be one).
     */
-#if HAVE_WORKING_LINK
-   if (link(tmppath, path) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not link file \"%s\" to \"%s\": %m",
-                       tmppath, path)));
-   unlink(tmppath);
-#else
-   if (rename(tmppath, path) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       tmppath, path)));
-#endif
+   durable_link_or_rename(tmppath, path, ERROR);
 }
 
 /*
index e92a0ada9dab7d66e692d06a51ec8f3f13d0718b..735f4073a38d81a4ac06cd2b29ad9839798591ad 100644 (file)
@@ -3253,34 +3253,16 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
    }
 
    /*
-    * Prefer link() to rename() here just to be really sure that we don't
-    * overwrite an existing logfile.  However, there shouldn't be one, so
-    * rename() is an acceptable substitute except for the truly paranoid.
+    * Perform the rename using link if available, paranoidly trying to avoid
+    * overwriting an existing file (there shouldn't be one).
     */
-#if HAVE_WORKING_LINK
-   if (link(tmppath, path) < 0)
+   if (durable_link_or_rename(tmppath, path, LOG) != 0)
    {
        if (use_lock)
            LWLockRelease(ControlFileLock);
-       ereport(LOG,
-               (errcode_for_file_access(),
-                errmsg("could not link file \"%s\" to \"%s\" (initialization of log file): %m",
-                       tmppath, path)));
-       return false;
-   }
-   unlink(tmppath);
-#else
-   if (rename(tmppath, path) < 0)
-   {
-       if (use_lock)
-           LWLockRelease(ControlFileLock);
-       ereport(LOG,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file): %m",
-                       tmppath, path)));
+       /* durable_link_or_rename already emitted log message */
        return false;
    }
-#endif
 
    if (use_lock)
        LWLockRelease(ControlFileLock);
@@ -5296,11 +5278,7 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
     * re-enter archive recovery mode in a subsequent crash.
     */
    unlink(RECOVERY_COMMAND_DONE);
-   if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
-       ereport(FATAL,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
+   durable_rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE, FATAL);
 
    ereport(LOG,
            (errmsg("archive recovery complete")));
@@ -6147,7 +6125,7 @@ StartupXLOG(void)
        if (stat(TABLESPACE_MAP, &st) == 0)
        {
            unlink(TABLESPACE_MAP_OLD);
-           if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0)
+           if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
                ereport(LOG,
                    (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
                            TABLESPACE_MAP, BACKUP_LABEL_FILE),
@@ -6510,11 +6488,7 @@ StartupXLOG(void)
        if (haveBackupLabel)
        {
            unlink(BACKUP_LABEL_OLD);
-           if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) != 0)
-               ereport(FATAL,
-                       (errcode_for_file_access(),
-                        errmsg("could not rename file \"%s\" to \"%s\": %m",
-                               BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
+           durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, FATAL);
        }
 
        /*
@@ -6527,11 +6501,7 @@ StartupXLOG(void)
        if (haveTblspcMap)
        {
            unlink(TABLESPACE_MAP_OLD);
-           if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) != 0)
-               ereport(FATAL,
-                       (errcode_for_file_access(),
-                        errmsg("could not rename file \"%s\" to \"%s\": %m",
-                               TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+           durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, FATAL);
        }
 
        /* Check that the GUCs used to generate the WAL allow recovery */
@@ -7308,11 +7278,7 @@ StartupXLOG(void)
                 */
                XLogArchiveCleanup(partialfname);
 
-               if (rename(origpath, partialpath) != 0)
-                   ereport(ERROR,
-                           (errcode_for_file_access(),
-                        errmsg("could not rename file \"%s\" to \"%s\": %m",
-                               origpath, partialpath)));
+               durable_rename(origpath, partialpath, ERROR);
                XLogArchiveNotify(partialfname);
            }
        }
@@ -10874,7 +10840,7 @@ CancelBackup(void)
    /* remove leftover file from previously canceled backup if it exists */
    unlink(BACKUP_LABEL_OLD);
 
-   if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) != 0)
+   if (durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, DEBUG1) != 0)
    {
        ereport(WARNING,
                (errcode_for_file_access(),
@@ -10897,7 +10863,7 @@ CancelBackup(void)
    /* remove leftover file from previously canceled backup if it exists */
    unlink(TABLESPACE_MAP_OLD);
 
-   if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0)
+   if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
    {
        ereport(LOG,
                (errmsg("online backup mode canceled"),
index 7af56a9a740b2c0f81920f4f6a3d38edeb282a49..4538f178acc0d1d4dc4a3fc9cc2f9b93a9af1b5f 100644 (file)
@@ -470,11 +470,7 @@ KeepFileRestoredFromArchive(char *path, char *xlogfname)
        reload = true;
    }
 
-   if (rename(path, xlogfpath) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       path, xlogfpath)));
+   durable_rename(path, xlogfpath, ERROR);
 
    /*
     * Create .done file forcibly to prevent the restored segment from being
@@ -580,12 +576,7 @@ XLogArchiveForceDone(const char *xlog)
    StatusFilePath(archiveReady, xlog, ".ready");
    if (stat(archiveReady, &stat_buf) == 0)
    {
-       if (rename(archiveReady, archiveDone) < 0)
-           ereport(WARNING,
-                   (errcode_for_file_access(),
-                    errmsg("could not rename file \"%s\" to \"%s\": %m",
-                           archiveReady, archiveDone)));
-
+       (void) durable_rename(archiveReady, archiveDone, WARNING);
        return;
    }
 
index 4df669e14af88acd3daf50893f39ea0fda324c3a..7a772bfedc03678039e8e1783a4e1432b3e89fb7 100644 (file)
@@ -728,9 +728,5 @@ pgarch_archiveDone(char *xlog)
 
    StatusFilePath(rlogready, xlog, ".ready");
    StatusFilePath(rlogdone, xlog, ".done");
-   if (rename(rlogready, rlogdone) < 0)
-       ereport(WARNING,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       rlogready, rlogdone)));
+   (void) durable_rename(rlogready, rlogdone, WARNING);
 }
index a4bfd15585b23deea0946866d9ada3fb91f80b22..6f96ce7255e3b2f9afc75080b4b67741a8a93b0e 100644 (file)
@@ -604,29 +604,10 @@ CheckPointReplicationOrigin(void)
                        tmppath)));
    }
 
-   /* fsync the temporary file */
-   if (pg_fsync(tmpfd) != 0)
-   {
-       CloseTransientFile(tmpfd);
-       ereport(PANIC,
-               (errcode_for_file_access(),
-                errmsg("could not fsync file \"%s\": %m",
-                       tmppath)));
-   }
-
    CloseTransientFile(tmpfd);
 
-   /* rename to permanent file, fsync file and directory */
-   if (rename(tmppath, path) != 0)
-   {
-       ereport(PANIC,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       tmppath, path)));
-   }
-
-   fsync_fname(path, false);
-   fsync_fname("pg_logical", true);
+   /* fsync, rename to permanent file, fsync file and directory */
+   durable_rename(tmppath, path, PANIC);
 }
 
 /*
index 27e07d21e2ead353372e577ed57220bdf508dde2..c37b577dc298880c183fce1e44c28ca389a06d6b 100644 (file)
@@ -6962,11 +6962,7 @@ AlterSystemSetConfigFile(AlterSystemStmt *altersysstmt)
         * at worst it can lose the parameters set by last ALTER SYSTEM
         * command.
         */
-       if (rename(AutoConfTmpFileName, AutoConfFileName) < 0)
-           ereport(ERROR,
-                   (errcode_for_file_access(),
-                    errmsg("could not rename file \"%s\" to \"%s\": %m",
-                           AutoConfTmpFileName, AutoConfFileName)));
+       durable_rename(AutoConfTmpFileName, AutoConfFileName, ERROR);
    }
    PG_CATCH();
    {