diff options
author | Andres Freund | 2017-09-20 05:03:48 +0000 |
---|---|---|
committer | Andres Freund | 2017-09-20 05:03:48 +0000 |
commit | fc49e24fa69a15efacd5b8958115ed9c43c48f9a (patch) | |
tree | a1399d0d533c1cfa864e545a17000e7b6df6f43d /src/bin | |
parent | 5ada1fcd0c30be1b0b793a802cf6da386a6c1925 (diff) |
Make WAL segment size configurable at initdb time.
For performance reasons a larger segment size than the default 16MB
can be useful. A larger segment size has two main benefits: Firstly,
in setups using archiving, it makes it easier to write scripts that
can keep up with higher amounts of WAL, secondly, the WAL has to be
written and synced to disk less frequently.
But at the same time large segment size are disadvantageous for
smaller databases. So far the segment size had to be configured at
compile time, often making it unrealistic to choose one fitting to a
particularly load. Therefore change it to a initdb time setting.
This includes a breaking changes to the xlogreader.h API, which now
requires the current segment size to be configured. For that and
similar reasons a number of binaries had to be taught how to recognize
the current segment size.
Author: Beena Emerson, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael
Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja
Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com
Diffstat (limited to 'src/bin')
-rw-r--r-- | src/bin/initdb/initdb.c | 58 | ||||
-rw-r--r-- | src/bin/pg_basebackup/pg_basebackup.c | 7 | ||||
-rw-r--r-- | src/bin/pg_basebackup/pg_receivewal.c | 16 | ||||
-rw-r--r-- | src/bin/pg_basebackup/receivelog.c | 36 | ||||
-rw-r--r-- | src/bin/pg_basebackup/streamutil.c | 76 | ||||
-rw-r--r-- | src/bin/pg_basebackup/streamutil.h | 2 | ||||
-rw-r--r-- | src/bin/pg_controldata/pg_controldata.c | 15 | ||||
-rw-r--r-- | src/bin/pg_resetwal/pg_resetwal.c | 55 | ||||
-rw-r--r-- | src/bin/pg_rewind/parsexlog.c | 30 | ||||
-rw-r--r-- | src/bin/pg_rewind/pg_rewind.c | 12 | ||||
-rw-r--r-- | src/bin/pg_rewind/pg_rewind.h | 1 | ||||
-rw-r--r-- | src/bin/pg_test_fsync/pg_test_fsync.c | 7 | ||||
-rw-r--r-- | src/bin/pg_upgrade/test.sh | 4 | ||||
-rw-r--r-- | src/bin/pg_waldump/pg_waldump.c | 246 |
14 files changed, 431 insertions, 134 deletions
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 9d1e5d789f6..1d4a138618b 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -59,6 +59,7 @@ #include "sys/mman.h" #endif +#include "access/xlog_internal.h" #include "catalog/catalog.h" #include "catalog/pg_authid.h" #include "catalog/pg_class.h" @@ -141,6 +142,8 @@ static bool sync_only = false; static bool show_setting = false; static bool data_checksums = false; static char *xlog_dir = NULL; +static char *str_wal_segment_size_mb = NULL; +static int wal_segment_size_mb; /* internal vars */ @@ -1000,6 +1003,23 @@ test_config_settings(void) } /* + * Calculate the default wal_size with a "pretty" unit. + */ +static char * +pretty_wal_size(int segment_count) +{ + int sz = wal_segment_size_mb * segment_count; + char *result = pg_malloc(10); + + if ((sz % 1024) == 0) + snprintf(result, 10, "%dGB", sz / 1024); + else + snprintf(result, 10, "%dMB", sz); + + return result; +} + +/* * set up all the config files */ static void @@ -1043,6 +1063,15 @@ setup_config(void) conflines = replace_token(conflines, "#port = 5432", repltok); #endif + /* set default max_wal_size and min_wal_size */ + snprintf(repltok, sizeof(repltok), "min_wal_size = %s", + pretty_wal_size(DEFAULT_MIN_WAL_SEGS)); + conflines = replace_token(conflines, "#min_wal_size = 80MB", repltok); + + snprintf(repltok, sizeof(repltok), "max_wal_size = %s", + pretty_wal_size(DEFAULT_MAX_WAL_SEGS)); + conflines = replace_token(conflines, "#max_wal_size = 1GB", repltok); + snprintf(repltok, sizeof(repltok), "lc_messages = '%s'", escape_quotes(lc_messages)); conflines = replace_token(conflines, "#lc_messages = 'C'", repltok); @@ -1352,8 +1381,9 @@ bootstrap_template1(void) unsetenv("PGCLIENTENCODING"); snprintf(cmd, sizeof(cmd), - "\"%s\" --boot -x1 %s %s %s", + "\"%s\" --boot -x1 -X %u %s %s %s", backend_exec, + wal_segment_size_mb * (1024 * 1024), data_checksums ? "-k" : "", boot_options, debug ? "-d 5" : ""); @@ -2293,6 +2323,7 @@ usage(const char *progname) printf(_(" -U, --username=NAME database superuser name\n")); printf(_(" -W, --pwprompt prompt for a password for the new superuser\n")); printf(_(" -X, --waldir=WALDIR location for the write-ahead log directory\n")); + printf(_(" --wal-segsize=SIZE size of wal segment size\n")); printf(_("\nLess commonly used options:\n")); printf(_(" -d, --debug generate lots of debugging output\n")); printf(_(" -k, --data-checksums use data page checksums\n")); @@ -2983,6 +3014,7 @@ main(int argc, char *argv[]) {"no-sync", no_argument, NULL, 'N'}, {"sync-only", no_argument, NULL, 'S'}, {"waldir", required_argument, NULL, 'X'}, + {"wal-segsize", required_argument, NULL, 12}, {"data-checksums", no_argument, NULL, 'k'}, {NULL, 0, NULL, 0} }; @@ -3116,6 +3148,9 @@ main(int argc, char *argv[]) case 'X': xlog_dir = pg_strdup(optarg); break; + case 12: + str_wal_segment_size_mb = pg_strdup(optarg); + break; default: /* getopt_long already emitted a complaint */ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), @@ -3178,6 +3213,27 @@ main(int argc, char *argv[]) check_need_password(authmethodlocal, authmethodhost); + /* set wal segment size */ + if (str_wal_segment_size_mb == NULL) + wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024); + else + { + char *endptr; + + /* check that the argument is a number */ + wal_segment_size_mb = strtol(str_wal_segment_size_mb, &endptr, 10); + + /* verify that wal segment size is valid */ + if (*endptr != '\0' || + !IsValidWalSegSize(wal_segment_size_mb * 1024 * 1024)) + { + fprintf(stderr, + _("%s: --wal-segsize must be a power of two between 1 and 1024\n"), + progname); + exit(1); + } + } + get_restricted_token(progname); setup_pgdata(); diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 51509d150e5..2d039d5a33a 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -26,6 +26,7 @@ #include <zlib.h> #endif +#include "access/xlog_internal.h" #include "common/file_utils.h" #include "common/string.h" #include "fe_utils/string_utils.h" @@ -555,7 +556,7 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier) } param->startptr = ((uint64) hi) << 32 | lo; /* Round off to even segment position */ - param->startptr -= param->startptr % XLOG_SEG_SIZE; + param->startptr -= XLogSegmentOffset(param->startptr, WalSegSz); #ifndef WIN32 /* Create our background pipe */ @@ -2397,6 +2398,10 @@ main(int argc, char **argv) exit(1); } + /* determine remote server's xlog segment size */ + if (!RetrieveWalSegSize(conn)) + disconnect_and_exit(1); + /* Create pg_wal symlink, if required */ if (xlog_dir) { diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c index 710a33ab4d2..fbac0df93d8 100644 --- a/src/bin/pg_basebackup/pg_receivewal.c +++ b/src/bin/pg_basebackup/pg_receivewal.c @@ -191,7 +191,7 @@ close_destination_dir(DIR *dest_dir, char *dest_folder) /* * Determine starting location for streaming, based on any existing xlog * segments in the directory. We start at the end of the last one that is - * complete (size matches XLogSegSize), on the timeline with highest ID. + * complete (size matches wal segment size), on the timeline with highest ID. * * If there are no WAL files in the directory, returns InvalidXLogRecPtr. */ @@ -242,7 +242,7 @@ FindStreamingStart(uint32 *tli) /* * Looks like an xlog file. Parse its position. */ - XLogFromFileName(dirent->d_name, &tli, &segno); + XLogFromFileName(dirent->d_name, &tli, &segno, WalSegSz); /* * Check that the segment has the right size, if it's supposed to be @@ -267,7 +267,7 @@ FindStreamingStart(uint32 *tli) disconnect_and_exit(1); } - if (statbuf.st_size != XLOG_SEG_SIZE) + if (statbuf.st_size != WalSegSz) { fprintf(stderr, _("%s: segment file \"%s\" has incorrect size %d, skipping\n"), @@ -308,7 +308,7 @@ FindStreamingStart(uint32 *tli) bytes_out = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; - if (bytes_out != XLOG_SEG_SIZE) + if (bytes_out != WalSegSz) { fprintf(stderr, _("%s: compressed segment file \"%s\" has incorrect uncompressed size %d, skipping\n"), @@ -349,7 +349,7 @@ FindStreamingStart(uint32 *tli) if (!high_ispartial) high_segno++; - XLogSegNoOffsetToRecPtr(high_segno, 0, high_ptr); + XLogSegNoOffsetToRecPtr(high_segno, 0, high_ptr, WalSegSz); *tli = high_tli; return high_ptr; @@ -410,7 +410,7 @@ StreamLog(void) /* * Always start streaming at the beginning of a segment */ - stream.startpos -= stream.startpos % XLOG_SEG_SIZE; + stream.startpos -= XLogSegmentOffset(stream.startpos, WalSegSz); /* * Start the replication @@ -689,6 +689,10 @@ main(int argc, char **argv) if (!RunIdentifySystem(conn, NULL, NULL, NULL, &db_name)) disconnect_and_exit(1); + /* determine remote server's xlog segment size */ + if (!RetrieveWalSegSize(conn)) + disconnect_and_exit(1); + /* * Check that there is a database associated with connection, none should * be defined in this context. diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index 888458f4a90..65931f64541 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -95,17 +95,17 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) ssize_t size; XLogSegNo segno; - XLByteToSeg(startpoint, segno); - XLogFileName(current_walfile_name, stream->timeline, segno); + XLByteToSeg(startpoint, segno, WalSegSz); + XLogFileName(current_walfile_name, stream->timeline, segno, WalSegSz); snprintf(fn, sizeof(fn), "%s%s", current_walfile_name, stream->partial_suffix ? stream->partial_suffix : ""); /* * When streaming to files, if an existing file exists we verify that it's - * either empty (just created), or a complete XLogSegSize segment (in - * which case it has been created and padded). Anything else indicates a - * corrupt file. + * either empty (just created), or a complete WalSegSz segment (in which + * case it has been created and padded). Anything else indicates a corrupt + * file. * * When streaming to tar, no file with this name will exist before, so we * never have to verify a size. @@ -120,7 +120,7 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) progname, fn, stream->walmethod->getlasterror()); return false; } - if (size == XLogSegSize) + if (size == WalSegSz) { /* Already padded file. Open it for use */ f = stream->walmethod->open_for_write(current_walfile_name, stream->partial_suffix, 0); @@ -154,7 +154,7 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) ngettext("%s: write-ahead log file \"%s\" has %d byte, should be 0 or %d\n", "%s: write-ahead log file \"%s\" has %d bytes, should be 0 or %d\n", size), - progname, fn, (int) size, XLogSegSize); + progname, fn, (int) size, WalSegSz); return false; } /* File existed and was empty, so fall through and open */ @@ -162,7 +162,8 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) /* No file existed, so create one */ - f = stream->walmethod->open_for_write(current_walfile_name, stream->partial_suffix, XLogSegSize); + f = stream->walmethod->open_for_write(current_walfile_name, + stream->partial_suffix, WalSegSz); if (f == NULL) { fprintf(stderr, @@ -203,7 +204,7 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos) if (stream->partial_suffix) { - if (currpos == XLOG_SEG_SIZE) + if (currpos == WalSegSz) r = stream->walmethod->close(walfile, CLOSE_NORMAL); else { @@ -231,7 +232,7 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos) * new node. This is in line with walreceiver.c always doing a * XLogArchiveForceDone() after a complete segment. */ - if (currpos == XLOG_SEG_SIZE && stream->mark_done) + if (currpos == WalSegSz && stream->mark_done) { /* writes error message if failed */ if (!mark_file_as_archived(stream, current_walfile_name)) @@ -676,7 +677,8 @@ ReceiveXlogStream(PGconn *conn, StreamCtl *stream) * start streaming at the beginning of a segment. */ stream->timeline = newtimeline; - stream->startpos = stream->startpos - (stream->startpos % XLOG_SEG_SIZE); + stream->startpos = stream->startpos - + XLogSegmentOffset(stream->startpos, WalSegSz); continue; } else if (PQresultStatus(res) == PGRES_COMMAND_OK) @@ -1111,7 +1113,7 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, *blockpos = fe_recvint64(©buf[1]); /* Extract WAL location for this block */ - xlogoff = *blockpos % XLOG_SEG_SIZE; + xlogoff = XLogSegmentOffset(*blockpos, WalSegSz); /* * Verify that the initial location in the stream matches where we think @@ -1148,11 +1150,11 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, int bytes_to_write; /* - * If crossing a WAL boundary, only write up until we reach - * XLOG_SEG_SIZE. + * If crossing a WAL boundary, only write up until we reach wal + * segment size. */ - if (xlogoff + bytes_left > XLOG_SEG_SIZE) - bytes_to_write = XLOG_SEG_SIZE - xlogoff; + if (xlogoff + bytes_left > WalSegSz) + bytes_to_write = WalSegSz - xlogoff; else bytes_to_write = bytes_left; @@ -1182,7 +1184,7 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, xlogoff += bytes_to_write; /* Did we reach the end of a WAL segment? */ - if (*blockpos % XLOG_SEG_SIZE == 0) + if (XLogSegmentOffset(*blockpos, WalSegSz) == 0) { if (!close_walfile(stream, *blockpos)) /* Error message written in close_walfile() */ diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c index 9d40744a349..df17f60596a 100644 --- a/src/bin/pg_basebackup/streamutil.c +++ b/src/bin/pg_basebackup/streamutil.c @@ -25,12 +25,18 @@ #include "receivelog.h" #include "streamutil.h" +#include "access/xlog_internal.h" #include "pqexpbuffer.h" #include "common/fe_memutils.h" #include "datatype/timestamp.h" #define ERRCODE_DUPLICATE_OBJECT "42710" +uint32 WalSegSz; + +/* SHOW command for replication connection was introduced in version 10 */ +#define MINIMUM_VERSION_FOR_SHOW_CMD 100000 + const char *progname; char *connection_string = NULL; char *dbhost = NULL; @@ -232,6 +238,76 @@ GetConnection(void) } /* + * From version 10, explicitly set wal segment size using SHOW wal_segment_size + * since ControlFile is not accessible here. + */ +bool +RetrieveWalSegSize(PGconn *conn) +{ + PGresult *res; + char xlog_unit[3]; + int xlog_val, + multiplier = 1; + + /* check connection existence */ + Assert(conn != NULL); + + /* for previous versions set the default xlog seg size */ + if (PQserverVersion(conn) < MINIMUM_VERSION_FOR_SHOW_CMD) + { + WalSegSz = DEFAULT_XLOG_SEG_SIZE; + return true; + } + + res = PQexec(conn, "SHOW wal_segment_size"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, _("%s: could not send replication command \"%s\": %s\n"), + progname, "SHOW wal_segment_size", PQerrorMessage(conn)); + + PQclear(res); + return false; + } + if (PQntuples(res) != 1 || PQnfields(res) < 1) + { + fprintf(stderr, + _("%s: could not fetch WAL segment size: got %d rows and %d fields, expected %d rows and %d or more fields\n"), + progname, PQntuples(res), PQnfields(res), 1, 1); + + PQclear(res); + return false; + } + + /* fetch xlog value and unit from the result */ + if (sscanf(PQgetvalue(res, 0, 0), "%d%s", &xlog_val, xlog_unit) != 2) + { + fprintf(stderr, _("%s: WAL segment size could not be parsed\n"), + progname); + return false; + } + + /* set the multiplier based on unit to convert xlog_val to bytes */ + if (strcmp(xlog_unit, "MB") == 0) + multiplier = 1024 * 1024; + else if (strcmp(xlog_unit, "GB") == 0) + multiplier = 1024 * 1024 * 1024; + + /* convert and set WalSegSz */ + WalSegSz = xlog_val * multiplier; + + if (!IsValidWalSegSize(WalSegSz)) + { + fprintf(stderr, + _("%s: WAL segment size must be a power of two between 1MB and 1GB, but the remote server reported a value of %d bytes\n"), + progname, WalSegSz); + return false; + } + + PQclear(res); + return true; +} + +/* * Run IDENTIFY_SYSTEM through a given connection and give back to caller * some result information if requested: * - System identifier diff --git a/src/bin/pg_basebackup/streamutil.h b/src/bin/pg_basebackup/streamutil.h index 6f6878679fc..ec227712d56 100644 --- a/src/bin/pg_basebackup/streamutil.h +++ b/src/bin/pg_basebackup/streamutil.h @@ -24,6 +24,7 @@ extern char *dbuser; extern char *dbport; extern char *dbname; extern int dbgetpassword; +extern uint32 WalSegSz; /* Connection kept global so we can disconnect easily */ extern PGconn *conn; @@ -39,6 +40,7 @@ extern bool RunIdentifySystem(PGconn *conn, char **sysid, TimeLineID *starttli, XLogRecPtr *startpos, char **db_name); +extern bool RetrieveWalSegSize(PGconn *conn); extern TimestampTz feGetCurrentTimestamp(void); extern void feTimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs); diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 2ea893179ab..8cc4fb03419 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -99,6 +99,7 @@ main(int argc, char *argv[]) char xlogfilename[MAXFNAMELEN]; int c; int i; + int WalSegSz; set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_controldata")); @@ -164,6 +165,15 @@ main(int argc, char *argv[]) "Either the file is corrupt, or it has a different layout than this program\n" "is expecting. The results below are untrustworthy.\n\n")); + /* set wal segment size */ + WalSegSz = ControlFile->xlog_seg_size; + + if (!IsValidWalSegSize(WalSegSz)) + fprintf(stderr, + _("WARNING: WAL segment size specified, %d bytes, is not a power of two between 1MB and 1GB.\n" + "The file is corrupt and the results below are untrustworthy.\n"), + WalSegSz); + /* * This slightly-chintzy coding will work as long as the control file * timestamps are within the range of time_t; that should be the case in @@ -184,8 +194,9 @@ main(int argc, char *argv[]) * Calculate name of the WAL file containing the latest checkpoint's REDO * start point. */ - XLByteToSeg(ControlFile->checkPointCopy.redo, segno); - XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID, segno); + XLByteToSeg(ControlFile->checkPointCopy.redo, segno, WalSegSz); + XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID, + segno, WalSegSz); /* * Format system_identifier and mock_authentication_nonce separately to diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index ac678317795..25d5547b36d 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -70,6 +70,7 @@ static MultiXactId set_mxid = 0; static MultiXactOffset set_mxoff = (MultiXactOffset) -1; static uint32 minXlogTli = 0; static XLogSegNo minXlogSegNo = 0; +static int WalSegSz; static void CheckDataVersion(void); static bool ReadControlFile(void); @@ -94,6 +95,7 @@ main(int argc, char *argv[]) char *endptr; char *endptr2; char *DataDir = NULL; + char *log_fname = NULL; int fd; set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_resetwal")); @@ -265,7 +267,12 @@ main(int argc, char *argv[]) fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } - XLogFromFileName(optarg, &minXlogTli, &minXlogSegNo); + + /* + * XLogFromFileName requires wal segment size which is not yet + * set. Hence wal details are set later on. + */ + log_fname = pg_strdup(optarg); break; default: @@ -350,6 +357,9 @@ main(int argc, char *argv[]) if (!ReadControlFile()) GuessControlValues(); + if (log_fname != NULL) + XLogFromFileName(log_fname, &minXlogTli, &minXlogSegNo, WalSegSz); + /* * Also look at existing segment files to set up newXlogSegNo */ @@ -573,18 +583,27 @@ ReadControlFile(void) offsetof(ControlFileData, crc)); FIN_CRC32C(crc); - if (EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc)) + if (!EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc)) { - /* Valid data... */ - memcpy(&ControlFile, buffer, sizeof(ControlFile)); - return true; + /* We will use the data but treat it as guessed. */ + fprintf(stderr, + _("%s: pg_control exists but has invalid CRC; proceed with caution\n"), + progname); + guessed = true; } - fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"), - progname); - /* We will use the data anyway, but treat it as guessed. */ memcpy(&ControlFile, buffer, sizeof(ControlFile)); - guessed = true; + WalSegSz = ControlFile.xlog_seg_size; + + /* return false if WalSegSz is not valid */ + if (!IsValidWalSegSize(WalSegSz)) + { + fprintf(stderr, + _("%s: pg_control specifies invalid WAL segment size (%d bytes); proceed with caution \n"), + progname, WalSegSz); + guessed = true; + } + return true; } @@ -660,7 +679,7 @@ GuessControlValues(void) ControlFile.blcksz = BLCKSZ; ControlFile.relseg_size = RELSEG_SIZE; ControlFile.xlog_blcksz = XLOG_BLCKSZ; - ControlFile.xlog_seg_size = XLOG_SEG_SIZE; + ControlFile.xlog_seg_size = DEFAULT_XLOG_SEG_SIZE; ControlFile.nameDataLen = NAMEDATALEN; ControlFile.indexMaxKeys = INDEX_MAX_KEYS; ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE; @@ -773,7 +792,8 @@ PrintNewControlValues(void) /* This will be always printed in order to keep format same. */ printf(_("\n\nValues to be changed:\n\n")); - XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo); + XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID, + newXlogSegNo, WalSegSz); printf(_("First log segment after reset: %s\n"), fname); if (set_mxid != 0) @@ -850,7 +870,7 @@ RewriteControlFile(void) * newXlogSegNo. */ XLogSegNoOffsetToRecPtr(newXlogSegNo, SizeOfXLogLongPHD, - ControlFile.checkPointCopy.redo); + ControlFile.checkPointCopy.redo, WalSegSz); ControlFile.checkPointCopy.time = (pg_time_t) time(NULL); ControlFile.state = DB_SHUTDOWNED; @@ -877,7 +897,7 @@ RewriteControlFile(void) ControlFile.max_locks_per_xact = 64; /* Now we can force the recorded xlog seg size to the right thing. */ - ControlFile.xlog_seg_size = XLogSegSize; + ControlFile.xlog_seg_size = WalSegSz; /* Contents are protected with a CRC */ INIT_CRC32C(ControlFile.crc); @@ -1014,7 +1034,7 @@ FindEndOfXLOG(void) * are in virgin territory. */ xlogbytepos = newXlogSegNo * ControlFile.xlog_seg_size; - newXlogSegNo = (xlogbytepos + XLogSegSize - 1) / XLogSegSize; + newXlogSegNo = (xlogbytepos + WalSegSz - 1) / WalSegSz; newXlogSegNo++; } @@ -1151,7 +1171,7 @@ WriteEmptyXLOG(void) page->xlp_pageaddr = ControlFile.checkPointCopy.redo - SizeOfXLogLongPHD; longpage = (XLogLongPageHeader) page; longpage->xlp_sysid = ControlFile.system_identifier; - longpage->xlp_seg_size = XLogSegSize; + longpage->xlp_seg_size = WalSegSz; longpage->xlp_xlog_blcksz = XLOG_BLCKSZ; /* Insert the initial checkpoint record */ @@ -1176,7 +1196,8 @@ WriteEmptyXLOG(void) record->xl_crc = crc; /* Write the first page */ - XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo); + XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, + newXlogSegNo, WalSegSz); unlink(path); @@ -1202,7 +1223,7 @@ WriteEmptyXLOG(void) /* Fill the rest of the file with zeroes */ memset(buffer, 0, XLOG_BLCKSZ); - for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) + for (nbytes = XLOG_BLCKSZ; nbytes < WalSegSz; nbytes += XLOG_BLCKSZ) { errno = 0; if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ) diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c index 1befdbdeea3..0fc71d2a135 100644 --- a/src/bin/pg_rewind/parsexlog.c +++ b/src/bin/pg_rewind/parsexlog.c @@ -69,7 +69,8 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, private.datadir = datadir; private.tliIndex = tliIndex; - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); + xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead, + &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); @@ -122,7 +123,8 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex) private.datadir = datadir; private.tliIndex = tliIndex; - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); + xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead, + &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); @@ -170,11 +172,17 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, * header in that case to find the next record. */ if (forkptr % XLOG_BLCKSZ == 0) - forkptr += (forkptr % XLogSegSize == 0) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD; + { + if (XLogSegmentOffset(forkptr, WalSegSz) == 0) + forkptr += SizeOfXLogLongPHD; + else + forkptr += SizeOfXLogShortPHD; + } private.datadir = datadir; private.tliIndex = tliIndex; - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); + xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead, + &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); @@ -239,21 +247,22 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, XLogRecPtr targetSegEnd; XLogSegNo targetSegNo; - XLByteToSeg(targetPagePtr, targetSegNo); - XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd); - targetPageOff = targetPagePtr % XLogSegSize; + XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz); + XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd, WalSegSz); + targetPageOff = XLogSegmentOffset(targetPagePtr, WalSegSz); /* * See if we need to switch to a new segment because the requested record * is not in the currently open one. */ - if (xlogreadfd >= 0 && !XLByteInSeg(targetPagePtr, xlogreadsegno)) + if (xlogreadfd >= 0 && + !XLByteInSeg(targetPagePtr, xlogreadsegno, WalSegSz)) { close(xlogreadfd); xlogreadfd = -1; } - XLByteToSeg(targetPagePtr, xlogreadsegno); + XLByteToSeg(targetPagePtr, xlogreadsegno, WalSegSz); if (xlogreadfd < 0) { @@ -272,7 +281,8 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, targetHistory[private->tliIndex].begin >= targetSegEnd) private->tliIndex--; - XLogFileName(xlogfname, targetHistory[private->tliIndex].tli, xlogreadsegno); + XLogFileName(xlogfname, targetHistory[private->tliIndex].tli, + xlogreadsegno, WalSegSz); snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s", private->datadir, xlogfname); diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 4bd1a759734..6079156e802 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -44,6 +44,7 @@ static ControlFileData ControlFile_target; static ControlFileData ControlFile_source; const char *progname; +int WalSegSz; /* Configuration options */ char *datadir_target = NULL; @@ -572,8 +573,8 @@ createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpo char buf[1000]; int len; - XLByteToSeg(startpoint, startsegno); - XLogFileName(xlogfilename, starttli, startsegno); + XLByteToSeg(startpoint, startsegno, WalSegSz); + XLogFileName(xlogfilename, starttli, startsegno, WalSegSz); /* * Construct backup label file @@ -631,6 +632,13 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size) memcpy(ControlFile, src, sizeof(ControlFileData)); + /* set and validate WalSegSz */ + WalSegSz = ControlFile->xlog_seg_size; + + if (!IsValidWalSegSize(WalSegSz)) + pg_fatal("WAL segment size must be a power of two between 1MB and 1GB, but the control file specifies %d bytes\n", + WalSegSz); + /* Additional checks on control file */ checkControlFile(ControlFile); } diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h index 31353dd3548..7bec34ff55d 100644 --- a/src/bin/pg_rewind/pg_rewind.h +++ b/src/bin/pg_rewind/pg_rewind.h @@ -24,6 +24,7 @@ extern char *connstr_source; extern bool debug; extern bool showprogress; extern bool dry_run; +extern int WalSegSz; /* Target history */ extern TimeLineHistoryEntry *targetHistory; diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c index c607b5371c0..e6f7ef85579 100644 --- a/src/bin/pg_test_fsync/pg_test_fsync.c +++ b/src/bin/pg_test_fsync/pg_test_fsync.c @@ -64,7 +64,7 @@ static const char *progname; static int secs_per_test = 5; static int needs_unlink = 0; -static char full_buf[XLOG_SEG_SIZE], +static char full_buf[DEFAULT_XLOG_SEG_SIZE], *buf, *filename = FSYNC_FILENAME; static struct timeval start_t, @@ -209,7 +209,7 @@ prepare_buf(void) int ops; /* write random data into buffer */ - for (ops = 0; ops < XLOG_SEG_SIZE; ops++) + for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++) full_buf[ops] = random(); buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf); @@ -226,7 +226,8 @@ test_open(void) if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1) die("could not open output file"); needs_unlink = 1; - if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE) + if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) != + DEFAULT_XLOG_SEG_SIZE) die("write failed"); /* fsync now so that dirty buffers don't skew later tests */ diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh index f4556341f32..1bacf066aaf 100644 --- a/src/bin/pg_upgrade/test.sh +++ b/src/bin/pg_upgrade/test.sh @@ -20,7 +20,9 @@ unset MAKELEVEL # Run a given "initdb" binary and overlay the regression testing # authentication configuration. standard_initdb() { - "$1" -N + # To increase coverage of non-standard segment size without + # increase test runtime, run these tests with a lower setting. + "$1" -N --wal-segsize 1 if [ -n "$TEMP_CONFIG" -a -r "$TEMP_CONFIG" ] then cat "$TEMP_CONFIG" >> "$PGDATA/postgresql.conf" diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 5aa3233bd3d..53eca4c8e02 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -13,6 +13,7 @@ #include "postgres.h" #include <dirent.h> +#include <sys/stat.h> #include <unistd.h> #include "access/xlogreader.h" @@ -26,6 +27,8 @@ static const char *progname; +static int WalSegSz; + typedef struct XLogDumpPrivate { TimeLineID timeline; @@ -144,77 +147,166 @@ split_path(const char *path, char **dir, char **fname) } /* - * Try to find the file in several places: - * if directory == NULL: - * fname - * XLOGDIR / fname - * $PGDATA / XLOGDIR / fname - * else - * directory / fname - * directory / XLOGDIR / fname + * Open the file in the valid target directory. * * return a read only fd */ static int -fuzzy_open_file(const char *directory, const char *fname) +open_file_in_directory(const char *directory, const char *fname) { int fd = -1; char fpath[MAXPGPATH]; - if (directory == NULL) + Assert(directory != NULL); + + snprintf(fpath, MAXPGPATH, "%s/%s", directory, fname); + fd = open(fpath, O_RDONLY | PG_BINARY, 0); + + if (fd < 0 && errno != ENOENT) + fatal_error("could not open file \"%s\": %s", + fname, strerror(errno)); + return fd; +} + +/* + * Try to find fname in the given directory. Returns true if it is found, + * false otherwise. If fname is NULL, search the complete directory for any + * file with a valid WAL file name. If file is successfully opened, set the + * wal segment size. + */ +static bool +search_directory(char *directory, char *fname) +{ + int fd = -1; + DIR *xldir; + + /* open file if valid filename is provided */ + if (fname != NULL) + fd = open_file_in_directory(directory, fname); + + /* + * A valid file name is not passed, so search the complete directory. If + * we find any file whose name is a valid WAL file name then try to open + * it. If we cannot open it, bail out. + */ + else if ((xldir = opendir(directory)) != NULL) + { + struct dirent *xlde; + + while ((xlde = readdir(xldir)) != NULL) + { + if (IsXLogFileName(xlde->d_name)) + { + fd = open_file_in_directory(directory, xlde->d_name); + fname = xlde->d_name; + break; + } + } + + closedir(xldir); + } + + /* set WalSegSz if file is successfully opened */ + if (fd >= 0) + { + char buf[XLOG_BLCKSZ]; + + if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ) + { + XLogLongPageHeader longhdr = (XLogLongPageHeader) buf; + + WalSegSz = longhdr->xlp_seg_size; + + if (!IsValidWalSegSize(WalSegSz)) + fatal_error("WAL segment size must be a power of two between 1MB and 1GB, but the WAL file \"%s\" header specifies %d bytes", + fname, WalSegSz); + } + else + { + if (errno != 0) + fatal_error("could not read file \"%s\": %s", + fname, strerror(errno)); + else + fatal_error("not enough data in file \"%s\"", fname); + } + close(fd); + return true; + } + + return false; +} + +/* + * Identify the target directory and set WalSegSz. + * + * Try to find the file in several places: + * if directory != NULL: + * directory / + * directory / XLOGDIR / + * else + * . + * XLOGDIR / + * $PGDATA / XLOGDIR / + * + * Set the valid target directory in private->inpath. + */ +static void +identify_target_directory(XLogDumpPrivate *private, char *directory, + char *fname) +{ + char fpath[MAXPGPATH]; + + if (directory != NULL) + { + if (search_directory(directory, fname)) + { + private->inpath = strdup(directory); + return; + } + + /* directory / XLOGDIR */ + snprintf(fpath, MAXPGPATH, "%s/%s", directory, XLOGDIR); + if (search_directory(fpath, fname)) + { + private->inpath = strdup(fpath); + return; + } + } + else { const char *datadir; - /* fname */ - fd = open(fname, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; - - /* XLOGDIR / fname */ - snprintf(fpath, MAXPGPATH, "%s/%s", - XLOGDIR, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; + /* current directory */ + if (search_directory(".", fname)) + { + private->inpath = strdup("."); + return; + } + /* XLOGDIR */ + if (search_directory(XLOGDIR, fname)) + { + private->inpath = strdup(XLOGDIR); + return; + } datadir = getenv("PGDATA"); - /* $PGDATA / XLOGDIR / fname */ + /* $PGDATA / XLOGDIR */ if (datadir != NULL) { - snprintf(fpath, MAXPGPATH, "%s/%s/%s", - datadir, XLOGDIR, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; + snprintf(fpath, MAXPGPATH, "%s/%s", datadir, XLOGDIR); + if (search_directory(fpath, fname)) + { + private->inpath = strdup(fpath); + return; + } } } + + /* could not locate WAL file */ + if (fname) + fatal_error("could not locate WAL file \"%s\"", fname); else - { - /* directory / fname */ - snprintf(fpath, MAXPGPATH, "%s/%s", - directory, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; - - /* directory / XLOGDIR / fname */ - snprintf(fpath, MAXPGPATH, "%s/%s/%s", - directory, XLOGDIR, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; - } - return -1; + fatal_error("could not find any WAL file"); } /* @@ -244,9 +336,9 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, int segbytes; int readbytes; - startoff = recptr % XLogSegSize; + startoff = XLogSegmentOffset(recptr, WalSegSz); - if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo)) + if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, WalSegSz)) { char fname[MAXFNAMELEN]; int tries; @@ -255,9 +347,9 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, if (sendFile >= 0) close(sendFile); - XLByteToSeg(recptr, sendSegNo); + XLByteToSeg(recptr, sendSegNo, WalSegSz); - XLogFileName(fname, timeline_id, sendSegNo); + XLogFileName(fname, timeline_id, sendSegNo, WalSegSz); /* * In follow mode there is a short period of time after the server @@ -267,7 +359,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, */ for (tries = 0; tries < 10; tries++) { - sendFile = fuzzy_open_file(directory, fname); + sendFile = open_file_in_directory(directory, fname); if (sendFile >= 0) break; if (errno == ENOENT) @@ -298,7 +390,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, int err = errno; char fname[MAXPGPATH]; - XLogFileName(fname, timeline_id, sendSegNo); + XLogFileName(fname, timeline_id, sendSegNo, WalSegSz); fatal_error("could not seek in log file %s to offset %u: %s", fname, startoff, strerror(err)); @@ -307,8 +399,8 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, } /* How many bytes are within this segment? */ - if (nbytes > (XLogSegSize - startoff)) - segbytes = XLogSegSize - startoff; + if (nbytes > (WalSegSz - startoff)) + segbytes = WalSegSz - startoff; else segbytes = nbytes; @@ -318,7 +410,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, int err = errno; char fname[MAXPGPATH]; - XLogFileName(fname, timeline_id, sendSegNo); + XLogFileName(fname, timeline_id, sendSegNo, WalSegSz); fatal_error("could not read from log file %s, offset %u, length %d: %s", fname, sendOff, segbytes, strerror(err)); @@ -935,17 +1027,18 @@ main(int argc, char **argv) private.inpath, strerror(errno)); } - fd = fuzzy_open_file(private.inpath, fname); + identify_target_directory(&private, private.inpath, fname); + fd = open_file_in_directory(private.inpath, fname); if (fd < 0) fatal_error("could not open file \"%s\"", fname); close(fd); /* parse position from file */ - XLogFromFileName(fname, &private.timeline, &segno); + XLogFromFileName(fname, &private.timeline, &segno, WalSegSz); if (XLogRecPtrIsInvalid(private.startptr)) - XLogSegNoOffsetToRecPtr(segno, 0, private.startptr); - else if (!XLByteInSeg(private.startptr, segno)) + XLogSegNoOffsetToRecPtr(segno, 0, private.startptr, WalSegSz); + else if (!XLByteInSeg(private.startptr, segno, WalSegSz)) { fprintf(stderr, _("%s: start WAL location %X/%X is not inside file \"%s\"\n"), @@ -958,7 +1051,7 @@ main(int argc, char **argv) /* no second file specified, set end position */ if (!(optind + 1 < argc) && XLogRecPtrIsInvalid(private.endptr)) - XLogSegNoOffsetToRecPtr(segno + 1, 0, private.endptr); + XLogSegNoOffsetToRecPtr(segno + 1, 0, private.endptr, WalSegSz); /* parse ENDSEG if passed */ if (optind + 1 < argc) @@ -968,28 +1061,29 @@ main(int argc, char **argv) /* ignore directory, already have that */ split_path(argv[optind + 1], &directory, &fname); - fd = fuzzy_open_file(private.inpath, fname); + fd = open_file_in_directory(private.inpath, fname); if (fd < 0) fatal_error("could not open file \"%s\"", fname); close(fd); /* parse position from file */ - XLogFromFileName(fname, &private.timeline, &endsegno); + XLogFromFileName(fname, &private.timeline, &endsegno, WalSegSz); if (endsegno < segno) fatal_error("ENDSEG %s is before STARTSEG %s", argv[optind + 1], argv[optind]); if (XLogRecPtrIsInvalid(private.endptr)) - XLogSegNoOffsetToRecPtr(endsegno + 1, 0, private.endptr); + XLogSegNoOffsetToRecPtr(endsegno + 1, 0, private.endptr, + WalSegSz); /* set segno to endsegno for check of --end */ segno = endsegno; } - if (!XLByteInSeg(private.endptr, segno) && - private.endptr != (segno + 1) * XLogSegSize) + if (!XLByteInSeg(private.endptr, segno, WalSegSz) && + private.endptr != (segno + 1) * WalSegSz) { fprintf(stderr, _("%s: end WAL location %X/%X is not inside file \"%s\"\n"), @@ -1000,6 +1094,8 @@ main(int argc, char **argv) goto bad_argument; } } + else + identify_target_directory(&private, private.inpath, NULL); /* we don't know what to print */ if (XLogRecPtrIsInvalid(private.startptr)) @@ -1011,7 +1107,8 @@ main(int argc, char **argv) /* done with argument parsing, do the actual work */ /* we have everything we need, start reading */ - xlogreader_state = XLogReaderAllocate(XLogDumpReadPage, &private); + xlogreader_state = XLogReaderAllocate(WalSegSz, XLogDumpReadPage, + &private); if (!xlogreader_state) fatal_error("out of memory"); @@ -1028,7 +1125,8 @@ main(int argc, char **argv) * to the start of a record and also wasn't a pointer to the beginning of * a segment (e.g. we were used in file mode). */ - if (first_record != private.startptr && (private.startptr % XLogSegSize) != 0) + if (first_record != private.startptr && + XLogSegmentOffset(private.startptr, WalSegSz) != 0) printf(ngettext("first record is after %X/%X, at %X/%X, skipping over %u byte\n", "first record is after %X/%X, at %X/%X, skipping over %u bytes\n", (first_record - private.startptr)), |