diff options
Diffstat (limited to 'src/backend/access/transam/varsup.c')
-rw-r--r-- | src/backend/access/transam/varsup.c | 209 |
1 files changed, 207 insertions, 2 deletions
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 849a7ce9d6d..f99c697c2f5 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -13,12 +13,16 @@ #include "postgres.h" +#include <unistd.h> + #include "access/clog.h" #include "access/commit_ts.h" #include "access/subtrans.h" #include "access/transam.h" #include "access/xact.h" #include "access/xlogutils.h" +#include "catalog/pg_class.h" +#include "catalog/pg_tablespace.h" #include "commands/dbcommands.h" #include "miscadmin.h" #include "postmaster/autovacuum.h" @@ -30,6 +34,15 @@ /* Number of OIDs to prefetch (preallocate) per XLOG write */ #define VAR_OID_PREFETCH 8192 +/* Number of RelFileNumbers to be logged per XLOG write */ +#define VAR_RELNUMBER_PER_XLOG 512 + +/* + * Need to log more if remaining logged RelFileNumbers are less than the + * threshold. Valid range could be between 0 to VAR_RELNUMBER_PER_XLOG - 1. + */ +#define VAR_RELNUMBER_NEW_XLOG_THRESHOLD 256 + /* pointer to "variable cache" in shared memory (set up by shmem.c) */ VariableCache ShmemVariableCache = NULL; @@ -521,8 +534,7 @@ ForceTransactionIdLimitUpdate(void) * wide, counter wraparound will occur eventually, and therefore it is unwise * to assume they are unique unless precautions are taken to make them so. * Hence, this routine should generally not be used directly. The only direct - * callers should be GetNewOidWithIndex() and GetNewRelFileNumber() in - * catalog/catalog.c. + * caller should be GetNewOidWithIndex() in catalog/catalog.c. */ Oid GetNewObjectId(void) @@ -613,6 +625,199 @@ SetNextObjectId(Oid nextOid) } /* + * GetNewRelFileNumber + * + * Similar to GetNewObjectId but instead of new Oid it generates new + * relfilenumber. + */ +RelFileNumber +GetNewRelFileNumber(Oid reltablespace, char relpersistence) +{ + RelFileNumber result; + RelFileNumber nextRelFileNumber, + loggedRelFileNumber, + flushedRelFileNumber; + + StaticAssertStmt(VAR_RELNUMBER_NEW_XLOG_THRESHOLD < VAR_RELNUMBER_PER_XLOG, + "VAR_RELNUMBER_NEW_XLOG_THRESHOLD must be smaller than VAR_RELNUMBER_PER_XLOG"); + + /* safety check, we should never get this far in a HS standby */ + if (RecoveryInProgress()) + elog(ERROR, "cannot assign RelFileNumber during recovery"); + + if (IsBinaryUpgrade) + elog(ERROR, "cannot assign RelFileNumber during binary upgrade"); + + LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE); + + nextRelFileNumber = ShmemVariableCache->nextRelFileNumber; + loggedRelFileNumber = ShmemVariableCache->loggedRelFileNumber; + flushedRelFileNumber = ShmemVariableCache->flushedRelFileNumber; + + Assert(nextRelFileNumber <= flushedRelFileNumber); + Assert(flushedRelFileNumber <= loggedRelFileNumber); + + /* check for the wraparound for the relfilenumber counter */ + if (unlikely(nextRelFileNumber > MAX_RELFILENUMBER)) + elog(ERROR, "relfilenumber is too large"); + + /* + * If the remaining logged relfilenumbers values are less than the + * threshold value then log more. Ideally, we can wait until all + * relfilenumbers have been consumed before logging more. Nevertheless, if + * we do that, we must immediately flush the logged wal record because we + * want to ensure that the nextRelFileNumber is always larger than any + * relfilenumber already in use on disk. And, to maintain that invariant, + * we must make sure that the record we log reaches the disk before any new + * files are created with the newly logged range. + * + * So in order to avoid flushing the wal immediately, we always log before + * consuming all the relfilenumber, and now we only have to flush the newly + * logged relfilenumber wal before consuming the relfilenumber from this + * new range. By the time we need to flush this wal, hopefully, those have + * already been flushed with some other XLogFlush operation. + */ + if (loggedRelFileNumber - nextRelFileNumber <= + VAR_RELNUMBER_NEW_XLOG_THRESHOLD) + { + XLogRecPtr recptr; + + loggedRelFileNumber = loggedRelFileNumber + VAR_RELNUMBER_PER_XLOG; + recptr = LogNextRelFileNumber(loggedRelFileNumber); + ShmemVariableCache->loggedRelFileNumber = loggedRelFileNumber; + + /* remember for the future flush */ + ShmemVariableCache->loggedRelFileNumberRecPtr = recptr; + } + + /* + * If the nextRelFileNumber is already reached to the already flushed + * relfilenumber then flush the WAL for previously logged relfilenumber. + */ + if (nextRelFileNumber >= flushedRelFileNumber) + { + XLogFlush(ShmemVariableCache->loggedRelFileNumberRecPtr); + ShmemVariableCache->flushedRelFileNumber = loggedRelFileNumber; + } + + result = ShmemVariableCache->nextRelFileNumber; + + /* we should never be using any relfilenumber outside the flushed range */ + Assert(result <= ShmemVariableCache->flushedRelFileNumber); + + (ShmemVariableCache->nextRelFileNumber)++; + + LWLockRelease(RelFileNumberGenLock); + + /* + * Because the RelFileNumber counter only ever increases and never wraps + * around, it should be impossible for the newly-allocated RelFileNumber to + * already be in use. But, if Asserts are enabled, double check that + * there's no main-fork relation file with the new RelFileNumber already on + * disk. + */ +#ifdef USE_ASSERT_CHECKING + { + RelFileLocatorBackend rlocator; + char *rpath; + BackendId backend; + + switch (relpersistence) + { + case RELPERSISTENCE_TEMP: + backend = BackendIdForTempRelations(); + break; + case RELPERSISTENCE_UNLOGGED: + case RELPERSISTENCE_PERMANENT: + backend = InvalidBackendId; + break; + default: + elog(ERROR, "invalid relpersistence: %c", relpersistence); + } + + /* this logic should match RelationInitPhysicalAddr */ + rlocator.locator.spcOid = + reltablespace ? reltablespace : MyDatabaseTableSpace; + rlocator.locator.dbOid = (reltablespace == GLOBALTABLESPACE_OID) ? + InvalidOid : MyDatabaseId; + rlocator.locator.relNumber = result; + + /* + * The relpath will vary based on the backend ID, so we must + * initialize that properly here to make sure that any collisions + * based on filename are properly detected. + */ + rlocator.backend = backend; + + /* check for existing file of same name. */ + rpath = relpath(rlocator, MAIN_FORKNUM); + Assert(access(rpath, F_OK) != 0); + } +#endif + + return result; +} + +/* + * SetNextRelFileNumber + * + * This may only be called during pg_upgrade; it advances the RelFileNumber + * counter to the specified value if the current value is smaller than the + * input value. + */ +void +SetNextRelFileNumber(RelFileNumber relnumber) +{ + /* safety check, we should never get this far in a HS standby */ + if (RecoveryInProgress()) + elog(ERROR, "cannot set RelFileNumber during recovery"); + + if (!IsBinaryUpgrade) + elog(ERROR, "RelFileNumber can be set only during binary upgrade"); + + LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE); + + /* + * If previous assigned value of the nextRelFileNumber is already higher + * than the current value then nothing to be done. This is possible + * because during upgrade the objects are not created in relfilenumber + * order. + */ + if (relnumber <= ShmemVariableCache->nextRelFileNumber) + { + LWLockRelease(RelFileNumberGenLock); + return; + } + + /* + * If the new relfilenumber to be set is greater than or equal to already + * flushed relfilenumber then log more and flush immediately. + * + * (This is less efficient than GetNewRelFileNumber, which arranges to + * log some new relfilenumbers before the old batch is exhausted in the + * hope that a flush will happen in the background before any values are + * needed from the new batch. However, since thais is only used during + * binary upgrade, it shouldn't really matter.) + */ + if (relnumber >= ShmemVariableCache->flushedRelFileNumber) + { + RelFileNumber newlogrelnum; + + newlogrelnum = relnumber + VAR_RELNUMBER_PER_XLOG; + XLogFlush(LogNextRelFileNumber(newlogrelnum)); + + /* we have flushed whatever we have logged so no pending flush */ + ShmemVariableCache->loggedRelFileNumber = newlogrelnum; + ShmemVariableCache->flushedRelFileNumber = newlogrelnum; + ShmemVariableCache->loggedRelFileNumberRecPtr = InvalidXLogRecPtr; + } + + ShmemVariableCache->nextRelFileNumber = relnumber; + + LWLockRelease(RelFileNumberGenLock); +} + +/* * StopGeneratingPinnedObjectIds * * This is called once during initdb to force the OID counter up to |