summaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer
diff options
context:
space:
mode:
authorNoah Misch2025-04-17 12:00:30 +0000
committerNoah Misch2025-04-17 12:00:30 +0000
commitf4ece891fc2f3f96f0571720a1ae30db8030681b (patch)
tree61437530ceb8f305390574162aa6903cd057f0e6 /src/backend/storage/buffer
parentb669293e3432ee8fdcd44854a945837bb18eea5c (diff)
Assert lack of hazardous buffer locks before possible catalog read.
Commit 0bada39c83a150079567a6e97b1a25a198f30ea3 fixed a bug of this kind, which existed in all branches for six days before detection. While the probability of reaching the trouble was low, the disruption was extreme. No new backends could start, and service restoration needed an immediate shutdown. Hence, add this to catch the next bug like it. The new check in RelationIdGetRelation() suffices to make autovacuum detect the bug in commit 243e9b40f1b2dd09d6e5bf91ebf6e822a2cd3704 that led to commit 0bada39. This also checks in a number of similar places. It replaces each Assert(IsTransactionState()) that pertained to a conditional catalog read. No back-patch for now, but a back-patch of commit 243e9b4 should back-patch this, too. A back-patch could omit the src/test/regress changes, since back branches won't gain new index columns. Reported-by: Alexander Lakhin <[email protected]> Discussion: https://postgr.es/m/[email protected] Discussion: https://postgr.es/m/[email protected]
Diffstat (limited to 'src/backend/storage/buffer')
-rw-r--r--src/backend/storage/buffer/bufmgr.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 0b0e056eea2..1f2a9fe9976 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -40,6 +40,9 @@
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
+#ifdef USE_ASSERT_CHECKING
+#include "catalog/pg_tablespace_d.h"
+#endif
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
@@ -541,6 +544,10 @@ static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator,
ForkNumber forkNum, bool permanent);
static void AtProcExit_Buffers(int code, Datum arg);
static void CheckForBufferLeaks(void);
+#ifdef USE_ASSERT_CHECKING
+static void AssertNotCatalogBufferLock(LWLock *lock, LWLockMode mode,
+ void *unused_context);
+#endif
static int rlocator_comparator(const void *p1, const void *p2);
static inline int buffertag_comparator(const BufferTag *ba, const BufferTag *bb);
static inline int ckpt_buforder_comparator(const CkptSortItem *a, const CkptSortItem *b);
@@ -4097,6 +4104,69 @@ CheckForBufferLeaks(void)
#endif
}
+#ifdef USE_ASSERT_CHECKING
+/*
+ * Check for exclusive-locked catalog buffers. This is the core of
+ * AssertCouldGetRelation().
+ *
+ * A backend would self-deadlock on LWLocks if the catalog scan read the
+ * exclusive-locked buffer. The main threat is exclusive-locked buffers of
+ * catalogs used in relcache, because a catcache search on any catalog may
+ * build that catalog's relcache entry. We don't have an inventory of
+ * catalogs relcache uses, so just check buffers of most catalogs.
+ *
+ * It's better to minimize waits while holding an exclusive buffer lock, so it
+ * would be nice to broaden this check not to be catalog-specific. However,
+ * bttextcmp() accesses pg_collation, and non-core opclasses might similarly
+ * read tables. That is deadlock-free as long as there's no loop in the
+ * dependency graph: modifying table A may cause an opclass to read table B,
+ * but it must not cause a read of table A.
+ */
+void
+AssertBufferLocksPermitCatalogRead(void)
+{
+ ForEachLWLockHeldByMe(AssertNotCatalogBufferLock, NULL);
+}
+
+static void
+AssertNotCatalogBufferLock(LWLock *lock, LWLockMode mode,
+ void *unused_context)
+{
+ BufferDesc *bufHdr;
+ BufferTag tag;
+ Oid relid;
+
+ if (mode != LW_EXCLUSIVE)
+ return;
+
+ if (!((BufferDescPadded *) lock > BufferDescriptors &&
+ (BufferDescPadded *) lock < BufferDescriptors + NBuffers))
+ return; /* not a buffer lock */
+
+ bufHdr = (BufferDesc *)
+ ((char *) lock - offsetof(BufferDesc, content_lock));
+ tag = bufHdr->tag;
+
+ /*
+ * This relNumber==relid assumption holds until a catalog experiences
+ * VACUUM FULL or similar. After a command like that, relNumber will be
+ * in the normal (non-catalog) range, and we lose the ability to detect
+ * hazardous access to that catalog. Calling RelidByRelfilenumber() would
+ * close that gap, but RelidByRelfilenumber() might then deadlock with a
+ * held lock.
+ */
+ relid = tag.relNumber;
+
+ if (IsCatalogTextUniqueIndexOid(relid)) /* see comments at the callee */
+ return;
+
+ Assert(!IsCatalogRelationOid(relid));
+ /* Shared rels are always catalogs: detect even after VACUUM FULL. */
+ Assert(tag.spcOid != GLOBALTABLESPACE_OID);
+}
+#endif
+
+
/*
* Helper routine to issue warnings when a buffer is unexpectedly pinned
*/