Introduce group locking to prevent parallel processes from deadlocking.

For locking purposes, we now regard heavyweight locks as mutually non-conflicting between cooperating parallel processes. There are some possible pitfalls to this approach that are not to be taken lightly, but it works OK for now and can be changed later if we find a better approach. Without this, it's very easy for parallel queries to silently self-deadlock if the user backend holds strong relation locks. Robert Haas, with help from Amit Kapila. Thanks to Noah Misch and Andres Freund for extensive discussion of possible issues with this approach.
author: Robert Haas 2016-02-07 15:16:13 +0000
committer: Robert Haas 2016-02-07 15:16:13 +0000
commit: a1c1af2a1f6099c039f145c1edb52257f315be51 (patch)
tree: 1cb89288ec139185a57ddda01ea53abf5df0c28a /src/backend/storage/lmgr/deadlock.c
parent: aa2387e2fd532954e88dfd8546ab894b9305123d (diff)
1 files changed, 227 insertions, 52 deletions
diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
index a68aaf62072..69f678b5f8d 100644
--- a/src/backend/storage/lmgr/deadlock.c
+++ b/src/backend/storage/lmgr/deadlock.c
@@ -38,6 +38,7 @@ typedef struct
 {
 	PGPROC	   *waiter;			/* the waiting process */
 	PGPROC	   *blocker;		/* the process it is waiting for */
+	LOCK	   *lock;			/* the lock it is waiting for */
 	int			pred;			/* workspace for TopoSort */
 	int			link;			/* workspace for TopoSort */
 } EDGE;
@@ -72,6 +73,9 @@ static bool FindLockCycle(PGPROC *checkProc,
 			  EDGE *softEdges, int *nSoftEdges);
 static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
 					 EDGE *softEdges, int *nSoftEdges);
+static bool FindLockCycleRecurseMember(PGPROC *checkProc,
+						   PGPROC *checkProcLeader,
+						   int depth, EDGE *softEdges, int *nSoftEdges);
 static bool ExpandConstraints(EDGE *constraints, int nConstraints);
 static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
 		 PGPROC **ordering);
@@ -449,18 +453,15 @@ FindLockCycleRecurse(PGPROC *checkProc,
 					 EDGE *softEdges,	/* output argument */
 					 int *nSoftEdges)	/* output argument */
 {
-	PGPROC	   *proc;
-	PGXACT	   *pgxact;
-	LOCK	   *lock;
-	PROCLOCK   *proclock;
-	SHM_QUEUE  *procLocks;
-	LockMethod	lockMethodTable;
-	PROC_QUEUE *waitQueue;
-	int			queue_size;
-	int			conflictMask;
 	int			i;
-	int			numLockModes,
-				lm;
+	dlist_iter	iter;
+
+	/*
+	 * If this process is a lock group member, check the leader instead. (Note
+	 * that we might be the leader, in which case this is a no-op.)
+	 */
+	if (checkProc->lockGroupLeader != NULL)
+		checkProc = checkProc->lockGroupLeader;
 
 	/*
 	 * Have we already seen this proc?
@@ -494,13 +495,57 @@ FindLockCycleRecurse(PGPROC *checkProc,
 	visitedProcs[nVisitedProcs++] = checkProc;
 
 	/*
-	 * If the proc is not waiting, we have no outgoing waits-for edges.
+	 * If the process is waiting, there is an outgoing waits-for edge to each
+	 * process that blocks it.
+	 */
+	if (checkProc->links.next != NULL && checkProc->waitLock != NULL &&
+		FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges,
+								   nSoftEdges))
+		return true;
+
+	/*
+	 * If the process is not waiting, there could still be outgoing waits-for
+	 * edges if it is part of a lock group, because other members of the lock
+	 * group might be waiting even though this process is not.  (Given lock
+	 * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2,
+	 * that is a deadlock even neither of B1 and A2 are waiting for anything.)
 	 */
-	if (checkProc->links.next == NULL)
-		return false;
-	lock = checkProc->waitLock;
-	if (lock == NULL)
-		return false;
+	dlist_foreach(iter, &checkProc->lockGroupMembers)
+	{
+		PGPROC	   *memberProc;
+
+		memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur);
+
+		if (memberProc->links.next != NULL && memberProc->waitLock != NULL &&
+			memberProc != checkProc &&
+		  FindLockCycleRecurseMember(memberProc, checkProc, depth, softEdges,
+									 nSoftEdges))
+			return true;
+	}
+
+	return false;
+}
+
+static bool
+FindLockCycleRecurseMember(PGPROC *checkProc,
+						   PGPROC *checkProcLeader,
+						   int depth,
+						   EDGE *softEdges,		/* output argument */
+						   int *nSoftEdges)		/* output argument */
+{
+	PGPROC	   *proc;
+	LOCK	   *lock = checkProc->waitLock;
+	PGXACT	   *pgxact;
+	PROCLOCK   *proclock;
+	SHM_QUEUE  *procLocks;
+	LockMethod	lockMethodTable;
+	PROC_QUEUE *waitQueue;
+	int			queue_size;
+	int			conflictMask;
+	int			i;
+	int			numLockModes,
+				lm;
+
 	lockMethodTable = GetLocksMethodTable(lock);
 	numLockModes = lockMethodTable->numLockModes;
 	conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode];
@@ -516,11 +561,14 @@ FindLockCycleRecurse(PGPROC *checkProc,
 
 	while (proclock)
 	{
+		PGPROC	   *leader;
+
 		proc = proclock->tag.myProc;
 		pgxact = &ProcGlobal->allPgXact[proc->pgprocno];
+		leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader;
 
-		/* A proc never blocks itself */
-		if (proc != checkProc)
+		/* A proc never blocks itself or any other lock group member */
+		if (leader != checkProcLeader)
 		{
 			for (lm = 1; lm <= numLockModes; lm++)
 			{
@@ -601,10 +649,20 @@ FindLockCycleRecurse(PGPROC *checkProc,
 
 		for (i = 0; i < queue_size; i++)
 		{
+			PGPROC	   *leader;
+
 			proc = procs[i];
+			leader = proc->lockGroupLeader == NULL ? proc :
+				proc->lockGroupLeader;
 
-			/* Done when we reach the target proc */
-			if (proc == checkProc)
+			/*
+			 * TopoSort will always return an ordering with group members
+			 * adjacent to each other in the wait queue (see comments
+			 * therein). So, as soon as we reach a process in the same lock
+			 * group as checkProc, we know we've found all the conflicts that
+			 * precede any member of the lock group lead by checkProcLeader.
+			 */
+			if (leader == checkProcLeader)
 				break;
 
 			/* Is there a conflict with this guy's request? */
@@ -625,8 +683,9 @@ FindLockCycleRecurse(PGPROC *checkProc,
 					 * Add this edge to the list of soft edges in the cycle
 					 */
 					Assert(*nSoftEdges < MaxBackends);
-					softEdges[*nSoftEdges].waiter = checkProc;
-					softEdges[*nSoftEdges].blocker = proc;
+					softEdges[*nSoftEdges].waiter = checkProcLeader;
+					softEdges[*nSoftEdges].blocker = leader;
+					softEdges[*nSoftEdges].lock = lock;
 					(*nSoftEdges)++;
 					return true;
 				}
@@ -635,20 +694,52 @@ FindLockCycleRecurse(PGPROC *checkProc,
 	}
 	else
 	{
+		PGPROC	   *lastGroupMember = NULL;
+
 		/* Use the true lock wait queue order */
 		waitQueue = &(lock->waitProcs);
-		queue_size = waitQueue->size;
 
-		proc = (PGPROC *) waitQueue->links.next;
+		/*
+		 * Find the last member of the lock group that is present in the wait
+		 * queue.  Anything after this is not a soft lock conflict. If group
+		 * locking is not in use, then we know immediately which process we're
+		 * looking for, but otherwise we've got to search the wait queue to
+		 * find the last process actually present.
+		 */
+		if (checkProc->lockGroupLeader == NULL)
+			lastGroupMember = checkProc;
+		else
+		{
+			proc = (PGPROC *) waitQueue->links.next;
+			queue_size = waitQueue->size;
+			while (queue_size-- > 0)
+			{
+				if (proc->lockGroupLeader == checkProcLeader)
+					lastGroupMember = proc;
+				proc = (PGPROC *) proc->links.next;
+			}
+			Assert(lastGroupMember != NULL);
+		}
 
+		/*
+		 * OK, now rescan (or scan) the queue to identify the soft conflicts.
+		 */
+		queue_size = waitQueue->size;
+		proc = (PGPROC *) waitQueue->links.next;
 		while (queue_size-- > 0)
 		{
+			PGPROC	   *leader;
+
+			leader = proc->lockGroupLeader == NULL ? proc :
+				proc->lockGroupLeader;
+
 			/* Done when we reach the target proc */
-			if (proc == checkProc)
+			if (proc == lastGroupMember)
 				break;
 
 			/* Is there a conflict with this guy's request? */
-			if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0)
+			if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0 &&
+				leader != checkProcLeader)
 			{
 				/* This proc soft-blocks checkProc */
 				if (FindLockCycleRecurse(proc, depth + 1,
@@ -665,8 +756,9 @@ FindLockCycleRecurse(PGPROC *checkProc,
 					 * Add this edge to the list of soft edges in the cycle
 					 */
 					Assert(*nSoftEdges < MaxBackends);
-					softEdges[*nSoftEdges].waiter = checkProc;
-					softEdges[*nSoftEdges].blocker = proc;
+					softEdges[*nSoftEdges].waiter = checkProcLeader;
+					softEdges[*nSoftEdges].blocker = leader;
+					softEdges[*nSoftEdges].lock = lock;
 					(*nSoftEdges)++;
 					return true;
 				}
@@ -711,8 +803,7 @@ ExpandConstraints(EDGE *constraints,
 	 */
 	for (i = nConstraints; --i >= 0;)
 	{
-		PGPROC	   *proc = constraints[i].waiter;
-		LOCK	   *lock = proc->waitLock;
+		LOCK	   *lock = constraints[i].lock;
 
 		/* Did we already make a list for this lock? */
 		for (j = nWaitOrders; --j >= 0;)
@@ -778,7 +869,9 @@ TopoSort(LOCK *lock,
 	PGPROC	   *proc;
 	int			i,
 				j,
+				jj,
 				k,
+				kk,
 				last;
 
 	/* First, fill topoProcs[] array with the procs in their current order */
@@ -798,41 +891,95 @@ TopoSort(LOCK *lock,
 	 * stores its list link in constraints[i].link (note any constraint will
 	 * be in just one list). The array index for the before-proc of the i'th
 	 * constraint is remembered in constraints[i].pred.
+	 *
+	 * Note that it's not necessarily the case that every constraint affects
+	 * this particular wait queue.  Prior to group locking, a process could be
+	 * waiting for at most one lock.  But a lock group can be waiting for
+	 * zero, one, or multiple locks.  Since topoProcs[] is an array of the
+	 * processes actually waiting, while constraints[] is an array of group
+	 * leaders, we've got to scan through topoProcs[] for each constraint,
+	 * checking whether both a waiter and a blocker for that group are
+	 * present.  If so, the constraint is relevant to this wait queue; if not,
+	 * it isn't.
 	 */
 	MemSet(beforeConstraints, 0, queue_size * sizeof(int));
 	MemSet(afterConstraints, 0, queue_size * sizeof(int));
 	for (i = 0; i < nConstraints; i++)
 	{
+		/*
+		 * Find a representative process that is on the lock queue and part of
+		 * the waiting lock group.  This may or may not be the leader, which
+		 * may or may not be waiting at all.  If there are any other processes
+		 * in the same lock group on the queue, set their number of
+		 * beforeConstraints to -1 to indicate that they should be emitted
+		 * with their groupmates rather than considered separately.
+		 */
 		proc = constraints[i].waiter;
-		/* Ignore constraint if not for this lock */
-		if (proc->waitLock != lock)
-			continue;
-		/* Find the waiter proc in the array */
+		Assert(proc != NULL);
+		jj = -1;
 		for (j = queue_size; --j >= 0;)
 		{
-			if (topoProcs[j] == proc)
+			PGPROC	   *waiter = topoProcs[j];
+
+			if (waiter == proc || waiter->lockGroupLeader == proc)
+			{
+				Assert(waiter->waitLock == lock);
+				if (jj == -1)
+					jj = j;
+				else
+				{
+					Assert(beforeConstraints[j] <= 0);
+					beforeConstraints[j] = -1;
+				}
 				break;
+			}
 		}
-		Assert(j >= 0);			/* should have found a match */
-		/* Find the blocker proc in the array */
+
+		/* If no matching waiter, constraint is not relevant to this lock. */
+		if (jj < 0)
+			continue;
+
+		/*
+		 * Similarly, find a representative process that is on the lock queue
+		 * and waiting for the blocking lock group.  Again, this could be the
+		 * leader but does not need to be.
+		 */
 		proc = constraints[i].blocker;
+		Assert(proc != NULL);
+		kk = -1;
 		for (k = queue_size; --k >= 0;)
 		{
-			if (topoProcs[k] == proc)
-				break;
+			PGPROC	   *blocker = topoProcs[k];
+
+			if (blocker == proc || blocker->lockGroupLeader == proc)
+			{
+				Assert(blocker->waitLock == lock);
+				if (kk == -1)
+					kk = k;
+				else
+				{
+					Assert(beforeConstraints[k] <= 0);
+					beforeConstraints[k] = -1;
+				}
+			}
 		}
-		Assert(k >= 0);			/* should have found a match */
-		beforeConstraints[j]++; /* waiter must come before */
+
+		/* If no matching blocker, constraint is not relevant to this lock. */
+		if (kk < 0)
+			continue;
+
+		beforeConstraints[jj]++;	/* waiter must come before */
 		/* add this constraint to list of after-constraints for blocker */
-		constraints[i].pred = j;
-		constraints[i].link = afterConstraints[k];
-		afterConstraints[k] = i + 1;
+		constraints[i].pred = jj;
+		constraints[i].link = afterConstraints[kk];
+		afterConstraints[kk] = i + 1;
 	}
+
 	/*--------------------
 	 * Now scan the topoProcs array backwards.  At each step, output the
-	 * last proc that has no remaining before-constraints, and decrease
-	 * the beforeConstraints count of each of the procs it was constrained
-	 * against.
+	 * last proc that has no remaining before-constraints plus any other
+	 * members of the same lock group; then decrease the beforeConstraints
+	 * count of each of the procs it was constrained against.
 	 * i = index of ordering[] entry we want to output this time
 	 * j = search index for topoProcs[]
 	 * k = temp for scanning constraint list for proc j
@@ -840,8 +987,11 @@ TopoSort(LOCK *lock,
 	 *--------------------
 	 */
 	last = queue_size - 1;
-	for (i = queue_size; --i >= 0;)
+	for (i = queue_size - 1; i >= 0;)
 	{
+		int			c;
+		int			nmatches = 0;
+
 		/* Find next candidate to output */
 		while (topoProcs[last] == NULL)
 			last--;
@@ -850,12 +1000,37 @@ TopoSort(LOCK *lock,
 			if (topoProcs[j] != NULL && beforeConstraints[j] == 0)
 				break;
 		}
+
 		/* If no available candidate, topological sort fails */
 		if (j < 0)
 			return false;
-		/* Output candidate, and mark it done by zeroing topoProcs[] entry */
-		ordering[i] = topoProcs[j];
-		topoProcs[j] = NULL;
+
+		/*
+		 * Output everything in the lock group.  There's no point in outputing
+		 * an ordering where members of the same lock group are not
+		 * consecutive on the wait queue: if some other waiter is between two
+		 * requests that belong to the same group, then either it conflicts
+		 * with both of them and is certainly not a solution; or it conflicts
+		 * with at most one of them and is thus isomorphic to an ordering
+		 * where the group members are consecutive.
+		 */
+		proc = topoProcs[j];
+		if (proc->lockGroupLeader != NULL)
+			proc = proc->lockGroupLeader;
+		Assert(proc != NULL);
+		for (c = 0; c <= last; ++c)
+		{
+			if (topoProcs[c] == proc || (topoProcs[c] != NULL &&
+									  topoProcs[c]->lockGroupLeader == proc))
+			{
+				ordering[i - nmatches] = topoProcs[c];
+				topoProcs[c] = NULL;
+				++nmatches;
+			}
+		}
+		Assert(nmatches > 0);
+		i -= nmatches;
+
 		/* Update beforeConstraints counts of its predecessors */
 		for (k = afterConstraints[j]; k > 0; k = constraints[k - 1].link)
 			beforeConstraints[constraints[k - 1].pred]--;
author	Robert Haas	2016-02-07 15:16:13 +0000
committer	Robert Haas	2016-02-07 15:16:13 +0000
commit	a1c1af2a1f6099c039f145c1edb52257f315be51 (patch)
tree	1cb89288ec139185a57ddda01ea53abf5df0c28a /src/backend/storage/lmgr/deadlock.c
parent	aa2387e2fd532954e88dfd8546ab894b9305123d (diff)