summaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorAlexander Korotkov2023-03-22 21:13:37 +0000
committerAlexander Korotkov2023-03-22 21:26:59 +0000
commit11470f544e3729c60fab890145b2e839cbc8905e (patch)
tree8ebd3c5b4aab9023e54e71e892b91a5beb9de8dc /src/backend
parent764da7710bf68eebb2c0facb2f871bc3c7a705b6 (diff)
Allow locking updated tuples in tuple_update() and tuple_delete()
Currently, in read committed transaction isolation mode (default), we have the following sequence of actions when tuple_update()/tuple_delete() finds the tuple updated by concurrent transaction. 1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which returns TM_Updated. 2. Lock tuple with tuple_lock(). 3. Re-evaluate plan qual (recheck if we still need to update/delete and calculate the new tuple for update). 4. Second attempt to update/delete tuple with tuple_update()/tuple_delete(). This attempt should be successful, since the tuple was previously locked. This patch eliminates step 2 by taking the lock during first tuple_update()/tuple_delete() call. Heap table access method saves some efforts by checking the updated tuple once instead of twice. Future undo-based table access methods, which will start from the latest row version, can immediately place a lock there. The code in nodeModifyTable.c is simplified by removing the nested switch/case. Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp Reviewed-by: Andres Freund, Chris Travers
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/heap/heapam_handler.c109
-rw-r--r--src/backend/access/table/tableam.c6
-rw-r--r--src/backend/executor/nodeModifyTable.c288
3 files changed, 226 insertions, 177 deletions
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 1ce7c6b9713..9e690074e94 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -45,6 +45,12 @@
#include "utils/builtins.h"
#include "utils/rel.h"
+static TM_Result heapam_tuple_lock_internal(Relation relation, ItemPointer tid,
+ Snapshot snapshot, TupleTableSlot *slot,
+ CommandId cid, LockTupleMode mode,
+ LockWaitPolicy wait_policy, uint8 flags,
+ TM_FailureData *tmfd, bool updated);
+
static void reform_and_rewrite_tuple(HeapTuple tuple,
Relation OldHeap, Relation NewHeap,
Datum *values, bool *isnull, RewriteState rwstate);
@@ -299,14 +305,46 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
static TM_Result
heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
Snapshot snapshot, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, bool changingPart)
+ TM_FailureData *tmfd, bool changingPart,
+ LazyTupleTableSlot *lockedSlot)
{
+ TM_Result result;
+
/*
* Currently Deleting of index tuples are handled at vacuum, in case if
* the storage itself is cleaning the dead tuples by itself, it is the
* time to call the index tuple deletion also.
*/
- return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
+ result = heap_delete(relation, tid, cid, crosscheck, wait,
+ tmfd, changingPart);
+
+ /*
+ * If the tuple has been concurrently updated, then get the lock on it.
+ * (Do this if caller asked for tat by providing a 'lockedSlot'.) With the
+ * lock held retry of delete should succeed even if there are more
+ * concurrent update attempts.
+ */
+ if (result == TM_Updated && lockedSlot)
+ {
+ TupleTableSlot *evalSlot;
+
+ Assert(wait);
+
+ evalSlot = LAZY_TTS_EVAL(lockedSlot);
+ result = heapam_tuple_lock_internal(relation, tid, snapshot,
+ evalSlot, cid, LockTupleExclusive,
+ LockWaitBlock,
+ TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+ tmfd, true);
+
+ if (result == TM_Ok)
+ {
+ tmfd->traversed = true;
+ return TM_Updated;
+ }
+ }
+
+ return result;
}
@@ -314,7 +352,8 @@ static TM_Result
heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd,
- LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
+ LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes,
+ LazyTupleTableSlot *lockedSlot)
{
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
@@ -352,6 +391,32 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
if (shouldFree)
pfree(tuple);
+ /*
+ * If the tuple has been concurrently updated, then get the lock on it.
+ * (Do this if caller asked for tat by providing a 'lockedSlot'.) With the
+ * lock held retry of update should succeed even if there are more
+ * concurrent update attempts.
+ */
+ if (result == TM_Updated && lockedSlot)
+ {
+ TupleTableSlot *evalSlot;
+
+ Assert(wait);
+
+ evalSlot = LAZY_TTS_EVAL(lockedSlot);
+ result = heapam_tuple_lock_internal(relation, otid, snapshot,
+ evalSlot, cid, *lockmode,
+ LockWaitBlock,
+ TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+ tmfd, true);
+
+ if (result == TM_Ok)
+ {
+ tmfd->traversed = true;
+ return TM_Updated;
+ }
+ }
+
return result;
}
@@ -361,9 +426,25 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
LockWaitPolicy wait_policy, uint8 flags,
TM_FailureData *tmfd)
{
+ return heapam_tuple_lock_internal(relation, tid, snapshot, slot, cid,
+ mode, wait_policy, flags, tmfd, false);
+}
+
+/*
+ * This routine does the work for heapam_tuple_lock(), but also support
+ * `updated` argument to re-use the work done by heapam_tuple_update() or
+ * heapam_tuple_delete() on figuring out that tuple was concurrently updated.
+ */
+static TM_Result
+heapam_tuple_lock_internal(Relation relation, ItemPointer tid,
+ Snapshot snapshot, TupleTableSlot *slot,
+ CommandId cid, LockTupleMode mode,
+ LockWaitPolicy wait_policy, uint8 flags,
+ TM_FailureData *tmfd, bool updated)
+{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
TM_Result result;
- Buffer buffer;
+ Buffer buffer = InvalidBuffer;
HeapTuple tuple = &bslot->base.tupdata;
bool follow_updates;
@@ -374,16 +455,26 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
tuple_lock_retry:
tuple->t_self = *tid;
- result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
- follow_updates, &buffer, tmfd);
+ if (!updated)
+ result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
+ follow_updates, &buffer, tmfd);
+ else
+ result = TM_Updated;
if (result == TM_Updated &&
(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
{
- /* Should not encounter speculative tuple on recheck */
- Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
+ if (!updated)
+ {
+ /* Should not encounter speculative tuple on recheck */
+ Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
- ReleaseBuffer(buffer);
+ ReleaseBuffer(buffer);
+ }
+ else
+ {
+ updated = false;
+ }
if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
{
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index a5e6c92f35e..2a1a6ced3c7 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -306,7 +306,8 @@ simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
true /* wait for commit */ ,
- &tmfd, false /* changingPart */ );
+ &tmfd, false /* changingPart */ ,
+ NULL);
switch (result)
{
@@ -355,7 +356,8 @@ simple_table_tuple_update(Relation rel, ItemPointer otid,
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
+ &tmfd, &lockmode, update_indexes,
+ NULL);
switch (result)
{
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 93ebfdbb0d8..e3503756818 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -1325,25 +1325,61 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
}
/*
+ * The implementation for LazyTupleTableSlot wrapper for EPQ slot to be passed
+ * to table_tuple_update()/table_tuple_delete().
+ */
+typedef struct
+{
+ EPQState *epqstate;
+ ResultRelInfo *resultRelInfo;
+} GetEPQSlotArg;
+
+static TupleTableSlot *
+GetEPQSlot(void *arg)
+{
+ GetEPQSlotArg *slotArg = (GetEPQSlotArg *) arg;
+
+ return EvalPlanQualSlot(slotArg->epqstate,
+ slotArg->resultRelInfo->ri_RelationDesc,
+ slotArg->resultRelInfo->ri_RangeTableIndex);
+}
+
+/*
* ExecDeleteAct -- subroutine for ExecDelete
*
* Actually delete the tuple from a plain table.
*
+ * If the 'lockUpdated' flag is set and the target tuple is updated, then
+ * the latest version gets locked and fetched into the EPQ slot.
+ *
* Caller is in charge of doing EvalPlanQual as necessary
*/
static TM_Result
ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, bool changingPart)
+ ItemPointer tupleid, bool changingPart, bool lockUpdated)
{
EState *estate = context->estate;
+ GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo};
+ LazyTupleTableSlot lazyEPQSlot,
+ *lazyEPQSlotPtr;
+ if (lockUpdated)
+ {
+ MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg);
+ lazyEPQSlotPtr = &lazyEPQSlot;
+ }
+ else
+ {
+ lazyEPQSlotPtr = NULL;
+ }
return table_tuple_delete(resultRelInfo->ri_RelationDesc, tupleid,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&context->tmfd,
- changingPart);
+ changingPart,
+ lazyEPQSlotPtr);
}
/*
@@ -1488,7 +1524,8 @@ ExecDelete(ModifyTableContext *context,
* transaction-snapshot mode transactions.
*/
ldelete:
- result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart);
+ result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
+ !IsolationUsesXactSnapshot());
switch (result)
{
@@ -1541,103 +1578,49 @@ ldelete:
errmsg("could not serialize access due to concurrent update")));
/*
- * Already know that we're going to need to do EPQ, so
- * fetch tuple directly into the right slot.
+ * ExecDeleteAct() has already locked the old tuple for
+ * us. Now we need to copy it to the right slot.
*/
EvalPlanQualBegin(context->epqstate);
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
- result = table_tuple_lock(resultRelationDesc, tupleid,
- estate->es_snapshot,
- inputslot, estate->es_output_cid,
- LockTupleExclusive, LockWaitBlock,
- TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
- &context->tmfd);
-
- switch (result)
+ /*
+ * Save locked table for further processing for RETURNING
+ * clause.
+ */
+ if (processReturning &&
+ resultRelInfo->ri_projectReturning &&
+ !resultRelInfo->ri_FdwRoutine)
{
- case TM_Ok:
- Assert(context->tmfd.traversed);
+ TupleTableSlot *returningSlot;
- /*
- * Save locked tuple for further processing of
- * RETURNING clause.
- */
- if (processReturning &&
- resultRelInfo->ri_projectReturning &&
- !resultRelInfo->ri_FdwRoutine)
- {
- TupleTableSlot *returningSlot;
-
- returningSlot = ExecGetReturningSlot(estate, resultRelInfo);
- ExecCopySlot(returningSlot, inputslot);
- ExecMaterializeSlot(returningSlot);
- }
-
- epqslot = EvalPlanQual(context->epqstate,
- resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex,
- inputslot);
- if (TupIsNull(epqslot))
- /* Tuple not passing quals anymore, exiting... */
- return NULL;
-
- /*
- * If requested, skip delete and pass back the
- * updated row.
- */
- if (epqreturnslot)
- {
- *epqreturnslot = epqslot;
- return NULL;
- }
- else
- goto ldelete;
-
- case TM_SelfModified:
-
- /*
- * This can be reached when following an update
- * chain from a tuple updated by another session,
- * reaching a tuple that was already updated in
- * this transaction. If previously updated by this
- * command, ignore the delete, otherwise error
- * out.
- *
- * See also TM_SelfModified response to
- * table_tuple_delete() above.
- */
- if (context->tmfd.cmax != estate->es_output_cid)
- ereport(ERROR,
- (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
- errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
- errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
- return NULL;
-
- case TM_Deleted:
- /* tuple already deleted; nothing to do */
- return NULL;
+ returningSlot = ExecGetReturningSlot(estate,
+ resultRelInfo);
+ ExecCopySlot(returningSlot, inputslot);
+ ExecMaterializeSlot(returningSlot);
+ }
- default:
+ Assert(context->tmfd.traversed);
+ epqslot = EvalPlanQual(context->epqstate,
+ resultRelationDesc,
+ resultRelInfo->ri_RangeTableIndex,
+ inputslot);
+ if (TupIsNull(epqslot))
+ /* Tuple not passing quals anymore, exiting... */
+ return NULL;
- /*
- * TM_Invisible should be impossible because we're
- * waiting for updated row versions, and would
- * already have errored out if the first version
- * is invisible.
- *
- * TM_Updated should be impossible, because we're
- * locking the latest version via
- * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
- */
- elog(ERROR, "unexpected table_tuple_lock status: %u",
- result);
- return NULL;
+ /*
+ * If requested, skip delete and pass back the updated
+ * row.
+ */
+ if (epqreturnslot)
+ {
+ *epqreturnslot = epqslot;
+ return NULL;
}
-
- Assert(false);
- break;
+ else
+ goto ldelete;
}
case TM_Deleted:
@@ -1982,12 +1965,15 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag, UpdateContext *updateCxt)
+ bool canSetTag, bool lockUpdated, UpdateContext *updateCxt)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
bool partition_constraint_failed;
TM_Result result;
+ GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo};
+ LazyTupleTableSlot lazyEPQSlot,
+ *lazyEPQSlotPtr;
updateCxt->crossPartUpdate = false;
@@ -2113,13 +2099,23 @@ lreplace:
* for referential integrity updates in transaction-snapshot mode
* transactions.
*/
+ if (lockUpdated)
+ {
+ MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg);
+ lazyEPQSlotPtr = &lazyEPQSlot;
+ }
+ else
+ {
+ lazyEPQSlotPtr = NULL;
+ }
result = table_tuple_update(resultRelationDesc, tupleid, slot,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode,
- &updateCxt->updateIndexes);
+ &updateCxt->updateIndexes,
+ lazyEPQSlotPtr);
if (result == TM_Ok)
updateCxt->updated = true;
@@ -2273,7 +2269,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
static TupleTableSlot *
ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag)
+ bool canSetTag, bool locked)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -2335,7 +2331,8 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
redo_act:
result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
- canSetTag, &updateCxt);
+ canSetTag, !IsolationUsesXactSnapshot(),
+ &updateCxt);
/*
* If ExecUpdateAct reports that a cross-partition update was done,
@@ -2394,81 +2391,39 @@ redo_act:
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ Assert(!locked);
/*
- * Already know that we're going to need to do EPQ, so
- * fetch tuple directly into the right slot.
+ * ExecUpdateAct() has already locked the old tuple for
+ * us. Now we need to copy it to the right slot.
*/
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
- result = table_tuple_lock(resultRelationDesc, tupleid,
- estate->es_snapshot,
- inputslot, estate->es_output_cid,
- updateCxt.lockmode, LockWaitBlock,
- TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
- &context->tmfd);
-
- switch (result)
- {
- case TM_Ok:
- Assert(context->tmfd.traversed);
-
- /* Make sure ri_oldTupleSlot is initialized. */
- if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
- ExecInitUpdateProjection(context->mtstate,
- resultRelInfo);
-
- /*
- * Save the locked tuple for further calculation
- * of the new tuple.
- */
- oldSlot = resultRelInfo->ri_oldTupleSlot;
- ExecCopySlot(oldSlot, inputslot);
- ExecMaterializeSlot(oldSlot);
-
- epqslot = EvalPlanQual(context->epqstate,
- resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex,
- inputslot);
- if (TupIsNull(epqslot))
- /* Tuple not passing quals anymore, exiting... */
- return NULL;
-
- slot = ExecGetUpdateNewTuple(resultRelInfo,
- epqslot, oldSlot);
- goto redo_act;
-
- case TM_Deleted:
- /* tuple already deleted; nothing to do */
- return NULL;
-
- case TM_SelfModified:
-
- /*
- * This can be reached when following an update
- * chain from a tuple updated by another session,
- * reaching a tuple that was already updated in
- * this transaction. If previously modified by
- * this command, ignore the redundant update,
- * otherwise error out.
- *
- * See also TM_SelfModified response to
- * table_tuple_update() above.
- */
- if (context->tmfd.cmax != estate->es_output_cid)
- ereport(ERROR,
- (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
- errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
- errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
- return NULL;
+ /* Make sure ri_oldTupleSlot is initialized. */
+ if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+ ExecInitUpdateProjection(context->mtstate,
+ resultRelInfo);
- default:
- /* see table_tuple_lock call in ExecDelete() */
- elog(ERROR, "unexpected table_tuple_lock status: %u",
- result);
- return NULL;
- }
+ /*
+ * Save the locked tuple for further calculation of the
+ * new tuple.
+ */
+ oldSlot = resultRelInfo->ri_oldTupleSlot;
+ ExecCopySlot(oldSlot, inputslot);
+ ExecMaterializeSlot(oldSlot);
+ Assert(context->tmfd.traversed);
+
+ epqslot = EvalPlanQual(context->epqstate,
+ resultRelationDesc,
+ resultRelInfo->ri_RangeTableIndex,
+ inputslot);
+ if (TupIsNull(epqslot))
+ /* Tuple not passing quals anymore, exiting... */
+ return NULL;
+ slot = ExecGetUpdateNewTuple(resultRelInfo,
+ epqslot, oldSlot);
+ goto redo_act;
}
break;
@@ -2710,7 +2665,7 @@ ExecOnConflictUpdate(ModifyTableContext *context,
*returning = ExecUpdate(context, resultRelInfo,
conflictTid, NULL,
resultRelInfo->ri_onConflict->oc_ProjSlot,
- canSetTag);
+ canSetTag, true);
/*
* Clear out existing tuple, as there might not be another conflict among
@@ -2913,7 +2868,7 @@ lmerge_matched:
break; /* concurrent update/delete */
}
result = ExecUpdateAct(context, resultRelInfo, tupleid, NULL,
- newslot, false, &updateCxt);
+ newslot, false, false, &updateCxt);
if (result == TM_Ok && updateCxt.updated)
{
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
@@ -2931,7 +2886,8 @@ lmerge_matched:
return true; /* "do nothing" */
break; /* concurrent update/delete */
}
- result = ExecDeleteAct(context, resultRelInfo, tupleid, false);
+ result = ExecDeleteAct(context, resultRelInfo, tupleid,
+ false, false);
if (result == TM_Ok)
{
ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
@@ -3837,7 +3793,7 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
- slot, node->canSetTag);
+ slot, node->canSetTag, false);
break;
case CMD_DELETE: