diff options
Diffstat (limited to 'src/backend')
42 files changed, 3140 insertions, 325 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e84c1743f4f..7ea9a77e7ea 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2061,8 +2061,17 @@ FreeBulkInsertState(BulkInsertState bistate) * This causes rows to be frozen, which is an MVCC violation and * requires explicit options chosen by user. * + * HEAP_INSERT_IS_SPECULATIVE is used on so-called "speculative insertions", + * which can be backed out afterwards without aborting the whole transaction. + * Other sessions can wait for the speculative insertion to be confirmed, + * turning it into a regular tuple, or aborted, as if it never existed. + * Speculatively inserted tuples behave as "value locks" of short duration, + * used to implement INSERT .. ON CONFLICT. + * * Note that these options will be applied when inserting into the heap's * TOAST table, too, if the tuple requires any out-of-line data. + * FIXME: Do we mark TOAST tuples as speculative too? What about confirming + * or aborting them? * * The BulkInsertState object (if any; bistate can be NULL for default * behavior) is also just passed through to RelationGetBufferForTuple. @@ -2115,7 +2124,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); - RelationPutHeapTuple(relation, buffer, heaptup); + RelationPutHeapTuple(relation, buffer, heaptup, + (options & HEAP_INSERT_SPECULATIVE) != 0); if (PageIsAllVisible(BufferGetPage(buffer))) { @@ -2169,7 +2179,11 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, } xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self); - xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0; + xlrec.flags = 0; + if (all_visible_cleared) + xlrec.flags |= XLH_INSERT_ALL_VISIBLE_CLEARED; + if (options & HEAP_INSERT_SPECULATIVE) + xlrec.flags |= XLH_INSERT_IS_SPECULATIVE; Assert(ItemPointerGetBlockNumber(&heaptup->t_self) == BufferGetBlockNumber(buffer)); /* @@ -2179,7 +2193,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, */ if (RelationIsLogicallyLogged(relation)) { - xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE; + xlrec.flags |= XLH_INSERT_CONTAINS_NEW_TUPLE; bufflags |= REGBUF_KEEP_DATA; } @@ -2224,6 +2238,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, */ CacheInvalidateHeapTuple(relation, heaptup, NULL); + /* Note: speculative insertions are counted too, even if aborted later */ pgstat_count_heap_insert(relation, 1); /* @@ -2395,7 +2410,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, * RelationGetBufferForTuple has ensured that the first tuple fits. * Put that on the page, and then as many other tuples as fit. */ - RelationPutHeapTuple(relation, buffer, heaptuples[ndone]); + RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false); for (nthispage = 1; ndone + nthispage < ntuples; nthispage++) { HeapTuple heaptup = heaptuples[ndone + nthispage]; @@ -2403,7 +2418,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace) break; - RelationPutHeapTuple(relation, buffer, heaptup); + RelationPutHeapTuple(relation, buffer, heaptup, false); /* * We don't use heap_multi_insert for catalog tuples yet, but @@ -2463,7 +2478,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, /* the rest of the scratch space is used for tuple data */ tupledata = scratchptr; - xlrec->flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0; + xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0; xlrec->ntuples = nthispage; /* @@ -2498,7 +2513,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, Assert((scratchptr - scratch) < BLCKSZ); if (need_tuple_data) - xlrec->flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE; + xlrec->flags |= XLH_INSERT_CONTAINS_NEW_TUPLE; /* * Signal that this is the last xl_heap_multi_insert record @@ -2506,7 +2521,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, * decoding so it knows when to cleanup temporary data. */ if (ndone + nthispage == ntuples) - xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT; + xlrec->flags |= XLH_INSERT_LAST_IN_MULTI; if (init) { @@ -2914,7 +2929,12 @@ l1: MarkBufferDirty(buffer); - /* XLOG stuff */ + /* + * XLOG stuff + * + * NB: heap_abort_speculative() uses the same xlog record and replay + * routines. + */ if (RelationNeedsWAL(relation)) { xl_heap_delete xlrec; @@ -2924,7 +2944,7 @@ l1: if (RelationIsAccessibleInLogicalDecoding(relation)) log_heap_new_cid(relation, &tp); - xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0; + xlrec.flags = all_visible_cleared ? XLH_DELETE_ALL_VISIBLE_CLEARED : 0; xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask, tp.t_data->t_infomask2); xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self); @@ -2933,9 +2953,9 @@ l1: if (old_key_tuple != NULL) { if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL) - xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE; + xlrec.flags |= XLH_DELETE_CONTAINS_OLD_TUPLE; else - xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY; + xlrec.flags |= XLH_DELETE_CONTAINS_OLD_KEY; } XLogBeginInsert(); @@ -3742,7 +3762,7 @@ l2: HeapTupleClearHeapOnly(newtup); } - RelationPutHeapTuple(relation, newbuf, heaptup); /* insert new tuple */ + RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */ if (!already_marked) { @@ -4133,14 +4153,16 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update) * * Function result may be: * HeapTupleMayBeUpdated: lock was successfully acquired + * HeapTupleInvisible: lock failed because tuple was never visible to us * HeapTupleSelfUpdated: lock failed because tuple updated by self * HeapTupleUpdated: lock failed because tuple updated by other xact * HeapTupleWouldBlock: lock couldn't be acquired and wait_policy is skip * - * In the failure cases, the routine fills *hufd with the tuple's t_ctid, - * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax - * (the last only for HeapTupleSelfUpdated, since we - * cannot obtain cmax from a combocid generated by another transaction). + * In the failure cases other than HeapTupleInvisible, the routine fills + * *hufd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact, + * if necessary), and t_cmax (the last only for HeapTupleSelfUpdated, + * since we cannot obtain cmax from a combocid generated by another + * transaction). * See comments for struct HeapUpdateFailureData for additional info. * * See README.tuplock for a thorough explanation of this mechanism. @@ -4179,8 +4201,15 @@ l3: if (result == HeapTupleInvisible) { - UnlockReleaseBuffer(*buffer); - elog(ERROR, "attempted to lock invisible tuple"); + LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); + + /* + * This is possible, but only when locking a tuple for ON CONFLICT + * UPDATE. We return this value here rather than throwing an error in + * order to give that case the opportunity to throw a more specific + * error. + */ + return HeapTupleInvisible; } else if (result == HeapTupleBeingUpdated) { @@ -5417,6 +5446,234 @@ heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, return HeapTupleMayBeUpdated; } +/* + * heap_finish_speculative - mark speculative insertion as successful + * + * To successfully finish a speculative insertion we have to clear speculative + * token from tuple. To do so the t_ctid field, which will contain a + * speculative token value, is modified in place to point to the tuple itself, + * which is characteristic of a newly inserted ordinary tuple. + * + * NB: It is not ok to commit without either finishing or aborting a + * speculative insertion. We could treat speculative tuples of committed + * transactions implicitly as completed, but then we would have to be prepared + * to deal with speculative tokens on committed tuples. That wouldn't be + * difficult - no-one looks at the ctid field of a tuple with invalid xmax - + * but clearing the token at completion isn't very expensive either. + * An explicit confirmation WAL record also makes logical decoding simpler. + */ +void +heap_finish_speculative(Relation relation, HeapTuple tuple) +{ + Buffer buffer; + Page page; + OffsetNumber offnum; + ItemId lp = NULL; + HeapTupleHeader htup; + + buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self))); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + page = (Page) BufferGetPage(buffer); + + offnum = ItemPointerGetOffsetNumber(&(tuple->t_self)); + if (PageGetMaxOffsetNumber(page) >= offnum) + lp = PageGetItemId(page, offnum); + + if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp)) + elog(ERROR, "heap_confirm_insert: invalid lp"); + + htup = (HeapTupleHeader) PageGetItem(page, lp); + + /* SpecTokenOffsetNumber should be distinguishable from any real offset */ + StaticAssertStmt(MaxOffsetNumber < SpecTokenOffsetNumber, + "invalid speculative token constant"); + + /* NO EREPORT(ERROR) from here till changes are logged */ + START_CRIT_SECTION(); + + Assert(HeapTupleHeaderIsSpeculative(tuple->t_data)); + + MarkBufferDirty(buffer); + + /* + * Replace the speculative insertion token with a real t_ctid, + * pointing to itself like it does on regular tuples. + */ + htup->t_ctid = tuple->t_self; + + /* XLOG stuff */ + if (RelationNeedsWAL(relation)) + { + xl_heap_confirm xlrec; + XLogRecPtr recptr; + + xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self); + + XLogBeginInsert(); + + /* We want the same filtering on this as on a plain insert */ + XLogIncludeOrigin(); + + XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm); + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buffer); +} + +/* + * heap_abort_speculative - kill a speculatively inserted tuple + * + * Marks a tuple that was speculatively inserted in the same command as dead, + * by setting its xmin as invalid. That makes it immediately appear as dead + * to all transactions, including our own. In particular, it makes + * HeapTupleSatisfiesDirty() regard the tuple as dead, so that another backend + * inserting a duplicate key value won't unnecessarily wait for our whole + * transaction to finish (it'll just wait for our speculative insertion to + * finish). + * + * Killing the tuple prevents "unprincipled deadlocks", which are deadlocks + * that arise due to a mutual dependency that is not user visible. By + * definition, unprincipled deadlocks cannot be prevented by the user + * reordering lock acquisition in client code, because the implementation level + * lock acquisitions are not under the user's direct control. If speculative + * inserters did not take this precaution, then under high concurrency they + * could deadlock with each other, which would not be acceptable. + * + * This is somewhat redundant with heap_delete, but we prefer to have a + * dedicated routine with stripped down requirements. + * + * This routine does not affect logical decoding as it only looks at + * confirmation records. + */ +void +heap_abort_speculative(Relation relation, HeapTuple tuple) +{ + TransactionId xid = GetCurrentTransactionId(); + ItemPointer tid = &(tuple->t_self); + ItemId lp; + HeapTupleData tp; + Page page; + BlockNumber block; + Buffer buffer; + + Assert(ItemPointerIsValid(tid)); + + block = ItemPointerGetBlockNumber(tid); + buffer = ReadBuffer(relation, block); + page = BufferGetPage(buffer); + + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + /* + * Page can't be all visible, we just inserted into it, and are still + * running. + */ + Assert(!PageIsAllVisible(page)); + + lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid)); + Assert(ItemIdIsNormal(lp)); + + tp.t_tableOid = RelationGetRelid(relation); + tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); + tp.t_len = ItemIdGetLength(lp); + tp.t_self = *tid; + + /* + * Sanity check that the tuple really is a speculatively inserted tuple, + * inserted by us. + */ + if (tp.t_data->t_choice.t_heap.t_xmin != xid) + elog(ERROR, "attempted to kill a tuple inserted by another transaction"); + if (!HeapTupleHeaderIsSpeculative(tp.t_data)) + elog(ERROR, "attempted to kill a non-speculative tuple"); + Assert(!HeapTupleHeaderIsHeapOnly(tp.t_data)); + + /* + * No need to check for serializable conflicts here. There is never a + * need for a combocid, either. No need to extract replica identity, or + * do anything special with infomask bits. + */ + + START_CRIT_SECTION(); + + /* + * The tuple will become DEAD immediately. Flag that this page + * immediately is a candidate for pruning by setting xmin to + * RecentGlobalXmin. That's not pretty, but it doesn't seem worth + * inventing a nicer API for this. + */ + Assert(TransactionIdIsValid(RecentGlobalXmin)); + PageSetPrunable(page, RecentGlobalXmin); + + /* store transaction information of xact deleting the tuple */ + tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); + tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; + + /* + * Set the tuple header xmin to InvalidTransactionId. This makes the + * tuple immediately invisible everyone. (In particular, to any + * transactions waiting on the speculative token, woken up later.) + */ + HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId); + + /* Clear the speculative insertion token too */ + tp.t_data->t_ctid = tp.t_self; + + MarkBufferDirty(buffer); + + /* + * XLOG stuff + * + * The WAL records generated here match heap_delete(). The same recovery + * routines are used. + */ + if (RelationNeedsWAL(relation)) + { + xl_heap_delete xlrec; + XLogRecPtr recptr; + + xlrec.flags = XLH_DELETE_IS_SUPER; + xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask, + tp.t_data->t_infomask2); + xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self); + xlrec.xmax = xid; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, SizeOfHeapDelete); + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + + /* No replica identity & replication origin logged */ + + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + + if (HeapTupleHasExternal(&tp)) + toast_delete(relation, &tp); + + /* + * Never need to mark tuple for invalidation, since catalogs don't support + * speculative insertion + */ + + /* Now we can release the buffer */ + ReleaseBuffer(buffer); + + /* count deletion, as we counted the insertion too */ + pgstat_count_heap_delete(relation); +} /* * heap_inplace_update - update a tuple "in place" (ie, overwrite it) @@ -6732,22 +6989,22 @@ log_heap_update(Relation reln, Buffer oldbuf, /* Prepare main WAL data chain */ xlrec.flags = 0; if (all_visible_cleared) - xlrec.flags |= XLOG_HEAP_ALL_VISIBLE_CLEARED; + xlrec.flags |= XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED; if (new_all_visible_cleared) - xlrec.flags |= XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED; + xlrec.flags |= XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED; if (prefixlen > 0) - xlrec.flags |= XLOG_HEAP_PREFIX_FROM_OLD; + xlrec.flags |= XLH_UPDATE_PREFIX_FROM_OLD; if (suffixlen > 0) - xlrec.flags |= XLOG_HEAP_SUFFIX_FROM_OLD; + xlrec.flags |= XLH_UPDATE_SUFFIX_FROM_OLD; if (need_tuple_data) { - xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE; + xlrec.flags |= XLH_UPDATE_CONTAINS_NEW_TUPLE; if (old_key_tuple) { if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL) - xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE; + xlrec.flags |= XLH_UPDATE_CONTAINS_OLD_TUPLE; else - xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY; + xlrec.flags |= XLH_UPDATE_CONTAINS_OLD_KEY; } } @@ -7378,7 +7635,7 @@ heap_xlog_delete(XLogReaderState *record) * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) + if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED) { Relation reln = CreateFakeRelcacheEntry(target_node); Buffer vmbuffer = InvalidBuffer; @@ -7406,13 +7663,16 @@ heap_xlog_delete(XLogReaderState *record) HeapTupleHeaderClearHotUpdated(htup); fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + if (!(xlrec->flags & XLH_DELETE_IS_SUPER)) + HeapTupleHeaderSetXmax(htup, xlrec->xmax); + else + HeapTupleHeaderSetXmin(htup, InvalidTransactionId); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Mark the page as a candidate for pruning */ PageSetPrunable(page, XLogRecGetXid(record)); - if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) + if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); /* Make sure there is no forward chain link in t_ctid */ @@ -7453,7 +7713,7 @@ heap_xlog_insert(XLogReaderState *record) * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) + if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) { Relation reln = CreateFakeRelcacheEntry(target_node); Buffer vmbuffer = InvalidBuffer; @@ -7516,7 +7776,7 @@ heap_xlog_insert(XLogReaderState *record) PageSetLSN(page, lsn); - if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) + if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); MarkBufferDirty(buffer); @@ -7573,7 +7833,7 @@ heap_xlog_multi_insert(XLogReaderState *record) * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) + if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) { Relation reln = CreateFakeRelcacheEntry(rnode); Buffer vmbuffer = InvalidBuffer; @@ -7655,7 +7915,7 @@ heap_xlog_multi_insert(XLogReaderState *record) PageSetLSN(page, lsn); - if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) + if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); MarkBufferDirty(buffer); @@ -7728,7 +7988,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) + if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) { Relation reln = CreateFakeRelcacheEntry(rnode); Buffer vmbuffer = InvalidBuffer; @@ -7783,7 +8043,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) /* Mark the page as a candidate for pruning */ PageSetPrunable(page, XLogRecGetXid(record)); - if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) + if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); PageSetLSN(page, lsn); @@ -7812,7 +8072,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED) + if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) { Relation reln = CreateFakeRelcacheEntry(rnode); Buffer vmbuffer = InvalidBuffer; @@ -7840,13 +8100,13 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) if (PageGetMaxOffsetNumber(page) + 1 < offnum) elog(PANIC, "heap_update_redo: invalid max offset number"); - if (xlrec->flags & XLOG_HEAP_PREFIX_FROM_OLD) + if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD) { Assert(newblk == oldblk); memcpy(&prefixlen, recdata, sizeof(uint16)); recdata += sizeof(uint16); } - if (xlrec->flags & XLOG_HEAP_SUFFIX_FROM_OLD) + if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD) { Assert(newblk == oldblk); memcpy(&suffixlen, recdata, sizeof(uint16)); @@ -7918,7 +8178,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) if (offnum == InvalidOffsetNumber) elog(PANIC, "heap_update_redo: failed to add tuple"); - if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED) + if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ @@ -7952,6 +8212,42 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) } static void +heap_xlog_confirm(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record); + Buffer buffer; + Page page; + OffsetNumber offnum; + ItemId lp = NULL; + HeapTupleHeader htup; + + if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) + { + page = BufferGetPage(buffer); + + offnum = xlrec->offnum; + if (PageGetMaxOffsetNumber(page) >= offnum) + lp = PageGetItemId(page, offnum); + + if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp)) + elog(PANIC, "heap_confirm_redo: invalid lp"); + + htup = (HeapTupleHeader) PageGetItem(page, lp); + + /* + * Confirm tuple as actually inserted + */ + ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum); + + PageSetLSN(page, lsn); + MarkBufferDirty(buffer); + } + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); +} + +static void heap_xlog_lock(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; @@ -8101,6 +8397,9 @@ heap_redo(XLogReaderState *record) case XLOG_HEAP_HOT_UPDATE: heap_xlog_update(record, true); break; + case XLOG_HEAP_CONFIRM: + heap_xlog_confirm(record); + break; case XLOG_HEAP_LOCK: heap_xlog_lock(record); break; diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 6d091f63af0..a9f0ca35e49 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -35,12 +35,17 @@ void RelationPutHeapTuple(Relation relation, Buffer buffer, - HeapTuple tuple) + HeapTuple tuple, + bool token) { Page pageHeader; OffsetNumber offnum; - ItemId itemId; - Item item; + + /* + * A tuple that's being inserted speculatively should already have its + * token set. + */ + Assert(!token || HeapTupleHeaderIsSpeculative(tuple->t_data)); /* Add the tuple to the page */ pageHeader = BufferGetPage(buffer); @@ -54,10 +59,18 @@ RelationPutHeapTuple(Relation relation, /* Update tuple->t_self to the actual position where it was stored */ ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum); - /* Insert the correct position into CTID of the stored tuple, too */ - itemId = PageGetItemId(pageHeader, offnum); - item = PageGetItem(pageHeader, itemId); - ((HeapTupleHeader) item)->t_ctid = tuple->t_self; + /* + * Insert the correct position into CTID of the stored tuple, too + * (unless this is a speculative insertion, in which case the token is + * held in CTID field instead) + */ + if (!token) + { + ItemId itemId = PageGetItemId(pageHeader, offnum); + Item item = PageGetItem(pageHeader, itemId); + + ((HeapTupleHeader) item)->t_ctid = tuple->t_self; + } } /* diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 8464e8794f6..274155ad0c7 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -523,6 +523,14 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, bool toast_delold[MaxHeapAttributeNumber]; /* + * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super + * deletions just normally insert/delete the toast values. It seems + * easiest to deal with that here, instead on, potentially, multiple + * callers. + */ + options &= ~HEAP_INSERT_SPECULATIVE; + + /* * We should only ever be called for tuples of plain relations or * materialized views --- recursing on a toast rel is bad news. */ diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index ef68a7145fc..4a60c5fa2c8 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -51,7 +51,8 @@ static Buffer _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf); static TransactionId _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, Buffer buf, OffsetNumber offset, ScanKey itup_scankey, - IndexUniqueCheck checkUnique, bool *is_unique); + IndexUniqueCheck checkUnique, bool *is_unique, + uint32 *speculativeToken); static void _bt_findinsertloc(Relation rel, Buffer *bufptr, OffsetNumber *offsetptr, @@ -159,17 +160,27 @@ top: */ if (checkUnique != UNIQUE_CHECK_NO) { - TransactionId xwait; + TransactionId xwait; + uint32 speculativeToken; offset = _bt_binsrch(rel, buf, natts, itup_scankey, false); xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey, - checkUnique, &is_unique); + checkUnique, &is_unique, &speculativeToken); if (TransactionIdIsValid(xwait)) { /* Have to wait for the other guy ... */ _bt_relbuf(rel, buf); - XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex); + /* + * If it's a speculative insertion, wait for it to finish (ie. + * to go ahead with the insertion, or kill the tuple). Otherwise + * wait for the transaction to finish as usual. + */ + if (speculativeToken) + SpeculativeInsertionWait(xwait, speculativeToken); + else + XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex); + /* start over... */ _bt_freestack(stack); goto top; @@ -213,7 +224,10 @@ top: * * Returns InvalidTransactionId if there is no conflict, else an xact ID * we must wait for to see if it commits a conflicting tuple. If an actual - * conflict is detected, no return --- just ereport(). + * conflict is detected, no return --- just ereport(). If an xact ID is + * returned, and the conflicting tuple still has a speculative insertion in + * progress, *speculativeToken is set to non-zero, and the caller can wait for + * the verdict on the insertion using SpeculativeInsertionWait(). * * However, if checkUnique == UNIQUE_CHECK_PARTIAL, we always return * InvalidTransactionId because we don't want to wait. In this case we @@ -223,7 +237,8 @@ top: static TransactionId _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, Buffer buf, OffsetNumber offset, ScanKey itup_scankey, - IndexUniqueCheck checkUnique, bool *is_unique) + IndexUniqueCheck checkUnique, bool *is_unique, + uint32 *speculativeToken) { TupleDesc itupdesc = RelationGetDescr(rel); int natts = rel->rd_rel->relnatts; @@ -340,6 +355,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, if (nbuf != InvalidBuffer) _bt_relbuf(rel, nbuf); /* Tell _bt_doinsert to wait... */ + *speculativeToken = SnapshotDirty.speculativeToken; return xwait; } diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 4f06a2637ae..f4a1b002cf1 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -75,6 +75,12 @@ heap_desc(StringInfo buf, XLogReaderState *record) xlrec->new_offnum, xlrec->new_xmax); } + else if (info == XLOG_HEAP_CONFIRM) + { + xl_heap_confirm *xlrec = (xl_heap_confirm *) rec; + + appendStringInfo(buf, "off %u", xlrec->offnum); + } else if (info == XLOG_HEAP_LOCK) { xl_heap_lock *xlrec = (xl_heap_lock *) rec; @@ -177,6 +183,9 @@ heap_identify(uint8 info) case XLOG_HEAP_HOT_UPDATE | XLOG_HEAP_INIT_PAGE: id = "HOT_UPDATE+INIT"; break; + case XLOG_HEAP_CONFIRM: + id = "HEAP_CONFIRM"; + break; case XLOG_HEAP_LOCK: id = "LOCK"; break; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index ac3b785b5a7..8c8a9eafeea 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1665,6 +1665,10 @@ BuildIndexInfo(Relation index) /* other info */ ii->ii_Unique = indexStruct->indisunique; ii->ii_ReadyForInserts = IndexIsReady(indexStruct); + /* assume not doing speculative insertion for now */ + ii->ii_UniqueOps = NULL; + ii->ii_UniqueProcs = NULL; + ii->ii_UniqueStrats = NULL; /* initialize index-build state to default */ ii->ii_Concurrent = false; @@ -1674,6 +1678,53 @@ BuildIndexInfo(Relation index) } /* ---------------- + * BuildSpeculativeIndexInfo + * Add extra state to IndexInfo record + * + * For unique indexes, we usually don't want to add info to the IndexInfo for + * checking uniqueness, since the B-Tree AM handles that directly. However, + * in the case of speculative insertion, additional support is required. + * + * Do this processing here rather than in BuildIndexInfo() to not incur the + * overhead in the common non-speculative cases. + * ---------------- + */ +void +BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii) +{ + int ncols = index->rd_rel->relnatts; + int i; + + /* + * fetch info for checking unique indexes + */ + Assert(ii->ii_Unique); + + if (index->rd_rel->relam != BTREE_AM_OID) + elog(ERROR, "unexpected non-btree speculative unique index"); + + ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols); + ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols); + ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols); + + /* + * We have to look up the operator's strategy number. This + * provides a cross-check that the operator does match the index. + */ + /* We need the func OIDs and strategy numbers too */ + for (i = 0; i < ncols; i++) + { + ii->ii_UniqueStrats[i] = BTEqualStrategyNumber; + ii->ii_UniqueOps[i] = + get_opfamily_member(index->rd_opfamily[i], + index->rd_opcintype[i], + index->rd_opcintype[i], + ii->ii_UniqueStrats[i]); + ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]); + } +} + +/* ---------------- * FormIndexDatum * Construct values[] and isnull[] arrays for a new index tuple. * @@ -2612,7 +2663,7 @@ IndexCheckExclusion(Relation heapRelation, check_exclusion_constraint(heapRelation, indexRelation, indexInfo, &(heapTuple->t_self), values, isnull, - estate, true, false); + estate, true); } heap_endscan(scan); diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index fe123addac0..0231084c7c9 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -46,7 +46,7 @@ CatalogOpenIndexes(Relation heapRel) resultRelInfo->ri_RelationDesc = heapRel; resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */ - ExecOpenIndices(resultRelInfo); + ExecOpenIndices(resultRelInfo, false); return resultRelInfo; } diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt index 332926424b6..cc0f8c45a6d 100644 --- a/src/backend/catalog/sql_features.txt +++ b/src/backend/catalog/sql_features.txt @@ -229,7 +229,7 @@ F311 Schema definition statement 02 CREATE TABLE for persistent base tables YES F311 Schema definition statement 03 CREATE VIEW YES F311 Schema definition statement 04 CREATE VIEW: WITH CHECK OPTION YES F311 Schema definition statement 05 GRANT statement YES -F312 MERGE statement NO +F312 MERGE statement NO Consider INSERT ... ON CONFLICT DO UPDATE F313 Enhanced MERGE statement NO F314 MERGE statement with DELETE branch NO F321 User authorization YES diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index 561d8fae574..e49affba9ee 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -172,7 +172,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) */ check_exclusion_constraint(trigdata->tg_relation, indexRel, indexInfo, &(new_row->t_self), values, isnull, - estate, false, false); + estate, false); } /* diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index aa8ae4b9bcd..00a2417a099 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2284,7 +2284,7 @@ CopyFrom(CopyState cstate) 1, /* dummy rangetable index */ 0); - ExecOpenIndices(resultRelInfo); + ExecOpenIndices(resultRelInfo, false); estate->es_result_relations = resultRelInfo; estate->es_num_result_relations = 1; @@ -2439,7 +2439,8 @@ CopyFrom(CopyState cstate) if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate); + estate, false, NULL, + NIL); /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, tuple, @@ -2553,7 +2554,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false); recheckIndexes = ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self), - estate); + estate, false, NULL, NIL); ExecARInsertTriggers(estate, resultRelInfo, bufferedTuples[i], recheckIndexes); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index f4cc90183a4..c5452e3cb6a 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -103,7 +103,8 @@ static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, static void ExplainScanTarget(Scan *plan, ExplainState *es); static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es); static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es); -static void show_modifytable_info(ModifyTableState *mtstate, ExplainState *es); +static void show_modifytable_info(ModifyTableState *mtstate, List *ancestors, + ExplainState *es); static void ExplainMemberNodes(List *plans, PlanState **planstates, List *ancestors, ExplainState *es); static void ExplainSubPlans(List *plans, List *ancestors, @@ -744,6 +745,9 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used) case T_ModifyTable: *rels_used = bms_add_member(*rels_used, ((ModifyTable *) plan)->nominalRelation); + if (((ModifyTable *) plan)->exclRelRTI) + *rels_used = bms_add_member(*rels_used, + ((ModifyTable *) plan)->exclRelRTI); break; default: break; @@ -1466,7 +1470,8 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); break; case T_ModifyTable: - show_modifytable_info((ModifyTableState *) planstate, es); + show_modifytable_info((ModifyTableState *) planstate, ancestors, + es); break; case T_Hash: show_hash_info((HashState *) planstate, es); @@ -2317,18 +2322,22 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) /* * Show extra information for a ModifyTable node * - * We have two objectives here. First, if there's more than one target table - * or it's different from the nominal target, identify the actual target(s). - * Second, give FDWs a chance to display extra info about foreign targets. + * We have three objectives here. First, if there's more than one target + * table or it's different from the nominal target, identify the actual + * target(s). Second, give FDWs a chance to display extra info about foreign + * targets. Third, show information about ON CONFLICT. */ static void -show_modifytable_info(ModifyTableState *mtstate, ExplainState *es) +show_modifytable_info(ModifyTableState *mtstate, List *ancestors, + ExplainState *es) { ModifyTable *node = (ModifyTable *) mtstate->ps.plan; const char *operation; const char *foperation; bool labeltargets; int j; + List *idxNames = NIL; + ListCell *lst; switch (node->operation) { @@ -2414,6 +2423,55 @@ show_modifytable_info(ModifyTableState *mtstate, ExplainState *es) } } + /* Gather names of ON CONFLICT arbiter indexes */ + foreach(lst, node->arbiterIndexes) + { + char *indexname = get_rel_name(lfirst_oid(lst)); + + idxNames = lappend(idxNames, indexname); + } + + if (node->onConflictAction != ONCONFLICT_NONE) + { + ExplainProperty("Conflict Resolution", + node->onConflictAction == ONCONFLICT_NOTHING ? + "NOTHING" : "UPDATE", + false, es); + + /* + * Don't display arbiter indexes at all when DO NOTHING variant + * implicitly ignores all conflicts + */ + if (idxNames) + ExplainPropertyList("Conflict Arbiter Indexes", idxNames, es); + + /* ON CONFLICT DO UPDATE WHERE qual is specially displayed */ + if (node->onConflictWhere) + { + show_upper_qual((List *) node->onConflictWhere, "Conflict Filter", + &mtstate->ps, ancestors, es); + show_instrumentation_count("Rows Removed by Conflict Filter", 1, &mtstate->ps, es); + } + + /* EXPLAIN ANALYZE display of actual outcome for each tuple proposed */ + if (es->analyze && mtstate->ps.instrument) + { + double total; + double insert_path; + double other_path; + + InstrEndLoop(mtstate->mt_plans[0]->instrument); + + /* count the number of source rows */ + total = mtstate->mt_plans[0]->instrument->ntuples; + other_path = mtstate->ps.instrument->nfiltered2; + insert_path = total - other_path; + + ExplainPropertyFloat("Tuples Inserted", insert_path, 0, es); + ExplainPropertyFloat("Conflicting Tuples", other_path, 0, es); + } + } + if (labeltargets) ExplainCloseGroup("Target Tables", "Target Tables", false, es); } diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 222e7fce854..b537ca5e661 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -2421,21 +2421,10 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, TupleTableSlot *newSlot; int i; Bitmapset *updatedCols; - Bitmapset *keyCols; LockTupleMode lockmode; - /* - * Compute lock mode to use. If columns that are part of the key have not - * been modified, then we can use a weaker lock, allowing for better - * concurrency. - */ - updatedCols = GetUpdatedColumns(relinfo, estate); - keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc, - INDEX_ATTR_BITMAP_KEY); - if (bms_overlap(keyCols, updatedCols)) - lockmode = LockTupleExclusive; - else - lockmode = LockTupleNoKeyExclusive; + /* Determine lock mode to use */ + lockmode = ExecUpdateLockMode(estate, relinfo); Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); if (fdw_trigtuple == NULL) @@ -2476,6 +2465,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, TRIGGER_EVENT_ROW | TRIGGER_EVENT_BEFORE; LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + updatedCols = GetUpdatedColumns(relinfo, estate); for (i = 0; i < trigdesc->numtriggers; i++) { Trigger *trigger = &trigdesc->triggers[i]; @@ -2783,6 +2773,9 @@ ltrmark:; */ return NULL; + case HeapTupleInvisible: + elog(ERROR, "attempted to lock invisible tuple"); + default: ReleaseBuffer(buffer); elog(ERROR, "unrecognized heap_lock_tuple status: %u", test); diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index a697682b20e..e7cf72b3875 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -50,6 +50,50 @@ * to the caller. The caller must re-check them later by calling * check_exclusion_constraint(). * + * Speculative insertion + * --------------------- + * + * Speculative insertion is a is a two-phase mechanism, used to implement + * INSERT ... ON CONFLICT DO UPDATE/NOTHING. The tuple is first inserted + * to the heap and update the indexes as usual, but if a constraint is + * violated, we can still back out the insertion without aborting the whole + * transaction. In an INSERT ... ON CONFLICT statement, if a conflict is + * detected, the inserted tuple is backed out and the ON CONFLICT action is + * executed instead. + * + * Insertion to a unique index works as usual: the index AM checks for + * duplicate keys atomically with the insertion. But instead of throwing + * an error on a conflict, the speculatively inserted heap tuple is backed + * out. + * + * Exclusion constraints are slightly more complicated. As mentioned + * earlier, there is a risk of deadlock when two backends insert the same + * key concurrently. That was not a problem for regular insertions, when + * one of the transactions has to be aborted anyway, but with a speculative + * insertion we cannot let a deadlock happen, because we only want to back + * out the speculatively inserted tuple on conflict, not abort the whole + * transaction. + * + * When a backend detects that the speculative insertion conflicts with + * another in-progress tuple, it has two options: + * + * 1. back out the speculatively inserted tuple, then wait for the other + * transaction, and retry. Or, + * 2. wait for the other transaction, with the speculatively inserted tuple + * still in place. + * + * If two backends insert at the same time, and both try to wait for each + * other, they will deadlock. So option 2 is not acceptable. Option 1 + * avoids the deadlock, but it is prone to a livelock instead. Both + * transactions will wake up immediately as the other transaction backs + * out. Then they both retry, and conflict with each other again, lather, + * rinse, repeat. + * + * To avoid the livelock, one of the backends must back out first, and then + * wait, while the other one waits without backing out. It doesn't matter + * which one backs out, so we employ an arbitrary rule that the transaction + * with the higher XID backs out. + * * * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -63,12 +107,30 @@ #include "postgres.h" #include "access/relscan.h" +#include "access/xact.h" #include "catalog/index.h" #include "executor/executor.h" #include "nodes/nodeFuncs.h" #include "storage/lmgr.h" #include "utils/tqual.h" +/* waitMode argument to check_exclusion_or_unique_constraint() */ +typedef enum +{ + CEOUC_WAIT, + CEOUC_NOWAIT, + CEOUC_LIVELOCK_PREVENTING_WAIT, +} CEOUC_WAIT_MODE; + +static bool check_exclusion_or_unique_constraint(Relation heap, Relation index, + IndexInfo *indexInfo, + ItemPointer tupleid, + Datum *values, bool *isnull, + EState *estate, bool newIndex, + CEOUC_WAIT_MODE waitMode, + bool errorOK, + ItemPointer conflictTid); + static bool index_recheck_constraint(Relation index, Oid *constr_procs, Datum *existing_values, bool *existing_isnull, Datum *new_values); @@ -84,7 +146,7 @@ static bool index_recheck_constraint(Relation index, Oid *constr_procs, * ---------------------------------------------------------------- */ void -ExecOpenIndices(ResultRelInfo *resultRelInfo) +ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative) { Relation resultRelation = resultRelInfo->ri_RelationDesc; List *indexoidlist; @@ -137,6 +199,13 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo) /* extract index key information from the index's pg_index info */ ii = BuildIndexInfo(indexDesc); + /* + * If the indexes are to be used for speculative insertion, add extra + * information required by unique index entries. + */ + if (speculative && ii->ii_Unique) + BuildSpeculativeIndexInfo(indexDesc, ii); + relationDescs[i] = indexDesc; indexInfoArray[i] = ii; i++; @@ -186,7 +255,9 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * Unique and exclusion constraints are enforced at the same * time. This returns a list of index OIDs for any unique or * exclusion constraints that are deferred and that had - * potential (unconfirmed) conflicts. + * potential (unconfirmed) conflicts. (if noDupErr == true, + * the same is done for non-deferred constraints, but report + * if conflict was speculative or deferred conflict to caller) * * CAUTION: this must not be called for a HOT update. * We can't defend against that here for lack of info. @@ -196,7 +267,10 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) List * ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, - EState *estate) + EState *estate, + bool noDupErr, + bool *specConflict, + List *arbiterIndexes) { List *result = NIL; ResultRelInfo *resultRelInfo; @@ -236,12 +310,17 @@ ExecInsertIndexTuples(TupleTableSlot *slot, IndexInfo *indexInfo; IndexUniqueCheck checkUnique; bool satisfiesConstraint; + bool arbiter; if (indexRelation == NULL) continue; indexInfo = indexInfoArray[i]; + /* Record if speculative insertion arbiter */ + arbiter = list_member_oid(arbiterIndexes, + indexRelation->rd_index->indexrelid); + /* If the index is marked as read-only, ignore it */ if (!indexInfo->ii_ReadyForInserts) continue; @@ -288,9 +367,14 @@ ExecInsertIndexTuples(TupleTableSlot *slot, * For a deferrable unique index, we tell the index AM to just detect * possible non-uniqueness, and we add the index OID to the result * list if further checking is needed. + * + * For a speculative insertion (used by INSERT ... ON CONFLICT), do + * the same as for a deferrable unique index. */ if (!indexRelation->rd_index->indisunique) checkUnique = UNIQUE_CHECK_NO; + else if (noDupErr && (arbiterIndexes == NIL || arbiter)) + checkUnique = UNIQUE_CHECK_PARTIAL; else if (indexRelation->rd_index->indimmediate) checkUnique = UNIQUE_CHECK_YES; else @@ -308,8 +392,11 @@ ExecInsertIndexTuples(TupleTableSlot *slot, * If the index has an associated exclusion constraint, check that. * This is simpler than the process for uniqueness checks since we * always insert first and then check. If the constraint is deferred, - * we check now anyway, but don't throw error on violation; instead - * we'll queue a recheck event. + * we check now anyway, but don't throw error on violation or wait for + * a conclusive outcome from a concurrent insertion; instead we'll + * queue a recheck event. Similarly, noDupErr callers (speculative + * inserters) will recheck later, and wait for a conclusive outcome + * then. * * An index for an exclusion constraint can't also be UNIQUE (not an * essential property, we just don't allow it in the grammar), so no @@ -317,13 +404,31 @@ ExecInsertIndexTuples(TupleTableSlot *slot, */ if (indexInfo->ii_ExclusionOps != NULL) { - bool errorOK = !indexRelation->rd_index->indimmediate; + bool violationOK; + bool waitMode; + + if (noDupErr) + { + violationOK = true; + waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT; + } + else if (!indexRelation->rd_index->indimmediate) + { + violationOK = true; + waitMode = CEOUC_NOWAIT; + } + else + { + violationOK = false; + waitMode = CEOUC_WAIT; + } satisfiesConstraint = - check_exclusion_constraint(heapRelation, - indexRelation, indexInfo, - tupleid, values, isnull, - estate, false, errorOK); + check_exclusion_or_unique_constraint(heapRelation, + indexRelation, indexInfo, + tupleid, values, isnull, + estate, false, + waitMode, violationOK, NULL); } if ((checkUnique == UNIQUE_CHECK_PARTIAL || @@ -333,46 +438,213 @@ ExecInsertIndexTuples(TupleTableSlot *slot, /* * The tuple potentially violates the uniqueness or exclusion * constraint, so make a note of the index so that we can re-check - * it later. + * it later. Speculative inserters are told if there was a + * speculative conflict, since that always requires a restart. */ result = lappend_oid(result, RelationGetRelid(indexRelation)); + if (indexRelation->rd_index->indimmediate && specConflict) + *specConflict = true; } } return result; } +/* ---------------------------------------------------------------- + * ExecCheckIndexConstraints + * + * This routine checks if a tuple violates any unique or + * exclusion constraints. Returns true if there is no no conflict. + * Otherwise returns false, and the TID of the conflicting + * tuple is returned in *conflictTid. + * + * If 'arbiterIndexes' is given, only those indexes are checked. + * NIL means all indexes. + * + * Note that this doesn't lock the values in any way, so it's + * possible that a conflicting tuple is inserted immediately + * after this returns. But this can be used for a pre-check + * before insertion. + * ---------------------------------------------------------------- + */ +bool +ExecCheckIndexConstraints(TupleTableSlot *slot, + EState *estate, ItemPointer conflictTid, + List *arbiterIndexes) +{ + ResultRelInfo *resultRelInfo; + int i; + int numIndices; + RelationPtr relationDescs; + Relation heapRelation; + IndexInfo **indexInfoArray; + ExprContext *econtext; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + ItemPointerData invalidItemPtr; + bool checkedIndex = false; + + ItemPointerSetInvalid(conflictTid); + ItemPointerSetInvalid(&invalidItemPtr); + + /* + * Get information from the result relation info structure. + */ + resultRelInfo = estate->es_result_relation_info; + numIndices = resultRelInfo->ri_NumIndices; + relationDescs = resultRelInfo->ri_IndexRelationDescs; + indexInfoArray = resultRelInfo->ri_IndexRelationInfo; + heapRelation = resultRelInfo->ri_RelationDesc; + + /* + * We will use the EState's per-tuple context for evaluating predicates + * and index expressions (creating it if it's not already there). + */ + econtext = GetPerTupleExprContext(estate); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* + * For each index, form index tuple and check if it satisfies the + * constraint. + */ + for (i = 0; i < numIndices; i++) + { + Relation indexRelation = relationDescs[i]; + IndexInfo *indexInfo; + bool satisfiesConstraint; + + if (indexRelation == NULL) + continue; + + indexInfo = indexInfoArray[i]; + + if (!indexInfo->ii_Unique && !indexInfo->ii_ExclusionOps) + continue; + + /* If the index is marked as read-only, ignore it */ + if (!indexInfo->ii_ReadyForInserts) + continue; + + /* When specific arbiter indexes requested, only examine them */ + if (arbiterIndexes != NIL && + !list_member_oid(arbiterIndexes, + indexRelation->rd_index->indexrelid)) + continue; + + if (!indexRelation->rd_index->indimmediate) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ON CONFLICT does not support deferred unique constraints/exclusion constraints as arbiters"), + errtableconstraint(heapRelation, + RelationGetRelationName(indexRelation)))); + + checkedIndex = true; + + /* Check for partial index */ + if (indexInfo->ii_Predicate != NIL) + { + List *predicate; + + /* + * If predicate state not set up yet, create it (in the estate's + * per-query context) + */ + predicate = indexInfo->ii_PredicateState; + if (predicate == NIL) + { + predicate = (List *) + ExecPrepareExpr((Expr *) indexInfo->ii_Predicate, + estate); + indexInfo->ii_PredicateState = predicate; + } + + /* Skip this index-update if the predicate isn't satisfied */ + if (!ExecQual(predicate, econtext, false)) + continue; + } + + /* + * FormIndexDatum fills in its values and isnull parameters with the + * appropriate values for the column(s) of the index. + */ + FormIndexDatum(indexInfo, + slot, + estate, + values, + isnull); + + satisfiesConstraint = + check_exclusion_or_unique_constraint(heapRelation, indexRelation, + indexInfo, &invalidItemPtr, + values, isnull, estate, false, + CEOUC_WAIT, true, + conflictTid); + if (!satisfiesConstraint) + return false; + } + + if (arbiterIndexes != NIL && !checkedIndex) + elog(ERROR, "unexpected failure to find arbiter index"); + + return true; +} + /* - * Check for violation of an exclusion constraint + * Check for violation of an exclusion or unique constraint * * heap: the table containing the new tuple - * index: the index supporting the exclusion constraint + * index: the index supporting the constraint * indexInfo: info about the index, including the exclusion properties - * tupleid: heap TID of the new tuple we have just inserted + * tupleid: heap TID of the new tuple we have just inserted (invalid if we + * haven't inserted a new tuple yet) * values, isnull: the *index* column values computed for the new tuple * estate: an EState we can do evaluation in * newIndex: if true, we are trying to build a new index (this affects * only the wording of error messages) - * errorOK: if true, don't throw error for violation + * waitMode: whether to wait for concurrent inserters/deleters + * violationOK: if true, don't throw error for violation + * conflictTid: if not-NULL, the TID of the conflicting tuple is returned here * * Returns true if OK, false if actual or potential violation * - * When errorOK is true, we report violation without waiting to see if any - * concurrent transaction has committed or not; so the violation is only - * potential, and the caller must recheck sometime later. This behavior - * is convenient for deferred exclusion checks; we need not bother queuing - * a deferred event if there is definitely no conflict at insertion time. + * 'waitMode' determines what happens if a conflict is detected with a tuple + * that was inserted or deleted by a transaction that's still running. + * CEOUC_WAIT means that we wait for the transaction to commit, before + * throwing an error or returning. CEOUC_NOWAIT means that we report the + * violation immediately; so the violation is only potential, and the caller + * must recheck sometime later. This behavior is convenient for deferred + * exclusion checks; we need not bother queuing a deferred event if there is + * definitely no conflict at insertion time. + * + * CEOUC_LIVELOCK_PREVENTING_WAIT is like CEOUC_NOWAIT, but we will sometimes + * wait anyway, to prevent livelocking if two transactions try inserting at + * the same time. This is used with speculative insertions, for INSERT ON + * CONFLICT statements. (See notes in file header) * - * When errorOK is false, we'll throw error on violation, so a false result - * is impossible. + * If violationOK is true, we just report the potential or actual violation to + * the caller by returning 'false'. Otherwise we throw a descriptive error + * message here. When violationOK is false, a false result is impossible. + * + * Note: The indexam is normally responsible for checking unique constraints, + * so this normally only needs to be used for exclusion constraints. But this + * function is also called when doing a "pre-check" for conflicts on a unique + * constraint, when doing speculative insertion. Caller may use the returned + * conflict TID to take further steps. */ -bool -check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo, - ItemPointer tupleid, Datum *values, bool *isnull, - EState *estate, bool newIndex, bool errorOK) +static bool +check_exclusion_or_unique_constraint(Relation heap, Relation index, + IndexInfo *indexInfo, + ItemPointer tupleid, + Datum *values, bool *isnull, + EState *estate, bool newIndex, + CEOUC_WAIT_MODE waitMode, + bool violationOK, + ItemPointer conflictTid) { - Oid *constr_procs = indexInfo->ii_ExclusionProcs; - uint16 *constr_strats = indexInfo->ii_ExclusionStrats; + Oid *constr_procs; + uint16 *constr_strats; Oid *index_collations = index->rd_indcollation; int index_natts = index->rd_index->indnatts; IndexScanDesc index_scan; @@ -386,6 +658,17 @@ check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo, TupleTableSlot *existing_slot; TupleTableSlot *save_scantuple; + if (indexInfo->ii_ExclusionOps) + { + constr_procs = indexInfo->ii_ExclusionProcs; + constr_strats = indexInfo->ii_ExclusionStrats; + } + else + { + constr_procs = indexInfo->ii_UniqueProcs; + constr_strats = indexInfo->ii_UniqueStrats; + } + /* * If any of the input values are NULL, the constraint check is assumed to * pass (i.e., we assume the operators are strict). @@ -450,7 +733,8 @@ retry: /* * Ignore the entry for the tuple we're trying to check. */ - if (ItemPointerEquals(tupleid, &tup->t_self)) + if (ItemPointerIsValid(tupleid) && + ItemPointerEquals(tupleid, &tup->t_self)) { if (found_self) /* should not happen */ elog(ERROR, "found self tuple multiple times in index \"%s\"", @@ -480,39 +764,47 @@ retry: } /* - * At this point we have either a conflict or a potential conflict. If - * we're not supposed to raise error, just return the fact of the - * potential conflict without waiting to see if it's real. - */ - if (errorOK) - { - conflict = true; - break; - } - - /* + * At this point we have either a conflict or a potential conflict. + * * If an in-progress transaction is affecting the visibility of this - * tuple, we need to wait for it to complete and then recheck. For - * simplicity we do rechecking by just restarting the whole scan --- - * this case probably doesn't happen often enough to be worth trying - * harder, and anyway we don't want to hold any index internal locks - * while waiting. + * tuple, we need to wait for it to complete and then recheck (unless + * the caller requested not to). For simplicity we do rechecking by + * just restarting the whole scan --- this case probably doesn't + * happen often enough to be worth trying harder, and anyway we don't + * want to hold any index internal locks while waiting. */ xwait = TransactionIdIsValid(DirtySnapshot.xmin) ? DirtySnapshot.xmin : DirtySnapshot.xmax; - if (TransactionIdIsValid(xwait)) + if (TransactionIdIsValid(xwait) && + (waitMode == CEOUC_WAIT || + (waitMode == CEOUC_LIVELOCK_PREVENTING_WAIT && + DirtySnapshot.speculativeToken && + TransactionIdPrecedes(GetCurrentTransactionId(), xwait)))) { ctid_wait = tup->t_data->t_ctid; index_endscan(index_scan); - XactLockTableWait(xwait, heap, &ctid_wait, - XLTW_RecheckExclusionConstr); + if (DirtySnapshot.speculativeToken) + SpeculativeInsertionWait(DirtySnapshot.xmin, + DirtySnapshot.speculativeToken); + else + XactLockTableWait(xwait, heap, &ctid_wait, + XLTW_RecheckExclusionConstr); goto retry; } /* - * We have a definite conflict. Report it. + * We have a definite conflict (or a potential one, but the caller + * didn't want to wait). Return it to caller, or report it. */ + if (violationOK) + { + conflict = true; + if (conflictTid) + *conflictTid = tup->t_self; + break; + } + error_new = BuildIndexValueDescription(index, values, isnull); error_existing = BuildIndexValueDescription(index, existing_values, existing_isnull); @@ -544,10 +836,10 @@ retry: /* * Ordinarily, at this point the search should have found the originally - * inserted tuple, unless we exited the loop early because of conflict. - * However, it is possible to define exclusion constraints for which that - * wouldn't be true --- for instance, if the operator is <>. So we no - * longer complain if found_self is still false. + * inserted tuple (if any), unless we exited the loop early because of + * conflict. However, it is possible to define exclusion constraints for + * which that wouldn't be true --- for instance, if the operator is <>. + * So we no longer complain if found_self is still false. */ econtext->ecxt_scantuple = save_scantuple; @@ -558,6 +850,25 @@ retry: } /* + * Check for violation of an exclusion constraint + * + * This is a dumbed down version of check_exclusion_or_unique_constraint + * for external callers. They don't need all the special modes. + */ +void +check_exclusion_constraint(Relation heap, Relation index, + IndexInfo *indexInfo, + ItemPointer tupleid, + Datum *values, bool *isnull, + EState *estate, bool newIndex) +{ + (void) check_exclusion_or_unique_constraint(heap, index, indexInfo, tupleid, + values, isnull, + estate, newIndex, + CEOUC_WAIT, false, NULL); +} + +/* * Check existing tuple's index values to see if it really matches the * exclusion condition against the new_values. Returns true if conflict. */ diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 4272d9bc155..0dee9491788 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1813,6 +1813,12 @@ ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo, errmsg("new row violates row level security policy for \"%s\"", wco->relname))); break; + case WCO_RLS_CONFLICT_CHECK: + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("new row violates row level security policy (USING expression) for \"%s\"", + wco->relname))); + break; default: elog(ERROR, "unrecognized WCO kind: %u", wco->kind); break; @@ -1973,6 +1979,31 @@ ExecBuildSlotValueDescription(Oid reloid, /* + * ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a + * given ResultRelInfo + */ +LockTupleMode +ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo) +{ + Bitmapset *keyCols; + Bitmapset *updatedCols; + + /* + * Compute lock mode to use. If columns that are part of the key have not + * been modified, then we can use a weaker lock, allowing for better + * concurrency. + */ + updatedCols = GetUpdatedColumns(relinfo, estate); + keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc, + INDEX_ATTR_BITMAP_KEY); + + if (bms_overlap(keyCols, updatedCols)) + return LockTupleExclusive; + + return LockTupleNoKeyExclusive; +} + +/* * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index */ ExecRowMark * @@ -2186,8 +2217,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, * recycled and reused for an unrelated tuple. This implies that * the latest version of the row was deleted, so we need do * nothing. (Should be safe to examine xmin without getting - * buffer's content lock, since xmin never changes in an existing - * tuple.) + * buffer's content lock. We assume reading a TransactionId to be + * atomic, and Xmin never changes in an existing tuple, except to + * invalid or frozen, and neither of those can match priorXmax.) */ if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data), priorXmax)) @@ -2268,11 +2300,12 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, * case, so as to avoid the "Halloween problem" of * repeated update attempts. In the latter case it might * be sensible to fetch the updated tuple instead, but - * doing so would require changing heap_lock_tuple as well - * as heap_update and heap_delete to not complain about - * updating "invisible" tuples, which seems pretty scary. - * So for now, treat the tuple as deleted and do not - * process. + * doing so would require changing heap_update and + * heap_delete to not complain about updating "invisible" + * tuples, which seems pretty scary (heap_lock_tuple will + * not complain, but few callers expect HeapTupleInvisible, + * and we're not one of them). So for now, treat the tuple + * as deleted and do not process. */ ReleaseBuffer(buffer); return NULL; @@ -2287,6 +2320,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + + /* Should not encounter speculative tuple on recheck */ + Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data)); if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self)) { /* it was updated, so look at the updated version */ @@ -2302,6 +2338,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, ReleaseBuffer(buffer); return NULL; + case HeapTupleInvisible: + elog(ERROR, "attempted to lock invisible tuple"); + default: ReleaseBuffer(buffer); elog(ERROR, "unrecognized heap_lock_tuple status: %u", diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index bb6df47a95d..5ae106c06ad 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -152,10 +152,11 @@ lnext: * case, so as to avoid the "Halloween problem" of repeated * update attempts. In the latter case it might be sensible * to fetch the updated tuple instead, but doing so would - * require changing heap_lock_tuple as well as heap_update and - * heap_delete to not complain about updating "invisible" - * tuples, which seems pretty scary. So for now, treat the - * tuple as deleted and do not process. + * require changing heap_update and heap_delete to not complain + * about updating "invisible" tuples, which seems pretty scary + * (heap_lock_tuple will not complain, but few callers expect + * HeapTupleInvisible, and we're not one of them). So for now, + * treat the tuple as deleted and do not process. */ goto lnext; @@ -228,6 +229,9 @@ lnext: /* Continue loop until we have all target tuples */ break; + case HeapTupleInvisible: + elog(ERROR, "attempted to lock invisible tuple"); + default: elog(ERROR, "unrecognized heap_lock_tuple status: %u", test); diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 31666edfa8a..34435c7e50a 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -46,12 +46,22 @@ #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "storage/bufmgr.h" +#include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/tqual.h" +static bool ExecOnConflictUpdate(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + ItemPointer conflictTid, + TupleTableSlot *planSlot, + TupleTableSlot *excludedSlot, + EState *estate, + bool canSetTag, + TupleTableSlot **returning); + /* * Verify that the tuples to be produced by INSERT or UPDATE match the * target relation's rowtype @@ -151,6 +161,51 @@ ExecProcessReturning(ProjectionInfo *projectReturning, return ExecProject(projectReturning, NULL); } +/* + * ExecCheckHeapTupleVisible -- verify heap tuple is visible + * + * It would not be consistent with guarantees of the higher isolation levels to + * proceed with avoiding insertion (taking speculative insertion's alternative + * path) on the basis of another tuple that is not visible to MVCC snapshot. + * Check for the need to raise a serialization failure, and do so as necessary. + */ +static void +ExecCheckHeapTupleVisible(EState *estate, + HeapTuple tuple, + Buffer buffer) +{ + if (!IsolationUsesXactSnapshot()) + return; + + if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer)) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); +} + +/* + * ExecCheckTIDVisible -- convenience variant of ExecCheckHeapTupleVisible() + */ +static void +ExecCheckTIDVisible(EState *estate, + ResultRelInfo *relinfo, + ItemPointer tid) +{ + Relation rel = relinfo->ri_RelationDesc; + Buffer buffer; + HeapTupleData tuple; + + /* Redundantly check isolation level */ + if (!IsolationUsesXactSnapshot()) + return; + + tuple.t_self = *tid; + if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL)) + elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT"); + ExecCheckHeapTupleVisible(estate, &tuple, buffer); + ReleaseBuffer(buffer); +} + /* ---------------------------------------------------------------- * ExecInsert * @@ -161,8 +216,11 @@ ExecProcessReturning(ProjectionInfo *projectReturning, * ---------------------------------------------------------------- */ static TupleTableSlot * -ExecInsert(TupleTableSlot *slot, +ExecInsert(ModifyTableState *mtstate, + TupleTableSlot *slot, TupleTableSlot *planSlot, + List *arbiterIndexes, + OnConflictAction onconflict, EState *estate, bool canSetTag) { @@ -199,7 +257,15 @@ ExecInsert(TupleTableSlot *slot, if (resultRelationDesc->rd_rel->relhasoids) HeapTupleSetOid(tuple, InvalidOid); - /* BEFORE ROW INSERT Triggers */ + /* + * BEFORE ROW INSERT Triggers. + * + * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an + * INSERT ... ON CONFLICT statement. We cannot check for constraint + * violations before firing these triggers, because they can change the + * values to insert. Also, they can run arbitrary user-defined code with + * side-effects that we can't cancel by just not inserting the tuple. + */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_insert_before_row) { @@ -268,21 +334,132 @@ ExecInsert(TupleTableSlot *slot, if (resultRelationDesc->rd_att->constr) ExecConstraints(resultRelInfo, slot, estate); - /* - * insert the tuple - * - * Note: heap_insert returns the tid (location) of the new tuple in - * the t_self field. - */ - newId = heap_insert(resultRelationDesc, tuple, - estate->es_output_cid, 0, NULL); + if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0) + { + /* Perform a speculative insertion. */ + uint32 specToken; + ItemPointerData conflictTid; + bool specConflict; - /* - * insert index entries for tuple - */ - if (resultRelInfo->ri_NumIndices > 0) + /* + * Do a non-conclusive check for conflicts first. + * + * We're not holding any locks yet, so this doesn't guarantee that + * the later insert won't conflict. But it avoids leaving behind + * a lot of canceled speculative insertions, if you run a lot of + * INSERT ON CONFLICT statements that do conflict. + * + * We loop back here if we find a conflict below, either during + * the pre-check, or when we re-check after inserting the tuple + * speculatively. See the executor README for a full discussion + * of speculative insertion. + */ + vlock: + specConflict = false; + if (!ExecCheckIndexConstraints(slot, estate, &conflictTid, + arbiterIndexes)) + { + /* committed conflict tuple found */ + if (onconflict == ONCONFLICT_UPDATE) + { + /* + * In case of ON CONFLICT DO UPDATE, execute the UPDATE + * part. Be prepared to retry if the UPDATE fails because + * of another concurrent UPDATE/DELETE to the conflict + * tuple. + */ + TupleTableSlot *returning = NULL; + + if (ExecOnConflictUpdate(mtstate, resultRelInfo, + &conflictTid, planSlot, slot, + estate, canSetTag, &returning)) + { + InstrCountFiltered2(&mtstate->ps, 1); + return returning; + } + else + goto vlock; + } + else + { + /* + * In case of ON CONFLICT DO NOTHING, do nothing. + * However, verify that the tuple is visible to the + * executor's MVCC snapshot at higher isolation levels. + */ + Assert(onconflict == ONCONFLICT_NOTHING); + ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid); + InstrCountFiltered2(&mtstate->ps, 1); + return NULL; + } + } + + /* + * Before we start insertion proper, acquire our "speculative + * insertion lock". Others can use that to wait for us to decide + * if we're going to go ahead with the insertion, instead of + * waiting for the whole transaction to complete. + */ + specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId()); + HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken); + + /* insert the tuple, with the speculative token */ + newId = heap_insert(resultRelationDesc, tuple, + estate->es_output_cid, + HEAP_INSERT_SPECULATIVE, + NULL); + + /* insert index entries for tuple */ recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate); + estate, true, &specConflict, + arbiterIndexes); + + /* adjust the tuple's state accordingly */ + if (!specConflict) + heap_finish_speculative(resultRelationDesc, tuple); + else + heap_abort_speculative(resultRelationDesc, tuple); + + /* + * Wake up anyone waiting for our decision. They will re-check + * the tuple, see that it's no longer speculative, and wait on our + * XID as if this was a regularly inserted tuple all along. Or if + * we killed the tuple, they will see it's dead, and proceed as if + * the tuple never existed. + */ + SpeculativeInsertionLockRelease(GetCurrentTransactionId()); + + /* + * If there was a conflict, start from the beginning. We'll do + * the pre-check again, which will now find the conflicting tuple + * (unless it aborts before we get there). + */ + if (specConflict) + { + list_free(recheckIndexes); + goto vlock; + } + + /* Since there was no insertion conflict, we're done */ + } + else + { + /* + * insert the tuple normally. + * + * Note: heap_insert returns the tid (location) of the new tuple + * in the t_self field. + */ + newId = heap_insert(resultRelationDesc, tuple, + estate->es_output_cid, + 0, NULL); + + /* insert index entries for tuple */ + if (resultRelInfo->ri_NumIndices > 0) + recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), + estate, false, NULL, + arbiterIndexes); + } } if (canSetTag) @@ -800,7 +977,7 @@ lreplace:; */ if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple)) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate); + estate, false, NULL, NIL); } if (canSetTag) @@ -832,6 +1009,190 @@ lreplace:; return NULL; } +/* + * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE + * + * Try to lock tuple for update as part of speculative insertion. If + * a qual originating from ON CONFLICT DO UPDATE is satisfied, update + * (but still lock row, even though it may not satisfy estate's + * snapshot). + * + * Returns true if if we're done (with or without an update), or false if + * the caller must retry the INSERT from scratch. + */ +static bool +ExecOnConflictUpdate(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + ItemPointer conflictTid, + TupleTableSlot *planSlot, + TupleTableSlot *excludedSlot, + EState *estate, + bool canSetTag, + TupleTableSlot **returning) +{ + ExprContext *econtext = mtstate->ps.ps_ExprContext; + Relation relation = resultRelInfo->ri_RelationDesc; + List *onConflictSetWhere = resultRelInfo->ri_onConflictSetWhere; + HeapTupleData tuple; + HeapUpdateFailureData hufd; + LockTupleMode lockmode; + HTSU_Result test; + Buffer buffer; + + /* Determine lock mode to use */ + lockmode = ExecUpdateLockMode(estate, resultRelInfo); + + /* + * Lock tuple for update. Don't follow updates when tuple cannot be + * locked without doing so. A row locking conflict here means our + * previous conclusion that the tuple is conclusively committed is not + * true anymore. + */ + tuple.t_self = *conflictTid; + test = heap_lock_tuple(relation, &tuple, estate->es_output_cid, + lockmode, LockWaitBlock, false, &buffer, + &hufd); + switch (test) + { + case HeapTupleMayBeUpdated: + /* success! */ + break; + + case HeapTupleInvisible: + + /* + * This can occur when a just inserted tuple is updated again in + * the same command. E.g. because multiple rows with the same + * conflicting key values are inserted. + * + * This is somewhat similar to the ExecUpdate() + * HeapTupleSelfUpdated case. We do not want to proceed because + * it would lead to the same row being updated a second time in + * some unspecified order, and in contrast to plain UPDATEs + * there's no historical behavior to break. + * + * It is the user's responsibility to prevent this situation from + * occurring. These problems are why SQL-2003 similarly specifies + * that for SQL MERGE, an exception must be raised in the event of + * an attempt to update the same row twice. + */ + if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data))) + ereport(ERROR, + (errcode(ERRCODE_CARDINALITY_VIOLATION), + errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"), + errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values."))); + + /* This shouldn't happen */ + elog(ERROR, "attempted to lock invisible tuple"); + + case HeapTupleSelfUpdated: + + /* + * This state should never be reached. As a dirty snapshot is used + * to find conflicting tuples, speculative insertion wouldn't have + * seen this row to conflict with. + */ + elog(ERROR, "unexpected self-updated tuple"); + + case HeapTupleUpdated: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + + /* + * Tell caller to try again from the very start. + * + * It does not make sense to use the usual EvalPlanQual() style + * loop here, as the new version of the row might not conflict + * anymore, or the conflicting tuple has actually been deleted. + */ + ReleaseBuffer(buffer); + return false; + + default: + elog(ERROR, "unrecognized heap_lock_tuple status: %u", test); + } + + /* + * Success, the tuple is locked. + * + * Reset per-tuple memory context to free any expression evaluation + * storage allocated in the previous cycle. + */ + ResetExprContext(econtext); + + /* + * Verify that the tuple is visible to our MVCC snapshot if the current + * isolation level mandates that. + * + * It's not sufficient to rely on the check within ExecUpdate() as e.g. + * CONFLICT ... WHERE clause may prevent us from reaching that. + * + * This means we only ever continue when a new command in the current + * transaction could see the row, even though in READ COMMITTED mode the + * tuple will not be visible according to the current statement's + * snapshot. This is in line with the way UPDATE deals with newer tuple + * versions. + */ + ExecCheckHeapTupleVisible(estate, &tuple, buffer); + + /* Store target's existing tuple in the state's dedicated slot */ + ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false); + + /* + * Make tuple and any needed join variables available to ExecQual and + * ExecProject. The EXCLUDED tuple is installed in ecxt_innertuple, while + * the target's existing tuple is installed in the scantuple. EXCLUDED has + * been made to reference INNER_VAR in setrefs.c, but there is no other + * redirection. + */ + econtext->ecxt_scantuple = mtstate->mt_existing; + econtext->ecxt_innertuple = excludedSlot; + econtext->ecxt_outertuple = NULL; + + if (!ExecQual(onConflictSetWhere, econtext, false)) + { + ReleaseBuffer(buffer); + InstrCountFiltered1(&mtstate->ps, 1); + return true; /* done with the tuple */ + } + + if (resultRelInfo->ri_WithCheckOptions != NIL) + { + /* + * Check target's existing tuple against UPDATE-applicable USING + * security barrier quals (if any), enforced here as RLS checks/WCOs. + * + * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security + * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK, + * but that's almost the extent of its special handling for ON + * CONFLICT DO UPDATE. + * + * The rewriter will also have associated UPDATE applicable straight + * RLS checks/WCOs for the benefit of the ExecUpdate() call that + * follows. INSERTs and UPDATEs naturally have mutually exclusive WCO + * kinds, so there is no danger of spurious over-enforcement in the + * INSERT or UPDATE path. + */ + ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo, + mtstate->mt_existing, + mtstate->ps.state); + } + + /* Project the new tuple version */ + ExecProject(resultRelInfo->ri_onConflictSetProj, NULL); + + /* Execute UPDATE with projection */ + *returning = ExecUpdate(&tuple.t_data->t_ctid, NULL, + mtstate->mt_conflproj, planSlot, + &mtstate->mt_epqstate, mtstate->ps.state, + canSetTag); + + ReleaseBuffer(buffer); + return true; +} + /* * Process BEFORE EACH STATEMENT triggers @@ -843,6 +1204,9 @@ fireBSTriggers(ModifyTableState *node) { case CMD_INSERT: ExecBSInsertTriggers(node->ps.state, node->resultRelInfo); + if (node->mt_onconflict == ONCONFLICT_UPDATE) + ExecBSUpdateTriggers(node->ps.state, + node->resultRelInfo); break; case CMD_UPDATE: ExecBSUpdateTriggers(node->ps.state, node->resultRelInfo); @@ -865,6 +1229,9 @@ fireASTriggers(ModifyTableState *node) switch (node->operation) { case CMD_INSERT: + if (node->mt_onconflict == ONCONFLICT_UPDATE) + ExecASUpdateTriggers(node->ps.state, + node->resultRelInfo); ExecASInsertTriggers(node->ps.state, node->resultRelInfo); break; case CMD_UPDATE: @@ -1062,7 +1429,9 @@ ExecModifyTable(ModifyTableState *node) switch (operation) { case CMD_INSERT: - slot = ExecInsert(slot, planSlot, estate, node->canSetTag); + slot = ExecInsert(node, slot, planSlot, + node->mt_arbiterindexes, node->mt_onconflict, + estate, node->canSetTag); break; case CMD_UPDATE: slot = ExecUpdate(tupleid, oldtuple, slot, planSlot, @@ -1137,6 +1506,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex; mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans); mtstate->mt_nplans = nplans; + mtstate->mt_onconflict = node->onConflictAction; + mtstate->mt_arbiterindexes = node->arbiterIndexes; /* set up epqstate with dummy subplan data for the moment */ EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam); @@ -1175,7 +1546,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex && operation != CMD_DELETE && resultRelInfo->ri_IndexRelationDescs == NULL) - ExecOpenIndices(resultRelInfo); + ExecOpenIndices(resultRelInfo, mtstate->mt_onconflict != ONCONFLICT_NONE); /* Now init the plan for this result rel */ estate->es_result_relation_info = resultRelInfo; @@ -1280,6 +1651,58 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } /* + * If needed, Initialize target list, projection and qual for ON CONFLICT + * DO UPDATE. + */ + resultRelInfo = mtstate->resultRelInfo; + if (node->onConflictAction == ONCONFLICT_UPDATE) + { + ExprContext *econtext; + ExprState *setexpr; + TupleDesc tupDesc; + + /* insert may only have one plan, inheritance is not expanded */ + Assert(nplans == 1); + + /* already exists if created by RETURNING processing above */ + if (mtstate->ps.ps_ExprContext == NULL) + ExecAssignExprContext(estate, &mtstate->ps); + + econtext = mtstate->ps.ps_ExprContext; + + /* initialize slot for the existing tuple */ + mtstate->mt_existing = ExecInitExtraTupleSlot(mtstate->ps.state); + ExecSetSlotDescriptor(mtstate->mt_existing, + resultRelInfo->ri_RelationDesc->rd_att); + + mtstate->mt_excludedtlist = node->exclRelTlist; + + /* create target slot for UPDATE SET projection */ + tupDesc = ExecTypeFromTL((List *) node->onConflictSet, + false); + mtstate->mt_conflproj = ExecInitExtraTupleSlot(mtstate->ps.state); + ExecSetSlotDescriptor(mtstate->mt_conflproj, tupDesc); + + /* build UPDATE SET expression and projection state */ + setexpr = ExecInitExpr((Expr *) node->onConflictSet, &mtstate->ps); + resultRelInfo->ri_onConflictSetProj = + ExecBuildProjectionInfo((List *) setexpr, econtext, + mtstate->mt_conflproj, + resultRelInfo->ri_RelationDesc->rd_att); + + /* build DO UPDATE WHERE clause expression */ + if (node->onConflictWhere) + { + ExprState *qualexpr; + + qualexpr = ExecInitExpr((Expr *) node->onConflictWhere, + mtstate->mt_plans[0]); + + resultRelInfo->ri_onConflictSetWhere = (List *) qualexpr; + } + } + + /* * If we have any secondary relations in an UPDATE or DELETE, they need to * be treated like non-locked relations in SELECT FOR UPDATE, ie, the * EvalPlanQual mechanism needs to be told about them. Locate the diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 1b02be287ce..a3139d3eb5d 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -81,6 +81,7 @@ _copyPlannedStmt(const PlannedStmt *from) COPY_SCALAR_FIELD(queryId); COPY_SCALAR_FIELD(hasReturning); COPY_SCALAR_FIELD(hasModifyingCTE); + COPY_SCALAR_FIELD(isUpsert); COPY_SCALAR_FIELD(canSetTag); COPY_SCALAR_FIELD(transientPlan); COPY_NODE_FIELD(planTree); @@ -185,6 +186,12 @@ _copyModifyTable(const ModifyTable *from) COPY_NODE_FIELD(fdwPrivLists); COPY_NODE_FIELD(rowMarks); COPY_SCALAR_FIELD(epqParam); + COPY_SCALAR_FIELD(onConflictAction); + COPY_NODE_FIELD(arbiterIndexes); + COPY_NODE_FIELD(onConflictSet); + COPY_NODE_FIELD(onConflictWhere); + COPY_SCALAR_FIELD(exclRelRTI); + COPY_NODE_FIELD(exclRelTlist); return newnode; } @@ -1787,6 +1794,22 @@ _copyCurrentOfExpr(const CurrentOfExpr *from) } /* + * _copyInferenceElem + */ +static InferenceElem * +_copyInferenceElem(const InferenceElem *from) +{ + InferenceElem *newnode = makeNode(InferenceElem); + + COPY_NODE_FIELD(expr); + COPY_SCALAR_FIELD(infercollid); + COPY_SCALAR_FIELD(inferopfamily); + COPY_SCALAR_FIELD(inferopcinputtype); + + return newnode; +} + +/* * _copyTargetEntry */ static TargetEntry * @@ -1852,6 +1875,26 @@ _copyFromExpr(const FromExpr *from) return newnode; } +/* + * _copyOnConflictExpr + */ +static OnConflictExpr * +_copyOnConflictExpr(const OnConflictExpr *from) +{ + OnConflictExpr *newnode = makeNode(OnConflictExpr); + + COPY_SCALAR_FIELD(action); + COPY_NODE_FIELD(arbiterElems); + COPY_NODE_FIELD(arbiterWhere); + COPY_NODE_FIELD(onConflictSet); + COPY_NODE_FIELD(onConflictWhere); + COPY_SCALAR_FIELD(constraint); + COPY_SCALAR_FIELD(exclRelIndex); + COPY_NODE_FIELD(exclRelTlist); + + return newnode; +} + /* **************************************************************** * relation.h copy functions * @@ -2135,6 +2178,33 @@ _copyWithClause(const WithClause *from) return newnode; } +static InferClause * +_copyInferClause(const InferClause *from) +{ + InferClause *newnode = makeNode(InferClause); + + COPY_NODE_FIELD(indexElems); + COPY_NODE_FIELD(whereClause); + COPY_STRING_FIELD(conname); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +static OnConflictClause * +_copyOnConflictClause(const OnConflictClause *from) +{ + OnConflictClause *newnode = makeNode(OnConflictClause); + + COPY_SCALAR_FIELD(action); + COPY_NODE_FIELD(infer); + COPY_NODE_FIELD(targetList); + COPY_NODE_FIELD(whereClause); + COPY_LOCATION_FIELD(location); + + return newnode; +} + static CommonTableExpr * _copyCommonTableExpr(const CommonTableExpr *from) { @@ -2552,6 +2622,7 @@ _copyQuery(const Query *from) COPY_NODE_FIELD(jointree); COPY_NODE_FIELD(targetList); COPY_NODE_FIELD(withCheckOptions); + COPY_NODE_FIELD(onConflict); COPY_NODE_FIELD(returningList); COPY_NODE_FIELD(groupClause); COPY_NODE_FIELD(havingQual); @@ -2575,6 +2646,7 @@ _copyInsertStmt(const InsertStmt *from) COPY_NODE_FIELD(relation); COPY_NODE_FIELD(cols); COPY_NODE_FIELD(selectStmt); + COPY_NODE_FIELD(onConflictClause); COPY_NODE_FIELD(returningList); COPY_NODE_FIELD(withClause); @@ -4283,6 +4355,9 @@ copyObject(const void *from) case T_CurrentOfExpr: retval = _copyCurrentOfExpr(from); break; + case T_InferenceElem: + retval = _copyInferenceElem(from); + break; case T_TargetEntry: retval = _copyTargetEntry(from); break; @@ -4295,6 +4370,9 @@ copyObject(const void *from) case T_FromExpr: retval = _copyFromExpr(from); break; + case T_OnConflictExpr: + retval = _copyOnConflictExpr(from); + break; /* * RELATION NODES @@ -4753,6 +4831,12 @@ copyObject(const void *from) case T_WithClause: retval = _copyWithClause(from); break; + case T_InferClause: + retval = _copyInferClause(from); + break; + case T_OnConflictClause: + retval = _copyOnConflictClause(from); + break; case T_CommonTableExpr: retval = _copyCommonTableExpr(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 1b9a83b93ed..7c86e919a49 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -683,6 +683,17 @@ _equalCurrentOfExpr(const CurrentOfExpr *a, const CurrentOfExpr *b) } static bool +_equalInferenceElem(const InferenceElem *a, const InferenceElem *b) +{ + COMPARE_NODE_FIELD(expr); + COMPARE_SCALAR_FIELD(infercollid); + COMPARE_SCALAR_FIELD(inferopfamily); + COMPARE_SCALAR_FIELD(inferopcinputtype); + + return true; +} + +static bool _equalTargetEntry(const TargetEntry *a, const TargetEntry *b) { COMPARE_NODE_FIELD(expr); @@ -728,6 +739,20 @@ _equalFromExpr(const FromExpr *a, const FromExpr *b) return true; } +static bool +_equalOnConflictExpr(const OnConflictExpr *a, const OnConflictExpr *b) +{ + COMPARE_SCALAR_FIELD(action); + COMPARE_NODE_FIELD(arbiterElems); + COMPARE_NODE_FIELD(arbiterWhere); + COMPARE_NODE_FIELD(onConflictSet); + COMPARE_NODE_FIELD(onConflictWhere); + COMPARE_SCALAR_FIELD(constraint); + COMPARE_SCALAR_FIELD(exclRelIndex); + COMPARE_NODE_FIELD(exclRelTlist); + + return true; +} /* * Stuff from relation.h @@ -868,6 +893,7 @@ _equalQuery(const Query *a, const Query *b) COMPARE_NODE_FIELD(jointree); COMPARE_NODE_FIELD(targetList); COMPARE_NODE_FIELD(withCheckOptions); + COMPARE_NODE_FIELD(onConflict); COMPARE_NODE_FIELD(returningList); COMPARE_NODE_FIELD(groupClause); COMPARE_NODE_FIELD(havingQual); @@ -889,6 +915,7 @@ _equalInsertStmt(const InsertStmt *a, const InsertStmt *b) COMPARE_NODE_FIELD(relation); COMPARE_NODE_FIELD(cols); COMPARE_NODE_FIELD(selectStmt); + COMPARE_NODE_FIELD(onConflictClause); COMPARE_NODE_FIELD(returningList); COMPARE_NODE_FIELD(withClause); @@ -2434,6 +2461,29 @@ _equalWithClause(const WithClause *a, const WithClause *b) } static bool +_equalInferClause(const InferClause *a, const InferClause *b) +{ + COMPARE_NODE_FIELD(indexElems); + COMPARE_NODE_FIELD(whereClause); + COMPARE_STRING_FIELD(conname); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool +_equalOnConflictClause(const OnConflictClause *a, const OnConflictClause *b) +{ + COMPARE_SCALAR_FIELD(action); + COMPARE_NODE_FIELD(infer); + COMPARE_NODE_FIELD(targetList); + COMPARE_NODE_FIELD(whereClause); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b) { COMPARE_STRING_FIELD(ctename); @@ -2712,6 +2762,9 @@ equal(const void *a, const void *b) case T_CurrentOfExpr: retval = _equalCurrentOfExpr(a, b); break; + case T_InferenceElem: + retval = _equalInferenceElem(a, b); + break; case T_TargetEntry: retval = _equalTargetEntry(a, b); break; @@ -2721,6 +2774,9 @@ equal(const void *a, const void *b) case T_FromExpr: retval = _equalFromExpr(a, b); break; + case T_OnConflictExpr: + retval = _equalOnConflictExpr(a, b); + break; case T_JoinExpr: retval = _equalJoinExpr(a, b); break; @@ -3169,6 +3225,12 @@ equal(const void *a, const void *b) case T_WithClause: retval = _equalWithClause(a, b); break; + case T_InferClause: + retval = _equalInferClause(a, b); + break; + case T_OnConflictClause: + retval = _equalOnConflictClause(a, b); + break; case T_CommonTableExpr: retval = _equalCommonTableExpr(a, b); break; diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index d6f1f5bb6d7..4135f9c3cfc 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -235,6 +235,13 @@ exprType(const Node *expr) case T_CurrentOfExpr: type = BOOLOID; break; + case T_InferenceElem: + { + const InferenceElem *n = (const InferenceElem *) expr; + + type = exprType((Node *) n->expr); + } + break; case T_PlaceHolderVar: type = exprType((Node *) ((const PlaceHolderVar *) expr)->phexpr); break; @@ -894,6 +901,9 @@ exprCollation(const Node *expr) case T_CurrentOfExpr: coll = InvalidOid; /* result is always boolean */ break; + case T_InferenceElem: + coll = exprCollation((Node *) ((const InferenceElem *) expr)->expr); + break; case T_PlaceHolderVar: coll = exprCollation((Node *) ((const PlaceHolderVar *) expr)->phexpr); break; @@ -1484,6 +1494,12 @@ exprLocation(const Node *expr) case T_WithClause: loc = ((const WithClause *) expr)->location; break; + case T_InferClause: + loc = ((const InferClause *) expr)->location; + break; + case T_OnConflictClause: + loc = ((const OnConflictClause *) expr)->location; + break; case T_CommonTableExpr: loc = ((const CommonTableExpr *) expr)->location; break; @@ -1491,6 +1507,10 @@ exprLocation(const Node *expr) /* just use argument's location */ loc = exprLocation((Node *) ((const PlaceHolderVar *) expr)->phexpr); break; + case T_InferenceElem: + /* just use nested expr's location */ + loc = exprLocation((Node *) ((const InferenceElem *) expr)->expr); + break; default: /* for any other node type it's just unknown... */ loc = -1; @@ -1890,6 +1910,20 @@ expression_tree_walker(Node *node, return true; } break; + case T_OnConflictExpr: + { + OnConflictExpr *onconflict = (OnConflictExpr *) node; + + if (walker((Node *) onconflict->arbiterElems, context)) + return true; + if (walker(onconflict->arbiterWhere, context)) + return true; + if (walker(onconflict->onConflictSet, context)) + return true; + if (walker(onconflict->onConflictWhere, context)) + return true; + } + break; case T_JoinExpr: { JoinExpr *join = (JoinExpr *) node; @@ -1920,6 +1954,8 @@ expression_tree_walker(Node *node, break; case T_PlaceHolderVar: return walker(((PlaceHolderVar *) node)->phexpr, context); + case T_InferenceElem: + return walker(((InferenceElem *) node)->expr, context); case T_AppendRelInfo: { AppendRelInfo *appinfo = (AppendRelInfo *) node; @@ -1968,6 +2004,8 @@ query_tree_walker(Query *query, return true; if (walker((Node *) query->withCheckOptions, context)) return true; + if (walker((Node *) query->onConflict, context)) + return true; if (walker((Node *) query->returningList, context)) return true; if (walker((Node *) query->jointree, context)) @@ -2594,6 +2632,20 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_OnConflictExpr: + { + OnConflictExpr *oc = (OnConflictExpr *) node; + OnConflictExpr *newnode; + + FLATCOPY(newnode, oc, OnConflictExpr); + MUTATE(newnode->arbiterElems, oc->arbiterElems, List *); + MUTATE(newnode->arbiterWhere, oc->arbiterWhere, Node *); + MUTATE(newnode->onConflictSet, oc->onConflictSet, List *); + MUTATE(newnode->onConflictWhere, oc->onConflictWhere, Node *); + + return (Node *) newnode; + } + break; case T_JoinExpr: { JoinExpr *join = (JoinExpr *) node; @@ -2630,6 +2682,16 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_InferenceElem: + { + InferenceElem *inferenceelemdexpr = (InferenceElem *) node; + InferenceElem *newnode; + + FLATCOPY(newnode, inferenceelemdexpr, InferenceElem); + MUTATE(newnode->expr, newnode->expr, Node *); + return (Node *) newnode; + } + break; case T_AppendRelInfo: { AppendRelInfo *appinfo = (AppendRelInfo *) node; @@ -2709,6 +2771,7 @@ query_tree_mutator(Query *query, MUTATE(query->targetList, query->targetList, List *); MUTATE(query->withCheckOptions, query->withCheckOptions, List *); + MUTATE(query->onConflict, query->onConflict, OnConflictExpr *); MUTATE(query->returningList, query->returningList, List *); MUTATE(query->jointree, query->jointree, FromExpr *); MUTATE(query->setOperations, query->setOperations, Node *); @@ -2978,6 +3041,8 @@ raw_expression_tree_walker(Node *node, return true; if (walker(stmt->selectStmt, context)) return true; + if (walker(stmt->onConflictClause, context)) + return true; if (walker(stmt->returningList, context)) return true; if (walker(stmt->withClause, context)) @@ -3217,6 +3282,28 @@ raw_expression_tree_walker(Node *node, break; case T_WithClause: return walker(((WithClause *) node)->ctes, context); + case T_InferClause: + { + InferClause *stmt = (InferClause *) node; + + if (walker(stmt->indexElems, context)) + return true; + if (walker(stmt->whereClause, context)) + return true; + } + break; + case T_OnConflictClause: + { + OnConflictClause *stmt = (OnConflictClause *) node; + + if (walker(stmt->infer, context)) + return true; + if (walker(stmt->targetList, context)) + return true; + if (walker(stmt->whereClause, context)) + return true; + } + break; case T_CommonTableExpr: return walker(((CommonTableExpr *) node)->ctequery, context); default: diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index d5ddd0b3592..bc891d391f5 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -243,6 +243,7 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node) WRITE_UINT_FIELD(queryId); WRITE_BOOL_FIELD(hasReturning); WRITE_BOOL_FIELD(hasModifyingCTE); + WRITE_BOOL_FIELD(isUpsert); WRITE_BOOL_FIELD(canSetTag); WRITE_BOOL_FIELD(transientPlan); WRITE_NODE_FIELD(planTree); @@ -337,6 +338,12 @@ _outModifyTable(StringInfo str, const ModifyTable *node) WRITE_NODE_FIELD(fdwPrivLists); WRITE_NODE_FIELD(rowMarks); WRITE_INT_FIELD(epqParam); + WRITE_ENUM_FIELD(onConflictAction, OnConflictAction); + WRITE_NODE_FIELD(arbiterIndexes); + WRITE_NODE_FIELD(onConflictSet); + WRITE_NODE_FIELD(onConflictWhere); + WRITE_INT_FIELD(exclRelRTI); + WRITE_NODE_FIELD(exclRelTlist); } static void @@ -1437,6 +1444,17 @@ _outCurrentOfExpr(StringInfo str, const CurrentOfExpr *node) } static void +_outInferenceElem(StringInfo str, const InferenceElem *node) +{ + WRITE_NODE_TYPE("INFERENCEELEM"); + + WRITE_NODE_FIELD(expr); + WRITE_OID_FIELD(infercollid); + WRITE_OID_FIELD(inferopfamily); + WRITE_OID_FIELD(inferopcinputtype); +} + +static void _outTargetEntry(StringInfo str, const TargetEntry *node) { WRITE_NODE_TYPE("TARGETENTRY"); @@ -1482,6 +1500,21 @@ _outFromExpr(StringInfo str, const FromExpr *node) WRITE_NODE_FIELD(quals); } +static void +_outOnConflictExpr(StringInfo str, const OnConflictExpr *node) +{ + WRITE_NODE_TYPE("ONCONFLICTEXPR"); + + WRITE_ENUM_FIELD(action, OnConflictAction); + WRITE_NODE_FIELD(arbiterElems); + WRITE_NODE_FIELD(arbiterWhere); + WRITE_NODE_FIELD(onConflictSet); + WRITE_NODE_FIELD(onConflictWhere); + WRITE_OID_FIELD(constraint); + WRITE_INT_FIELD(exclRelIndex); + WRITE_NODE_FIELD(exclRelTlist); +} + /***************************************************************************** * * Stuff from relation.h. @@ -2319,6 +2352,7 @@ _outQuery(StringInfo str, const Query *node) WRITE_NODE_FIELD(jointree); WRITE_NODE_FIELD(targetList); WRITE_NODE_FIELD(withCheckOptions); + WRITE_NODE_FIELD(onConflict); WRITE_NODE_FIELD(returningList); WRITE_NODE_FIELD(groupClause); WRITE_NODE_FIELD(havingQual); @@ -3112,6 +3146,9 @@ _outNode(StringInfo str, const void *obj) case T_CurrentOfExpr: _outCurrentOfExpr(str, obj); break; + case T_InferenceElem: + _outInferenceElem(str, obj); + break; case T_TargetEntry: _outTargetEntry(str, obj); break; @@ -3124,7 +3161,9 @@ _outNode(StringInfo str, const void *obj) case T_FromExpr: _outFromExpr(str, obj); break; - + case T_OnConflictExpr: + _outOnConflictExpr(str, obj); + break; case T_Path: _outPath(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index d1ced0cc4b3..8136306e1e5 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -214,6 +214,7 @@ _readQuery(void) READ_NODE_FIELD(jointree); READ_NODE_FIELD(targetList); READ_NODE_FIELD(withCheckOptions); + READ_NODE_FIELD(onConflict); READ_NODE_FIELD(returningList); READ_NODE_FIELD(groupClause); READ_NODE_FIELD(havingQual); @@ -1131,6 +1132,22 @@ _readCurrentOfExpr(void) } /* + * _readInferenceElem + */ +static InferenceElem * +_readInferenceElem(void) +{ + READ_LOCALS(InferenceElem); + + READ_NODE_FIELD(expr); + READ_OID_FIELD(infercollid); + READ_OID_FIELD(inferopfamily); + READ_OID_FIELD(inferopcinputtype); + + READ_DONE(); +} + +/* * _readTargetEntry */ static TargetEntry * @@ -1196,6 +1213,25 @@ _readFromExpr(void) READ_DONE(); } +/* + * _readOnConflictExpr + */ +static OnConflictExpr * +_readOnConflictExpr(void) +{ + READ_LOCALS(OnConflictExpr); + + READ_ENUM_FIELD(action, OnConflictAction); + READ_NODE_FIELD(arbiterElems); + READ_NODE_FIELD(arbiterWhere); + READ_NODE_FIELD(onConflictSet); + READ_NODE_FIELD(onConflictWhere); + READ_OID_FIELD(constraint); + READ_INT_FIELD(exclRelIndex); + READ_NODE_FIELD(exclRelTlist); + + READ_DONE(); +} /* * Stuff from parsenodes.h. @@ -1395,6 +1431,8 @@ parseNodeString(void) return_value = _readSetToDefault(); else if (MATCH("CURRENTOFEXPR", 13)) return_value = _readCurrentOfExpr(); + else if (MATCH("INFERENCEELEM", 13)) + return_value = _readInferenceElem(); else if (MATCH("TARGETENTRY", 11)) return_value = _readTargetEntry(); else if (MATCH("RANGETBLREF", 11)) @@ -1403,6 +1441,8 @@ parseNodeString(void) return_value = _readJoinExpr(); else if (MATCH("FROMEXPR", 8)) return_value = _readFromExpr(); + else if (MATCH("ONCONFLICTEXPR", 14)) + return_value = _readOnConflictExpr(); else if (MATCH("RTE", 3)) return_value = _readRangeTblEntry(); else if (MATCH("RANGETBLFUNCTION", 16)) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index eeb2a417643..3246332d6e3 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4868,7 +4868,7 @@ make_modifytable(PlannerInfo *root, Index nominalRelation, List *resultRelations, List *subplans, List *withCheckOptionLists, List *returningLists, - List *rowMarks, int epqParam) + List *rowMarks, OnConflictExpr *onconflict, int epqParam) { ModifyTable *node = makeNode(ModifyTable); Plan *plan = &node->plan; @@ -4918,6 +4918,30 @@ make_modifytable(PlannerInfo *root, node->resultRelations = resultRelations; node->resultRelIndex = -1; /* will be set correctly in setrefs.c */ node->plans = subplans; + if (!onconflict) + { + node->onConflictAction = ONCONFLICT_NONE; + node->onConflictSet = NIL; + node->onConflictWhere = NULL; + node->arbiterIndexes = NIL; + } + else + { + node->onConflictAction = onconflict->action; + node->onConflictSet = onconflict->onConflictSet; + node->onConflictWhere = onconflict->onConflictWhere; + + /* + * If a set of unique index inference elements was provided (an + * INSERT...ON CONFLICT "inference specification"), then infer + * appropriate unique indexes (or throw an error if none are + * available). + */ + node->arbiterIndexes = infer_arbiter_indexes(root); + + node->exclRelRTI = onconflict->exclRelIndex; + node->exclRelTlist = onconflict->exclRelTlist; + } node->withCheckOptionLists = withCheckOptionLists; node->returningLists = returningLists; node->rowMarks = rowMarks; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ea4d4c55cbd..c80d45acaa9 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -243,6 +243,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) result->queryId = parse->queryId; result->hasReturning = (parse->returningList != NIL); result->hasModifyingCTE = parse->hasModifyingCTE; + result->isUpsert = + (parse->onConflict && parse->onConflict->action == ONCONFLICT_UPDATE); result->canSetTag = parse->canSetTag; result->transientPlan = glob->transientPlan; result->planTree = top_plan; @@ -462,6 +464,17 @@ subquery_planner(PlannerGlobal *glob, Query *parse, parse->limitCount = preprocess_expression(root, parse->limitCount, EXPRKIND_LIMIT); + if (parse->onConflict) + { + parse->onConflict->onConflictSet = (List *) + preprocess_expression(root, (Node *) parse->onConflict->onConflictSet, + EXPRKIND_TARGET); + + parse->onConflict->onConflictWhere = + preprocess_expression(root, (Node *) parse->onConflict->onConflictWhere, + EXPRKIND_QUAL); + } + root->append_rel_list = (List *) preprocess_expression(root, (Node *) root->append_rel_list, EXPRKIND_APPINFO); @@ -612,6 +625,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, withCheckOptionLists, returningLists, rowMarks, + parse->onConflict, SS_assign_special_param(root)); } } @@ -802,6 +816,8 @@ inheritance_planner(PlannerInfo *root) List *rowMarks; ListCell *lc; + Assert(parse->commandType != CMD_INSERT); + /* * We generate a modified instance of the original Query for each target * relation, plan that, and put all the plans into a list that will be @@ -1046,6 +1062,8 @@ inheritance_planner(PlannerInfo *root) if (parse->returningList) returningLists = lappend(returningLists, subroot.parse->returningList); + + Assert(!parse->onConflict); } /* Mark result as unordered (probably unnecessary) */ @@ -1095,6 +1113,7 @@ inheritance_planner(PlannerInfo *root) withCheckOptionLists, returningLists, rowMarks, + NULL, SS_assign_special_param(root)); } @@ -1228,6 +1247,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) bool use_hashed_grouping = false; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; + OnConflictExpr *onconfl; MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); @@ -1242,6 +1262,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Preprocess targetlist */ tlist = preprocess_targetlist(root, tlist); + onconfl = parse->onConflict; + if (onconfl) + onconfl->onConflictSet = + preprocess_onconflict_targetlist(onconfl->onConflictSet, + parse->resultRelation, + parse->rtable); + /* * Expand any rangetable entries that have security barrier quals. * This may add new security barrier subquery RTEs to the rangetable. diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index b7d6ff11223..612d32571af 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -739,7 +739,35 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->plan.targetlist = copyObject(linitial(newRL)); } + /* + * We treat ModifyTable with ON CONFLICT as a form of 'pseudo + * join', where the inner side is the EXLUDED tuple. Therefore + * use fix_join_expr to setup the relevant variables to + * INNER_VAR. We explicitly don't create any OUTER_VARs as + * those are already used by RETURNING and it seems better to + * be non-conflicting. + */ + if (splan->onConflictSet) + { + indexed_tlist *itlist; + + itlist = build_tlist_index(splan->exclRelTlist); + + splan->onConflictSet = + fix_join_expr(root, splan->onConflictSet, + NULL, itlist, + linitial_int(splan->resultRelations), + rtoffset); + + splan->onConflictWhere = (Node *) + fix_join_expr(root, (List *) splan->onConflictWhere, + NULL, itlist, + linitial_int(splan->resultRelations), + rtoffset); + } + splan->nominalRelation += rtoffset; + splan->exclRelRTI += rtoffset; foreach(l, splan->resultRelations) { @@ -1846,7 +1874,8 @@ search_indexed_tlist_for_sortgroupref(Node *node, * inner_itlist = NULL and acceptable_rel = the ID of the target relation. * * 'clauses' is the targetlist or list of join clauses - * 'outer_itlist' is the indexed target list of the outer join relation + * 'outer_itlist' is the indexed target list of the outer join relation, + * or NULL * 'inner_itlist' is the indexed target list of the inner join relation, * or NULL * 'acceptable_rel' is either zero or the rangetable index of a relation @@ -1886,12 +1915,17 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) Var *var = (Var *) node; /* First look for the var in the input tlists */ - newvar = search_indexed_tlist_for_var(var, - context->outer_itlist, - OUTER_VAR, - context->rtoffset); - if (newvar) - return (Node *) newvar; + if (context->outer_itlist) + { + newvar = search_indexed_tlist_for_var(var, + context->outer_itlist, + OUTER_VAR, + context->rtoffset); + if (newvar) + return (Node *) newvar; + } + + /* Then in the outer */ if (context->inner_itlist) { newvar = search_indexed_tlist_for_var(var, @@ -1920,7 +1954,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) PlaceHolderVar *phv = (PlaceHolderVar *) node; /* See if the PlaceHolderVar has bubbled up from a lower plan node */ - if (context->outer_itlist->has_ph_vars) + if (context->outer_itlist && context->outer_itlist->has_ph_vars) { newvar = search_indexed_tlist_for_non_var((Node *) phv, context->outer_itlist, @@ -1943,7 +1977,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) if (IsA(node, Param)) return fix_param_node(context->root, (Param *) node); /* Try matching more complex expressions too, if tlists have any */ - if (context->outer_itlist->has_non_vars) + if (context->outer_itlist && context->outer_itlist->has_non_vars) { newvar = search_indexed_tlist_for_non_var(node, context->outer_itlist, diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index acfd0bcfbe5..0220672fc43 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -2340,6 +2340,10 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, locally_added_param); finalize_primnode((Node *) mtplan->returningLists, &context); + finalize_primnode((Node *) mtplan->onConflictSet, + &context); + finalize_primnode((Node *) mtplan->onConflictWhere, + &context); foreach(l, mtplan->plans) { context.paramids = diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 50acfe40e97..4f0dc80d025 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -1030,6 +1030,9 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, pullup_replace_vars((Node *) parse->targetList, &rvcontext); parse->returningList = (List *) pullup_replace_vars((Node *) parse->returningList, &rvcontext); + if (parse->onConflict) + parse->onConflict->onConflictSet = (List *) + pullup_replace_vars((Node *) parse->onConflict->onConflictSet, &rvcontext); replace_vars_in_jointree((Node *) parse->jointree, &rvcontext, lowest_nulling_outer_join); Assert(parse->setOperations == NULL); @@ -1605,6 +1608,9 @@ pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte) pullup_replace_vars((Node *) parse->targetList, &rvcontext); parse->returningList = (List *) pullup_replace_vars((Node *) parse->returningList, &rvcontext); + if (parse->onConflict) + parse->onConflict->onConflictSet = (List *) + pullup_replace_vars((Node *) parse->onConflict->onConflictSet, &rvcontext); replace_vars_in_jointree((Node *) parse->jointree, &rvcontext, NULL); Assert(parse->setOperations == NULL); parse->havingQual = pullup_replace_vars(parse->havingQual, &rvcontext); diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c index 580c8467703..6b0c689e0c9 100644 --- a/src/backend/optimizer/prep/preptlist.c +++ b/src/backend/optimizer/prep/preptlist.c @@ -181,6 +181,19 @@ preprocess_targetlist(PlannerInfo *root, List *tlist) return tlist; } +/* + * preprocess_onconflict_targetlist + * Process ON CONFLICT SET targetlist. + * + * Returns the new targetlist. + */ +List * +preprocess_onconflict_targetlist(List *tlist, int result_relation, List *range_table) +{ + return expand_targetlist(tlist, CMD_UPDATE, result_relation, range_table); +} + + /***************************************************************************** * * TARGETLIST EXPANSION diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 068ab39dd43..8bcc5064a37 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -25,6 +25,7 @@ #include "access/transam.h" #include "access/xlog.h" #include "catalog/catalog.h" +#include "catalog/dependency.h" #include "catalog/heap.h" #include "foreign/fdwapi.h" #include "miscadmin.h" @@ -50,6 +51,8 @@ int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION; get_relation_info_hook_type get_relation_info_hook = NULL; +static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel, + Bitmapset *inferAttrs, List *idxExprs); static int32 get_rel_data_width(Relation rel, int32 *attr_widths); static List *get_relation_constraints(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel, @@ -400,6 +403,355 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, } /* + * infer_arbiter_indexes - + * Determine the unique indexes used to arbitrate speculative insertion. + * + * Uses user-supplied inference clause expressions and predicate to match a + * unique index from those defined and ready on the heap relation (target). + * An exact match is required on columns/expressions (although they can appear + * in any order). However, the predicate given by the user need only restrict + * insertion to a subset of some part of the table covered by some particular + * unique index (in particular, a partial unique index) in order to be + * inferred. + * + * The implementation does not consider which B-Tree operator class any + * particular available unique index attribute uses, unless one was specified + * in the inference specification. The same is true of collations. In + * particular, there is no system dependency on the default operator class for + * the purposes of inference. If no opclass (or collation) is specified, then + * all matching indexes (that may or may not match the default in terms of + * each attribute opclass/collation) are used for inference. + */ +List * +infer_arbiter_indexes(PlannerInfo *root) +{ + OnConflictExpr *onconflict = root->parse->onConflict; + /* Iteration state */ + Relation relation; + Oid relationObjectId; + Oid indexOidFromConstraint = InvalidOid; + List *indexList; + ListCell *l; + + /* Normalized inference attributes and inference expressions: */ + Bitmapset *inferAttrs = NULL; + List *inferElems = NIL; + + /* Result */ + List *candidates = NIL; + + /* + * Quickly return NIL for ON CONFLICT DO NOTHING without an inference + * specification or named constraint. ON CONFLICT DO UPDATE statements + * must always provide one or the other (but parser ought to have caught + * that already). + */ + if (onconflict->arbiterElems == NIL && + onconflict->constraint == InvalidOid) + return NIL; + + /* + * We need not lock the relation since it was already locked, either by + * the rewriter or when expand_inherited_rtentry() added it to the query's + * rangetable. + */ + relationObjectId = rt_fetch(root->parse->resultRelation, + root->parse->rtable)->relid; + + relation = heap_open(relationObjectId, NoLock); + + /* + * Build normalized/BMS representation of plain indexed attributes, as + * well as direct list of inference elements. This is required for + * matching the cataloged definition of indexes. + */ + foreach(l, onconflict->arbiterElems) + { + InferenceElem *elem; + Var *var; + int attno; + + elem = (InferenceElem *) lfirst(l); + + /* + * Parse analysis of inference elements performs full parse analysis + * of Vars, even for non-expression indexes (in contrast with utility + * command related use of IndexElem). However, indexes are cataloged + * with simple attribute numbers for non-expression indexes. Those + * are handled later. + */ + if (!IsA(elem->expr, Var)) + { + inferElems = lappend(inferElems, elem->expr); + continue; + } + + var = (Var *) elem->expr; + attno = var->varattno; + + if (attno < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("system columns cannot be used in an ON CONFLICT clause"))); + else if (attno == 0) + elog(ERROR, "whole row unique index inference specifications are not valid"); + + inferAttrs = bms_add_member(inferAttrs, attno); + } + + /* + * Lookup named constraint's index. This is not immediately returned + * because some additional sanity checks are required. + */ + if (onconflict->constraint != InvalidOid) + { + indexOidFromConstraint = get_constraint_index(onconflict->constraint); + + if (indexOidFromConstraint == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("constraint in ON CONFLICT clause has no associated index"))); + } + + indexList = RelationGetIndexList(relation); + + /* + * Using that representation, iterate through the list of indexes on the + * target relation to try and find a match + */ + foreach(l, indexList) + { + Oid indexoid = lfirst_oid(l); + Relation idxRel; + Form_pg_index idxForm; + Bitmapset *indexedAttrs = NULL; + List *idxExprs; + List *predExprs; + List *whereExplicit; + AttrNumber natt; + ListCell *el; + + /* + * Extract info from the relation descriptor for the index. We know + * that this is a target, so get lock type it is known will ultimately + * be required by the executor. + * + * Let executor complain about !indimmediate case directly, because + * enforcement needs to occur there anyway when an inference clause is + * omitted. + */ + idxRel = index_open(indexoid, RowExclusiveLock); + idxForm = idxRel->rd_index; + + if (!IndexIsValid(idxForm)) + goto next; + + /* + * If the index is valid, but cannot yet be used, ignore it. See + * src/backend/access/heap/README.HOT for discussion. + */ + if (idxForm->indcheckxmin && + !TransactionIdPrecedes(HeapTupleHeaderGetXmin(idxRel->rd_indextuple->t_data), + TransactionXmin)) + goto next; + + /* + * Look for match on "ON constraint_name" variant, which may not be + * unique constraint. This can only be a constraint name. + */ + if (indexOidFromConstraint == idxForm->indexrelid) + { + if (!idxForm->indisunique && onconflict->action == ONCONFLICT_UPDATE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("ON CONFLICT DO UPDATE not supported with exclusion constraints"))); + + list_free(indexList); + index_close(idxRel, NoLock); + heap_close(relation, NoLock); + candidates = lappend_oid(candidates, idxForm->indexrelid); + return candidates; + } + else if (indexOidFromConstraint != InvalidOid) + { + /* No point in further work for index in named constraint case */ + goto next; + } + + /* + * Only considering conventional inference at this point (not named + * constraints), so index under consideration can be immediately + * skipped if it's not unique + */ + if (!idxForm->indisunique) + goto next; + + /* Build BMS representation of cataloged index attributes */ + for (natt = 0; natt < idxForm->indnatts; natt++) + { + int attno = idxRel->rd_index->indkey.values[natt]; + + if (attno < 0) + elog(ERROR, "system column in index"); + + if (attno != 0) + indexedAttrs = bms_add_member(indexedAttrs, attno); + } + + /* Non-expression attributes (if any) must match */ + if (!bms_equal(indexedAttrs, inferAttrs)) + goto next; + + /* Expression attributes (if any) must match */ + idxExprs = RelationGetIndexExpressions(idxRel); + foreach(el, onconflict->arbiterElems) + { + InferenceElem *elem = (InferenceElem *) lfirst(el); + + /* + * Ensure that collation/opclass aspects of inference expression + * element match. Even though this loop is primarily concerned + * with matching expressions, it is a convenient point to check + * this for both expressions and ordinary (non-expression) + * attributes appearing as inference elements. + */ + if (!infer_collation_opclass_match(elem, idxRel, inferAttrs, + idxExprs)) + goto next; + + /* + * Plain Vars don't factor into count of expression elements, and + * the question of whether or not they satisfy the index + * definition has already been considered (they must). + */ + if (IsA(elem->expr, Var)) + continue; + + /* + * Might as well avoid redundant check in the rare cases where + * infer_collation_opclass_match() is required to do real work. + * Otherwise, check that element expression appears in cataloged + * index definition. + */ + if (elem->infercollid != InvalidOid || + elem->inferopfamily != InvalidOid || + list_member(idxExprs, elem->expr)) + continue; + + goto next; + } + + /* + * Now that all inference elements were matched, ensure that the + * expression elements from inference clause are not missing any + * cataloged expressions. This does the right thing when unique + * indexes redundantly repeat the same attribute, or if attributes + * redundantly appear multiple times within an inference clause. + */ + if (list_difference(idxExprs, inferElems) != NIL) + goto next; + + /* + * Any user-supplied ON CONFLICT unique index inference WHERE clause + * need only be implied by the cataloged index definitions predicate. + */ + predExprs = RelationGetIndexPredicate(idxRel); + whereExplicit = make_ands_implicit((Expr *) onconflict->arbiterWhere); + + if (!predicate_implied_by(predExprs, whereExplicit)) + goto next; + + candidates = lappend_oid(candidates, idxForm->indexrelid); +next: + index_close(idxRel, NoLock); + } + + list_free(indexList); + heap_close(relation, NoLock); + + if (candidates == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("there is no unique or exclusion constraint matching the ON CONFLICT specification"))); + + return candidates; +} + +/* + * infer_collation_opclass_match - ensure infer element opclass/collation match + * + * Given unique index inference element from inference specification, if + * collation was specified, or if opclass (represented here as opfamily + + * opcintype) was specified, verify that there is at least one matching + * indexed attribute (occasionally, there may be more). Skip this in the + * common case where inference specification does not include collation or + * opclass (instead matching everything, regardless of cataloged + * collation/opclass of indexed attribute). + * + * At least historically, Postgres has not offered collations or opclasses + * with alternative-to-default notions of equality, so these additional + * criteria should only be required infrequently. + * + * Don't give up immediately when an inference element matches some attribute + * cataloged as indexed but not matching additional opclass/collation + * criteria. This is done so that the implementation is as forgiving as + * possible of redundancy within cataloged index attributes (or, less + * usefully, within inference specification elements). If collations actually + * differ between apparently redundantly indexed attributes (redundant within + * or across indexes), then there really is no redundancy as such. + * + * Note that if an inference element specifies an opclass and a collation at + * once, both must match in at least one particular attribute within index + * catalog definition in order for that inference element to be considered + * inferred/satisfied. + */ +static bool +infer_collation_opclass_match(InferenceElem *elem, Relation idxRel, + Bitmapset *inferAttrs, List *idxExprs) +{ + AttrNumber natt; + + /* + * If inference specification element lacks collation/opclass, then no + * need to check for exact match. + */ + if (elem->infercollid == InvalidOid && elem->inferopfamily == InvalidOid) + return true; + + for (natt = 1; natt <= idxRel->rd_att->natts; natt++) + { + Oid opfamily = idxRel->rd_opfamily[natt - 1]; + Oid opcinputtype = idxRel->rd_opcintype[natt - 1]; + Oid collation = idxRel->rd_indcollation[natt - 1]; + + if (elem->inferopfamily != InvalidOid && + (elem->inferopfamily != opfamily || + elem->inferopcinputtype != opcinputtype)) + { + /* Attribute needed to match opclass, but didn't */ + continue; + } + + if (elem->infercollid != InvalidOid && + elem->infercollid != collation) + { + /* Attribute needed to match collation, but didn't */ + continue; + } + + if ((IsA(elem->expr, Var) && + bms_is_member(((Var *) elem->expr)->varattno, inferAttrs)) || + list_member(idxExprs, elem->expr)) + { + /* Found one match - good enough */ + return true; + } + } + + return false; +} + +/* * estimate_rel_size - estimate # pages and # tuples in a table or index * * We also estimate the fraction of the pages that are marked all-visible in diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 2d320d100b8..3eb4feabfd6 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -52,6 +52,8 @@ static Query *transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt); static Query *transformInsertStmt(ParseState *pstate, InsertStmt *stmt); static List *transformInsertRow(ParseState *pstate, List *exprlist, List *stmtcols, List *icolumns, List *attrnos); +static OnConflictExpr *transformOnConflictClause(ParseState *pstate, + OnConflictClause *onConflictClause); static int count_rowexpr_columns(ParseState *pstate, Node *expr); static Query *transformSelectStmt(ParseState *pstate, SelectStmt *stmt); static Query *transformValuesClause(ParseState *pstate, SelectStmt *stmt); @@ -62,6 +64,8 @@ static void determineRecursiveColTypes(ParseState *pstate, Node *larg, List *nrtargetlist); static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt); static List *transformReturningList(ParseState *pstate, List *returningList); +static List *transformUpdateTargetList(ParseState *pstate, + List *targetList); static Query *transformDeclareCursorStmt(ParseState *pstate, DeclareCursorStmt *stmt); static Query *transformExplainStmt(ParseState *pstate, @@ -419,6 +423,8 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) ListCell *icols; ListCell *attnos; ListCell *lc; + bool isOnConflictUpdate; + AclMode targetPerms; /* There can't be any outer WITH to worry about */ Assert(pstate->p_ctenamespace == NIL); @@ -434,6 +440,9 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) qry->hasModifyingCTE = pstate->p_hasModifyingCTE; } + isOnConflictUpdate = (stmt->onConflictClause && + stmt->onConflictClause->action == ONCONFLICT_UPDATE); + /* * We have three cases to deal with: DEFAULT VALUES (selectStmt == NULL), * VALUES list, or general SELECT input. We special-case VALUES, both for @@ -478,8 +487,11 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) * mentioned in the SELECT part. Note that the target table is not added * to the joinlist or namespace. */ + targetPerms = ACL_INSERT; + if (isOnConflictUpdate) + targetPerms |= ACL_UPDATE; qry->resultRelation = setTargetTable(pstate, stmt->relation, - false, false, ACL_INSERT); + false, false, targetPerms); /* Validate stmt->cols list, or build default list if no list given */ icolumns = checkInsertTargets(pstate, stmt->cols, &attrnos); @@ -740,6 +752,11 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) attnos = lnext(attnos); } + /* Process ON CONFLICT, if any. */ + if (stmt->onConflictClause) + qry->onConflict = transformOnConflictClause(pstate, + stmt->onConflictClause); + /* * If we have a RETURNING clause, we need to add the target relation to * the query namespace before processing it, so that Var references in @@ -850,6 +867,85 @@ transformInsertRow(ParseState *pstate, List *exprlist, } /* + * transformSelectStmt - + * transforms an OnConflictClause in an INSERT + */ +static OnConflictExpr * +transformOnConflictClause(ParseState *pstate, + OnConflictClause *onConflictClause) +{ + List *arbiterElems; + Node *arbiterWhere; + Oid arbiterConstraint; + List *onConflictSet = NIL; + Node *onConflictWhere = NULL; + RangeTblEntry *exclRte = NULL; + int exclRelIndex = 0; + List *exclRelTlist = NIL; + OnConflictExpr *result; + + /* Process the arbiter clause, ON CONFLICT ON (...) */ + transformOnConflictArbiter(pstate, onConflictClause, &arbiterElems, + &arbiterWhere, &arbiterConstraint); + + /* Process DO UPDATE */ + if (onConflictClause->action == ONCONFLICT_UPDATE) + { + exclRte = addRangeTableEntryForRelation(pstate, + pstate->p_target_relation, + makeAlias("excluded", NIL), + false, false); + exclRelIndex = list_length(pstate->p_rtable); + + /* + * Build a targetlist for the EXCLUDED pseudo relation. Out of + * simplicity we do that here, because expandRelAttrs() happens to + * nearly do the right thing; specifically it also works with views. + * It'd be more proper to instead scan some pseudo scan node, but it + * doesn't seem worth the amount of code required. + * + * The only caveat of this hack is that the permissions expandRelAttrs + * adds have to be reset. markVarForSelectPriv() will add the exact + * required permissions back. + */ + exclRelTlist = expandRelAttrs(pstate, exclRte, + exclRelIndex, 0, -1); + exclRte->requiredPerms = 0; + exclRte->selectedCols = NULL; + + /* + * Add EXCLUDED and the target RTE to the namespace, so that they can + * be used in the UPDATE statement. + */ + addRTEtoQuery(pstate, exclRte, false, true, true); + addRTEtoQuery(pstate, pstate->p_target_rangetblentry, + false, true, true); + + onConflictSet = + transformUpdateTargetList(pstate, onConflictClause->targetList); + + onConflictWhere = transformWhereClause(pstate, + onConflictClause->whereClause, + EXPR_KIND_WHERE, "WHERE"); + } + + /* Finally, build ON CONFLICT DO [NOTHING | UPDATE] expression */ + result = makeNode(OnConflictExpr); + + result->action = onConflictClause->action; + result->arbiterElems = arbiterElems; + result->arbiterWhere = arbiterWhere; + result->constraint = arbiterConstraint; + result->onConflictSet = onConflictSet; + result->onConflictWhere = onConflictWhere; + result->exclRelIndex = exclRelIndex; + result->exclRelTlist = exclRelTlist; + + return result; +} + + +/* * count_rowexpr_columns - * get number of columns contained in a ROW() expression; * return -1 if expression isn't a RowExpr or a Var referencing one. @@ -1899,10 +1995,7 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt) { Query *qry = makeNode(Query); ParseNamespaceItem *nsitem; - RangeTblEntry *target_rte; Node *qual; - ListCell *origTargetList; - ListCell *tl; qry->commandType = CMD_UPDATE; pstate->p_is_update = true; @@ -1937,23 +2030,41 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt) nsitem->p_lateral_only = false; nsitem->p_lateral_ok = true; - qry->targetList = transformTargetList(pstate, stmt->targetList, - EXPR_KIND_UPDATE_SOURCE); - qual = transformWhereClause(pstate, stmt->whereClause, EXPR_KIND_WHERE, "WHERE"); qry->returningList = transformReturningList(pstate, stmt->returningList); + /* + * Now we are done with SELECT-like processing, and can get on with + * transforming the target list to match the UPDATE target columns. + */ + qry->targetList = transformUpdateTargetList(pstate, stmt->targetList); + qry->rtable = pstate->p_rtable; qry->jointree = makeFromExpr(pstate->p_joinlist, qual); qry->hasSubLinks = pstate->p_hasSubLinks; - /* - * Now we are done with SELECT-like processing, and can get on with - * transforming the target list to match the UPDATE target columns. - */ + assign_query_collations(pstate, qry); + + return qry; +} + +/* + * transformUpdateTargetList - + * handle SET clause in UPDATE/INSERT ... ON CONFLICT UPDATE + */ +static List * +transformUpdateTargetList(ParseState *pstate, List *origTlist) +{ + List *tlist = NIL; + RangeTblEntry *target_rte; + ListCell *orig_tl; + ListCell *tl; + + tlist = transformTargetList(pstate, origTlist, + EXPR_KIND_UPDATE_SOURCE); /* Prepare to assign non-conflicting resnos to resjunk attributes */ if (pstate->p_next_resno <= pstate->p_target_relation->rd_rel->relnatts) @@ -1961,9 +2072,9 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt) /* Prepare non-junk columns for assignment to target table */ target_rte = pstate->p_target_rangetblentry; - origTargetList = list_head(stmt->targetList); + orig_tl = list_head(origTlist); - foreach(tl, qry->targetList) + foreach(tl, tlist) { TargetEntry *tle = (TargetEntry *) lfirst(tl); ResTarget *origTarget; @@ -1981,9 +2092,9 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt) tle->resname = NULL; continue; } - if (origTargetList == NULL) + if (orig_tl == NULL) elog(ERROR, "UPDATE target count mismatch --- internal error"); - origTarget = (ResTarget *) lfirst(origTargetList); + origTarget = (ResTarget *) lfirst(orig_tl); Assert(IsA(origTarget, ResTarget)); attrno = attnameAttNum(pstate->p_target_relation, @@ -2005,14 +2116,12 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt) target_rte->updatedCols = bms_add_member(target_rte->updatedCols, attrno - FirstLowInvalidHeapAttributeNumber); - origTargetList = lnext(origTargetList); + orig_tl = lnext(orig_tl); } - if (origTargetList != NULL) + if (orig_tl != NULL) elog(ERROR, "UPDATE target count mismatch --- internal error"); - assign_query_collations(pstate, qry); - - return qry; + return tlist; } /* diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 0180530a309..7a4c07365c1 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -217,6 +217,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); RangeVar *range; IntoClause *into; WithClause *with; + InferClause *infer; + OnConflictClause *onconflict; A_Indices *aind; ResTarget *target; struct PrivTarget *privtarget; @@ -318,7 +320,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); opt_class opt_inline_handler opt_validator validator_clause opt_collate -%type <range> qualified_name OptConstrFromTable +%type <range> qualified_name insert_target OptConstrFromTable %type <str> all_Op MathOp @@ -344,7 +346,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); OptTableElementList TableElementList OptInherit definition OptTypedTableElementList TypedTableElementList reloptions opt_reloptions - OptWith opt_distinct opt_definition func_args func_args_list + OptWith distinct_clause opt_all_clause opt_definition func_args func_args_list func_args_with_defaults func_args_with_defaults_list aggr_args aggr_args_list func_as createfunc_opt_list alterfunc_opt_list @@ -389,7 +391,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <node> for_locking_item %type <list> for_locking_clause opt_for_locking_clause for_locking_items %type <list> locked_rels_list -%type <boolean> opt_all +%type <boolean> all_or_distinct %type <node> join_outer join_qual %type <jtype> join_type @@ -418,6 +420,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <defelt> SeqOptElem %type <istmt> insert_rest +%type <infer> opt_conf_expr +%type <onconflict> opt_on_conflict %type <vsetstmt> generic_set set_rest set_rest_more generic_reset reset_rest SetResetClause FunctionSetResetClause @@ -557,8 +561,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); CACHE CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE CLUSTER COALESCE COLLATE COLLATION COLUMN COMMENT COMMENTS COMMIT - COMMITTED CONCURRENTLY CONFIGURATION CONNECTION CONSTRAINT CONSTRAINTS - CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE + COMMITTED CONCURRENTLY CONFIGURATION CONFLICT CONNECTION CONSTRAINT + CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE CROSS CSV CURRENT_P CURRENT_CATALOG CURRENT_DATE CURRENT_ROLE CURRENT_SCHEMA CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE @@ -9436,15 +9440,35 @@ DeallocateStmt: DEALLOCATE name *****************************************************************************/ InsertStmt: - opt_with_clause INSERT INTO qualified_name insert_rest returning_clause + opt_with_clause INSERT INTO insert_target insert_rest + opt_on_conflict returning_clause { $5->relation = $4; - $5->returningList = $6; + $5->onConflictClause = $6; + $5->returningList = $7; $5->withClause = $1; $$ = (Node *) $5; } ; +/* + * Can't easily make AS optional here, because VALUES in insert_rest would + * have a shift/reduce conflict with a values as a optional alias. We could + * easily allow unreserved_keywords as optional aliases, but that'd be a odd + * divergance from other places. So just require AS for now. + */ +insert_target: + qualified_name + { + $$ = $1; + } + | qualified_name AS ColId + { + $1->alias = makeAlias($3, NIL); + $$ = $1; + } + ; + insert_rest: SelectStmt { @@ -9484,6 +9508,56 @@ insert_column_item: } ; +opt_on_conflict: + ON CONFLICT opt_conf_expr DO UPDATE SET set_clause_list where_clause + { + $$ = makeNode(OnConflictClause); + $$->action = ONCONFLICT_UPDATE; + $$->infer = $3; + $$->targetList = $7; + $$->whereClause = $8; + $$->location = @1; + } + | + ON CONFLICT opt_conf_expr DO NOTHING + { + $$ = makeNode(OnConflictClause); + $$->action = ONCONFLICT_NOTHING; + $$->infer = $3; + $$->targetList = NIL; + $$->whereClause = NULL; + $$->location = @1; + } + | /*EMPTY*/ + { + $$ = NULL; + } + ; + +opt_conf_expr: + '(' index_params ')' where_clause + { + $$ = makeNode(InferClause); + $$->indexElems = $2; + $$->whereClause = $4; + $$->conname = NULL; + $$->location = @1; + } + | + ON CONSTRAINT name + { + $$ = makeNode(InferClause); + $$->indexElems = NIL; + $$->whereClause = NULL; + $$->conname = $3; + $$->location = @1; + } + | /*EMPTY*/ + { + $$ = NULL; + } + ; + returning_clause: RETURNING target_list { $$ = $2; } | /* EMPTY */ { $$ = NIL; } @@ -9870,7 +9944,21 @@ select_clause: * However, this is not checked by the grammar; parse analysis must check it. */ simple_select: - SELECT opt_distinct opt_target_list + SELECT opt_all_clause opt_target_list + into_clause from_clause where_clause + group_clause having_clause window_clause + { + SelectStmt *n = makeNode(SelectStmt); + n->targetList = $3; + n->intoClause = $4; + n->fromClause = $5; + n->whereClause = $6; + n->groupClause = $7; + n->havingClause = $8; + n->windowClause = $9; + $$ = (Node *)n; + } + | SELECT distinct_clause target_list into_clause from_clause where_clause group_clause having_clause window_clause { @@ -9905,15 +9993,15 @@ simple_select: n->fromClause = list_make1($2); $$ = (Node *)n; } - | select_clause UNION opt_all select_clause + | select_clause UNION all_or_distinct select_clause { $$ = makeSetOp(SETOP_UNION, $3, $1, $4); } - | select_clause INTERSECT opt_all select_clause + | select_clause INTERSECT all_or_distinct select_clause { $$ = makeSetOp(SETOP_INTERSECT, $3, $1, $4); } - | select_clause EXCEPT opt_all select_clause + | select_clause EXCEPT all_or_distinct select_clause { $$ = makeSetOp(SETOP_EXCEPT, $3, $1, $4); } @@ -10052,7 +10140,8 @@ opt_table: TABLE {} | /*EMPTY*/ {} ; -opt_all: ALL { $$ = TRUE; } +all_or_distinct: + ALL { $$ = TRUE; } | DISTINCT { $$ = FALSE; } | /*EMPTY*/ { $$ = FALSE; } ; @@ -10060,10 +10149,13 @@ opt_all: ALL { $$ = TRUE; } /* We use (NIL) as a placeholder to indicate that all target expressions * should be placed in the DISTINCT list during parsetree analysis. */ -opt_distinct: +distinct_clause: DISTINCT { $$ = list_make1(NIL); } | DISTINCT ON '(' expr_list ')' { $$ = $4; } - | ALL { $$ = NIL; } + ; + +opt_all_clause: + ALL { $$ = NIL;} | /*EMPTY*/ { $$ = NIL; } ; @@ -13367,6 +13459,7 @@ unreserved_keyword: | COMMIT | COMMITTED | CONFIGURATION + | CONFLICT | CONNECTION | CONSTRAINTS | CONTENT_P diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 8d90b5098a1..73c505ed85b 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -16,7 +16,9 @@ #include "postgres.h" #include "access/heapam.h" +#include "catalog/catalog.h" #include "catalog/heap.h" +#include "catalog/pg_constraint.h" #include "catalog/pg_type.h" #include "commands/defrem.h" #include "nodes/makefuncs.h" @@ -32,6 +34,7 @@ #include "parser/parse_oper.h" #include "parser/parse_relation.h" #include "parser/parse_target.h" +#include "parser/parse_type.h" #include "rewrite/rewriteManip.h" #include "utils/guc.h" #include "utils/lsyscache.h" @@ -75,6 +78,8 @@ static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist, ParseExprKind exprKind); static int get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs); +static List *resolve_unique_index_expr(ParseState *pstate, InferClause * infer, + Relation heapRel); static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle, List *grouplist, List *targetlist, int location, bool resolveUnknown); @@ -2167,6 +2172,204 @@ get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs) } /* + * resolve_unique_index_expr + * Infer a unique index from a list of indexElems, for ON + * CONFLICT clause + * + * Perform parse analysis of expressions and columns appearing within ON + * CONFLICT clause. During planning, the returned list of expressions is used + * to infer which unique index to use. + */ +static List * +resolve_unique_index_expr(ParseState *pstate, InferClause *infer, + Relation heapRel) +{ + List *result = NIL; + ListCell *l; + + foreach(l, infer->indexElems) + { + IndexElem *ielem = (IndexElem *) lfirst(l); + InferenceElem *pInfer = makeNode(InferenceElem); + Node *parse; + + /* + * Raw grammar re-uses CREATE INDEX infrastructure for unique index + * inference clause, and so will accept opclasses by name and so on. + * + * Make no attempt to match ASC or DESC ordering or NULLS FIRST/NULLS + * LAST ordering, since those are not significant for inference + * purposes (any unique index matching the inference specification in + * other regards is accepted indifferently). Actively reject this as + * wrong-headed. + */ + if (ielem->ordering != SORTBY_DEFAULT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("ASC/DESC is not allowed in ON CONFLICT clause"), + parser_errposition(pstate, + exprLocation((Node *) infer)))); + if (ielem->nulls_ordering != SORTBY_NULLS_DEFAULT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("NULLS FIRST/LAST is not allowed in ON CONFLICT clause"), + parser_errposition(pstate, + exprLocation((Node *) infer)))); + + if (!ielem->expr) + { + /* Simple index attribute */ + ColumnRef *n; + + /* + * Grammar won't have built raw expression for us in event of + * plain column reference. Create one directly, and perform + * expression transformation. Planner expects this, and performs + * its own normalization for the purposes of matching against + * pg_index. + */ + n = makeNode(ColumnRef); + n->fields = list_make1(makeString(ielem->name)); + /* Location is approximately that of inference specification */ + n->location = infer->location; + parse = (Node *) n; + } + else + { + /* Do parse transformation of the raw expression */ + parse = (Node *) ielem->expr; + } + + /* + * transformExpr() should have already rejected subqueries, + * aggregates, and window functions, based on the EXPR_KIND_ for an + * index expression. Expressions returning sets won't have been + * rejected, but don't bother doing so here; there should be no + * available expression unique index to match any such expression + * against anyway. + */ + pInfer->expr = transformExpr(pstate, parse, EXPR_KIND_INDEX_EXPRESSION); + + /* Perform lookup of collation and operator class as required */ + if (!ielem->collation) + pInfer->infercollid = InvalidOid; + else + pInfer->infercollid = LookupCollation(pstate, ielem->collation, + exprLocation(pInfer->expr)); + + if (!ielem->opclass) + { + pInfer->inferopfamily = InvalidOid; + pInfer->inferopcinputtype = InvalidOid; + } + else + { + Oid opclass = get_opclass_oid(BTREE_AM_OID, ielem->opclass, + false); + + pInfer->inferopfamily = get_opclass_family(opclass); + pInfer->inferopcinputtype = get_opclass_input_type(opclass); + } + + result = lappend(result, pInfer); + } + + return result; +} + +/* + * transformOnConflictArbiter - + * transform arbiter expressions in an ON CONFLICT clause. + * + * Transformed expressions used to infer one unique index relation to serve as + * an ON CONFLICT arbiter. Partial unique indexes may be inferred using WHERE + * clause from inference specification clause. + */ +void +transformOnConflictArbiter(ParseState *pstate, + OnConflictClause *onConflictClause, + List **arbiterExpr, Node **arbiterWhere, + Oid *constraint) +{ + InferClause *infer = onConflictClause->infer; + + *arbiterExpr = NIL; + *arbiterWhere = NULL; + *constraint = InvalidOid; + + if (onConflictClause->action == ONCONFLICT_UPDATE && !infer) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("ON CONFLICT DO UPDATE requires inference specification or constraint name"), + errhint("For example, ON CONFLICT ON CONFLICT (<column>)."), + parser_errposition(pstate, + exprLocation((Node *) onConflictClause)))); + + /* + * To simplify certain aspects of its design, speculative insertion into + * system catalogs is disallowed + */ + if (IsCatalogRelation(pstate->p_target_relation)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ON CONFLICT not supported with system catalog tables"), + parser_errposition(pstate, + exprLocation((Node *) onConflictClause)))); + + /* Same applies to table used by logical decoding as catalog table */ + if (RelationIsUsedAsCatalogTable(pstate->p_target_relation)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ON CONFLICT not supported on table \"%s\" used as a catalog table", + RelationGetRelationName(pstate->p_target_relation)), + parser_errposition(pstate, + exprLocation((Node *) onConflictClause)))); + + /* ON CONFLICT DO NOTHING does not require an inference clause */ + if (infer) + { + List *save_namespace; + + /* + * While we process the arbiter expressions, accept only + * non-qualified references to the target table. Hide any other + * relations. + */ + save_namespace = pstate->p_namespace; + pstate->p_namespace = NIL; + addRTEtoQuery(pstate, pstate->p_target_rangetblentry, + false, false, true); + + if (infer->indexElems) + *arbiterExpr = resolve_unique_index_expr(pstate, infer, + pstate->p_target_relation); + + /* + * Handling inference WHERE clause (for partial unique index + * inference) + */ + if (infer->whereClause) + *arbiterWhere = transformExpr(pstate, infer->whereClause, + EXPR_KIND_INDEX_PREDICATE); + + pstate->p_namespace = save_namespace; + + if (infer->conname) + *constraint = get_relation_constraint_oid(RelationGetRelid(pstate->p_target_relation), + infer->conname, false); + } + + /* + * It's convenient to form a list of expressions based on the + * representation used by CREATE INDEX, since the same restrictions are + * appropriate (e.g. on subqueries). However, from here on, a dedicated + * primnode representation is used for inference elements, and so + * assign_query_collations() can be trusted to do the right thing with the + * post parse analysis query tree inference clause representation. + */ +} + +/* * addTargetToSortList * If the given targetlist entry isn't already in the SortGroupClause * list, add it to the end of the list, using the given sort ordering diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c index 7c6a11c7575..4c85b708d3b 100644 --- a/src/backend/parser/parse_collate.c +++ b/src/backend/parser/parse_collate.c @@ -479,9 +479,11 @@ assign_collations_walker(Node *node, assign_collations_context *context) parser_errposition(context->pstate, loccontext.location2))); break; + case T_InferenceElem: case T_RangeTblRef: case T_JoinExpr: case T_FromExpr: + case T_OnConflictExpr: case T_SortGroupClause: (void) expression_tree_walker(node, assign_collations_walker, diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index 2d85cf08e70..59973ba9c3c 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -537,11 +537,12 @@ transformAssignedExpr(ParseState *pstate, /* * updateTargetListEntry() - * This is used in UPDATE statements only. It prepares an UPDATE - * TargetEntry for assignment to a column of the target table. - * This includes coercing the given value to the target column's type - * (if necessary), and dealing with any subfield names or subscripts - * attached to the target column itself. + * This is used in UPDATE statements (and ON CONFLICT DO UPDATE) + * only. It prepares an UPDATE TargetEntry for assignment to a + * column of the target table. This includes coercing the given + * value to the target column's type (if necessary), and dealing with + * any subfield names or subscripts attached to the target column + * itself. * * pstate parse state * tle target list entry to be modified diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 88424964ef3..ea388182692 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -64,6 +64,8 @@ static void DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); static void DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); static void DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); +static void DecodeSpecConfirm(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); + static void DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, xl_xact_parsed_commit *parsed, TransactionId xid); static void DecodeAbort(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, @@ -414,6 +416,11 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr); break; + case XLOG_HEAP_CONFIRM: + if (SnapBuildProcessChange(builder, xid, buf->origptr)) + DecodeSpecConfirm(ctx, buf); + break; + case XLOG_HEAP_LOCK: /* we don't care about row level locks for now */ break; @@ -564,11 +571,15 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) return; change = ReorderBufferGetChange(ctx->reorder); - change->action = REORDER_BUFFER_CHANGE_INSERT; + if (!(xlrec->flags & XLH_INSERT_IS_SPECULATIVE)) + change->action = REORDER_BUFFER_CHANGE_INSERT; + else + change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT; change->origin_id = XLogRecGetOrigin(r); + memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode)); - if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE) + if (xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE) { Size tuplelen; char *tupledata = XLogRecGetBlockData(r, 0, &tuplelen); @@ -615,7 +626,7 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) change->origin_id = XLogRecGetOrigin(r); memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode)); - if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE) + if (xlrec->flags & XLH_UPDATE_CONTAINS_NEW_TUPLE) { data = XLogRecGetBlockData(r, 0, &datalen); @@ -624,7 +635,7 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) DecodeXLogTuple(data, datalen, change->data.tp.newtuple); } - if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD) + if (xlrec->flags & XLH_UPDATE_CONTAINS_OLD) { /* caution, remaining data in record is not aligned */ data = XLogRecGetData(r) + SizeOfHeapUpdate; @@ -660,6 +671,13 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) if (target_node.dbNode != ctx->slot->data.database) return; + /* + * Super deletions are irrelevant for logical decoding, it's driven by the + * confirmation records. + */ + if (xlrec->flags & XLH_DELETE_IS_SUPER) + return; + /* output plugin doesn't look for this origin, no need to queue */ if (FilterByOrigin(ctx, XLogRecGetOrigin(r))) return; @@ -671,7 +689,7 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode)); /* old primary key stored */ - if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD) + if (xlrec->flags & XLH_DELETE_CONTAINS_OLD) { Assert(XLogRecGetDataLen(r) > (SizeOfHeapDelete + SizeOfHeapHeader)); @@ -737,7 +755,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) * We decode the tuple in pretty much the same way as DecodeXLogTuple, * but since the layout is slightly different, we can't use it here. */ - if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE) + if (xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE) { change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder); @@ -775,7 +793,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) * xl_multi_insert_tuple record emitted by one heap_multi_insert() * call. */ - if (xlrec->flags & XLOG_HEAP_LAST_MULTI_INSERT && + if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI && (i + 1) == xlrec->ntuples) change->data.tp.clear_toast_afterwards = true; else @@ -788,6 +806,40 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) } /* + * Parse XLOG_HEAP_CONFIRM from wal into a confirmation change. + * + * This is pretty trivial, all the state essentially already setup by the + * speculative insertion. + */ +static void +DecodeSpecConfirm(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) +{ + XLogReaderState *r = buf->record; + ReorderBufferChange *change; + RelFileNode target_node; + + /* only interested in our database */ + XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL); + if (target_node.dbNode != ctx->slot->data.database) + return; + + /* output plugin doesn't look for this origin, no need to queue */ + if (FilterByOrigin(ctx, XLogRecGetOrigin(r))) + return; + + change = ReorderBufferGetChange(ctx->reorder); + change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM; + change->origin_id = XLogRecGetOrigin(r); + + memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode)); + + change->data.tp.clear_toast_afterwards = true; + + ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change); +} + + +/* * Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete * (but not by heap_multi_insert) into a tuplebuf. * diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index c9c1d1036e0..57854b0aa57 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -401,6 +401,7 @@ ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change) case REORDER_BUFFER_CHANGE_INSERT: case REORDER_BUFFER_CHANGE_UPDATE: case REORDER_BUFFER_CHANGE_DELETE: + case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT: if (change->data.tp.newtuple) { ReorderBufferReturnTupleBuf(rb, change->data.tp.newtuple); @@ -420,8 +421,9 @@ ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change) change->data.snapshot = NULL; } break; + /* no data in addition to the struct itself */ + case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM: case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID: - break; case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID: break; } @@ -1317,6 +1319,7 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, PG_TRY(); { ReorderBufferChange *change; + ReorderBufferChange *specinsert = NULL; if (using_subtxn) BeginInternalSubTransaction("replay"); @@ -1333,6 +1336,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, switch (change->action) { + case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM: + /* + * Confirmation for speculative insertion arrived. Simply + * use as a normal record. It'll be cleaned up at the end + * of INSERT processing. + */ + Assert(specinsert->data.tp.oldtuple == NULL); + change = specinsert; + change->action = REORDER_BUFFER_CHANGE_INSERT; + + /* intentionally fall through */ case REORDER_BUFFER_CHANGE_INSERT: case REORDER_BUFFER_CHANGE_UPDATE: case REORDER_BUFFER_CHANGE_DELETE: @@ -1348,7 +1362,7 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, if (reloid == InvalidOid && change->data.tp.newtuple == NULL && change->data.tp.oldtuple == NULL) - continue; + goto change_done; else if (reloid == InvalidOid) elog(ERROR, "could not map filenode \"%s\" to relation OID", relpathperm(change->data.tp.relnode, @@ -1362,50 +1376,92 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, relpathperm(change->data.tp.relnode, MAIN_FORKNUM)); - if (RelationIsLogicallyLogged(relation)) + if (!RelationIsLogicallyLogged(relation)) + goto change_done; + + /* + * For now ignore sequence changes entirely. Most of + * the time they don't log changes using records we + * understand, so it doesn't make sense to handle the + * few cases we do. + */ + if (relation->rd_rel->relkind == RELKIND_SEQUENCE) + goto change_done; + + /* user-triggered change */ + if (!IsToastRelation(relation)) { + ReorderBufferToastReplace(rb, txn, relation, change); + rb->apply_change(rb, txn, relation, change); + /* - * For now ignore sequence changes entirely. Most of - * the time they don't log changes using records we - * understand, so it doesn't make sense to handle the - * few cases we do. + * Only clear reassembled toast chunks if we're + * sure they're not required anymore. The creator + * of the tuple tells us. */ - if (relation->rd_rel->relkind == RELKIND_SEQUENCE) - { - } - /* user-triggered change */ - else if (!IsToastRelation(relation)) - { - ReorderBufferToastReplace(rb, txn, relation, change); - rb->apply_change(rb, txn, relation, change); - - /* - * Only clear reassembled toast chunks if we're - * sure they're not required anymore. The creator - * of the tuple tells us. - */ - if (change->data.tp.clear_toast_afterwards) - ReorderBufferToastReset(rb, txn); - } - /* we're not interested in toast deletions */ - else if (change->action == REORDER_BUFFER_CHANGE_INSERT) - { - /* - * Need to reassemble the full toasted Datum in - * memory, to ensure the chunks don't get reused - * till we're done remove it from the list of this - * transaction's changes. Otherwise it will get - * freed/reused while restoring spooled data from - * disk. - */ - dlist_delete(&change->node); - ReorderBufferToastAppendChunk(rb, txn, relation, - change); - } + if (change->data.tp.clear_toast_afterwards) + ReorderBufferToastReset(rb, txn); + } + /* we're not interested in toast deletions */ + else if (change->action == REORDER_BUFFER_CHANGE_INSERT) + { + /* + * Need to reassemble the full toasted Datum in + * memory, to ensure the chunks don't get reused till + * we're done remove it from the list of this + * transaction's changes. Otherwise it will get + * freed/reused while restoring spooled data from + * disk. + */ + dlist_delete(&change->node); + ReorderBufferToastAppendChunk(rb, txn, relation, + change); + } + + change_done: + /* + * Either speculative insertion was confirmed, or it was + * unsuccessful and the record isn't needed anymore. + */ + if (specinsert != NULL) + { + ReorderBufferReturnChange(rb, specinsert); + specinsert = NULL; + } + if (relation != NULL) + { + RelationClose(relation); + relation = NULL; } - RelationClose(relation); break; + + case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT: + /* + * Speculative insertions are dealt with by delaying the + * processing of the insert until the confirmation record + * arrives. For that we simply unlink the record from the + * chain, so it does not get freed/reused while restoring + * spooled data from disk. + * + * This is safe in the face of concurrent catalog changes + * because the relevant relation can't be changed between + * speculative insertion and confirmation due to + * CheckTableNotInUse() and locking. + */ + + /* clear out a pending (and thus failed) speculation */ + if (specinsert != NULL) + { + ReorderBufferReturnChange(rb, specinsert); + specinsert = NULL; + } + + /* and memorize the pending insertion */ + dlist_delete(&change->node); + specinsert = change; + break; + case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT: /* get rid of the old */ TeardownHistoricSnapshot(false); @@ -1474,6 +1530,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, } } + /* + * There's a a speculative insertion remaining, just clean in up, it + * can't have been successful, otherwise we'd gotten a confirmation + * record. + */ + if (specinsert) + { + ReorderBufferReturnChange(rb, specinsert); + specinsert = NULL; + } + /* clean up the iterator */ ReorderBufferIterTXNFinish(rb, iterstate); iterstate = NULL; @@ -2001,11 +2068,11 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn, switch (change->action) { + /* fall through these, they're all similar enough */ case REORDER_BUFFER_CHANGE_INSERT: - /* fall through */ case REORDER_BUFFER_CHANGE_UPDATE: - /* fall through */ case REORDER_BUFFER_CHANGE_DELETE: + case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT: { char *data; ReorderBufferTupleBuf *oldtup, @@ -2083,9 +2150,8 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn, } break; } + case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM: case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID: - /* ReorderBufferChange contains everything important */ - break; case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID: /* ReorderBufferChange contains everything important */ break; @@ -2256,11 +2322,11 @@ ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn, /* restore individual stuff */ switch (change->action) { + /* fall through these, they're all similar enough */ case REORDER_BUFFER_CHANGE_INSERT: - /* fall through */ case REORDER_BUFFER_CHANGE_UPDATE: - /* fall through */ case REORDER_BUFFER_CHANGE_DELETE: + case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT: if (change->data.tp.newtuple) { Size len = offsetof(ReorderBufferTupleBuf, t_data) + @@ -2309,6 +2375,7 @@ ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn, break; } /* the base struct contains all the data, easy peasy */ + case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM: case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID: case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID: break; diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 0fc47cb786c..39302a410b8 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -52,7 +52,10 @@ static Query *rewriteRuleAction(Query *parsetree, CmdType event, bool *returning_flag); static List *adjustJoinTreeList(Query *parsetree, bool removert, int rt_index); -static void rewriteTargetListIU(Query *parsetree, Relation target_relation, +static List *rewriteTargetListIU(List *targetList, + CmdType commandType, + Relation target_relation, + int result_rti, List **attrno_list); static TargetEntry *process_matched_tle(TargetEntry *src_tle, TargetEntry *prior_tle, @@ -66,7 +69,7 @@ static void markQueryForLocking(Query *qry, Node *jtnode, LockClauseStrength strength, LockWaitPolicy waitPolicy, bool pushedDown); static List *matchLocks(CmdType event, RuleLock *rulelocks, - int varno, Query *parsetree); + int varno, Query *parsetree, bool *hasUpdate); static Query *fireRIRrules(Query *parsetree, List *activeRIRs, bool forUpdatePushedDown); static bool view_has_instead_trigger(Relation view, CmdType event); @@ -679,11 +682,13 @@ adjustJoinTreeList(Query *parsetree, bool removert, int rt_index) * order of the original tlist's non-junk entries. This is needed for * processing VALUES RTEs. */ -static void -rewriteTargetListIU(Query *parsetree, Relation target_relation, +static List* +rewriteTargetListIU(List *targetList, + CmdType commandType, + Relation target_relation, + int result_rti, List **attrno_list) { - CmdType commandType = parsetree->commandType; TargetEntry **new_tles; List *new_tlist = NIL; List *junk_tlist = NIL; @@ -709,7 +714,7 @@ rewriteTargetListIU(Query *parsetree, Relation target_relation, new_tles = (TargetEntry **) palloc0(numattrs * sizeof(TargetEntry *)); next_junk_attrno = numattrs + 1; - foreach(temp, parsetree->targetList) + foreach(temp, targetList) { TargetEntry *old_tle = (TargetEntry *) lfirst(temp); @@ -827,7 +832,7 @@ rewriteTargetListIU(Query *parsetree, Relation target_relation, { Node *new_expr; - new_expr = (Node *) makeVar(parsetree->resultRelation, + new_expr = (Node *) makeVar(result_rti, attrno, att_tup->atttypid, att_tup->atttypmod, @@ -846,7 +851,7 @@ rewriteTargetListIU(Query *parsetree, Relation target_relation, pfree(new_tles); - parsetree->targetList = list_concat(new_tlist, junk_tlist); + return list_concat(new_tlist, junk_tlist); } @@ -1288,7 +1293,8 @@ static List * matchLocks(CmdType event, RuleLock *rulelocks, int varno, - Query *parsetree) + Query *parsetree, + bool *hasUpdate) { List *matching_locks = NIL; int nlocks; @@ -1309,6 +1315,9 @@ matchLocks(CmdType event, { RewriteRule *oneLock = rulelocks->rules[i]; + if (oneLock->event == CMD_UPDATE) + *hasUpdate = true; + /* * Suppress ON INSERT/UPDATE/DELETE rules that are disabled or * configured to not fire during the current sessions replication @@ -1766,8 +1775,8 @@ fireRIRrules(Query *parsetree, List *activeRIRs, bool forUpdatePushedDown) /* * Fetch any new security quals that must be applied to this RTE. */ - get_row_security_policies(parsetree, rte, rt_index, - &securityQuals, &withCheckOptions, + get_row_security_policies(parsetree, parsetree->commandType, rte, + rt_index, &securityQuals, &withCheckOptions, &hasRowSecurity, &hasSubLinks); if (securityQuals != NIL || withCheckOptions != NIL) @@ -2642,6 +2651,18 @@ rewriteTargetView(Query *parsetree, Relation view) tle->resno - FirstLowInvalidHeapAttributeNumber); } + if (parsetree->onConflict) + { + foreach(lc, parsetree->onConflict->onConflictSet) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (!tle->resjunk) + modified_cols = bms_add_member(modified_cols, + tle->resno - FirstLowInvalidHeapAttributeNumber); + } + } + auto_update_detail = view_cols_are_auto_updatable(viewquery, modified_cols, NULL, @@ -2999,6 +3020,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events) CmdType event = parsetree->commandType; bool instead = false; bool returning = false; + bool updatableview = false; Query *qual_product = NULL; List *rewritten = NIL; ListCell *lc1; @@ -3081,6 +3103,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events) Relation rt_entry_relation; List *locks; List *product_queries; + bool hasUpdate = false; result_relation = parsetree->resultRelation; Assert(result_relation != 0); @@ -3123,19 +3146,41 @@ RewriteQuery(Query *parsetree, List *rewrite_events) List *attrnos; /* Process the main targetlist ... */ - rewriteTargetListIU(parsetree, rt_entry_relation, &attrnos); + parsetree->targetList = rewriteTargetListIU(parsetree->targetList, + parsetree->commandType, + rt_entry_relation, + parsetree->resultRelation, + &attrnos); /* ... and the VALUES expression lists */ rewriteValuesRTE(values_rte, rt_entry_relation, attrnos); } else { /* Process just the main targetlist */ - rewriteTargetListIU(parsetree, rt_entry_relation, NULL); + parsetree->targetList = + rewriteTargetListIU(parsetree->targetList, + parsetree->commandType, + rt_entry_relation, + parsetree->resultRelation, NULL); + } + + if (parsetree->onConflict && + parsetree->onConflict->action == ONCONFLICT_UPDATE) + { + parsetree->onConflict->onConflictSet = + rewriteTargetListIU(parsetree->onConflict->onConflictSet, + CMD_UPDATE, + rt_entry_relation, + parsetree->resultRelation, + NULL); } } else if (event == CMD_UPDATE) { - rewriteTargetListIU(parsetree, rt_entry_relation, NULL); + parsetree->targetList = + rewriteTargetListIU(parsetree->targetList, + parsetree->commandType, rt_entry_relation, + parsetree->resultRelation, NULL); rewriteTargetListUD(parsetree, rt_entry, rt_entry_relation); } else if (event == CMD_DELETE) @@ -3149,7 +3194,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events) * Collect and apply the appropriate rules. */ locks = matchLocks(event, rt_entry_relation->rd_rules, - result_relation, parsetree); + result_relation, parsetree, &hasUpdate); product_queries = fireRules(parsetree, result_relation, @@ -3198,6 +3243,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events) */ instead = true; returning = true; + updatableview = true; } /* @@ -3278,6 +3324,17 @@ RewriteQuery(Query *parsetree, List *rewrite_events) } } + /* + * Updatable views are supported by ON CONFLICT, so don't prevent that + * case from proceeding + */ + if (parsetree->onConflict && + (product_queries != NIL || hasUpdate) && + !updatableview) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("INSERT with ON CONFLICT clause cannot be used with table that has INSERT or UPDATE rules"))); + heap_close(rt_entry_relation, NoLock); } diff --git a/src/backend/rewrite/rowsecurity.c b/src/backend/rewrite/rowsecurity.c index b0b308118f4..2c095ce88ac 100644 --- a/src/backend/rewrite/rowsecurity.c +++ b/src/backend/rewrite/rowsecurity.c @@ -89,9 +89,10 @@ row_security_policy_hook_type row_security_policy_hook_restrictive = NULL; * set to true if any of the quals returned contain sublinks. */ void -get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index, - List **securityQuals, List **withCheckOptions, - bool *hasRowSecurity, bool *hasSubLinks) +get_row_security_policies(Query* root, CmdType commandType, RangeTblEntry* rte, + int rt_index, List **securityQuals, + List **withCheckOptions, bool *hasRowSecurity, + bool *hasSubLinks) { Expr *rowsec_expr = NULL; Expr *rowsec_with_check_expr = NULL; @@ -159,7 +160,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index, /* Grab the built-in policies which should be applied to this relation. */ rel = heap_open(rte->relid, NoLock); - rowsec_policies = pull_row_security_policies(root->commandType, rel, + rowsec_policies = pull_row_security_policies(commandType, rel, user_id); /* @@ -201,7 +202,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index, */ if (row_security_policy_hook_restrictive) { - hook_policies_restrictive = (*row_security_policy_hook_restrictive)(root->commandType, rel); + hook_policies_restrictive = (*row_security_policy_hook_restrictive)(commandType, rel); /* Build the expression from any policies returned. */ if (hook_policies_restrictive != NIL) @@ -214,7 +215,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index, if (row_security_policy_hook_permissive) { - hook_policies_permissive = (*row_security_policy_hook_permissive)(root->commandType, rel); + hook_policies_permissive = (*row_security_policy_hook_permissive)(commandType, rel); /* Build the expression from any policies returned. */ if (hook_policies_permissive != NIL) @@ -242,7 +243,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index, * WITH CHECK policy (this will be a copy of the USING policy, if no * explicit WITH CHECK policy exists). */ - if (root->commandType == CMD_INSERT || root->commandType == CMD_UPDATE) + if (commandType == CMD_INSERT || commandType == CMD_UPDATE) { /* * WITH CHECK OPTIONS wants a WCO node which wraps each Expr, so @@ -259,7 +260,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index, WithCheckOption *wco; wco = (WithCheckOption *) makeNode(WithCheckOption); - wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK : + wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK : WCO_RLS_UPDATE_CHECK; wco->relname = pstrdup(RelationGetRelationName(rel)); wco->qual = (Node *) hook_with_check_expr_restrictive; @@ -276,7 +277,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index, WithCheckOption *wco; wco = (WithCheckOption *) makeNode(WithCheckOption); - wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK : + wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK : WCO_RLS_UPDATE_CHECK; wco->relname = pstrdup(RelationGetRelationName(rel)); wco->qual = (Node *) rowsec_with_check_expr; @@ -289,7 +290,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index, WithCheckOption *wco; wco = (WithCheckOption *) makeNode(WithCheckOption); - wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK : + wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK : WCO_RLS_UPDATE_CHECK; wco->relname = pstrdup(RelationGetRelationName(rel)); wco->qual = (Node *) hook_with_check_expr_permissive; @@ -312,19 +313,72 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index, combined_qual_eval = makeBoolExpr(OR_EXPR, combined_quals, -1); wco = (WithCheckOption *) makeNode(WithCheckOption); - wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK : + wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK : WCO_RLS_UPDATE_CHECK; wco->relname = pstrdup(RelationGetRelationName(rel)); wco->qual = (Node *) combined_qual_eval; wco->cascaded = false; *withCheckOptions = lappend(*withCheckOptions, wco); } + + /* + * ON CONFLICT DO UPDATE has an RTE that is subject to both INSERT and + * UPDATE RLS enforcement. Those are enforced (as a special, distinct + * kind of WCO) on the target tuple. + * + * Make a second, recursive pass over the RTE for this, gathering + * UPDATE-applicable RLS checks/WCOs, and gathering and converting + * UPDATE-applicable security quals into WCO_RLS_CONFLICT_CHECK RLS + * checks/WCOs. Finally, these distinct kinds of RLS checks/WCOs are + * concatenated with our own INSERT-applicable list. + */ + if (root->onConflict && root->onConflict->action == ONCONFLICT_UPDATE && + commandType == CMD_INSERT) + { + List *conflictSecurityQuals = NIL; + List *conflictWCOs = NIL; + ListCell *item; + bool conflictHasRowSecurity = false; + bool conflictHasSublinks = false; + + /* Assume that RTE is target resultRelation */ + get_row_security_policies(root, CMD_UPDATE, rte, rt_index, + &conflictSecurityQuals, &conflictWCOs, + &conflictHasRowSecurity, + &conflictHasSublinks); + + if (conflictHasRowSecurity) + *hasRowSecurity = true; + if (conflictHasSublinks) + *hasSubLinks = true; + + /* + * Append WITH CHECK OPTIONs/RLS checks, which should not conflict + * between this INSERT and the auxiliary UPDATE + */ + *withCheckOptions = list_concat(*withCheckOptions, + conflictWCOs); + + foreach(item, conflictSecurityQuals) + { + Expr *conflict_rowsec_expr = (Expr *) lfirst(item); + WithCheckOption *wco; + + wco = (WithCheckOption *) makeNode(WithCheckOption); + + wco->kind = WCO_RLS_CONFLICT_CHECK; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->qual = (Node *) copyObject(conflict_rowsec_expr); + wco->cascaded = false; + *withCheckOptions = lappend(*withCheckOptions, wco); + } + } } /* For SELECT, UPDATE, and DELETE, set the security quals */ - if (root->commandType == CMD_SELECT - || root->commandType == CMD_UPDATE - || root->commandType == CMD_DELETE) + if (commandType == CMD_SELECT + || commandType == CMD_UPDATE + || commandType == CMD_DELETE) { /* restrictive policies can simply be added to the list first */ if (hook_expr_restrictive) diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index d13a1673344..c0529497496 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -26,6 +26,24 @@ /* + * Per-backend counter for generating speculative insertion tokens. + * + * This may wrap around, but that's OK as it's only used for the short + * duration between inserting a tuple and checking that there are no (unique) + * constraint violations. It's theoretically possible that a backend sees a + * tuple that was speculatively inserted by another backend, but before it has + * started waiting on the token, the other backend completes its insertion, + * and then then performs 2^32 unrelated insertions. And after all that, the + * first backend finally calls SpeculativeInsertionLockAcquire(), with the + * intention of waiting for the first insertion to complete, but ends up + * waiting for the latest unrelated insertion instead. Even then, nothing + * particularly bad happens: in the worst case they deadlock, causing one of + * the transactions to abort. + */ +static uint32 speculativeInsertionToken = 0; + + +/* * Struct to hold context info for transaction lock waits. * * 'oper' is the operation that needs to wait for the other transaction; 'rel' @@ -576,6 +594,73 @@ ConditionalXactLockTableWait(TransactionId xid) } /* + * SpeculativeInsertionLockAcquire + * + * Insert a lock showing that the given transaction ID is inserting a tuple, + * but hasn't yet decided whether it's going to keep it. The lock can then be + * used to wait for the decision to go ahead with the insertion, or aborting + * it. + * + * The token is used to distinguish multiple insertions by the same + * transaction. It is returned to caller. + */ +uint32 +SpeculativeInsertionLockAcquire(TransactionId xid) +{ + LOCKTAG tag; + + speculativeInsertionToken++; + + /* + * Check for wrap-around. Zero means no token is held, so don't use that. + */ + if (speculativeInsertionToken == 0) + speculativeInsertionToken = 1; + + SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, speculativeInsertionToken); + + (void) LockAcquire(&tag, ExclusiveLock, false, false); + + return speculativeInsertionToken; +} + +/* + * SpeculativeInsertionLockRelease + * + * Delete the lock showing that the given transaction is speculatively + * inserting a tuple. + */ +void +SpeculativeInsertionLockRelease(TransactionId xid) +{ + LOCKTAG tag; + + SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, speculativeInsertionToken); + + LockRelease(&tag, ExclusiveLock, false); +} + +/* + * SpeculativeInsertionWait + * + * Wait for the specified transaction to finish or abort the insertion of a + * tuple. + */ +void +SpeculativeInsertionWait(TransactionId xid, uint32 token) +{ + LOCKTAG tag; + + SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, token); + + Assert(TransactionIdIsValid(xid)); + Assert(token != 0); + + (void) LockAcquire(&tag, ShareLock, false, false); + LockRelease(&tag, ShareLock, false); +} + +/* * XactLockTableWaitErrorContextCb * Error context callback for transaction lock waits. */ @@ -873,6 +958,12 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag) tag->locktag_field1, tag->locktag_field2); break; + case LOCKTAG_SPECULATIVE_TOKEN: + appendStringInfo(buf, + _("speculative token %u of transaction %u"), + tag->locktag_field2, + tag->locktag_field1); + break; case LOCKTAG_OBJECT: appendStringInfo(buf, _("object %u of class %u of database %u"), diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 9c14e8abdf8..bcffd85754c 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -202,8 +202,14 @@ ProcessQuery(PlannedStmt *plan, lastOid = queryDesc->estate->es_lastoid; else lastOid = InvalidOid; - snprintf(completionTag, COMPLETION_TAG_BUFSIZE, - "INSERT %u %u", lastOid, queryDesc->estate->es_processed); + if (plan->isUpsert) + snprintf(completionTag, COMPLETION_TAG_BUFSIZE, + "UPSERT %u %u", + lastOid, queryDesc->estate->es_processed); + else + snprintf(completionTag, COMPLETION_TAG_BUFSIZE, + "INSERT %u %u", + lastOid, queryDesc->estate->es_processed); break; case CMD_UPDATE: snprintf(completionTag, COMPLETION_TAG_BUFSIZE, @@ -1356,7 +1362,10 @@ PortalRunMulti(Portal portal, bool isTopLevel, * 0" here because technically there is no query of the matching tag type, * and printing a non-zero count for a different query type seems wrong, * e.g. an INSERT that does an UPDATE instead should not print "0 1" if - * one row was updated. See QueryRewrite(), step 3, for details. + * one row was updated (unless the ON CONFLICT DO UPDATE, or "UPSERT" + * variant of INSERT was used to update the row, where it's logically a + * direct effect of the top level command). See QueryRewrite(), step 3, + * for details. */ if (completionTag && completionTag[0] == '\0') { @@ -1366,6 +1375,8 @@ PortalRunMulti(Portal portal, bool isTopLevel, sprintf(completionTag, "SELECT 0 0"); else if (strcmp(completionTag, "INSERT") == 0) strcpy(completionTag, "INSERT 0 0"); + else if (strcmp(completionTag, "UPSERT") == 0) + strcpy(completionTag, "UPSERT 0 0"); else if (strcmp(completionTag, "UPDATE") == 0) strcpy(completionTag, "UPDATE 0"); else if (strcmp(completionTag, "DELETE") == 0) diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 491824dd6bf..9d53a8b6a32 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -29,6 +29,7 @@ static const char *const LockTagTypeNames[] = { "tuple", "transactionid", "virtualxid", + "speculative token", "object", "userlock", "advisory" diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 69267bdb918..4b3cd85ad90 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -354,6 +354,9 @@ static void get_select_query_def(Query *query, deparse_context *context, TupleDesc resultDesc); static void get_insert_query_def(Query *query, deparse_context *context); static void get_update_query_def(Query *query, deparse_context *context); +static void get_update_query_targetlist_def(Query *query, List *targetList, + deparse_context *context, + RangeTblEntry *rte); static void get_delete_query_def(Query *query, deparse_context *context); static void get_utility_query_def(Query *query, deparse_context *context); static void get_basic_select_query(Query *query, deparse_context *context, @@ -3846,15 +3849,23 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps) * For a SubqueryScan, pretend the subplan is INNER referent. (We don't * use OUTER because that could someday conflict with the normal meaning.) * Likewise, for a CteScan, pretend the subquery's plan is INNER referent. + * For ON CONFLICT .. UPDATE we just need the inner tlist to point to the + * excluded expression's tlist. (Similar to the SubqueryScan we don't want + * to reuse OUTER, it's used for RETURNING in some modify table cases, + * although not INSERT .. CONFLICT). */ if (IsA(ps, SubqueryScanState)) dpns->inner_planstate = ((SubqueryScanState *) ps)->subplan; else if (IsA(ps, CteScanState)) dpns->inner_planstate = ((CteScanState *) ps)->cteplanstate; + else if (IsA(ps, ModifyTableState)) + dpns->inner_planstate = ps; else dpns->inner_planstate = innerPlanState(ps); - if (dpns->inner_planstate) + if (IsA(ps, ModifyTableState)) + dpns->inner_tlist = ((ModifyTableState *) ps)->mt_excludedtlist; + else if (dpns->inner_planstate) dpns->inner_tlist = dpns->inner_planstate->plan->targetlist; else dpns->inner_tlist = NIL; @@ -5302,6 +5313,32 @@ get_insert_query_def(Query *query, deparse_context *context) appendStringInfoString(buf, "DEFAULT VALUES"); } + /* Add ON CONFLICT if present */ + if (query->onConflict) + { + OnConflictExpr *confl = query->onConflict; + + if (confl->action == ONCONFLICT_NOTHING) + { + appendStringInfoString(buf, " ON CONFLICT DO NOTHING"); + } + else + { + appendStringInfoString(buf, " ON CONFLICT DO UPDATE SET "); + /* Deparse targetlist */ + get_update_query_targetlist_def(query, confl->onConflictSet, + context, rte); + + /* Add a WHERE clause if given */ + if (confl->onConflictWhere != NULL) + { + appendContextKeyword(context, " WHERE ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_rule_expr(confl->onConflictWhere, context, false); + } + } + } + /* Add RETURNING if present */ if (query->returningList) { @@ -5321,12 +5358,6 @@ get_update_query_def(Query *query, deparse_context *context) { StringInfo buf = context->buf; RangeTblEntry *rte; - List *ma_sublinks; - ListCell *next_ma_cell; - SubLink *cur_ma_sublink; - int remaining_ma_columns; - const char *sep; - ListCell *l; /* Insert the WITH clause if given */ get_with_clause(query, context); @@ -5349,6 +5380,46 @@ get_update_query_def(Query *query, deparse_context *context) quote_identifier(rte->alias->aliasname)); appendStringInfoString(buf, " SET "); + /* Deparse targetlist */ + get_update_query_targetlist_def(query, query->targetList, context, rte); + + /* Add the FROM clause if needed */ + get_from_clause(query, " FROM ", context); + + /* Add a WHERE clause if given */ + if (query->jointree->quals != NULL) + { + appendContextKeyword(context, " WHERE ", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_rule_expr(query->jointree->quals, context, false); + } + + /* Add RETURNING if present */ + if (query->returningList) + { + appendContextKeyword(context, " RETURNING", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_target_list(query->returningList, context, NULL); + } +} + + +/* ---------- + * get_update_query_targetlist_def - Parse back an UPDATE targetlist + * ---------- + */ +static void +get_update_query_targetlist_def(Query *query, List *targetList, + deparse_context *context, RangeTblEntry *rte) +{ + StringInfo buf = context->buf; + ListCell *l; + ListCell *next_ma_cell; + int remaining_ma_columns; + const char *sep; + SubLink *cur_ma_sublink; + List *ma_sublinks; + /* * Prepare to deal with MULTIEXPR assignments: collect the source SubLinks * into a list. We expect them to appear, in ID order, in resjunk tlist @@ -5357,7 +5428,7 @@ get_update_query_def(Query *query, deparse_context *context) ma_sublinks = NIL; if (query->hasSubLinks) /* else there can't be any */ { - foreach(l, query->targetList) + foreach(l, targetList) { TargetEntry *tle = (TargetEntry *) lfirst(l); @@ -5379,7 +5450,7 @@ get_update_query_def(Query *query, deparse_context *context) /* Add the comma separated list of 'attname = value' */ sep = ""; - foreach(l, query->targetList) + foreach(l, targetList) { TargetEntry *tle = (TargetEntry *) lfirst(l); Node *expr; @@ -5470,25 +5541,6 @@ get_update_query_def(Query *query, deparse_context *context) get_rule_expr(expr, context, false); } - - /* Add the FROM clause if needed */ - get_from_clause(query, " FROM ", context); - - /* Add a WHERE clause if given */ - if (query->jointree->quals != NULL) - { - appendContextKeyword(context, " WHERE ", - -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); - get_rule_expr(query->jointree->quals, context, false); - } - - /* Add RETURNING if present */ - if (query->returningList) - { - appendContextKeyword(context, " RETURNING", - -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); - get_target_list(query->returningList, context, NULL); - } } diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index a4a478d1142..b4284d6d94f 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -405,6 +405,13 @@ HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, } } } + /* + * An invalid Xmin can be left behind by a speculative insertion that + * is cancelled by super-deleting the tuple. We shouldn't see any of + * those in TOAST tables, but better safe than sorry. + */ + else if (!TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple))) + return false; } /* otherwise assume the tuple is valid for TOAST. */ @@ -714,8 +721,11 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, * output argument to return the xids of concurrent xacts that affected the * tuple. snapshot->xmin is set to the tuple's xmin if that is another * transaction that's still in progress; or to InvalidTransactionId if the - * tuple's xmin is committed good, committed dead, or my own xact. Similarly - * for snapshot->xmax and the tuple's xmax. + * tuple's xmin is committed good, committed dead, or my own xact. + * Similarly for snapshot->xmax and the tuple's xmax. If the tuple was + * inserted speculatively, meaning that the inserter might still back down + * on the insertion without aborting the whole transaction, the associated + * token is also returned in snapshot->speculativeToken. */ bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, @@ -727,6 +737,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Assert(htup->t_tableOid != InvalidOid); snapshot->xmin = snapshot->xmax = InvalidTransactionId; + snapshot->speculativeToken = 0; if (!HeapTupleHeaderXminCommitted(tuple)) { @@ -808,6 +819,20 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, } else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) { + /* + * Return the speculative token to caller. Caller can worry + * about xmax, since it requires a conclusively locked row + * version, and a concurrent update to this tuple is a conflict + * of its purposes. + */ + if (HeapTupleHeaderIsSpeculative(tuple)) + { + snapshot->speculativeToken = + HeapTupleHeaderGetSpeculativeToken(tuple); + + Assert(snapshot->speculativeToken != 0); + } + snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple); /* XXX shouldn't we fall through to look at xmax? */ return true; /* in insertion by other */ |