42 files changed, 3140 insertions, 325 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index e84c1743f4f..7ea9a77e7ea 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2061,8 +2061,17 @@ FreeBulkInsertState(BulkInsertState bistate)
  * This causes rows to be frozen, which is an MVCC violation and
  * requires explicit options chosen by user.
  *
+ * HEAP_INSERT_IS_SPECULATIVE is used on so-called "speculative insertions",
+ * which can be backed out afterwards without aborting the whole transaction.
+ * Other sessions can wait for the speculative insertion to be confirmed,
+ * turning it into a regular tuple, or aborted, as if it never existed.
+ * Speculatively inserted tuples behave as "value locks" of short duration,
+ * used to implement INSERT .. ON CONFLICT.
+ *
  * Note that these options will be applied when inserting into the heap's
  * TOAST table, too, if the tuple requires any out-of-line data.
+ * FIXME: Do we mark TOAST tuples as speculative too? What about confirming
+ * or aborting them?
  *
  * The BulkInsertState object (if any; bistate can be NULL for default
  * behavior) is also just passed through to RelationGetBufferForTuple.
@@ -2115,7 +2124,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	/* NO EREPORT(ERROR) from here till changes are logged */
 	START_CRIT_SECTION();
 
-	RelationPutHeapTuple(relation, buffer, heaptup);
+	RelationPutHeapTuple(relation, buffer, heaptup,
+						 (options & HEAP_INSERT_SPECULATIVE) != 0);
 
 	if (PageIsAllVisible(BufferGetPage(buffer)))
 	{
@@ -2169,7 +2179,11 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 		}
 
 		xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
-		xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+		xlrec.flags = 0;
+		if (all_visible_cleared)
+			xlrec.flags |= XLH_INSERT_ALL_VISIBLE_CLEARED;
+		if (options & HEAP_INSERT_SPECULATIVE)
+			xlrec.flags |= XLH_INSERT_IS_SPECULATIVE;
 		Assert(ItemPointerGetBlockNumber(&heaptup->t_self) == BufferGetBlockNumber(buffer));
 
 		/*
@@ -2179,7 +2193,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 		 */
 		if (RelationIsLogicallyLogged(relation))
 		{
-			xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+			xlrec.flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
 			bufflags |= REGBUF_KEEP_DATA;
 		}
 
@@ -2224,6 +2238,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	 */
 	CacheInvalidateHeapTuple(relation, heaptup, NULL);
 
+	/* Note: speculative insertions are counted too, even if aborted later */
 	pgstat_count_heap_insert(relation, 1);
 
 	/*
@@ -2395,7 +2410,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 		 * RelationGetBufferForTuple has ensured that the first tuple fits.
 		 * Put that on the page, and then as many other tuples as fit.
 		 */
-		RelationPutHeapTuple(relation, buffer, heaptuples[ndone]);
+		RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
 		for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
 		{
 			HeapTuple	heaptup = heaptuples[ndone + nthispage];
@@ -2403,7 +2418,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 			if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
 				break;
 
-			RelationPutHeapTuple(relation, buffer, heaptup);
+			RelationPutHeapTuple(relation, buffer, heaptup, false);
 
 			/*
 			 * We don't use heap_multi_insert for catalog tuples yet, but
@@ -2463,7 +2478,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 			/* the rest of the scratch space is used for tuple data */
 			tupledata = scratchptr;
 
-			xlrec->flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+			xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0;
 			xlrec->ntuples = nthispage;
 
 			/*
@@ -2498,7 +2513,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 			Assert((scratchptr - scratch) < BLCKSZ);
 
 			if (need_tuple_data)
-				xlrec->flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+				xlrec->flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
 
 			/*
 			 * Signal that this is the last xl_heap_multi_insert record
@@ -2506,7 +2521,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 			 * decoding so it knows when to cleanup temporary data.
 			 */
 			if (ndone + nthispage == ntuples)
-				xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT;
+				xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
 
 			if (init)
 			{
@@ -2914,7 +2929,12 @@ l1:
 
 	MarkBufferDirty(buffer);
 
-	/* XLOG stuff */
+	/*
+	 * XLOG stuff
+	 *
+	 * NB: heap_abort_speculative() uses the same xlog record and replay
+	 * routines.
+	 */
 	if (RelationNeedsWAL(relation))
 	{
 		xl_heap_delete xlrec;
@@ -2924,7 +2944,7 @@ l1:
 		if (RelationIsAccessibleInLogicalDecoding(relation))
 			log_heap_new_cid(relation, &tp);
 
-		xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+		xlrec.flags = all_visible_cleared ? XLH_DELETE_ALL_VISIBLE_CLEARED : 0;
 		xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
 											  tp.t_data->t_infomask2);
 		xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
@@ -2933,9 +2953,9 @@ l1:
 		if (old_key_tuple != NULL)
 		{
 			if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-				xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+				xlrec.flags |= XLH_DELETE_CONTAINS_OLD_TUPLE;
 			else
-				xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+				xlrec.flags |= XLH_DELETE_CONTAINS_OLD_KEY;
 		}
 
 		XLogBeginInsert();
@@ -3742,7 +3762,7 @@ l2:
 		HeapTupleClearHeapOnly(newtup);
 	}
 
-	RelationPutHeapTuple(relation, newbuf, heaptup);	/* insert new tuple */
+	RelationPutHeapTuple(relation, newbuf, heaptup, false);	/* insert new tuple */
 
 	if (!already_marked)
 	{
@@ -4133,14 +4153,16 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
  *
  * Function result may be:
  *	HeapTupleMayBeUpdated: lock was successfully acquired
+ *	HeapTupleInvisible: lock failed because tuple was never visible to us
  *	HeapTupleSelfUpdated: lock failed because tuple updated by self
  *	HeapTupleUpdated: lock failed because tuple updated by other xact
  *	HeapTupleWouldBlock: lock couldn't be acquired and wait_policy is skip
  *
- * In the failure cases, the routine fills *hufd with the tuple's t_ctid,
- * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax
- * (the last only for HeapTupleSelfUpdated, since we
- * cannot obtain cmax from a combocid generated by another transaction).
+ * In the failure cases other than HeapTupleInvisible, the routine fills
+ * *hufd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact,
+ * if necessary), and t_cmax (the last only for HeapTupleSelfUpdated,
+ * since we cannot obtain cmax from a combocid generated by another
+ * transaction).
  * See comments for struct HeapUpdateFailureData for additional info.
  *
  * See README.tuplock for a thorough explanation of this mechanism.
@@ -4179,8 +4201,15 @@ l3:
 
 	if (result == HeapTupleInvisible)
 	{
-		UnlockReleaseBuffer(*buffer);
-		elog(ERROR, "attempted to lock invisible tuple");
+		LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+
+		/*
+		 * This is possible, but only when locking a tuple for ON CONFLICT
+		 * UPDATE.  We return this value here rather than throwing an error in
+		 * order to give that case the opportunity to throw a more specific
+		 * error.
+		 */
+		return HeapTupleInvisible;
 	}
 	else if (result == HeapTupleBeingUpdated)
 	{
@@ -5417,6 +5446,234 @@ heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid,
 	return HeapTupleMayBeUpdated;
 }
 
+/*
+ *	heap_finish_speculative - mark speculative insertion as successful
+ *
+ * To successfully finish a speculative insertion we have to clear speculative
+ * token from tuple.  To do so the t_ctid field, which will contain a
+ * speculative token value, is modified in place to point to the tuple itself,
+ * which is characteristic of a newly inserted ordinary tuple.
+ *
+ * NB: It is not ok to commit without either finishing or aborting a
+ * speculative insertion.  We could treat speculative tuples of committed
+ * transactions implicitly as completed, but then we would have to be prepared
+ * to deal with speculative tokens on committed tuples.  That wouldn't be
+ * difficult - no-one looks at the ctid field of a tuple with invalid xmax -
+ * but clearing the token at completion isn't very expensive either.
+ * An explicit confirmation WAL record also makes logical decoding simpler.
+ */
+void
+heap_finish_speculative(Relation relation, HeapTuple tuple)
+{
+	Buffer		buffer;
+	Page		page;
+	OffsetNumber offnum;
+	ItemId		lp = NULL;
+	HeapTupleHeader htup;
+
+	buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	page = (Page) BufferGetPage(buffer);
+
+	offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
+	if (PageGetMaxOffsetNumber(page) >= offnum)
+		lp = PageGetItemId(page, offnum);
+
+	if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+		elog(ERROR, "heap_confirm_insert: invalid lp");
+
+	htup = (HeapTupleHeader) PageGetItem(page, lp);
+
+	/* SpecTokenOffsetNumber should be distinguishable from any real offset */
+	StaticAssertStmt(MaxOffsetNumber < SpecTokenOffsetNumber,
+					 "invalid speculative token constant");
+
+	/* NO EREPORT(ERROR) from here till changes are logged */
+	START_CRIT_SECTION();
+
+	Assert(HeapTupleHeaderIsSpeculative(tuple->t_data));
+
+	MarkBufferDirty(buffer);
+
+	/*
+	 * Replace the speculative insertion token with a real t_ctid,
+	 * pointing to itself like it does on regular tuples.
+	 */
+	htup->t_ctid = tuple->t_self;
+
+	/* XLOG stuff */
+	if (RelationNeedsWAL(relation))
+	{
+		xl_heap_confirm xlrec;
+		XLogRecPtr	recptr;
+
+		xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
+
+		XLogBeginInsert();
+
+		/* We want the same filtering on this as on a plain insert */
+		XLogIncludeOrigin();
+
+		XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
+		XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+		recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
+
+		PageSetLSN(page, recptr);
+	}
+
+	END_CRIT_SECTION();
+
+	UnlockReleaseBuffer(buffer);
+}
+
+/*
+ *	heap_abort_speculative - kill a speculatively inserted tuple
+ *
+ * Marks a tuple that was speculatively inserted in the same command as dead,
+ * by setting its xmin as invalid.  That makes it immediately appear as dead
+ * to all transactions, including our own.  In particular, it makes
+ * HeapTupleSatisfiesDirty() regard the tuple as dead, so that another backend
+ * inserting a duplicate key value won't unnecessarily wait for our whole
+ * transaction to finish (it'll just wait for our speculative insertion to
+ * finish).
+ *
+ * Killing the tuple prevents "unprincipled deadlocks", which are deadlocks
+ * that arise due to a mutual dependency that is not user visible.  By
+ * definition, unprincipled deadlocks cannot be prevented by the user
+ * reordering lock acquisition in client code, because the implementation level
+ * lock acquisitions are not under the user's direct control.  If speculative
+ * inserters did not take this precaution, then under high concurrency they
+ * could deadlock with each other, which would not be acceptable.
+ *
+ * This is somewhat redundant with heap_delete, but we prefer to have a
+ * dedicated routine with stripped down requirements.
+ *
+ * This routine does not affect logical decoding as it only looks at
+ * confirmation records.
+ */
+void
+heap_abort_speculative(Relation relation, HeapTuple tuple)
+{
+	TransactionId xid = GetCurrentTransactionId();
+	ItemPointer tid = &(tuple->t_self);
+	ItemId		lp;
+	HeapTupleData tp;
+	Page		page;
+	BlockNumber block;
+	Buffer		buffer;
+
+	Assert(ItemPointerIsValid(tid));
+
+	block = ItemPointerGetBlockNumber(tid);
+	buffer = ReadBuffer(relation, block);
+	page = BufferGetPage(buffer);
+
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+	/*
+	 * Page can't be all visible, we just inserted into it, and are still
+	 * running.
+	 */
+	Assert(!PageIsAllVisible(page));
+
+	lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
+	Assert(ItemIdIsNormal(lp));
+
+	tp.t_tableOid = RelationGetRelid(relation);
+	tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+	tp.t_len = ItemIdGetLength(lp);
+	tp.t_self = *tid;
+
+	/*
+	 * Sanity check that the tuple really is a speculatively inserted tuple,
+	 * inserted by us.
+	 */
+	if (tp.t_data->t_choice.t_heap.t_xmin != xid)
+		elog(ERROR, "attempted to kill a tuple inserted by another transaction");
+	if (!HeapTupleHeaderIsSpeculative(tp.t_data))
+		elog(ERROR, "attempted to kill a non-speculative tuple");
+	Assert(!HeapTupleHeaderIsHeapOnly(tp.t_data));
+
+	/*
+	 * No need to check for serializable conflicts here.  There is never a
+	 * need for a combocid, either.  No need to extract replica identity, or
+	 * do anything special with infomask bits.
+	 */
+
+	START_CRIT_SECTION();
+
+	/*
+	 * The tuple will become DEAD immediately.  Flag that this page
+	 * immediately is a candidate for pruning by setting xmin to
+	 * RecentGlobalXmin.  That's not pretty, but it doesn't seem worth
+	 * inventing a nicer API for this.
+	 */
+	Assert(TransactionIdIsValid(RecentGlobalXmin));
+	PageSetPrunable(page, RecentGlobalXmin);
+
+	/* store transaction information of xact deleting the tuple */
+	tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+	tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+
+	/*
+	 * Set the tuple header xmin to InvalidTransactionId.  This makes the
+	 * tuple immediately invisible everyone.  (In particular, to any
+	 * transactions waiting on the speculative token, woken up later.)
+	 */
+	HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId);
+
+	/* Clear the speculative insertion token too */
+	tp.t_data->t_ctid = tp.t_self;
+
+	MarkBufferDirty(buffer);
+
+	/*
+	 * XLOG stuff
+	 *
+	 * The WAL records generated here match heap_delete().  The same recovery
+	 * routines are used.
+	 */
+	if (RelationNeedsWAL(relation))
+	{
+		xl_heap_delete xlrec;
+		XLogRecPtr	recptr;
+
+		xlrec.flags = XLH_DELETE_IS_SUPER;
+		xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
+											  tp.t_data->t_infomask2);
+		xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
+		xlrec.xmax = xid;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
+		XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+		/* No replica identity & replication origin logged */
+
+		recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
+
+		PageSetLSN(page, recptr);
+	}
+
+	END_CRIT_SECTION();
+
+	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
+	if (HeapTupleHasExternal(&tp))
+		toast_delete(relation, &tp);
+
+	/*
+	 * Never need to mark tuple for invalidation, since catalogs don't support
+	 * speculative insertion
+	 */
+
+	/* Now we can release the buffer */
+	ReleaseBuffer(buffer);
+
+	/* count deletion, as we counted the insertion too */
+	pgstat_count_heap_delete(relation);
+}
 
 /*
  * heap_inplace_update - update a tuple "in place" (ie, overwrite it)
@@ -6732,22 +6989,22 @@ log_heap_update(Relation reln, Buffer oldbuf,
 	/* Prepare main WAL data chain */
 	xlrec.flags = 0;
 	if (all_visible_cleared)
-		xlrec.flags |= XLOG_HEAP_ALL_VISIBLE_CLEARED;
+		xlrec.flags |= XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED;
 	if (new_all_visible_cleared)
-		xlrec.flags |= XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED;
+		xlrec.flags |= XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED;
 	if (prefixlen > 0)
-		xlrec.flags |= XLOG_HEAP_PREFIX_FROM_OLD;
+		xlrec.flags |= XLH_UPDATE_PREFIX_FROM_OLD;
 	if (suffixlen > 0)
-		xlrec.flags |= XLOG_HEAP_SUFFIX_FROM_OLD;
+		xlrec.flags |= XLH_UPDATE_SUFFIX_FROM_OLD;
 	if (need_tuple_data)
 	{
-		xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+		xlrec.flags |= XLH_UPDATE_CONTAINS_NEW_TUPLE;
 		if (old_key_tuple)
 		{
 			if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-				xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+				xlrec.flags |= XLH_UPDATE_CONTAINS_OLD_TUPLE;
 			else
-				xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+				xlrec.flags |= XLH_UPDATE_CONTAINS_OLD_KEY;
 		}
 	}
 
@@ -7378,7 +7635,7 @@ heap_xlog_delete(XLogReaderState *record)
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
 	 */
-	if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+	if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(target_node);
 		Buffer		vmbuffer = InvalidBuffer;
@@ -7406,13 +7663,16 @@ heap_xlog_delete(XLogReaderState *record)
 		HeapTupleHeaderClearHotUpdated(htup);
 		fix_infomask_from_infobits(xlrec->infobits_set,
 								   &htup->t_infomask, &htup->t_infomask2);
-		HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+		if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
+			HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+		else
+			HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
 		HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
 
 		/* Mark the page as a candidate for pruning */
 		PageSetPrunable(page, XLogRecGetXid(record));
 
-		if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+		if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
 			PageClearAllVisible(page);
 
 		/* Make sure there is no forward chain link in t_ctid */
@@ -7453,7 +7713,7 @@ heap_xlog_insert(XLogReaderState *record)
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
 	 */
-	if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+	if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(target_node);
 		Buffer		vmbuffer = InvalidBuffer;
@@ -7516,7 +7776,7 @@ heap_xlog_insert(XLogReaderState *record)
 
 		PageSetLSN(page, lsn);
 
-		if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+		if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
 			PageClearAllVisible(page);
 
 		MarkBufferDirty(buffer);
@@ -7573,7 +7833,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
 	 */
-	if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+	if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(rnode);
 		Buffer		vmbuffer = InvalidBuffer;
@@ -7655,7 +7915,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
 
 		PageSetLSN(page, lsn);
 
-		if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+		if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
 			PageClearAllVisible(page);
 
 		MarkBufferDirty(buffer);
@@ -7728,7 +7988,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
 	 */
-	if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+	if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(rnode);
 		Buffer		vmbuffer = InvalidBuffer;
@@ -7783,7 +8043,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 		/* Mark the page as a candidate for pruning */
 		PageSetPrunable(page, XLogRecGetXid(record));
 
-		if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+		if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
 			PageClearAllVisible(page);
 
 		PageSetLSN(page, lsn);
@@ -7812,7 +8072,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
 	 */
-	if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
+	if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(rnode);
 		Buffer		vmbuffer = InvalidBuffer;
@@ -7840,13 +8100,13 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 		if (PageGetMaxOffsetNumber(page) + 1 < offnum)
 			elog(PANIC, "heap_update_redo: invalid max offset number");
 
-		if (xlrec->flags & XLOG_HEAP_PREFIX_FROM_OLD)
+		if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
 		{
 			Assert(newblk == oldblk);
 			memcpy(&prefixlen, recdata, sizeof(uint16));
 			recdata += sizeof(uint16);
 		}
-		if (xlrec->flags & XLOG_HEAP_SUFFIX_FROM_OLD)
+		if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
 		{
 			Assert(newblk == oldblk);
 			memcpy(&suffixlen, recdata, sizeof(uint16));
@@ -7918,7 +8178,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 		if (offnum == InvalidOffsetNumber)
 			elog(PANIC, "heap_update_redo: failed to add tuple");
 
-		if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
+		if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
 			PageClearAllVisible(page);
 
 		freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
@@ -7952,6 +8212,42 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 }
 
 static void
+heap_xlog_confirm(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
+	Buffer		buffer;
+	Page		page;
+	OffsetNumber offnum;
+	ItemId		lp = NULL;
+	HeapTupleHeader htup;
+
+	if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+	{
+		page = BufferGetPage(buffer);
+
+		offnum = xlrec->offnum;
+		if (PageGetMaxOffsetNumber(page) >= offnum)
+			lp = PageGetItemId(page, offnum);
+
+		if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+			elog(PANIC, "heap_confirm_redo: invalid lp");
+
+		htup = (HeapTupleHeader) PageGetItem(page, lp);
+
+		/*
+		 * Confirm tuple as actually inserted
+		 */
+		ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+}
+
+static void
 heap_xlog_lock(XLogReaderState *record)
 {
 	XLogRecPtr	lsn = record->EndRecPtr;
@@ -8101,6 +8397,9 @@ heap_redo(XLogReaderState *record)
 		case XLOG_HEAP_HOT_UPDATE:
 			heap_xlog_update(record, true);
 			break;
+		case XLOG_HEAP_CONFIRM:
+			heap_xlog_confirm(record);
+			break;
 		case XLOG_HEAP_LOCK:
 			heap_xlog_lock(record);
 			break;
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index 6d091f63af0..a9f0ca35e49 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -35,12 +35,17 @@
 void
 RelationPutHeapTuple(Relation relation,
 					 Buffer buffer,
-					 HeapTuple tuple)
+					 HeapTuple tuple,
+					 bool token)
 {
 	Page		pageHeader;
 	OffsetNumber offnum;
-	ItemId		itemId;
-	Item		item;
+
+	/*
+	 * A tuple that's being inserted speculatively should already have its
+	 * token set.
+	 */
+	Assert(!token || HeapTupleHeaderIsSpeculative(tuple->t_data));
 
 	/* Add the tuple to the page */
 	pageHeader = BufferGetPage(buffer);
@@ -54,10 +59,18 @@ RelationPutHeapTuple(Relation relation,
 	/* Update tuple->t_self to the actual position where it was stored */
 	ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
 
-	/* Insert the correct position into CTID of the stored tuple, too */
-	itemId = PageGetItemId(pageHeader, offnum);
-	item = PageGetItem(pageHeader, itemId);
-	((HeapTupleHeader) item)->t_ctid = tuple->t_self;
+	/*
+	 * Insert the correct position into CTID of the stored tuple, too
+	 * (unless this is a speculative insertion, in which case the token is
+	 * held in CTID field instead)
+	 */
+	if (!token)
+	{
+		ItemId		itemId = PageGetItemId(pageHeader, offnum);
+		Item		item = PageGetItem(pageHeader, itemId);
+
+		((HeapTupleHeader) item)->t_ctid = tuple->t_self;
+	}
 }
 
 /*
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 8464e8794f6..274155ad0c7 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -523,6 +523,14 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	bool		toast_delold[MaxHeapAttributeNumber];
 
 	/*
+	 * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super
+	 * deletions just normally insert/delete the toast values. It seems
+	 * easiest to deal with that here, instead on, potentially, multiple
+	 * callers.
+	 */
+	options &= ~HEAP_INSERT_SPECULATIVE;
+
+	/*
 	 * We should only ever be called for tuples of plain relations or
 	 * materialized views --- recursing on a toast rel is bad news.
 	 */
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index ef68a7145fc..4a60c5fa2c8 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -51,7 +51,8 @@ static Buffer _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
 static TransactionId _bt_check_unique(Relation rel, IndexTuple itup,
 				 Relation heapRel, Buffer buf, OffsetNumber offset,
 				 ScanKey itup_scankey,
-				 IndexUniqueCheck checkUnique, bool *is_unique);
+				 IndexUniqueCheck checkUnique, bool *is_unique,
+				 uint32 *speculativeToken);
 static void _bt_findinsertloc(Relation rel,
 				  Buffer *bufptr,
 				  OffsetNumber *offsetptr,
@@ -159,17 +160,27 @@ top:
 	 */
 	if (checkUnique != UNIQUE_CHECK_NO)
 	{
-		TransactionId xwait;
+		TransactionId	xwait;
+		uint32			speculativeToken;
 
 		offset = _bt_binsrch(rel, buf, natts, itup_scankey, false);
 		xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey,
-								 checkUnique, &is_unique);
+								 checkUnique, &is_unique, &speculativeToken);
 
 		if (TransactionIdIsValid(xwait))
 		{
 			/* Have to wait for the other guy ... */
 			_bt_relbuf(rel, buf);
-			XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex);
+			/*
+			 * If it's a speculative insertion, wait for it to finish (ie.
+			 * to go ahead with the insertion, or kill the tuple).  Otherwise
+			 * wait for the transaction to finish as usual.
+			 */
+			if (speculativeToken)
+				SpeculativeInsertionWait(xwait, speculativeToken);
+			else
+				XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex);
+
 			/* start over... */
 			_bt_freestack(stack);
 			goto top;
@@ -213,7 +224,10 @@ top:
  *
  * Returns InvalidTransactionId if there is no conflict, else an xact ID
  * we must wait for to see if it commits a conflicting tuple.   If an actual
- * conflict is detected, no return --- just ereport().
+ * conflict is detected, no return --- just ereport().  If an xact ID is
+ * returned, and the conflicting tuple still has a speculative insertion in
+ * progress, *speculativeToken is set to non-zero, and the caller can wait for
+ * the verdict on the insertion using SpeculativeInsertionWait().
  *
  * However, if checkUnique == UNIQUE_CHECK_PARTIAL, we always return
  * InvalidTransactionId because we don't want to wait.  In this case we
@@ -223,7 +237,8 @@ top:
 static TransactionId
 _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 				 Buffer buf, OffsetNumber offset, ScanKey itup_scankey,
-				 IndexUniqueCheck checkUnique, bool *is_unique)
+				 IndexUniqueCheck checkUnique, bool *is_unique,
+				 uint32 *speculativeToken)
 {
 	TupleDesc	itupdesc = RelationGetDescr(rel);
 	int			natts = rel->rd_rel->relnatts;
@@ -340,6 +355,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 						if (nbuf != InvalidBuffer)
 							_bt_relbuf(rel, nbuf);
 						/* Tell _bt_doinsert to wait... */
+						*speculativeToken = SnapshotDirty.speculativeToken;
 						return xwait;
 					}
 
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index 4f06a2637ae..f4a1b002cf1 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -75,6 +75,12 @@ heap_desc(StringInfo buf, XLogReaderState *record)
 						 xlrec->new_offnum,
 						 xlrec->new_xmax);
 	}
+	else if (info == XLOG_HEAP_CONFIRM)
+	{
+		xl_heap_confirm *xlrec = (xl_heap_confirm *) rec;
+
+		appendStringInfo(buf, "off %u", xlrec->offnum);
+	}
 	else if (info == XLOG_HEAP_LOCK)
 	{
 		xl_heap_lock *xlrec = (xl_heap_lock *) rec;
@@ -177,6 +183,9 @@ heap_identify(uint8 info)
 		case XLOG_HEAP_HOT_UPDATE | XLOG_HEAP_INIT_PAGE:
 			id = "HOT_UPDATE+INIT";
 			break;
+		case XLOG_HEAP_CONFIRM:
+			id = "HEAP_CONFIRM";
+			break;
 		case XLOG_HEAP_LOCK:
 			id = "LOCK";
 			break;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index ac3b785b5a7..8c8a9eafeea 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1665,6 +1665,10 @@ BuildIndexInfo(Relation index)
 	/* other info */
 	ii->ii_Unique = indexStruct->indisunique;
 	ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
+	/* assume not doing speculative insertion for now */
+	ii->ii_UniqueOps = NULL;
+	ii->ii_UniqueProcs = NULL;
+	ii->ii_UniqueStrats = NULL;
 
 	/* initialize index-build state to default */
 	ii->ii_Concurrent = false;
@@ -1674,6 +1678,53 @@ BuildIndexInfo(Relation index)
 }
 
 /* ----------------
+ *		BuildSpeculativeIndexInfo
+ *			Add extra state to IndexInfo record
+ *
+ * For unique indexes, we usually don't want to add info to the IndexInfo for
+ * checking uniqueness, since the B-Tree AM handles that directly.  However,
+ * in the case of speculative insertion, additional support is required.
+ *
+ * Do this processing here rather than in BuildIndexInfo() to not incur the
+ * overhead in the common non-speculative cases.
+ * ----------------
+ */
+void
+BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
+{
+	int			ncols = index->rd_rel->relnatts;
+	int			i;
+
+	/*
+	 * fetch info for checking unique indexes
+	 */
+	Assert(ii->ii_Unique);
+
+	if (index->rd_rel->relam != BTREE_AM_OID)
+		elog(ERROR, "unexpected non-btree speculative unique index");
+
+	ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols);
+	ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols);
+	ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols);
+
+	/*
+	 * We have to look up the operator's strategy number.  This
+	 * provides a cross-check that the operator does match the index.
+	 */
+	/* We need the func OIDs and strategy numbers too */
+	for (i = 0; i < ncols; i++)
+	{
+		ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
+		ii->ii_UniqueOps[i] =
+			get_opfamily_member(index->rd_opfamily[i],
+								index->rd_opcintype[i],
+								index->rd_opcintype[i],
+								ii->ii_UniqueStrats[i]);
+		ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
+	}
+}
+
+/* ----------------
  *		FormIndexDatum
  *			Construct values[] and isnull[] arrays for a new index tuple.
  *
@@ -2612,7 +2663,7 @@ IndexCheckExclusion(Relation heapRelation,
 		check_exclusion_constraint(heapRelation,
 								   indexRelation, indexInfo,
 								   &(heapTuple->t_self), values, isnull,
-								   estate, true, false);
+								   estate, true);
 	}
 
 	heap_endscan(scan);
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c
index fe123addac0..0231084c7c9 100644
--- a/src/backend/catalog/indexing.c
+++ b/src/backend/catalog/indexing.c
@@ -46,7 +46,7 @@ CatalogOpenIndexes(Relation heapRel)
 	resultRelInfo->ri_RelationDesc = heapRel;
 	resultRelInfo->ri_TrigDesc = NULL;	/* we don't fire triggers */
 
-	ExecOpenIndices(resultRelInfo);
+	ExecOpenIndices(resultRelInfo, false);
 
 	return resultRelInfo;
 }
diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt
index 332926424b6..cc0f8c45a6d 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -229,7 +229,7 @@ F311	Schema definition statement	02	CREATE TABLE for persistent base tables	YES
 F311	Schema definition statement	03	CREATE VIEW	YES	
 F311	Schema definition statement	04	CREATE VIEW: WITH CHECK OPTION	YES	
 F311	Schema definition statement	05	GRANT statement	YES	
-F312	MERGE statement			NO	
+F312	MERGE statement			NO	Consider INSERT ... ON CONFLICT DO UPDATE
 F313	Enhanced MERGE statement			NO	
 F314	MERGE statement with DELETE branch			NO	
 F321	User authorization			YES	
diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c
index 561d8fae574..e49affba9ee 100644
--- a/src/backend/commands/constraint.c
+++ b/src/backend/commands/constraint.c
@@ -172,7 +172,7 @@ unique_key_recheck(PG_FUNCTION_ARGS)
 		 */
 		check_exclusion_constraint(trigdata->tg_relation, indexRel, indexInfo,
 								   &(new_row->t_self), values, isnull,
-								   estate, false, false);
+								   estate, false);
 	}
 
 	/*
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index aa8ae4b9bcd..00a2417a099 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2284,7 +2284,7 @@ CopyFrom(CopyState cstate)
 					  1,		/* dummy rangetable index */
 					  0);
 
-	ExecOpenIndices(resultRelInfo);
+	ExecOpenIndices(resultRelInfo, false);
 
 	estate->es_result_relations = resultRelInfo;
 	estate->es_num_result_relations = 1;
@@ -2439,7 +2439,8 @@ CopyFrom(CopyState cstate)
 
 				if (resultRelInfo->ri_NumIndices > 0)
 					recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-														   estate);
+														   estate, false, NULL,
+														   NIL);
 
 				/* AFTER ROW INSERT Triggers */
 				ExecARInsertTriggers(estate, resultRelInfo, tuple,
@@ -2553,7 +2554,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
 			ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false);
 			recheckIndexes =
 				ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
-									  estate);
+									  estate, false, NULL, NIL);
 			ExecARInsertTriggers(estate, resultRelInfo,
 								 bufferedTuples[i],
 								 recheckIndexes);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index f4cc90183a4..c5452e3cb6a 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -103,7 +103,8 @@ static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
 static void ExplainScanTarget(Scan *plan, ExplainState *es);
 static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es);
 static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es);
-static void show_modifytable_info(ModifyTableState *mtstate, ExplainState *es);
+static void show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
+					  ExplainState *es);
 static void ExplainMemberNodes(List *plans, PlanState **planstates,
 				   List *ancestors, ExplainState *es);
 static void ExplainSubPlans(List *plans, List *ancestors,
@@ -744,6 +745,9 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
 		case T_ModifyTable:
 			*rels_used = bms_add_member(*rels_used,
 									((ModifyTable *) plan)->nominalRelation);
+			if (((ModifyTable *) plan)->exclRelRTI)
+				*rels_used = bms_add_member(*rels_used,
+											((ModifyTable *) plan)->exclRelRTI);
 			break;
 		default:
 			break;
@@ -1466,7 +1470,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										   planstate, es);
 			break;
 		case T_ModifyTable:
-			show_modifytable_info((ModifyTableState *) planstate, es);
+			show_modifytable_info((ModifyTableState *) planstate, ancestors,
+								  es);
 			break;
 		case T_Hash:
 			show_hash_info((HashState *) planstate, es);
@@ -2317,18 +2322,22 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
 /*
  * Show extra information for a ModifyTable node
  *
- * We have two objectives here.  First, if there's more than one target table
- * or it's different from the nominal target, identify the actual target(s).
- * Second, give FDWs a chance to display extra info about foreign targets.
+ * We have three objectives here.  First, if there's more than one target
+ * table or it's different from the nominal target, identify the actual
+ * target(s).  Second, give FDWs a chance to display extra info about foreign
+ * targets.  Third, show information about ON CONFLICT.
  */
 static void
-show_modifytable_info(ModifyTableState *mtstate, ExplainState *es)
+show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
+					  ExplainState *es)
 {
 	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
 	const char *operation;
 	const char *foperation;
 	bool		labeltargets;
 	int			j;
+	List	   *idxNames = NIL;
+	ListCell   *lst;
 
 	switch (node->operation)
 	{
@@ -2414,6 +2423,55 @@ show_modifytable_info(ModifyTableState *mtstate, ExplainState *es)
 		}
 	}
 
+	/* Gather names of ON CONFLICT arbiter indexes */
+	foreach(lst, node->arbiterIndexes)
+	{
+		char	   *indexname = get_rel_name(lfirst_oid(lst));
+
+		idxNames = lappend(idxNames, indexname);
+	}
+
+	if (node->onConflictAction != ONCONFLICT_NONE)
+	{
+		ExplainProperty("Conflict Resolution",
+						node->onConflictAction == ONCONFLICT_NOTHING ?
+							"NOTHING" : "UPDATE",
+						false, es);
+
+		/*
+		 * Don't display arbiter indexes at all when DO NOTHING variant
+		 * implicitly ignores all conflicts
+		 */
+		if (idxNames)
+			ExplainPropertyList("Conflict Arbiter Indexes", idxNames, es);
+
+		/* ON CONFLICT DO UPDATE WHERE qual is specially displayed */
+		if (node->onConflictWhere)
+		{
+			show_upper_qual((List *) node->onConflictWhere, "Conflict Filter",
+							&mtstate->ps, ancestors, es);
+			show_instrumentation_count("Rows Removed by Conflict Filter", 1, &mtstate->ps, es);
+		}
+
+		/* EXPLAIN ANALYZE display of actual outcome for each tuple proposed */
+		if (es->analyze && mtstate->ps.instrument)
+		{
+			double total;
+			double insert_path;
+			double other_path;
+
+			InstrEndLoop(mtstate->mt_plans[0]->instrument);
+
+			/* count the number of source rows */
+			total = mtstate->mt_plans[0]->instrument->ntuples;
+			other_path = mtstate->ps.instrument->nfiltered2;
+			insert_path = total - other_path;
+
+			ExplainPropertyFloat("Tuples Inserted", insert_path, 0, es);
+			ExplainPropertyFloat("Conflicting Tuples", other_path, 0, es);
+		}
+	}
+
 	if (labeltargets)
 		ExplainCloseGroup("Target Tables", "Target Tables", false, es);
 }
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 222e7fce854..b537ca5e661 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2421,21 +2421,10 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 	TupleTableSlot *newSlot;
 	int			i;
 	Bitmapset  *updatedCols;
-	Bitmapset  *keyCols;
 	LockTupleMode lockmode;
 
-	/*
-	 * Compute lock mode to use.  If columns that are part of the key have not
-	 * been modified, then we can use a weaker lock, allowing for better
-	 * concurrency.
-	 */
-	updatedCols = GetUpdatedColumns(relinfo, estate);
-	keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
-										 INDEX_ATTR_BITMAP_KEY);
-	if (bms_overlap(keyCols, updatedCols))
-		lockmode = LockTupleExclusive;
-	else
-		lockmode = LockTupleNoKeyExclusive;
+	/* Determine lock mode to use */
+	lockmode = ExecUpdateLockMode(estate, relinfo);
 
 	Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
 	if (fdw_trigtuple == NULL)
@@ -2476,6 +2465,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 		TRIGGER_EVENT_ROW |
 		TRIGGER_EVENT_BEFORE;
 	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+	updatedCols = GetUpdatedColumns(relinfo, estate);
 	for (i = 0; i < trigdesc->numtriggers; i++)
 	{
 		Trigger    *trigger = &trigdesc->triggers[i];
@@ -2783,6 +2773,9 @@ ltrmark:;
 				 */
 				return NULL;
 
+			case HeapTupleInvisible:
+				elog(ERROR, "attempted to lock invisible tuple");
+
 			default:
 				ReleaseBuffer(buffer);
 				elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index a697682b20e..e7cf72b3875 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -50,6 +50,50 @@
  * to the caller.  The caller must re-check them later by calling
  * check_exclusion_constraint().
  *
+ * Speculative insertion
+ * ---------------------
+ *
+ * Speculative insertion is a is a two-phase mechanism, used to implement
+ * INSERT ... ON CONFLICT DO UPDATE/NOTHING.  The tuple is first inserted
+ * to the heap and update the indexes as usual, but if a constraint is
+ * violated, we can still back out the insertion without aborting the whole
+ * transaction.  In an INSERT ... ON CONFLICT statement, if a conflict is
+ * detected, the inserted tuple is backed out and the ON CONFLICT action is
+ * executed instead.
+ *
+ * Insertion to a unique index works as usual: the index AM checks for
+ * duplicate keys atomically with the insertion.  But instead of throwing
+ * an error on a conflict, the speculatively inserted heap tuple is backed
+ * out.
+ *
+ * Exclusion constraints are slightly more complicated.  As mentioned
+ * earlier, there is a risk of deadlock when two backends insert the same
+ * key concurrently.  That was not a problem for regular insertions, when
+ * one of the transactions has to be aborted anyway, but with a speculative
+ * insertion we cannot let a deadlock happen, because we only want to back
+ * out the speculatively inserted tuple on conflict, not abort the whole
+ * transaction.
+ *
+ * When a backend detects that the speculative insertion conflicts with
+ * another in-progress tuple, it has two options:
+ *
+ * 1. back out the speculatively inserted tuple, then wait for the other
+ *    transaction, and retry. Or,
+ * 2. wait for the other transaction, with the speculatively inserted tuple
+ *    still in place.
+ *
+ * If two backends insert at the same time, and both try to wait for each
+ * other, they will deadlock.  So option 2 is not acceptable.  Option 1
+ * avoids the deadlock, but it is prone to a livelock instead.  Both
+ * transactions will wake up immediately as the other transaction backs
+ * out.  Then they both retry, and conflict with each other again, lather,
+ * rinse, repeat.
+ *
+ * To avoid the livelock, one of the backends must back out first, and then
+ * wait, while the other one waits without backing out.  It doesn't matter
+ * which one backs out, so we employ an arbitrary rule that the transaction
+ * with the higher XID backs out.
+ *
  *
  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -63,12 +107,30 @@
 #include "postgres.h"
 
 #include "access/relscan.h"
+#include "access/xact.h"
 #include "catalog/index.h"
 #include "executor/executor.h"
 #include "nodes/nodeFuncs.h"
 #include "storage/lmgr.h"
 #include "utils/tqual.h"
 
+/* waitMode argument to check_exclusion_or_unique_constraint() */
+typedef enum
+{
+	CEOUC_WAIT,
+	CEOUC_NOWAIT,
+	CEOUC_LIVELOCK_PREVENTING_WAIT,
+} CEOUC_WAIT_MODE;
+
+static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
+									 IndexInfo *indexInfo,
+									 ItemPointer tupleid,
+									 Datum *values, bool *isnull,
+									 EState *estate, bool newIndex,
+									 CEOUC_WAIT_MODE waitMode,
+									 bool errorOK,
+									 ItemPointer conflictTid);
+
 static bool index_recheck_constraint(Relation index, Oid *constr_procs,
 						 Datum *existing_values, bool *existing_isnull,
 						 Datum *new_values);
@@ -84,7 +146,7 @@ static bool index_recheck_constraint(Relation index, Oid *constr_procs,
  * ----------------------------------------------------------------
  */
 void
-ExecOpenIndices(ResultRelInfo *resultRelInfo)
+ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
 {
 	Relation	resultRelation = resultRelInfo->ri_RelationDesc;
 	List	   *indexoidlist;
@@ -137,6 +199,13 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo)
 		/* extract index key information from the index's pg_index info */
 		ii = BuildIndexInfo(indexDesc);
 
+		/*
+		 * If the indexes are to be used for speculative insertion, add extra
+		 * information required by unique index entries.
+		 */
+		if (speculative && ii->ii_Unique)
+			BuildSpeculativeIndexInfo(indexDesc, ii);
+
 		relationDescs[i] = indexDesc;
 		indexInfoArray[i] = ii;
 		i++;
@@ -186,7 +255,9 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
  *		Unique and exclusion constraints are enforced at the same
  *		time.  This returns a list of index OIDs for any unique or
  *		exclusion constraints that are deferred and that had
- *		potential (unconfirmed) conflicts.
+ *		potential (unconfirmed) conflicts.  (if noDupErr == true,
+ *		the same is done for non-deferred constraints, but report
+ *		if conflict was speculative or deferred conflict to caller)
  *
  *		CAUTION: this must not be called for a HOT update.
  *		We can't defend against that here for lack of info.
@@ -196,7 +267,10 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
 List *
 ExecInsertIndexTuples(TupleTableSlot *slot,
 					  ItemPointer tupleid,
-					  EState *estate)
+					  EState *estate,
+					  bool noDupErr,
+					  bool *specConflict,
+					  List *arbiterIndexes)
 {
 	List	   *result = NIL;
 	ResultRelInfo *resultRelInfo;
@@ -236,12 +310,17 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
 		IndexInfo  *indexInfo;
 		IndexUniqueCheck checkUnique;
 		bool		satisfiesConstraint;
+		bool		arbiter;
 
 		if (indexRelation == NULL)
 			continue;
 
 		indexInfo = indexInfoArray[i];
 
+		/* Record if speculative insertion arbiter */
+		arbiter = list_member_oid(arbiterIndexes,
+								  indexRelation->rd_index->indexrelid);
+
 		/* If the index is marked as read-only, ignore it */
 		if (!indexInfo->ii_ReadyForInserts)
 			continue;
@@ -288,9 +367,14 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
 		 * For a deferrable unique index, we tell the index AM to just detect
 		 * possible non-uniqueness, and we add the index OID to the result
 		 * list if further checking is needed.
+		 *
+		 * For a speculative insertion (used by INSERT ... ON CONFLICT), do
+		 * the same as for a deferrable unique index.
 		 */
 		if (!indexRelation->rd_index->indisunique)
 			checkUnique = UNIQUE_CHECK_NO;
+		else if (noDupErr && (arbiterIndexes == NIL || arbiter))
+			checkUnique = UNIQUE_CHECK_PARTIAL;
 		else if (indexRelation->rd_index->indimmediate)
 			checkUnique = UNIQUE_CHECK_YES;
 		else
@@ -308,8 +392,11 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
 		 * If the index has an associated exclusion constraint, check that.
 		 * This is simpler than the process for uniqueness checks since we
 		 * always insert first and then check.  If the constraint is deferred,
-		 * we check now anyway, but don't throw error on violation; instead
-		 * we'll queue a recheck event.
+		 * we check now anyway, but don't throw error on violation or wait for
+		 * a conclusive outcome from a concurrent insertion; instead we'll
+		 * queue a recheck event.  Similarly, noDupErr callers (speculative
+		 * inserters) will recheck later, and wait for a conclusive outcome
+		 * then.
 		 *
 		 * An index for an exclusion constraint can't also be UNIQUE (not an
 		 * essential property, we just don't allow it in the grammar), so no
@@ -317,13 +404,31 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
 		 */
 		if (indexInfo->ii_ExclusionOps != NULL)
 		{
-			bool		errorOK = !indexRelation->rd_index->indimmediate;
+			bool		violationOK;
+			bool		waitMode;
+
+			if (noDupErr)
+			{
+				violationOK = true;
+				waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT;
+			}
+			else if (!indexRelation->rd_index->indimmediate)
+			{
+				violationOK = true;
+				waitMode = CEOUC_NOWAIT;
+			}
+			else
+			{
+				violationOK = false;
+				waitMode = CEOUC_WAIT;
+			}
 
 			satisfiesConstraint =
-				check_exclusion_constraint(heapRelation,
-										   indexRelation, indexInfo,
-										   tupleid, values, isnull,
-										   estate, false, errorOK);
+				check_exclusion_or_unique_constraint(heapRelation,
+													 indexRelation, indexInfo,
+													 tupleid, values, isnull,
+													 estate, false,
+													 waitMode, violationOK, NULL);
 		}
 
 		if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
@@ -333,46 +438,213 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
 			/*
 			 * The tuple potentially violates the uniqueness or exclusion
 			 * constraint, so make a note of the index so that we can re-check
-			 * it later.
+			 * it later.  Speculative inserters are told if there was a
+			 * speculative conflict, since that always requires a restart.
 			 */
 			result = lappend_oid(result, RelationGetRelid(indexRelation));
+			if (indexRelation->rd_index->indimmediate && specConflict)
+				*specConflict = true;
 		}
 	}
 
 	return result;
 }
 
+/* ----------------------------------------------------------------
+ *		ExecCheckIndexConstraints
+ *
+ *		This routine checks if a tuple violates any unique or
+ *		exclusion constraints.  Returns true if there is no no conflict.
+ *		Otherwise returns false, and the TID of the conflicting
+ *		tuple is returned in *conflictTid.
+ *
+ *		If 'arbiterIndexes' is given, only those indexes are checked.
+ *		NIL means all indexes.
+ *
+ *		Note that this doesn't lock the values in any way, so it's
+ *		possible that a conflicting tuple is inserted immediately
+ *		after this returns.  But this can be used for a pre-check
+ *		before insertion.
+ * ----------------------------------------------------------------
+ */
+bool
+ExecCheckIndexConstraints(TupleTableSlot *slot,
+						  EState *estate, ItemPointer conflictTid,
+						  List *arbiterIndexes)
+{
+	ResultRelInfo *resultRelInfo;
+	int			i;
+	int			numIndices;
+	RelationPtr relationDescs;
+	Relation	heapRelation;
+	IndexInfo **indexInfoArray;
+	ExprContext *econtext;
+	Datum		values[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+	ItemPointerData invalidItemPtr;
+	bool		checkedIndex = false;
+
+	ItemPointerSetInvalid(conflictTid);
+	ItemPointerSetInvalid(&invalidItemPtr);
+
+	/*
+	 * Get information from the result relation info structure.
+	 */
+	resultRelInfo = estate->es_result_relation_info;
+	numIndices = resultRelInfo->ri_NumIndices;
+	relationDescs = resultRelInfo->ri_IndexRelationDescs;
+	indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+	heapRelation = resultRelInfo->ri_RelationDesc;
+
+	/*
+	 * We will use the EState's per-tuple context for evaluating predicates
+	 * and index expressions (creating it if it's not already there).
+	 */
+	econtext = GetPerTupleExprContext(estate);
+
+	/* Arrange for econtext's scan tuple to be the tuple under test */
+	econtext->ecxt_scantuple = slot;
+
+	/*
+	 * For each index, form index tuple and check if it satisfies the
+	 * constraint.
+	 */
+	for (i = 0; i < numIndices; i++)
+	{
+		Relation	indexRelation = relationDescs[i];
+		IndexInfo  *indexInfo;
+		bool		satisfiesConstraint;
+
+		if (indexRelation == NULL)
+			continue;
+
+		indexInfo = indexInfoArray[i];
+
+		if (!indexInfo->ii_Unique && !indexInfo->ii_ExclusionOps)
+			continue;
+
+		/* If the index is marked as read-only, ignore it */
+		if (!indexInfo->ii_ReadyForInserts)
+			continue;
+
+		/* When specific arbiter indexes requested, only examine them */
+		if (arbiterIndexes != NIL &&
+			!list_member_oid(arbiterIndexes,
+							 indexRelation->rd_index->indexrelid))
+			continue;
+
+		if (!indexRelation->rd_index->indimmediate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("ON CONFLICT does not support deferred unique constraints/exclusion constraints as arbiters"),
+					 errtableconstraint(heapRelation,
+										RelationGetRelationName(indexRelation))));
+
+		checkedIndex = true;
+
+		/* Check for partial index */
+		if (indexInfo->ii_Predicate != NIL)
+		{
+			List	   *predicate;
+
+			/*
+			 * If predicate state not set up yet, create it (in the estate's
+			 * per-query context)
+			 */
+			predicate = indexInfo->ii_PredicateState;
+			if (predicate == NIL)
+			{
+				predicate = (List *)
+					ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
+									estate);
+				indexInfo->ii_PredicateState = predicate;
+			}
+
+			/* Skip this index-update if the predicate isn't satisfied */
+			if (!ExecQual(predicate, econtext, false))
+				continue;
+		}
+
+		/*
+		 * FormIndexDatum fills in its values and isnull parameters with the
+		 * appropriate values for the column(s) of the index.
+		 */
+		FormIndexDatum(indexInfo,
+					   slot,
+					   estate,
+					   values,
+					   isnull);
+
+		satisfiesConstraint =
+			check_exclusion_or_unique_constraint(heapRelation, indexRelation,
+												 indexInfo, &invalidItemPtr,
+												 values, isnull, estate, false,
+												 CEOUC_WAIT, true,
+												 conflictTid);
+		if (!satisfiesConstraint)
+			return false;
+	}
+
+	if (arbiterIndexes != NIL && !checkedIndex)
+		elog(ERROR, "unexpected failure to find arbiter index");
+
+	return true;
+}
+
 /*
- * Check for violation of an exclusion constraint
+ * Check for violation of an exclusion or unique constraint
  *
  * heap: the table containing the new tuple
- * index: the index supporting the exclusion constraint
+ * index: the index supporting the constraint
  * indexInfo: info about the index, including the exclusion properties
- * tupleid: heap TID of the new tuple we have just inserted
+ * tupleid: heap TID of the new tuple we have just inserted (invalid if we
+ *		haven't inserted a new tuple yet)
  * values, isnull: the *index* column values computed for the new tuple
  * estate: an EState we can do evaluation in
  * newIndex: if true, we are trying to build a new index (this affects
  *		only the wording of error messages)
- * errorOK: if true, don't throw error for violation
+ * waitMode: whether to wait for concurrent inserters/deleters
+ * violationOK: if true, don't throw error for violation
+ * conflictTid: if not-NULL, the TID of the conflicting tuple is returned here
  *
  * Returns true if OK, false if actual or potential violation
  *
- * When errorOK is true, we report violation without waiting to see if any
- * concurrent transaction has committed or not; so the violation is only
- * potential, and the caller must recheck sometime later.  This behavior
- * is convenient for deferred exclusion checks; we need not bother queuing
- * a deferred event if there is definitely no conflict at insertion time.
+ * 'waitMode' determines what happens if a conflict is detected with a tuple
+ * that was inserted or deleted by a transaction that's still running.
+ * CEOUC_WAIT means that we wait for the transaction to commit, before
+ * throwing an error or returning.  CEOUC_NOWAIT means that we report the
+ * violation immediately; so the violation is only potential, and the caller
+ * must recheck sometime later.  This behavior is convenient for deferred
+ * exclusion checks; we need not bother queuing a deferred event if there is
+ * definitely no conflict at insertion time.
+ *
+ * CEOUC_LIVELOCK_PREVENTING_WAIT is like CEOUC_NOWAIT, but we will sometimes
+ * wait anyway, to prevent livelocking if two transactions try inserting at
+ * the same time.  This is used with speculative insertions, for INSERT ON
+ * CONFLICT statements. (See notes in file header)
  *
- * When errorOK is false, we'll throw error on violation, so a false result
- * is impossible.
+ * If violationOK is true, we just report the potential or actual violation to
+ * the caller by returning 'false'.  Otherwise we throw a descriptive error
+ * message here.  When violationOK is false, a false result is impossible.
+ *
+ * Note: The indexam is normally responsible for checking unique constraints,
+ * so this normally only needs to be used for exclusion constraints.  But this
+ * function is also called when doing a "pre-check" for conflicts on a unique
+ * constraint, when doing speculative insertion.  Caller may use the returned
+ * conflict TID to take further steps.
  */
-bool
-check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
-						   ItemPointer tupleid, Datum *values, bool *isnull,
-						   EState *estate, bool newIndex, bool errorOK)
+static bool
+check_exclusion_or_unique_constraint(Relation heap, Relation index,
+									 IndexInfo *indexInfo,
+									 ItemPointer tupleid,
+									 Datum *values, bool *isnull,
+									 EState *estate, bool newIndex,
+									 CEOUC_WAIT_MODE waitMode,
+									 bool violationOK,
+									 ItemPointer conflictTid)
 {
-	Oid		   *constr_procs = indexInfo->ii_ExclusionProcs;
-	uint16	   *constr_strats = indexInfo->ii_ExclusionStrats;
+	Oid		   *constr_procs;
+	uint16	   *constr_strats;
 	Oid		   *index_collations = index->rd_indcollation;
 	int			index_natts = index->rd_index->indnatts;
 	IndexScanDesc index_scan;
@@ -386,6 +658,17 @@ check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
 	TupleTableSlot *existing_slot;
 	TupleTableSlot *save_scantuple;
 
+	if (indexInfo->ii_ExclusionOps)
+	{
+		constr_procs = indexInfo->ii_ExclusionProcs;
+		constr_strats = indexInfo->ii_ExclusionStrats;
+	}
+	else
+	{
+		constr_procs = indexInfo->ii_UniqueProcs;
+		constr_strats = indexInfo->ii_UniqueStrats;
+	}
+
 	/*
 	 * If any of the input values are NULL, the constraint check is assumed to
 	 * pass (i.e., we assume the operators are strict).
@@ -450,7 +733,8 @@ retry:
 		/*
 		 * Ignore the entry for the tuple we're trying to check.
 		 */
-		if (ItemPointerEquals(tupleid, &tup->t_self))
+		if (ItemPointerIsValid(tupleid) &&
+			ItemPointerEquals(tupleid, &tup->t_self))
 		{
 			if (found_self)		/* should not happen */
 				elog(ERROR, "found self tuple multiple times in index \"%s\"",
@@ -480,39 +764,47 @@ retry:
 		}
 
 		/*
-		 * At this point we have either a conflict or a potential conflict. If
-		 * we're not supposed to raise error, just return the fact of the
-		 * potential conflict without waiting to see if it's real.
-		 */
-		if (errorOK)
-		{
-			conflict = true;
-			break;
-		}
-
-		/*
+		 * At this point we have either a conflict or a potential conflict.
+		 *
 		 * If an in-progress transaction is affecting the visibility of this
-		 * tuple, we need to wait for it to complete and then recheck.  For
-		 * simplicity we do rechecking by just restarting the whole scan ---
-		 * this case probably doesn't happen often enough to be worth trying
-		 * harder, and anyway we don't want to hold any index internal locks
-		 * while waiting.
+		 * tuple, we need to wait for it to complete and then recheck (unless
+		 * the caller requested not to).  For simplicity we do rechecking by
+		 * just restarting the whole scan --- this case probably doesn't
+		 * happen often enough to be worth trying harder, and anyway we don't
+		 * want to hold any index internal locks while waiting.
 		 */
 		xwait = TransactionIdIsValid(DirtySnapshot.xmin) ?
 			DirtySnapshot.xmin : DirtySnapshot.xmax;
 
-		if (TransactionIdIsValid(xwait))
+		if (TransactionIdIsValid(xwait) &&
+			(waitMode == CEOUC_WAIT ||
+			 (waitMode == CEOUC_LIVELOCK_PREVENTING_WAIT &&
+			  DirtySnapshot.speculativeToken &&
+			  TransactionIdPrecedes(GetCurrentTransactionId(), xwait))))
 		{
 			ctid_wait = tup->t_data->t_ctid;
 			index_endscan(index_scan);
-			XactLockTableWait(xwait, heap, &ctid_wait,
-							  XLTW_RecheckExclusionConstr);
+			if (DirtySnapshot.speculativeToken)
+				SpeculativeInsertionWait(DirtySnapshot.xmin,
+										 DirtySnapshot.speculativeToken);
+			else
+				XactLockTableWait(xwait, heap, &ctid_wait,
+								  XLTW_RecheckExclusionConstr);
 			goto retry;
 		}
 
 		/*
-		 * We have a definite conflict.  Report it.
+		 * We have a definite conflict (or a potential one, but the caller
+		 * didn't want to wait).  Return it to caller, or report it.
 		 */
+		if (violationOK)
+		{
+			conflict = true;
+			if (conflictTid)
+				*conflictTid = tup->t_self;
+			break;
+		}
+
 		error_new = BuildIndexValueDescription(index, values, isnull);
 		error_existing = BuildIndexValueDescription(index, existing_values,
 													existing_isnull);
@@ -544,10 +836,10 @@ retry:
 
 	/*
 	 * Ordinarily, at this point the search should have found the originally
-	 * inserted tuple, unless we exited the loop early because of conflict.
-	 * However, it is possible to define exclusion constraints for which that
-	 * wouldn't be true --- for instance, if the operator is <>. So we no
-	 * longer complain if found_self is still false.
+	 * inserted tuple (if any), unless we exited the loop early because of
+	 * conflict.  However, it is possible to define exclusion constraints for
+	 * which that wouldn't be true --- for instance, if the operator is <>.
+	 * So we no longer complain if found_self is still false.
 	 */
 
 	econtext->ecxt_scantuple = save_scantuple;
@@ -558,6 +850,25 @@ retry:
 }
 
 /*
+ * Check for violation of an exclusion constraint
+ *
+ * This is a dumbed down version of check_exclusion_or_unique_constraint
+ * for external callers. They don't need all the special modes.
+ */
+void
+check_exclusion_constraint(Relation heap, Relation index,
+						   IndexInfo *indexInfo,
+						   ItemPointer tupleid,
+						   Datum *values, bool *isnull,
+						   EState *estate, bool newIndex)
+{
+	(void) check_exclusion_or_unique_constraint(heap, index, indexInfo, tupleid,
+												values, isnull,
+												estate, newIndex,
+												CEOUC_WAIT, false, NULL);
+}
+
+/*
  * Check existing tuple's index values to see if it really matches the
  * exclusion condition against the new_values.  Returns true if conflict.
  */
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 4272d9bc155..0dee9491788 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1813,6 +1813,12 @@ ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
 						 errmsg("new row violates row level security policy for \"%s\"",
 								wco->relname)));
 					break;
+				case WCO_RLS_CONFLICT_CHECK:
+					ereport(ERROR,
+							(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+						 errmsg("new row violates row level security policy (USING expression) for \"%s\"",
+								wco->relname)));
+					break;
 				default:
 					elog(ERROR, "unrecognized WCO kind: %u", wco->kind);
 					break;
@@ -1973,6 +1979,31 @@ ExecBuildSlotValueDescription(Oid reloid,
 
 
 /*
+ * ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a
+ * given ResultRelInfo
+ */
+LockTupleMode
+ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
+{
+	Bitmapset  *keyCols;
+	Bitmapset  *updatedCols;
+
+	/*
+	 * Compute lock mode to use.  If columns that are part of the key have not
+	 * been modified, then we can use a weaker lock, allowing for better
+	 * concurrency.
+	 */
+	updatedCols = GetUpdatedColumns(relinfo, estate);
+	keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
+										 INDEX_ATTR_BITMAP_KEY);
+
+	if (bms_overlap(keyCols, updatedCols))
+		return LockTupleExclusive;
+
+	return LockTupleNoKeyExclusive;
+}
+
+/*
  * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
  */
 ExecRowMark *
@@ -2186,8 +2217,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 			 * recycled and reused for an unrelated tuple.  This implies that
 			 * the latest version of the row was deleted, so we need do
 			 * nothing.  (Should be safe to examine xmin without getting
-			 * buffer's content lock, since xmin never changes in an existing
-			 * tuple.)
+			 * buffer's content lock.  We assume reading a TransactionId to be
+			 * atomic, and Xmin never changes in an existing tuple, except to
+			 * invalid or frozen, and neither of those can match priorXmax.)
 			 */
 			if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
 									 priorXmax))
@@ -2268,11 +2300,12 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 					 * case, so as to avoid the "Halloween problem" of
 					 * repeated update attempts.  In the latter case it might
 					 * be sensible to fetch the updated tuple instead, but
-					 * doing so would require changing heap_lock_tuple as well
-					 * as heap_update and heap_delete to not complain about
-					 * updating "invisible" tuples, which seems pretty scary.
-					 * So for now, treat the tuple as deleted and do not
-					 * process.
+					 * doing so would require changing heap_update and
+					 * heap_delete to not complain about updating "invisible"
+					 * tuples, which seems pretty scary (heap_lock_tuple will
+					 * not complain, but few callers expect HeapTupleInvisible,
+					 * and we're not one of them).  So for now, treat the tuple
+					 * as deleted and do not process.
 					 */
 					ReleaseBuffer(buffer);
 					return NULL;
@@ -2287,6 +2320,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 						ereport(ERROR,
 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 								 errmsg("could not serialize access due to concurrent update")));
+
+					/* Should not encounter speculative tuple on recheck */
+					Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
 					if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
 					{
 						/* it was updated, so look at the updated version */
@@ -2302,6 +2338,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 					ReleaseBuffer(buffer);
 					return NULL;
 
+				case HeapTupleInvisible:
+					elog(ERROR, "attempted to lock invisible tuple");
+
 				default:
 					ReleaseBuffer(buffer);
 					elog(ERROR, "unrecognized heap_lock_tuple status: %u",
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index bb6df47a95d..5ae106c06ad 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -152,10 +152,11 @@ lnext:
 				 * case, so as to avoid the "Halloween problem" of repeated
 				 * update attempts.  In the latter case it might be sensible
 				 * to fetch the updated tuple instead, but doing so would
-				 * require changing heap_lock_tuple as well as heap_update and
-				 * heap_delete to not complain about updating "invisible"
-				 * tuples, which seems pretty scary.  So for now, treat the
-				 * tuple as deleted and do not process.
+				 * require changing heap_update and heap_delete to not complain
+				 * about updating "invisible" tuples, which seems pretty scary
+				 * (heap_lock_tuple will not complain, but few callers expect
+				 * HeapTupleInvisible, and we're not one of them).  So for now,
+				 * treat the tuple as deleted and do not process.
 				 */
 				goto lnext;
 
@@ -228,6 +229,9 @@ lnext:
 				/* Continue loop until we have all target tuples */
 				break;
 
+			case HeapTupleInvisible:
+				elog(ERROR, "attempted to lock invisible tuple");
+
 			default:
 				elog(ERROR, "unrecognized heap_lock_tuple status: %u",
 					 test);
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 31666edfa8a..34435c7e50a 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -46,12 +46,22 @@
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "storage/bufmgr.h"
+#include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 #include "utils/tqual.h"
 
 
+static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
+					 ResultRelInfo *resultRelInfo,
+					 ItemPointer conflictTid,
+					 TupleTableSlot *planSlot,
+					 TupleTableSlot *excludedSlot,
+					 EState *estate,
+					 bool canSetTag,
+					 TupleTableSlot **returning);
+
 /*
  * Verify that the tuples to be produced by INSERT or UPDATE match the
  * target relation's rowtype
@@ -151,6 +161,51 @@ ExecProcessReturning(ProjectionInfo *projectReturning,
 	return ExecProject(projectReturning, NULL);
 }
 
+/*
+ * ExecCheckHeapTupleVisible -- verify heap tuple is visible
+ *
+ * It would not be consistent with guarantees of the higher isolation levels to
+ * proceed with avoiding insertion (taking speculative insertion's alternative
+ * path) on the basis of another tuple that is not visible to MVCC snapshot.
+ * Check for the need to raise a serialization failure, and do so as necessary.
+ */
+static void
+ExecCheckHeapTupleVisible(EState *estate,
+						  HeapTuple tuple,
+						  Buffer buffer)
+{
+	if (!IsolationUsesXactSnapshot())
+		return;
+
+	if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer))
+		ereport(ERROR,
+				(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+				 errmsg("could not serialize access due to concurrent update")));
+}
+
+/*
+ * ExecCheckTIDVisible -- convenience variant of ExecCheckHeapTupleVisible()
+ */
+static void
+ExecCheckTIDVisible(EState *estate,
+					ResultRelInfo *relinfo,
+					ItemPointer tid)
+{
+	Relation	rel = relinfo->ri_RelationDesc;
+	Buffer		buffer;
+	HeapTupleData tuple;
+
+	/* Redundantly check isolation level */
+	if (!IsolationUsesXactSnapshot())
+		return;
+
+	tuple.t_self = *tid;
+	if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL))
+		elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
+	ExecCheckHeapTupleVisible(estate, &tuple, buffer);
+	ReleaseBuffer(buffer);
+}
+
 /* ----------------------------------------------------------------
  *		ExecInsert
  *
@@ -161,8 +216,11 @@ ExecProcessReturning(ProjectionInfo *projectReturning,
  * ----------------------------------------------------------------
  */
 static TupleTableSlot *
-ExecInsert(TupleTableSlot *slot,
+ExecInsert(ModifyTableState *mtstate,
+		   TupleTableSlot *slot,
 		   TupleTableSlot *planSlot,
+		   List *arbiterIndexes,
+		   OnConflictAction onconflict,
 		   EState *estate,
 		   bool canSetTag)
 {
@@ -199,7 +257,15 @@ ExecInsert(TupleTableSlot *slot,
 	if (resultRelationDesc->rd_rel->relhasoids)
 		HeapTupleSetOid(tuple, InvalidOid);
 
-	/* BEFORE ROW INSERT Triggers */
+	/*
+	 * BEFORE ROW INSERT Triggers.
+	 *
+	 * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
+	 * INSERT ... ON CONFLICT statement.  We cannot check for constraint
+	 * violations before firing these triggers, because they can change the
+	 * values to insert.  Also, they can run arbitrary user-defined code with
+	 * side-effects that we can't cancel by just not inserting the tuple.
+	 */
 	if (resultRelInfo->ri_TrigDesc &&
 		resultRelInfo->ri_TrigDesc->trig_insert_before_row)
 	{
@@ -268,21 +334,132 @@ ExecInsert(TupleTableSlot *slot,
 		if (resultRelationDesc->rd_att->constr)
 			ExecConstraints(resultRelInfo, slot, estate);
 
-		/*
-		 * insert the tuple
-		 *
-		 * Note: heap_insert returns the tid (location) of the new tuple in
-		 * the t_self field.
-		 */
-		newId = heap_insert(resultRelationDesc, tuple,
-							estate->es_output_cid, 0, NULL);
+		if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
+		{
+			/* Perform a speculative insertion. */
+			uint32		specToken;
+			ItemPointerData conflictTid;
+			bool		specConflict;
 
-		/*
-		 * insert index entries for tuple
-		 */
-		if (resultRelInfo->ri_NumIndices > 0)
+			/*
+			 * Do a non-conclusive check for conflicts first.
+			 *
+			 * We're not holding any locks yet, so this doesn't guarantee that
+			 * the later insert won't conflict.  But it avoids leaving behind
+			 * a lot of canceled speculative insertions, if you run a lot of
+			 * INSERT ON CONFLICT statements that do conflict.
+			 *
+			 * We loop back here if we find a conflict below, either during
+			 * the pre-check, or when we re-check after inserting the tuple
+			 * speculatively.  See the executor README for a full discussion
+			 * of speculative insertion.
+			 */
+	vlock:
+			specConflict = false;
+			if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
+										   arbiterIndexes))
+			{
+				/* committed conflict tuple found */
+				if (onconflict == ONCONFLICT_UPDATE)
+				{
+					/*
+					 * In case of ON CONFLICT DO UPDATE, execute the UPDATE
+					 * part.  Be prepared to retry if the UPDATE fails because
+					 * of another concurrent UPDATE/DELETE to the conflict
+					 * tuple.
+					 */
+					TupleTableSlot *returning = NULL;
+
+					if (ExecOnConflictUpdate(mtstate, resultRelInfo,
+											 &conflictTid, planSlot, slot,
+											 estate, canSetTag, &returning))
+					{
+						InstrCountFiltered2(&mtstate->ps, 1);
+						return returning;
+					}
+					else
+						goto vlock;
+				}
+				else
+				{
+					/*
+					 * In case of ON CONFLICT DO NOTHING, do nothing.
+					 * However, verify that the tuple is visible to the
+					 * executor's MVCC snapshot at higher isolation levels.
+					 */
+					Assert(onconflict == ONCONFLICT_NOTHING);
+					ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid);
+					InstrCountFiltered2(&mtstate->ps, 1);
+					return NULL;
+				}
+			}
+
+			/*
+			 * Before we start insertion proper, acquire our "speculative
+			 * insertion lock".  Others can use that to wait for us to decide
+			 * if we're going to go ahead with the insertion, instead of
+			 * waiting for the whole transaction to complete.
+			 */
+			specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
+			HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
+
+			/* insert the tuple, with the speculative token */
+			newId = heap_insert(resultRelationDesc, tuple,
+								estate->es_output_cid,
+								HEAP_INSERT_SPECULATIVE,
+								NULL);
+
+			/* insert index entries for tuple */
 			recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-												   estate);
+												   estate, true, &specConflict,
+												   arbiterIndexes);
+
+			/* adjust the tuple's state accordingly */
+			if (!specConflict)
+				heap_finish_speculative(resultRelationDesc, tuple);
+			else
+				heap_abort_speculative(resultRelationDesc, tuple);
+
+			/*
+			 * Wake up anyone waiting for our decision.  They will re-check
+			 * the tuple, see that it's no longer speculative, and wait on our
+			 * XID as if this was a regularly inserted tuple all along.  Or if
+			 * we killed the tuple, they will see it's dead, and proceed as if
+			 * the tuple never existed.
+			 */
+			SpeculativeInsertionLockRelease(GetCurrentTransactionId());
+
+			/*
+			 * If there was a conflict, start from the beginning.  We'll do
+			 * the pre-check again, which will now find the conflicting tuple
+			 * (unless it aborts before we get there).
+			 */
+			if (specConflict)
+			{
+				list_free(recheckIndexes);
+				goto vlock;
+			}
+
+			/* Since there was no insertion conflict, we're done */
+		}
+		else
+		{
+			/*
+			 * insert the tuple normally.
+			 *
+			 * Note: heap_insert returns the tid (location) of the new tuple
+			 * in the t_self field.
+			 */
+			newId = heap_insert(resultRelationDesc, tuple,
+								estate->es_output_cid,
+								0, NULL);
+
+			/* insert index entries for tuple */
+			if (resultRelInfo->ri_NumIndices > 0)
+				recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
+													   estate, false, NULL,
+													   arbiterIndexes);
+		}
 	}
 
 	if (canSetTag)
@@ -800,7 +977,7 @@ lreplace:;
 		 */
 		if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
 			recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-												   estate);
+												   estate, false, NULL, NIL);
 	}
 
 	if (canSetTag)
@@ -832,6 +1009,190 @@ lreplace:;
 	return NULL;
 }
 
+/*
+ * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
+ *
+ * Try to lock tuple for update as part of speculative insertion.  If
+ * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
+ * (but still lock row, even though it may not satisfy estate's
+ * snapshot).
+ *
+ * Returns true if if we're done (with or without an update), or false if
+ * the caller must retry the INSERT from scratch.
+ */
+static bool
+ExecOnConflictUpdate(ModifyTableState *mtstate,
+					 ResultRelInfo *resultRelInfo,
+					 ItemPointer conflictTid,
+					 TupleTableSlot *planSlot,
+					 TupleTableSlot *excludedSlot,
+					 EState *estate,
+					 bool canSetTag,
+					 TupleTableSlot **returning)
+{
+	ExprContext *econtext = mtstate->ps.ps_ExprContext;
+	Relation	relation = resultRelInfo->ri_RelationDesc;
+	List	   *onConflictSetWhere = resultRelInfo->ri_onConflictSetWhere;
+	HeapTupleData tuple;
+	HeapUpdateFailureData hufd;
+	LockTupleMode lockmode;
+	HTSU_Result test;
+	Buffer		buffer;
+
+	/* Determine lock mode to use */
+	lockmode = ExecUpdateLockMode(estate, resultRelInfo);
+
+	/*
+	 * Lock tuple for update.  Don't follow updates when tuple cannot be
+	 * locked without doing so.  A row locking conflict here means our
+	 * previous conclusion that the tuple is conclusively committed is not
+	 * true anymore.
+	 */
+	tuple.t_self = *conflictTid;
+	test = heap_lock_tuple(relation, &tuple, estate->es_output_cid,
+						   lockmode, LockWaitBlock, false, &buffer,
+						   &hufd);
+	switch (test)
+	{
+		case HeapTupleMayBeUpdated:
+			/* success! */
+			break;
+
+		case HeapTupleInvisible:
+
+			/*
+			 * This can occur when a just inserted tuple is updated again in
+			 * the same command. E.g. because multiple rows with the same
+			 * conflicting key values are inserted.
+			 *
+			 * This is somewhat similar to the ExecUpdate()
+			 * HeapTupleSelfUpdated case.  We do not want to proceed because
+			 * it would lead to the same row being updated a second time in
+			 * some unspecified order, and in contrast to plain UPDATEs
+			 * there's no historical behavior to break.
+			 *
+			 * It is the user's responsibility to prevent this situation from
+			 * occurring.  These problems are why SQL-2003 similarly specifies
+			 * that for SQL MERGE, an exception must be raised in the event of
+			 * an attempt to update the same row twice.
+			 */
+			if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data)))
+				ereport(ERROR,
+						(errcode(ERRCODE_CARDINALITY_VIOLATION),
+						 errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
+						 errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
+
+			/* This shouldn't happen */
+			elog(ERROR, "attempted to lock invisible tuple");
+
+		case HeapTupleSelfUpdated:
+
+			/*
+			 * This state should never be reached. As a dirty snapshot is used
+			 * to find conflicting tuples, speculative insertion wouldn't have
+			 * seen this row to conflict with.
+			 */
+			elog(ERROR, "unexpected self-updated tuple");
+
+		case HeapTupleUpdated:
+			if (IsolationUsesXactSnapshot())
+				ereport(ERROR,
+						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+						 errmsg("could not serialize access due to concurrent update")));
+
+			/*
+			 * Tell caller to try again from the very start.
+			 *
+			 * It does not make sense to use the usual EvalPlanQual() style
+			 * loop here, as the new version of the row might not conflict
+			 * anymore, or the conflicting tuple has actually been deleted.
+			 */
+			ReleaseBuffer(buffer);
+			return false;
+
+		default:
+			elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
+	}
+
+	/*
+	 * Success, the tuple is locked.
+	 *
+	 * Reset per-tuple memory context to free any expression evaluation
+	 * storage allocated in the previous cycle.
+	 */
+	ResetExprContext(econtext);
+
+	/*
+	 * Verify that the tuple is visible to our MVCC snapshot if the current
+	 * isolation level mandates that.
+	 *
+	 * It's not sufficient to rely on the check within ExecUpdate() as e.g.
+	 * CONFLICT ... WHERE clause may prevent us from reaching that.
+	 *
+	 * This means we only ever continue when a new command in the current
+	 * transaction could see the row, even though in READ COMMITTED mode the
+	 * tuple will not be visible according to the current statement's
+	 * snapshot.  This is in line with the way UPDATE deals with newer tuple
+	 * versions.
+	 */
+	ExecCheckHeapTupleVisible(estate, &tuple, buffer);
+
+	/* Store target's existing tuple in the state's dedicated slot */
+	ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false);
+
+	/*
+	 * Make tuple and any needed join variables available to ExecQual and
+	 * ExecProject.  The EXCLUDED tuple is installed in ecxt_innertuple, while
+	 * the target's existing tuple is installed in the scantuple.  EXCLUDED has
+	 * been made to reference INNER_VAR in setrefs.c, but there is no other
+	 * redirection.
+	 */
+	econtext->ecxt_scantuple = mtstate->mt_existing;
+	econtext->ecxt_innertuple = excludedSlot;
+	econtext->ecxt_outertuple = NULL;
+
+	if (!ExecQual(onConflictSetWhere, econtext, false))
+	{
+		ReleaseBuffer(buffer);
+		InstrCountFiltered1(&mtstate->ps, 1);
+		return true;			/* done with the tuple */
+	}
+
+	if (resultRelInfo->ri_WithCheckOptions != NIL)
+	{
+		/*
+		 * Check target's existing tuple against UPDATE-applicable USING
+		 * security barrier quals (if any), enforced here as RLS checks/WCOs.
+		 *
+		 * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
+		 * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
+		 * but that's almost the extent of its special handling for ON
+		 * CONFLICT DO UPDATE.
+		 *
+		 * The rewriter will also have associated UPDATE applicable straight
+		 * RLS checks/WCOs for the benefit of the ExecUpdate() call that
+		 * follows.  INSERTs and UPDATEs naturally have mutually exclusive WCO
+		 * kinds, so there is no danger of spurious over-enforcement in the
+		 * INSERT or UPDATE path.
+		 */
+		ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
+							 mtstate->mt_existing,
+							 mtstate->ps.state);
+	}
+
+	/* Project the new tuple version */
+	ExecProject(resultRelInfo->ri_onConflictSetProj, NULL);
+
+	/* Execute UPDATE with projection */
+	*returning = ExecUpdate(&tuple.t_data->t_ctid, NULL,
+							mtstate->mt_conflproj, planSlot,
+							&mtstate->mt_epqstate, mtstate->ps.state,
+							canSetTag);
+
+	ReleaseBuffer(buffer);
+	return true;
+}
+
 
 /*
  * Process BEFORE EACH STATEMENT triggers
@@ -843,6 +1204,9 @@ fireBSTriggers(ModifyTableState *node)
 	{
 		case CMD_INSERT:
 			ExecBSInsertTriggers(node->ps.state, node->resultRelInfo);
+			if (node->mt_onconflict == ONCONFLICT_UPDATE)
+				ExecBSUpdateTriggers(node->ps.state,
+									 node->resultRelInfo);
 			break;
 		case CMD_UPDATE:
 			ExecBSUpdateTriggers(node->ps.state, node->resultRelInfo);
@@ -865,6 +1229,9 @@ fireASTriggers(ModifyTableState *node)
 	switch (node->operation)
 	{
 		case CMD_INSERT:
+			if (node->mt_onconflict == ONCONFLICT_UPDATE)
+				ExecASUpdateTriggers(node->ps.state,
+									 node->resultRelInfo);
 			ExecASInsertTriggers(node->ps.state, node->resultRelInfo);
 			break;
 		case CMD_UPDATE:
@@ -1062,7 +1429,9 @@ ExecModifyTable(ModifyTableState *node)
 		switch (operation)
 		{
 			case CMD_INSERT:
-				slot = ExecInsert(slot, planSlot, estate, node->canSetTag);
+				slot = ExecInsert(node, slot, planSlot,
+								  node->mt_arbiterindexes, node->mt_onconflict,
+								  estate, node->canSetTag);
 				break;
 			case CMD_UPDATE:
 				slot = ExecUpdate(tupleid, oldtuple, slot, planSlot,
@@ -1137,6 +1506,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
 	mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
 	mtstate->mt_nplans = nplans;
+	mtstate->mt_onconflict = node->onConflictAction;
+	mtstate->mt_arbiterindexes = node->arbiterIndexes;
 
 	/* set up epqstate with dummy subplan data for the moment */
 	EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
@@ -1175,7 +1546,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 		if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
 			operation != CMD_DELETE &&
 			resultRelInfo->ri_IndexRelationDescs == NULL)
-			ExecOpenIndices(resultRelInfo);
+			ExecOpenIndices(resultRelInfo, mtstate->mt_onconflict != ONCONFLICT_NONE);
 
 		/* Now init the plan for this result rel */
 		estate->es_result_relation_info = resultRelInfo;
@@ -1280,6 +1651,58 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	}
 
 	/*
+	 * If needed, Initialize target list, projection and qual for ON CONFLICT
+	 * DO UPDATE.
+	 */
+	resultRelInfo = mtstate->resultRelInfo;
+	if (node->onConflictAction == ONCONFLICT_UPDATE)
+	{
+		ExprContext *econtext;
+		ExprState  *setexpr;
+		TupleDesc	tupDesc;
+
+		/* insert may only have one plan, inheritance is not expanded */
+		Assert(nplans == 1);
+
+		/* already exists if created by RETURNING processing above */
+		if (mtstate->ps.ps_ExprContext == NULL)
+			ExecAssignExprContext(estate, &mtstate->ps);
+
+		econtext = mtstate->ps.ps_ExprContext;
+
+		/* initialize slot for the existing tuple */
+		mtstate->mt_existing = ExecInitExtraTupleSlot(mtstate->ps.state);
+		ExecSetSlotDescriptor(mtstate->mt_existing,
+							  resultRelInfo->ri_RelationDesc->rd_att);
+
+		mtstate->mt_excludedtlist = node->exclRelTlist;
+
+		/* create target slot for UPDATE SET projection */
+		tupDesc = ExecTypeFromTL((List *) node->onConflictSet,
+								 false);
+		mtstate->mt_conflproj = ExecInitExtraTupleSlot(mtstate->ps.state);
+		ExecSetSlotDescriptor(mtstate->mt_conflproj, tupDesc);
+
+		/* build UPDATE SET expression and projection state */
+		setexpr = ExecInitExpr((Expr *) node->onConflictSet, &mtstate->ps);
+		resultRelInfo->ri_onConflictSetProj =
+			ExecBuildProjectionInfo((List *) setexpr, econtext,
+									mtstate->mt_conflproj,
+									resultRelInfo->ri_RelationDesc->rd_att);
+
+		/* build DO UPDATE WHERE clause expression */
+		if (node->onConflictWhere)
+		{
+			ExprState  *qualexpr;
+
+			qualexpr = ExecInitExpr((Expr *) node->onConflictWhere,
+									mtstate->mt_plans[0]);
+
+			resultRelInfo->ri_onConflictSetWhere = (List *) qualexpr;
+		}
+	}
+
+	/*
 	 * If we have any secondary relations in an UPDATE or DELETE, they need to
 	 * be treated like non-locked relations in SELECT FOR UPDATE, ie, the
 	 * EvalPlanQual mechanism needs to be told about them.  Locate the
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 1b02be287ce..a3139d3eb5d 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -81,6 +81,7 @@ _copyPlannedStmt(const PlannedStmt *from)
 	COPY_SCALAR_FIELD(queryId);
 	COPY_SCALAR_FIELD(hasReturning);
 	COPY_SCALAR_FIELD(hasModifyingCTE);
+	COPY_SCALAR_FIELD(isUpsert);
 	COPY_SCALAR_FIELD(canSetTag);
 	COPY_SCALAR_FIELD(transientPlan);
 	COPY_NODE_FIELD(planTree);
@@ -185,6 +186,12 @@ _copyModifyTable(const ModifyTable *from)
 	COPY_NODE_FIELD(fdwPrivLists);
 	COPY_NODE_FIELD(rowMarks);
 	COPY_SCALAR_FIELD(epqParam);
+	COPY_SCALAR_FIELD(onConflictAction);
+	COPY_NODE_FIELD(arbiterIndexes);
+	COPY_NODE_FIELD(onConflictSet);
+	COPY_NODE_FIELD(onConflictWhere);
+	COPY_SCALAR_FIELD(exclRelRTI);
+	COPY_NODE_FIELD(exclRelTlist);
 
 	return newnode;
 }
@@ -1787,6 +1794,22 @@ _copyCurrentOfExpr(const CurrentOfExpr *from)
 }
 
 /*
+ * _copyInferenceElem
+ */
+static InferenceElem *
+_copyInferenceElem(const InferenceElem *from)
+{
+	InferenceElem *newnode = makeNode(InferenceElem);
+
+	COPY_NODE_FIELD(expr);
+	COPY_SCALAR_FIELD(infercollid);
+	COPY_SCALAR_FIELD(inferopfamily);
+	COPY_SCALAR_FIELD(inferopcinputtype);
+
+	return newnode;
+}
+
+/*
  * _copyTargetEntry
  */
 static TargetEntry *
@@ -1852,6 +1875,26 @@ _copyFromExpr(const FromExpr *from)
 	return newnode;
 }
 
+/*
+ * _copyOnConflictExpr
+ */
+static OnConflictExpr *
+_copyOnConflictExpr(const OnConflictExpr *from)
+{
+	OnConflictExpr   *newnode = makeNode(OnConflictExpr);
+
+	COPY_SCALAR_FIELD(action);
+	COPY_NODE_FIELD(arbiterElems);
+	COPY_NODE_FIELD(arbiterWhere);
+	COPY_NODE_FIELD(onConflictSet);
+	COPY_NODE_FIELD(onConflictWhere);
+	COPY_SCALAR_FIELD(constraint);
+	COPY_SCALAR_FIELD(exclRelIndex);
+	COPY_NODE_FIELD(exclRelTlist);
+
+	return newnode;
+}
+
 /* ****************************************************************
  *						relation.h copy functions
  *
@@ -2135,6 +2178,33 @@ _copyWithClause(const WithClause *from)
 	return newnode;
 }
 
+static InferClause *
+_copyInferClause(const InferClause *from)
+{
+	InferClause *newnode = makeNode(InferClause);
+
+	COPY_NODE_FIELD(indexElems);
+	COPY_NODE_FIELD(whereClause);
+	COPY_STRING_FIELD(conname);
+	COPY_LOCATION_FIELD(location);
+
+	return newnode;
+}
+
+static OnConflictClause *
+_copyOnConflictClause(const OnConflictClause *from)
+{
+	OnConflictClause *newnode = makeNode(OnConflictClause);
+
+	COPY_SCALAR_FIELD(action);
+	COPY_NODE_FIELD(infer);
+	COPY_NODE_FIELD(targetList);
+	COPY_NODE_FIELD(whereClause);
+	COPY_LOCATION_FIELD(location);
+
+	return newnode;
+}
+
 static CommonTableExpr *
 _copyCommonTableExpr(const CommonTableExpr *from)
 {
@@ -2552,6 +2622,7 @@ _copyQuery(const Query *from)
 	COPY_NODE_FIELD(jointree);
 	COPY_NODE_FIELD(targetList);
 	COPY_NODE_FIELD(withCheckOptions);
+	COPY_NODE_FIELD(onConflict);
 	COPY_NODE_FIELD(returningList);
 	COPY_NODE_FIELD(groupClause);
 	COPY_NODE_FIELD(havingQual);
@@ -2575,6 +2646,7 @@ _copyInsertStmt(const InsertStmt *from)
 	COPY_NODE_FIELD(relation);
 	COPY_NODE_FIELD(cols);
 	COPY_NODE_FIELD(selectStmt);
+	COPY_NODE_FIELD(onConflictClause);
 	COPY_NODE_FIELD(returningList);
 	COPY_NODE_FIELD(withClause);
 
@@ -4283,6 +4355,9 @@ copyObject(const void *from)
 		case T_CurrentOfExpr:
 			retval = _copyCurrentOfExpr(from);
 			break;
+		case T_InferenceElem:
+			retval = _copyInferenceElem(from);
+			break;
 		case T_TargetEntry:
 			retval = _copyTargetEntry(from);
 			break;
@@ -4295,6 +4370,9 @@ copyObject(const void *from)
 		case T_FromExpr:
 			retval = _copyFromExpr(from);
 			break;
+		case T_OnConflictExpr:
+			retval = _copyOnConflictExpr(from);
+			break;
 
 			/*
 			 * RELATION NODES
@@ -4753,6 +4831,12 @@ copyObject(const void *from)
 		case T_WithClause:
 			retval = _copyWithClause(from);
 			break;
+		case T_InferClause:
+			retval = _copyInferClause(from);
+			break;
+		case T_OnConflictClause:
+			retval = _copyOnConflictClause(from);
+			break;
 		case T_CommonTableExpr:
 			retval = _copyCommonTableExpr(from);
 			break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 1b9a83b93ed..7c86e919a49 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -683,6 +683,17 @@ _equalCurrentOfExpr(const CurrentOfExpr *a, const CurrentOfExpr *b)
 }
 
 static bool
+_equalInferenceElem(const InferenceElem *a, const InferenceElem *b)
+{
+	COMPARE_NODE_FIELD(expr);
+	COMPARE_SCALAR_FIELD(infercollid);
+	COMPARE_SCALAR_FIELD(inferopfamily);
+	COMPARE_SCALAR_FIELD(inferopcinputtype);
+
+	return true;
+}
+
+static bool
 _equalTargetEntry(const TargetEntry *a, const TargetEntry *b)
 {
 	COMPARE_NODE_FIELD(expr);
@@ -728,6 +739,20 @@ _equalFromExpr(const FromExpr *a, const FromExpr *b)
 	return true;
 }
 
+static bool
+_equalOnConflictExpr(const OnConflictExpr *a, const OnConflictExpr *b)
+{
+	COMPARE_SCALAR_FIELD(action);
+	COMPARE_NODE_FIELD(arbiterElems);
+	COMPARE_NODE_FIELD(arbiterWhere);
+	COMPARE_NODE_FIELD(onConflictSet);
+	COMPARE_NODE_FIELD(onConflictWhere);
+	COMPARE_SCALAR_FIELD(constraint);
+	COMPARE_SCALAR_FIELD(exclRelIndex);
+	COMPARE_NODE_FIELD(exclRelTlist);
+
+	return true;
+}
 
 /*
  * Stuff from relation.h
@@ -868,6 +893,7 @@ _equalQuery(const Query *a, const Query *b)
 	COMPARE_NODE_FIELD(jointree);
 	COMPARE_NODE_FIELD(targetList);
 	COMPARE_NODE_FIELD(withCheckOptions);
+	COMPARE_NODE_FIELD(onConflict);
 	COMPARE_NODE_FIELD(returningList);
 	COMPARE_NODE_FIELD(groupClause);
 	COMPARE_NODE_FIELD(havingQual);
@@ -889,6 +915,7 @@ _equalInsertStmt(const InsertStmt *a, const InsertStmt *b)
 	COMPARE_NODE_FIELD(relation);
 	COMPARE_NODE_FIELD(cols);
 	COMPARE_NODE_FIELD(selectStmt);
+	COMPARE_NODE_FIELD(onConflictClause);
 	COMPARE_NODE_FIELD(returningList);
 	COMPARE_NODE_FIELD(withClause);
 
@@ -2434,6 +2461,29 @@ _equalWithClause(const WithClause *a, const WithClause *b)
 }
 
 static bool
+_equalInferClause(const InferClause *a, const InferClause *b)
+{
+	COMPARE_NODE_FIELD(indexElems);
+	COMPARE_NODE_FIELD(whereClause);
+	COMPARE_STRING_FIELD(conname);
+	COMPARE_LOCATION_FIELD(location);
+
+	return true;
+}
+
+static bool
+_equalOnConflictClause(const OnConflictClause *a, const OnConflictClause *b)
+{
+	COMPARE_SCALAR_FIELD(action);
+	COMPARE_NODE_FIELD(infer);
+	COMPARE_NODE_FIELD(targetList);
+	COMPARE_NODE_FIELD(whereClause);
+	COMPARE_LOCATION_FIELD(location);
+
+	return true;
+}
+
+static bool
 _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
 {
 	COMPARE_STRING_FIELD(ctename);
@@ -2712,6 +2762,9 @@ equal(const void *a, const void *b)
 		case T_CurrentOfExpr:
 			retval = _equalCurrentOfExpr(a, b);
 			break;
+		case T_InferenceElem:
+			retval = _equalInferenceElem(a, b);
+			break;
 		case T_TargetEntry:
 			retval = _equalTargetEntry(a, b);
 			break;
@@ -2721,6 +2774,9 @@ equal(const void *a, const void *b)
 		case T_FromExpr:
 			retval = _equalFromExpr(a, b);
 			break;
+		case T_OnConflictExpr:
+			retval = _equalOnConflictExpr(a, b);
+			break;
 		case T_JoinExpr:
 			retval = _equalJoinExpr(a, b);
 			break;
@@ -3169,6 +3225,12 @@ equal(const void *a, const void *b)
 		case T_WithClause:
 			retval = _equalWithClause(a, b);
 			break;
+		case T_InferClause:
+			retval = _equalInferClause(a, b);
+			break;
+		case T_OnConflictClause:
+			retval = _equalOnConflictClause(a, b);
+			break;
 		case T_CommonTableExpr:
 			retval = _equalCommonTableExpr(a, b);
 			break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index d6f1f5bb6d7..4135f9c3cfc 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -235,6 +235,13 @@ exprType(const Node *expr)
 		case T_CurrentOfExpr:
 			type = BOOLOID;
 			break;
+		case T_InferenceElem:
+			{
+				const InferenceElem *n = (const InferenceElem *) expr;
+
+				type = exprType((Node *) n->expr);
+			}
+			break;
 		case T_PlaceHolderVar:
 			type = exprType((Node *) ((const PlaceHolderVar *) expr)->phexpr);
 			break;
@@ -894,6 +901,9 @@ exprCollation(const Node *expr)
 		case T_CurrentOfExpr:
 			coll = InvalidOid;	/* result is always boolean */
 			break;
+		case T_InferenceElem:
+			coll = exprCollation((Node *) ((const InferenceElem *) expr)->expr);
+			break;
 		case T_PlaceHolderVar:
 			coll = exprCollation((Node *) ((const PlaceHolderVar *) expr)->phexpr);
 			break;
@@ -1484,6 +1494,12 @@ exprLocation(const Node *expr)
 		case T_WithClause:
 			loc = ((const WithClause *) expr)->location;
 			break;
+		case T_InferClause:
+			loc = ((const InferClause *) expr)->location;
+			break;
+		case T_OnConflictClause:
+			loc = ((const OnConflictClause *) expr)->location;
+			break;
 		case T_CommonTableExpr:
 			loc = ((const CommonTableExpr *) expr)->location;
 			break;
@@ -1491,6 +1507,10 @@ exprLocation(const Node *expr)
 			/* just use argument's location */
 			loc = exprLocation((Node *) ((const PlaceHolderVar *) expr)->phexpr);
 			break;
+		case T_InferenceElem:
+			/* just use nested expr's location */
+			loc = exprLocation((Node *) ((const InferenceElem *) expr)->expr);
+			break;
 		default:
 			/* for any other node type it's just unknown... */
 			loc = -1;
@@ -1890,6 +1910,20 @@ expression_tree_walker(Node *node,
 					return true;
 			}
 			break;
+		case T_OnConflictExpr:
+			{
+				OnConflictExpr   *onconflict = (OnConflictExpr *) node;
+
+				if (walker((Node *) onconflict->arbiterElems, context))
+					return true;
+				if (walker(onconflict->arbiterWhere, context))
+					return true;
+				if (walker(onconflict->onConflictSet, context))
+					return true;
+				if (walker(onconflict->onConflictWhere, context))
+					return true;
+			}
+			break;
 		case T_JoinExpr:
 			{
 				JoinExpr   *join = (JoinExpr *) node;
@@ -1920,6 +1954,8 @@ expression_tree_walker(Node *node,
 			break;
 		case T_PlaceHolderVar:
 			return walker(((PlaceHolderVar *) node)->phexpr, context);
+		case T_InferenceElem:
+			return walker(((InferenceElem *) node)->expr, context);
 		case T_AppendRelInfo:
 			{
 				AppendRelInfo *appinfo = (AppendRelInfo *) node;
@@ -1968,6 +2004,8 @@ query_tree_walker(Query *query,
 		return true;
 	if (walker((Node *) query->withCheckOptions, context))
 		return true;
+	if (walker((Node *) query->onConflict, context))
+		return true;
 	if (walker((Node *) query->returningList, context))
 		return true;
 	if (walker((Node *) query->jointree, context))
@@ -2594,6 +2632,20 @@ expression_tree_mutator(Node *node,
 				return (Node *) newnode;
 			}
 			break;
+		case T_OnConflictExpr:
+			{
+				OnConflictExpr   *oc = (OnConflictExpr *) node;
+				OnConflictExpr   *newnode;
+
+				FLATCOPY(newnode, oc, OnConflictExpr);
+				MUTATE(newnode->arbiterElems, oc->arbiterElems, List *);
+				MUTATE(newnode->arbiterWhere, oc->arbiterWhere, Node *);
+				MUTATE(newnode->onConflictSet, oc->onConflictSet, List *);
+				MUTATE(newnode->onConflictWhere, oc->onConflictWhere, Node *);
+
+				return (Node *) newnode;
+			}
+			break;
 		case T_JoinExpr:
 			{
 				JoinExpr   *join = (JoinExpr *) node;
@@ -2630,6 +2682,16 @@ expression_tree_mutator(Node *node,
 				return (Node *) newnode;
 			}
 			break;
+		case T_InferenceElem:
+			{
+				InferenceElem *inferenceelemdexpr = (InferenceElem *) node;
+				InferenceElem *newnode;
+
+				FLATCOPY(newnode, inferenceelemdexpr, InferenceElem);
+				MUTATE(newnode->expr, newnode->expr, Node *);
+				return (Node *) newnode;
+			}
+			break;
 		case T_AppendRelInfo:
 			{
 				AppendRelInfo *appinfo = (AppendRelInfo *) node;
@@ -2709,6 +2771,7 @@ query_tree_mutator(Query *query,
 
 	MUTATE(query->targetList, query->targetList, List *);
 	MUTATE(query->withCheckOptions, query->withCheckOptions, List *);
+	MUTATE(query->onConflict, query->onConflict, OnConflictExpr *);
 	MUTATE(query->returningList, query->returningList, List *);
 	MUTATE(query->jointree, query->jointree, FromExpr *);
 	MUTATE(query->setOperations, query->setOperations, Node *);
@@ -2978,6 +3041,8 @@ raw_expression_tree_walker(Node *node,
 					return true;
 				if (walker(stmt->selectStmt, context))
 					return true;
+				if (walker(stmt->onConflictClause, context))
+					return true;
 				if (walker(stmt->returningList, context))
 					return true;
 				if (walker(stmt->withClause, context))
@@ -3217,6 +3282,28 @@ raw_expression_tree_walker(Node *node,
 			break;
 		case T_WithClause:
 			return walker(((WithClause *) node)->ctes, context);
+		case T_InferClause:
+			{
+				InferClause *stmt = (InferClause *) node;
+
+				if (walker(stmt->indexElems, context))
+					return true;
+				if (walker(stmt->whereClause, context))
+					return true;
+			}
+			break;
+		case T_OnConflictClause:
+			{
+				OnConflictClause *stmt = (OnConflictClause *) node;
+
+				if (walker(stmt->infer, context))
+					return true;
+				if (walker(stmt->targetList, context))
+					return true;
+				if (walker(stmt->whereClause, context))
+					return true;
+			}
+			break;
 		case T_CommonTableExpr:
 			return walker(((CommonTableExpr *) node)->ctequery, context);
 		default:
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index d5ddd0b3592..bc891d391f5 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -243,6 +243,7 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node)
 	WRITE_UINT_FIELD(queryId);
 	WRITE_BOOL_FIELD(hasReturning);
 	WRITE_BOOL_FIELD(hasModifyingCTE);
+	WRITE_BOOL_FIELD(isUpsert);
 	WRITE_BOOL_FIELD(canSetTag);
 	WRITE_BOOL_FIELD(transientPlan);
 	WRITE_NODE_FIELD(planTree);
@@ -337,6 +338,12 @@ _outModifyTable(StringInfo str, const ModifyTable *node)
 	WRITE_NODE_FIELD(fdwPrivLists);
 	WRITE_NODE_FIELD(rowMarks);
 	WRITE_INT_FIELD(epqParam);
+	WRITE_ENUM_FIELD(onConflictAction, OnConflictAction);
+	WRITE_NODE_FIELD(arbiterIndexes);
+	WRITE_NODE_FIELD(onConflictSet);
+	WRITE_NODE_FIELD(onConflictWhere);
+	WRITE_INT_FIELD(exclRelRTI);
+	WRITE_NODE_FIELD(exclRelTlist);
 }
 
 static void
@@ -1437,6 +1444,17 @@ _outCurrentOfExpr(StringInfo str, const CurrentOfExpr *node)
 }
 
 static void
+_outInferenceElem(StringInfo str, const InferenceElem *node)
+{
+	WRITE_NODE_TYPE("INFERENCEELEM");
+
+	WRITE_NODE_FIELD(expr);
+	WRITE_OID_FIELD(infercollid);
+	WRITE_OID_FIELD(inferopfamily);
+	WRITE_OID_FIELD(inferopcinputtype);
+}
+
+static void
 _outTargetEntry(StringInfo str, const TargetEntry *node)
 {
 	WRITE_NODE_TYPE("TARGETENTRY");
@@ -1482,6 +1500,21 @@ _outFromExpr(StringInfo str, const FromExpr *node)
 	WRITE_NODE_FIELD(quals);
 }
 
+static void
+_outOnConflictExpr(StringInfo str, const OnConflictExpr *node)
+{
+	WRITE_NODE_TYPE("ONCONFLICTEXPR");
+
+	WRITE_ENUM_FIELD(action, OnConflictAction);
+	WRITE_NODE_FIELD(arbiterElems);
+	WRITE_NODE_FIELD(arbiterWhere);
+	WRITE_NODE_FIELD(onConflictSet);
+	WRITE_NODE_FIELD(onConflictWhere);
+	WRITE_OID_FIELD(constraint);
+	WRITE_INT_FIELD(exclRelIndex);
+	WRITE_NODE_FIELD(exclRelTlist);
+}
+
 /*****************************************************************************
  *
  *	Stuff from relation.h.
@@ -2319,6 +2352,7 @@ _outQuery(StringInfo str, const Query *node)
 	WRITE_NODE_FIELD(jointree);
 	WRITE_NODE_FIELD(targetList);
 	WRITE_NODE_FIELD(withCheckOptions);
+	WRITE_NODE_FIELD(onConflict);
 	WRITE_NODE_FIELD(returningList);
 	WRITE_NODE_FIELD(groupClause);
 	WRITE_NODE_FIELD(havingQual);
@@ -3112,6 +3146,9 @@ _outNode(StringInfo str, const void *obj)
 			case T_CurrentOfExpr:
 				_outCurrentOfExpr(str, obj);
 				break;
+			case T_InferenceElem:
+				_outInferenceElem(str, obj);
+				break;
 			case T_TargetEntry:
 				_outTargetEntry(str, obj);
 				break;
@@ -3124,7 +3161,9 @@ _outNode(StringInfo str, const void *obj)
 			case T_FromExpr:
 				_outFromExpr(str, obj);
 				break;
-
+			case T_OnConflictExpr:
+				_outOnConflictExpr(str, obj);
+				break;
 			case T_Path:
 				_outPath(str, obj);
 				break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index d1ced0cc4b3..8136306e1e5 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -214,6 +214,7 @@ _readQuery(void)
 	READ_NODE_FIELD(jointree);
 	READ_NODE_FIELD(targetList);
 	READ_NODE_FIELD(withCheckOptions);
+	READ_NODE_FIELD(onConflict);
 	READ_NODE_FIELD(returningList);
 	READ_NODE_FIELD(groupClause);
 	READ_NODE_FIELD(havingQual);
@@ -1131,6 +1132,22 @@ _readCurrentOfExpr(void)
 }
 
 /*
+ * _readInferenceElem
+ */
+static InferenceElem *
+_readInferenceElem(void)
+{
+	READ_LOCALS(InferenceElem);
+
+	READ_NODE_FIELD(expr);
+	READ_OID_FIELD(infercollid);
+	READ_OID_FIELD(inferopfamily);
+	READ_OID_FIELD(inferopcinputtype);
+
+	READ_DONE();
+}
+
+/*
  * _readTargetEntry
  */
 static TargetEntry *
@@ -1196,6 +1213,25 @@ _readFromExpr(void)
 	READ_DONE();
 }
 
+/*
+ * _readOnConflictExpr
+ */
+static OnConflictExpr *
+_readOnConflictExpr(void)
+{
+	READ_LOCALS(OnConflictExpr);
+
+	READ_ENUM_FIELD(action, OnConflictAction);
+	READ_NODE_FIELD(arbiterElems);
+	READ_NODE_FIELD(arbiterWhere);
+	READ_NODE_FIELD(onConflictSet);
+	READ_NODE_FIELD(onConflictWhere);
+	READ_OID_FIELD(constraint);
+	READ_INT_FIELD(exclRelIndex);
+	READ_NODE_FIELD(exclRelTlist);
+
+	READ_DONE();
+}
 
 /*
  *	Stuff from parsenodes.h.
@@ -1395,6 +1431,8 @@ parseNodeString(void)
 		return_value = _readSetToDefault();
 	else if (MATCH("CURRENTOFEXPR", 13))
 		return_value = _readCurrentOfExpr();
+	else if (MATCH("INFERENCEELEM", 13))
+		return_value = _readInferenceElem();
 	else if (MATCH("TARGETENTRY", 11))
 		return_value = _readTargetEntry();
 	else if (MATCH("RANGETBLREF", 11))
@@ -1403,6 +1441,8 @@ parseNodeString(void)
 		return_value = _readJoinExpr();
 	else if (MATCH("FROMEXPR", 8))
 		return_value = _readFromExpr();
+	else if (MATCH("ONCONFLICTEXPR", 14))
+		return_value = _readOnConflictExpr();
 	else if (MATCH("RTE", 3))
 		return_value = _readRangeTblEntry();
 	else if (MATCH("RANGETBLFUNCTION", 16))
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index eeb2a417643..3246332d6e3 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -4868,7 +4868,7 @@ make_modifytable(PlannerInfo *root,
 				 Index nominalRelation,
 				 List *resultRelations, List *subplans,
 				 List *withCheckOptionLists, List *returningLists,
-				 List *rowMarks, int epqParam)
+				 List *rowMarks, OnConflictExpr *onconflict, int epqParam)
 {
 	ModifyTable *node = makeNode(ModifyTable);
 	Plan	   *plan = &node->plan;
@@ -4918,6 +4918,30 @@ make_modifytable(PlannerInfo *root,
 	node->resultRelations = resultRelations;
 	node->resultRelIndex = -1;	/* will be set correctly in setrefs.c */
 	node->plans = subplans;
+	if (!onconflict)
+	{
+		node->onConflictAction = ONCONFLICT_NONE;
+		node->onConflictSet = NIL;
+		node->onConflictWhere = NULL;
+		node->arbiterIndexes = NIL;
+	}
+	else
+	{
+		node->onConflictAction = onconflict->action;
+		node->onConflictSet = onconflict->onConflictSet;
+		node->onConflictWhere = onconflict->onConflictWhere;
+
+		/*
+		 * If a set of unique index inference elements was provided (an
+		 * INSERT...ON CONFLICT "inference specification"), then infer
+		 * appropriate unique indexes (or throw an error if none are
+		 * available).
+		 */
+		node->arbiterIndexes = infer_arbiter_indexes(root);
+
+		node->exclRelRTI = onconflict->exclRelIndex;
+		node->exclRelTlist = onconflict->exclRelTlist;
+	}
 	node->withCheckOptionLists = withCheckOptionLists;
 	node->returningLists = returningLists;
 	node->rowMarks = rowMarks;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index ea4d4c55cbd..c80d45acaa9 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -243,6 +243,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 	result->queryId = parse->queryId;
 	result->hasReturning = (parse->returningList != NIL);
 	result->hasModifyingCTE = parse->hasModifyingCTE;
+	result->isUpsert =
+		(parse->onConflict && parse->onConflict->action == ONCONFLICT_UPDATE);
 	result->canSetTag = parse->canSetTag;
 	result->transientPlan = glob->transientPlan;
 	result->planTree = top_plan;
@@ -462,6 +464,17 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	parse->limitCount = preprocess_expression(root, parse->limitCount,
 											  EXPRKIND_LIMIT);
 
+	if (parse->onConflict)
+	{
+		parse->onConflict->onConflictSet = (List *)
+			preprocess_expression(root, (Node *) parse->onConflict->onConflictSet,
+								  EXPRKIND_TARGET);
+
+		parse->onConflict->onConflictWhere =
+			preprocess_expression(root, (Node *) parse->onConflict->onConflictWhere,
+								  EXPRKIND_QUAL);
+	}
+
 	root->append_rel_list = (List *)
 		preprocess_expression(root, (Node *) root->append_rel_list,
 							  EXPRKIND_APPINFO);
@@ -612,6 +625,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 											 withCheckOptionLists,
 											 returningLists,
 											 rowMarks,
+											 parse->onConflict,
 											 SS_assign_special_param(root));
 		}
 	}
@@ -802,6 +816,8 @@ inheritance_planner(PlannerInfo *root)
 	List	   *rowMarks;
 	ListCell   *lc;
 
+	Assert(parse->commandType != CMD_INSERT);
+
 	/*
 	 * We generate a modified instance of the original Query for each target
 	 * relation, plan that, and put all the plans into a list that will be
@@ -1046,6 +1062,8 @@ inheritance_planner(PlannerInfo *root)
 		if (parse->returningList)
 			returningLists = lappend(returningLists,
 									 subroot.parse->returningList);
+
+		Assert(!parse->onConflict);
 	}
 
 	/* Mark result as unordered (probably unnecessary) */
@@ -1095,6 +1113,7 @@ inheritance_planner(PlannerInfo *root)
 									 withCheckOptionLists,
 									 returningLists,
 									 rowMarks,
+									 NULL,
 									 SS_assign_special_param(root));
 }
 
@@ -1228,6 +1247,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		bool		use_hashed_grouping = false;
 		WindowFuncLists *wflists = NULL;
 		List	   *activeWindows = NIL;
+		OnConflictExpr *onconfl;
 
 		MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
 
@@ -1242,6 +1262,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		/* Preprocess targetlist */
 		tlist = preprocess_targetlist(root, tlist);
 
+		onconfl = parse->onConflict;
+		if (onconfl)
+			onconfl->onConflictSet =
+				preprocess_onconflict_targetlist(onconfl->onConflictSet,
+												 parse->resultRelation,
+												 parse->rtable);
+
 		/*
 		 * Expand any rangetable entries that have security barrier quals.
 		 * This may add new security barrier subquery RTEs to the rangetable.
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index b7d6ff11223..612d32571af 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -739,7 +739,35 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
 					splan->plan.targetlist = copyObject(linitial(newRL));
 				}
 
+				/*
+				 * We treat ModifyTable with ON CONFLICT as a form of 'pseudo
+				 * join', where the inner side is the EXLUDED tuple. Therefore
+				 * use fix_join_expr to setup the relevant variables to
+				 * INNER_VAR. We explicitly don't create any OUTER_VARs as
+				 * those are already used by RETURNING and it seems better to
+				 * be non-conflicting.
+				 */
+				if (splan->onConflictSet)
+				{
+					indexed_tlist *itlist;
+
+					itlist = build_tlist_index(splan->exclRelTlist);
+
+					splan->onConflictSet =
+						fix_join_expr(root, splan->onConflictSet,
+									  NULL, itlist,
+									  linitial_int(splan->resultRelations),
+									  rtoffset);
+
+					splan->onConflictWhere = (Node *)
+						fix_join_expr(root, (List *) splan->onConflictWhere,
+									  NULL, itlist,
+									  linitial_int(splan->resultRelations),
+									  rtoffset);
+				}
+
 				splan->nominalRelation += rtoffset;
+				splan->exclRelRTI += rtoffset;
 
 				foreach(l, splan->resultRelations)
 				{
@@ -1846,7 +1874,8 @@ search_indexed_tlist_for_sortgroupref(Node *node,
  * inner_itlist = NULL and acceptable_rel = the ID of the target relation.
  *
  * 'clauses' is the targetlist or list of join clauses
- * 'outer_itlist' is the indexed target list of the outer join relation
+ * 'outer_itlist' is the indexed target list of the outer join relation,
+ *		or NULL
  * 'inner_itlist' is the indexed target list of the inner join relation,
  *		or NULL
  * 'acceptable_rel' is either zero or the rangetable index of a relation
@@ -1886,12 +1915,17 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
 		Var		   *var = (Var *) node;
 
 		/* First look for the var in the input tlists */
-		newvar = search_indexed_tlist_for_var(var,
-											  context->outer_itlist,
-											  OUTER_VAR,
-											  context->rtoffset);
-		if (newvar)
-			return (Node *) newvar;
+		if (context->outer_itlist)
+		{
+			newvar = search_indexed_tlist_for_var(var,
+												  context->outer_itlist,
+												  OUTER_VAR,
+												  context->rtoffset);
+			if (newvar)
+				return (Node *) newvar;
+		}
+
+		/* Then in the outer */
 		if (context->inner_itlist)
 		{
 			newvar = search_indexed_tlist_for_var(var,
@@ -1920,7 +1954,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
 		PlaceHolderVar *phv = (PlaceHolderVar *) node;
 
 		/* See if the PlaceHolderVar has bubbled up from a lower plan node */
-		if (context->outer_itlist->has_ph_vars)
+		if (context->outer_itlist && context->outer_itlist->has_ph_vars)
 		{
 			newvar = search_indexed_tlist_for_non_var((Node *) phv,
 													  context->outer_itlist,
@@ -1943,7 +1977,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
 	if (IsA(node, Param))
 		return fix_param_node(context->root, (Param *) node);
 	/* Try matching more complex expressions too, if tlists have any */
-	if (context->outer_itlist->has_non_vars)
+	if (context->outer_itlist && context->outer_itlist->has_non_vars)
 	{
 		newvar = search_indexed_tlist_for_non_var(node,
 												  context->outer_itlist,
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index acfd0bcfbe5..0220672fc43 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2340,6 +2340,10 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
 											 locally_added_param);
 				finalize_primnode((Node *) mtplan->returningLists,
 								  &context);
+				finalize_primnode((Node *) mtplan->onConflictSet,
+								  &context);
+				finalize_primnode((Node *) mtplan->onConflictWhere,
+								  &context);
 				foreach(l, mtplan->plans)
 				{
 					context.paramids =
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 50acfe40e97..4f0dc80d025 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1030,6 +1030,9 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 		pullup_replace_vars((Node *) parse->targetList, &rvcontext);
 	parse->returningList = (List *)
 		pullup_replace_vars((Node *) parse->returningList, &rvcontext);
+	if (parse->onConflict)
+		parse->onConflict->onConflictSet = (List *)
+			pullup_replace_vars((Node *) parse->onConflict->onConflictSet, &rvcontext);
 	replace_vars_in_jointree((Node *) parse->jointree, &rvcontext,
 							 lowest_nulling_outer_join);
 	Assert(parse->setOperations == NULL);
@@ -1605,6 +1608,9 @@ pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte)
 		pullup_replace_vars((Node *) parse->targetList, &rvcontext);
 	parse->returningList = (List *)
 		pullup_replace_vars((Node *) parse->returningList, &rvcontext);
+	if (parse->onConflict)
+		parse->onConflict->onConflictSet = (List *)
+			pullup_replace_vars((Node *) parse->onConflict->onConflictSet, &rvcontext);
 	replace_vars_in_jointree((Node *) parse->jointree, &rvcontext, NULL);
 	Assert(parse->setOperations == NULL);
 	parse->havingQual = pullup_replace_vars(parse->havingQual, &rvcontext);
diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c
index 580c8467703..6b0c689e0c9 100644
--- a/src/backend/optimizer/prep/preptlist.c
+++ b/src/backend/optimizer/prep/preptlist.c
@@ -181,6 +181,19 @@ preprocess_targetlist(PlannerInfo *root, List *tlist)
 	return tlist;
 }
 
+/*
+ * preprocess_onconflict_targetlist
+ *	  Process ON CONFLICT SET targetlist.
+ *
+ *	  Returns the new targetlist.
+ */
+List *
+preprocess_onconflict_targetlist(List *tlist, int result_relation, List *range_table)
+{
+	return expand_targetlist(tlist, CMD_UPDATE, result_relation, range_table);
+}
+
+
 /*****************************************************************************
  *
  *		TARGETLIST EXPANSION
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 068ab39dd43..8bcc5064a37 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -25,6 +25,7 @@
 #include "access/transam.h"
 #include "access/xlog.h"
 #include "catalog/catalog.h"
+#include "catalog/dependency.h"
 #include "catalog/heap.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
@@ -50,6 +51,8 @@ int			constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION;
 get_relation_info_hook_type get_relation_info_hook = NULL;
 
 
+static bool infer_collation_opclass_match(InferenceElem *elem, Relation	idxRel,
+						Bitmapset *inferAttrs, List *idxExprs);
 static int32 get_rel_data_width(Relation rel, int32 *attr_widths);
 static List *get_relation_constraints(PlannerInfo *root,
 						 Oid relationObjectId, RelOptInfo *rel,
@@ -400,6 +403,355 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 }
 
 /*
+ * infer_arbiter_indexes -
+ *	  Determine the unique indexes used to arbitrate speculative insertion.
+ *
+ * Uses user-supplied inference clause expressions and predicate to match a
+ * unique index from those defined and ready on the heap relation (target).
+ * An exact match is required on columns/expressions (although they can appear
+ * in any order).  However, the predicate given by the user need only restrict
+ * insertion to a subset of some part of the table covered by some particular
+ * unique index (in particular, a partial unique index) in order to be
+ * inferred.
+ *
+ * The implementation does not consider which B-Tree operator class any
+ * particular available unique index attribute uses, unless one was specified
+ * in the inference specification. The same is true of collations.  In
+ * particular, there is no system dependency on the default operator class for
+ * the purposes of inference.  If no opclass (or collation) is specified, then
+ * all matching indexes (that may or may not match the default in terms of
+ * each attribute opclass/collation) are used for inference.
+ */
+List *
+infer_arbiter_indexes(PlannerInfo *root)
+{
+	OnConflictExpr *onconflict = root->parse->onConflict;
+	/* Iteration state */
+	Relation	relation;
+	Oid			relationObjectId;
+	Oid			indexOidFromConstraint = InvalidOid;
+	List	   *indexList;
+	ListCell   *l;
+
+	/* Normalized inference attributes and inference expressions: */
+	Bitmapset  *inferAttrs = NULL;
+	List	   *inferElems = NIL;
+
+	/* Result */
+	List	   *candidates = NIL;
+
+	/*
+	 * Quickly return NIL for ON CONFLICT DO NOTHING without an inference
+	 * specification or named constraint.  ON CONFLICT DO UPDATE statements
+	 * must always provide one or the other (but parser ought to have caught
+	 * that already).
+	 */
+	if (onconflict->arbiterElems == NIL &&
+		onconflict->constraint == InvalidOid)
+		return NIL;
+
+	/*
+	 * We need not lock the relation since it was already locked, either by
+	 * the rewriter or when expand_inherited_rtentry() added it to the query's
+	 * rangetable.
+	 */
+	relationObjectId = rt_fetch(root->parse->resultRelation,
+								root->parse->rtable)->relid;
+
+	relation = heap_open(relationObjectId, NoLock);
+
+	/*
+	 * Build normalized/BMS representation of plain indexed attributes, as
+	 * well as direct list of inference elements.  This is required for
+	 * matching the cataloged definition of indexes.
+	 */
+	foreach(l, onconflict->arbiterElems)
+	{
+		InferenceElem  *elem;
+		Var			   *var;
+		int				attno;
+
+		elem = (InferenceElem *) lfirst(l);
+
+		/*
+		 * Parse analysis of inference elements performs full parse analysis
+		 * of Vars, even for non-expression indexes (in contrast with utility
+		 * command related use of IndexElem).  However, indexes are cataloged
+		 * with simple attribute numbers for non-expression indexes.  Those
+		 * are handled later.
+		 */
+		if (!IsA(elem->expr, Var))
+		{
+			inferElems = lappend(inferElems, elem->expr);
+			continue;
+		}
+
+		var = (Var *) elem->expr;
+		attno = var->varattno;
+
+		if (attno < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+					 errmsg("system columns cannot be used in an ON CONFLICT clause")));
+		else if (attno == 0)
+			elog(ERROR, "whole row unique index inference specifications are not valid");
+
+		inferAttrs = bms_add_member(inferAttrs, attno);
+	}
+
+	/*
+	 * Lookup named constraint's index.  This is not immediately returned
+	 * because some additional sanity checks are required.
+	 */
+	if (onconflict->constraint != InvalidOid)
+	{
+		indexOidFromConstraint = get_constraint_index(onconflict->constraint);
+
+		if (indexOidFromConstraint == InvalidOid)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("constraint in ON CONFLICT clause has no associated index")));
+	}
+
+	indexList = RelationGetIndexList(relation);
+
+	/*
+	 * Using that representation, iterate through the list of indexes on the
+	 * target relation to try and find a match
+	 */
+	foreach(l, indexList)
+	{
+		Oid			indexoid = lfirst_oid(l);
+		Relation	idxRel;
+		Form_pg_index idxForm;
+		Bitmapset  *indexedAttrs = NULL;
+		List	   *idxExprs;
+		List	   *predExprs;
+		List	   *whereExplicit;
+		AttrNumber	natt;
+		ListCell   *el;
+
+		/*
+		 * Extract info from the relation descriptor for the index.  We know
+		 * that this is a target, so get lock type it is known will ultimately
+		 * be required by the executor.
+		 *
+		 * Let executor complain about !indimmediate case directly, because
+		 * enforcement needs to occur there anyway when an inference clause is
+		 * omitted.
+		 */
+		idxRel = index_open(indexoid, RowExclusiveLock);
+		idxForm = idxRel->rd_index;
+
+		if (!IndexIsValid(idxForm))
+			goto next;
+
+		/*
+		 * If the index is valid, but cannot yet be used, ignore it. See
+		 * src/backend/access/heap/README.HOT for discussion.
+		 */
+		if (idxForm->indcheckxmin &&
+			!TransactionIdPrecedes(HeapTupleHeaderGetXmin(idxRel->rd_indextuple->t_data),
+								   TransactionXmin))
+			goto next;
+
+		/*
+		 * Look for match on "ON constraint_name" variant, which may not be
+		 * unique constraint.  This can only be a constraint name.
+		 */
+		if (indexOidFromConstraint == idxForm->indexrelid)
+		{
+			if (!idxForm->indisunique && onconflict->action == ONCONFLICT_UPDATE)
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("ON CONFLICT DO UPDATE not supported with exclusion constraints")));
+
+			list_free(indexList);
+			index_close(idxRel, NoLock);
+			heap_close(relation, NoLock);
+			candidates = lappend_oid(candidates, idxForm->indexrelid);
+			return candidates;
+		}
+		else if (indexOidFromConstraint != InvalidOid)
+		{
+			/* No point in further work for index in named constraint case */
+			goto next;
+		}
+
+		/*
+		 * Only considering conventional inference at this point (not named
+		 * constraints), so index under consideration can be immediately
+		 * skipped if it's not unique
+		 */
+		if (!idxForm->indisunique)
+			goto next;
+
+		/* Build BMS representation of cataloged index attributes */
+		for (natt = 0; natt < idxForm->indnatts; natt++)
+		{
+			int			attno = idxRel->rd_index->indkey.values[natt];
+
+			if (attno < 0)
+				elog(ERROR, "system column in index");
+
+			if (attno != 0)
+				indexedAttrs = bms_add_member(indexedAttrs, attno);
+		}
+
+		/* Non-expression attributes (if any) must match */
+		if (!bms_equal(indexedAttrs, inferAttrs))
+			goto next;
+
+		/* Expression attributes (if any) must match */
+		idxExprs = RelationGetIndexExpressions(idxRel);
+		foreach(el, onconflict->arbiterElems)
+		{
+			InferenceElem   *elem = (InferenceElem *) lfirst(el);
+
+			/*
+			 * Ensure that collation/opclass aspects of inference expression
+			 * element match.  Even though this loop is primarily concerned
+			 * with matching expressions, it is a convenient point to check
+			 * this for both expressions and ordinary (non-expression)
+			 * attributes appearing as inference elements.
+			 */
+			if (!infer_collation_opclass_match(elem, idxRel, inferAttrs,
+											   idxExprs))
+				goto next;
+
+			/*
+			 * Plain Vars don't factor into count of expression elements, and
+			 * the question of whether or not they satisfy the index
+			 * definition has already been considered (they must).
+			 */
+			if (IsA(elem->expr, Var))
+				continue;
+
+			/*
+			 * Might as well avoid redundant check in the rare cases where
+			 * infer_collation_opclass_match() is required to do real work.
+			 * Otherwise, check that element expression appears in cataloged
+			 * index definition.
+			 */
+			if (elem->infercollid != InvalidOid ||
+				elem->inferopfamily != InvalidOid ||
+				list_member(idxExprs, elem->expr))
+				continue;
+
+			goto next;
+		}
+
+		/*
+		 * Now that all inference elements were matched, ensure that the
+		 * expression elements from inference clause are not missing any
+		 * cataloged expressions.  This does the right thing when unique
+		 * indexes redundantly repeat the same attribute, or if attributes
+		 * redundantly appear multiple times within an inference clause.
+		 */
+		if (list_difference(idxExprs, inferElems) != NIL)
+			goto next;
+
+		/*
+		 * Any user-supplied ON CONFLICT unique index inference WHERE clause
+		 * need only be implied by the cataloged index definitions predicate.
+		 */
+		predExprs = RelationGetIndexPredicate(idxRel);
+		whereExplicit = make_ands_implicit((Expr *) onconflict->arbiterWhere);
+
+		if (!predicate_implied_by(predExprs, whereExplicit))
+			goto next;
+
+		candidates = lappend_oid(candidates, idxForm->indexrelid);
+next:
+		index_close(idxRel, NoLock);
+	}
+
+	list_free(indexList);
+	heap_close(relation, NoLock);
+
+	if (candidates == NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("there is no unique or exclusion constraint matching the ON CONFLICT specification")));
+
+	return candidates;
+}
+
+/*
+ * infer_collation_opclass_match - ensure infer element opclass/collation match
+ *
+ * Given unique index inference element from inference specification, if
+ * collation was specified, or if opclass (represented here as opfamily +
+ * opcintype) was specified, verify that there is at least one matching
+ * indexed attribute (occasionally, there may be more).  Skip this in the
+ * common case where inference specification does not include collation or
+ * opclass (instead matching everything, regardless of cataloged
+ * collation/opclass of indexed attribute).
+ *
+ * At least historically, Postgres has not offered collations or opclasses
+ * with alternative-to-default notions of equality, so these additional
+ * criteria should only be required infrequently.
+ *
+ * Don't give up immediately when an inference element matches some attribute
+ * cataloged as indexed but not matching additional opclass/collation
+ * criteria.  This is done so that the implementation is as forgiving as
+ * possible of redundancy within cataloged index attributes (or, less
+ * usefully, within inference specification elements).  If collations actually
+ * differ between apparently redundantly indexed attributes (redundant within
+ * or across indexes), then there really is no redundancy as such.
+ *
+ * Note that if an inference element specifies an opclass and a collation at
+ * once, both must match in at least one particular attribute within index
+ * catalog definition in order for that inference element to be considered
+ * inferred/satisfied.
+ */
+static bool
+infer_collation_opclass_match(InferenceElem *elem, Relation idxRel,
+							  Bitmapset *inferAttrs, List *idxExprs)
+{
+	AttrNumber	natt;
+
+	/*
+	 * If inference specification element lacks collation/opclass, then no
+	 * need to check for exact match.
+	 */
+	if (elem->infercollid == InvalidOid && elem->inferopfamily == InvalidOid)
+		return true;
+
+	for (natt = 1; natt <= idxRel->rd_att->natts; natt++)
+	{
+		Oid		opfamily = idxRel->rd_opfamily[natt - 1];
+		Oid		opcinputtype = idxRel->rd_opcintype[natt - 1];
+		Oid		collation = idxRel->rd_indcollation[natt - 1];
+
+		if (elem->inferopfamily != InvalidOid &&
+			(elem->inferopfamily != opfamily ||
+			 elem->inferopcinputtype != opcinputtype))
+		{
+			/* Attribute needed to match opclass, but didn't */
+			continue;
+		}
+
+		if (elem->infercollid != InvalidOid &&
+			elem->infercollid != collation)
+		{
+			/* Attribute needed to match collation, but didn't */
+			continue;
+		}
+
+		if ((IsA(elem->expr, Var) &&
+			 bms_is_member(((Var *) elem->expr)->varattno, inferAttrs)) ||
+			list_member(idxExprs, elem->expr))
+		{
+			/* Found one match - good enough */
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/*
  * estimate_rel_size - estimate # pages and # tuples in a table or index
  *
  * We also estimate the fraction of the pages that are marked all-visible in
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 2d320d100b8..3eb4feabfd6 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -52,6 +52,8 @@ static Query *transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt);
 static Query *transformInsertStmt(ParseState *pstate, InsertStmt *stmt);
 static List *transformInsertRow(ParseState *pstate, List *exprlist,
 				   List *stmtcols, List *icolumns, List *attrnos);
+static OnConflictExpr *transformOnConflictClause(ParseState *pstate,
+												 OnConflictClause *onConflictClause);
 static int	count_rowexpr_columns(ParseState *pstate, Node *expr);
 static Query *transformSelectStmt(ParseState *pstate, SelectStmt *stmt);
 static Query *transformValuesClause(ParseState *pstate, SelectStmt *stmt);
@@ -62,6 +64,8 @@ static void determineRecursiveColTypes(ParseState *pstate,
 						   Node *larg, List *nrtargetlist);
 static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt);
 static List *transformReturningList(ParseState *pstate, List *returningList);
+static List *transformUpdateTargetList(ParseState *pstate,
+								  List *targetList);
 static Query *transformDeclareCursorStmt(ParseState *pstate,
 						   DeclareCursorStmt *stmt);
 static Query *transformExplainStmt(ParseState *pstate,
@@ -419,6 +423,8 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 	ListCell   *icols;
 	ListCell   *attnos;
 	ListCell   *lc;
+	bool		isOnConflictUpdate;
+	AclMode		targetPerms;
 
 	/* There can't be any outer WITH to worry about */
 	Assert(pstate->p_ctenamespace == NIL);
@@ -434,6 +440,9 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 		qry->hasModifyingCTE = pstate->p_hasModifyingCTE;
 	}
 
+	isOnConflictUpdate = (stmt->onConflictClause &&
+						  stmt->onConflictClause->action == ONCONFLICT_UPDATE);
+
 	/*
 	 * We have three cases to deal with: DEFAULT VALUES (selectStmt == NULL),
 	 * VALUES list, or general SELECT input.  We special-case VALUES, both for
@@ -478,8 +487,11 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 	 * mentioned in the SELECT part.  Note that the target table is not added
 	 * to the joinlist or namespace.
 	 */
+	targetPerms = ACL_INSERT;
+	if (isOnConflictUpdate)
+		targetPerms |= ACL_UPDATE;
 	qry->resultRelation = setTargetTable(pstate, stmt->relation,
-										 false, false, ACL_INSERT);
+										 false, false, targetPerms);
 
 	/* Validate stmt->cols list, or build default list if no list given */
 	icolumns = checkInsertTargets(pstate, stmt->cols, &attrnos);
@@ -740,6 +752,11 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 		attnos = lnext(attnos);
 	}
 
+	/* Process ON CONFLICT, if any. */
+	if (stmt->onConflictClause)
+		qry->onConflict = transformOnConflictClause(pstate,
+													stmt->onConflictClause);
+
 	/*
 	 * If we have a RETURNING clause, we need to add the target relation to
 	 * the query namespace before processing it, so that Var references in
@@ -850,6 +867,85 @@ transformInsertRow(ParseState *pstate, List *exprlist,
 }
 
 /*
+ * transformSelectStmt -
+ *	  transforms an OnConflictClause in an INSERT
+ */
+static OnConflictExpr *
+transformOnConflictClause(ParseState *pstate,
+						  OnConflictClause *onConflictClause)
+{
+	List	   *arbiterElems;
+	Node	   *arbiterWhere;
+	Oid			arbiterConstraint;
+	List	   *onConflictSet = NIL;
+	Node	   *onConflictWhere = NULL;
+	RangeTblEntry *exclRte = NULL;
+	int			exclRelIndex = 0;
+	List	   *exclRelTlist = NIL;
+	OnConflictExpr   *result;
+
+	/* Process the arbiter clause, ON CONFLICT ON (...) */
+	transformOnConflictArbiter(pstate, onConflictClause, &arbiterElems,
+							   &arbiterWhere, &arbiterConstraint);
+
+	/* Process DO UPDATE */
+	if (onConflictClause->action == ONCONFLICT_UPDATE)
+	{
+		exclRte = addRangeTableEntryForRelation(pstate,
+												pstate->p_target_relation,
+												makeAlias("excluded", NIL),
+												false, false);
+		exclRelIndex = list_length(pstate->p_rtable);
+
+		/*
+		 * Build a targetlist for the EXCLUDED pseudo relation. Out of
+		 * simplicity we do that here, because expandRelAttrs() happens to
+		 * nearly do the right thing; specifically it also works with views.
+		 * It'd be more proper to instead scan some pseudo scan node, but it
+		 * doesn't seem worth the amount of code required.
+		 *
+		 * The only caveat of this hack is that the permissions expandRelAttrs
+		 * adds have to be reset. markVarForSelectPriv() will add the exact
+		 * required permissions back.
+		 */
+		exclRelTlist = expandRelAttrs(pstate, exclRte,
+									  exclRelIndex, 0, -1);
+		exclRte->requiredPerms = 0;
+		exclRte->selectedCols = NULL;
+
+		/*
+		 * Add EXCLUDED and the target RTE to the namespace, so that they can
+		 * be used in the UPDATE statement.
+		 */
+		addRTEtoQuery(pstate, exclRte, false, true, true);
+		addRTEtoQuery(pstate, pstate->p_target_rangetblentry,
+					  false, true, true);
+
+		onConflictSet =
+			transformUpdateTargetList(pstate, onConflictClause->targetList);
+
+		onConflictWhere = transformWhereClause(pstate,
+											   onConflictClause->whereClause,
+											   EXPR_KIND_WHERE, "WHERE");
+	}
+
+	/* Finally, build ON CONFLICT DO [NOTHING | UPDATE] expression */
+	result = makeNode(OnConflictExpr);
+
+	result->action = onConflictClause->action;
+	result->arbiterElems = arbiterElems;
+	result->arbiterWhere = arbiterWhere;
+	result->constraint = arbiterConstraint;
+	result->onConflictSet = onConflictSet;
+	result->onConflictWhere = onConflictWhere;
+	result->exclRelIndex = exclRelIndex;
+	result->exclRelTlist = exclRelTlist;
+
+	return result;
+}
+
+
+/*
  * count_rowexpr_columns -
  *	  get number of columns contained in a ROW() expression;
  *	  return -1 if expression isn't a RowExpr or a Var referencing one.
@@ -1899,10 +1995,7 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
 {
 	Query	   *qry = makeNode(Query);
 	ParseNamespaceItem *nsitem;
-	RangeTblEntry *target_rte;
 	Node	   *qual;
-	ListCell   *origTargetList;
-	ListCell   *tl;
 
 	qry->commandType = CMD_UPDATE;
 	pstate->p_is_update = true;
@@ -1937,23 +2030,41 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
 	nsitem->p_lateral_only = false;
 	nsitem->p_lateral_ok = true;
 
-	qry->targetList = transformTargetList(pstate, stmt->targetList,
-										  EXPR_KIND_UPDATE_SOURCE);
-
 	qual = transformWhereClause(pstate, stmt->whereClause,
 								EXPR_KIND_WHERE, "WHERE");
 
 	qry->returningList = transformReturningList(pstate, stmt->returningList);
 
+	/*
+	 * Now we are done with SELECT-like processing, and can get on with
+	 * transforming the target list to match the UPDATE target columns.
+	 */
+	qry->targetList = transformUpdateTargetList(pstate, stmt->targetList);
+
 	qry->rtable = pstate->p_rtable;
 	qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
 
 	qry->hasSubLinks = pstate->p_hasSubLinks;
 
-	/*
-	 * Now we are done with SELECT-like processing, and can get on with
-	 * transforming the target list to match the UPDATE target columns.
-	 */
+	assign_query_collations(pstate, qry);
+
+	return qry;
+}
+
+/*
+ * transformUpdateTargetList -
+ *	handle SET clause in UPDATE/INSERT ... ON CONFLICT UPDATE
+ */
+static List *
+transformUpdateTargetList(ParseState *pstate, List *origTlist)
+{
+	List		   *tlist = NIL;
+	RangeTblEntry  *target_rte;
+	ListCell	   *orig_tl;
+	ListCell	   *tl;
+
+	tlist = transformTargetList(pstate, origTlist,
+								EXPR_KIND_UPDATE_SOURCE);
 
 	/* Prepare to assign non-conflicting resnos to resjunk attributes */
 	if (pstate->p_next_resno <= pstate->p_target_relation->rd_rel->relnatts)
@@ -1961,9 +2072,9 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
 
 	/* Prepare non-junk columns for assignment to target table */
 	target_rte = pstate->p_target_rangetblentry;
-	origTargetList = list_head(stmt->targetList);
+	orig_tl = list_head(origTlist);
 
-	foreach(tl, qry->targetList)
+	foreach(tl, tlist)
 	{
 		TargetEntry *tle = (TargetEntry *) lfirst(tl);
 		ResTarget  *origTarget;
@@ -1981,9 +2092,9 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
 			tle->resname = NULL;
 			continue;
 		}
-		if (origTargetList == NULL)
+		if (orig_tl == NULL)
 			elog(ERROR, "UPDATE target count mismatch --- internal error");
-		origTarget = (ResTarget *) lfirst(origTargetList);
+		origTarget = (ResTarget *) lfirst(orig_tl);
 		Assert(IsA(origTarget, ResTarget));
 
 		attrno = attnameAttNum(pstate->p_target_relation,
@@ -2005,14 +2116,12 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
 		target_rte->updatedCols = bms_add_member(target_rte->updatedCols,
 								attrno - FirstLowInvalidHeapAttributeNumber);
 
-		origTargetList = lnext(origTargetList);
+		orig_tl = lnext(orig_tl);
 	}
-	if (origTargetList != NULL)
+	if (orig_tl != NULL)
 		elog(ERROR, "UPDATE target count mismatch --- internal error");
 
-	assign_query_collations(pstate, qry);
-
-	return qry;
+	return tlist;
 }
 
 /*
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0180530a309..7a4c07365c1 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -217,6 +217,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	RangeVar			*range;
 	IntoClause			*into;
 	WithClause			*with;
+	InferClause			*infer;
+	OnConflictClause	*onconflict;
 	A_Indices			*aind;
 	ResTarget			*target;
 	struct PrivTarget	*privtarget;
@@ -318,7 +320,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 				opt_class opt_inline_handler opt_validator validator_clause
 				opt_collate
 
-%type <range>	qualified_name OptConstrFromTable
+%type <range>	qualified_name insert_target OptConstrFromTable
 
 %type <str>		all_Op MathOp
 
@@ -344,7 +346,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 				OptTableElementList TableElementList OptInherit definition
 				OptTypedTableElementList TypedTableElementList
 				reloptions opt_reloptions
-				OptWith opt_distinct opt_definition func_args func_args_list
+				OptWith distinct_clause opt_all_clause opt_definition func_args func_args_list
 				func_args_with_defaults func_args_with_defaults_list
 				aggr_args aggr_args_list
 				func_as createfunc_opt_list alterfunc_opt_list
@@ -389,7 +391,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	for_locking_item
 %type <list>	for_locking_clause opt_for_locking_clause for_locking_items
 %type <list>	locked_rels_list
-%type <boolean>	opt_all
+%type <boolean>	all_or_distinct
 
 %type <node>	join_outer join_qual
 %type <jtype>	join_type
@@ -418,6 +420,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	SeqOptElem
 
 %type <istmt>	insert_rest
+%type <infer>	opt_conf_expr
+%type <onconflict> opt_on_conflict
 
 %type <vsetstmt> generic_set set_rest set_rest_more generic_reset reset_rest
 				 SetResetClause FunctionSetResetClause
@@ -557,8 +561,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	CACHE CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COMMENT COMMENTS COMMIT
-	COMMITTED CONCURRENTLY CONFIGURATION CONNECTION CONSTRAINT CONSTRAINTS
-	CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE
+	COMMITTED CONCURRENTLY CONFIGURATION CONFLICT CONNECTION CONSTRAINT
+	CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE
 	CROSS CSV CURRENT_P
 	CURRENT_CATALOG CURRENT_DATE CURRENT_ROLE CURRENT_SCHEMA
 	CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
@@ -9436,15 +9440,35 @@ DeallocateStmt: DEALLOCATE name
  *****************************************************************************/
 
 InsertStmt:
-			opt_with_clause INSERT INTO qualified_name insert_rest returning_clause
+			opt_with_clause INSERT INTO insert_target insert_rest
+			opt_on_conflict returning_clause
 				{
 					$5->relation = $4;
-					$5->returningList = $6;
+					$5->onConflictClause = $6;
+					$5->returningList = $7;
 					$5->withClause = $1;
 					$$ = (Node *) $5;
 				}
 		;
 
+/*
+ * Can't easily make AS optional here, because VALUES in insert_rest would
+ * have a shift/reduce conflict with a values as a optional alias. We could
+ * easily allow unreserved_keywords as optional aliases, but that'd be a odd
+ * divergance from other places.  So just require AS for now.
+ */
+insert_target:
+			qualified_name
+				{
+					$$ = $1;
+				}
+			| qualified_name AS ColId
+				{
+					$1->alias = makeAlias($3, NIL);
+					$$ = $1;
+				}
+		;
+
 insert_rest:
 			SelectStmt
 				{
@@ -9484,6 +9508,56 @@ insert_column_item:
 				}
 		;
 
+opt_on_conflict:
+			ON CONFLICT opt_conf_expr DO UPDATE SET set_clause_list	where_clause
+				{
+					$$ = makeNode(OnConflictClause);
+					$$->action = ONCONFLICT_UPDATE;
+					$$->infer = $3;
+					$$->targetList = $7;
+					$$->whereClause = $8;
+					$$->location = @1;
+				}
+			|
+			ON CONFLICT opt_conf_expr DO NOTHING
+				{
+					$$ = makeNode(OnConflictClause);
+					$$->action = ONCONFLICT_NOTHING;
+					$$->infer = $3;
+					$$->targetList = NIL;
+					$$->whereClause = NULL;
+					$$->location = @1;
+				}
+			| /*EMPTY*/
+				{
+					$$ = NULL;
+				}
+		;
+
+opt_conf_expr:
+			'(' index_params ')' where_clause
+				{
+					$$ = makeNode(InferClause);
+					$$->indexElems = $2;
+					$$->whereClause = $4;
+					$$->conname = NULL;
+					$$->location = @1;
+				}
+			|
+			ON CONSTRAINT name
+				{
+					$$ = makeNode(InferClause);
+					$$->indexElems = NIL;
+					$$->whereClause = NULL;
+					$$->conname = $3;
+					$$->location = @1;
+				}
+			| /*EMPTY*/
+				{
+					$$ = NULL;
+				}
+		;
+
 returning_clause:
 			RETURNING target_list		{ $$ = $2; }
 			| /* EMPTY */				{ $$ = NIL; }
@@ -9870,7 +9944,21 @@ select_clause:
  * However, this is not checked by the grammar; parse analysis must check it.
  */
 simple_select:
-			SELECT opt_distinct opt_target_list
+			SELECT opt_all_clause opt_target_list
+			into_clause from_clause where_clause
+			group_clause having_clause window_clause
+				{
+					SelectStmt *n = makeNode(SelectStmt);
+					n->targetList = $3;
+					n->intoClause = $4;
+					n->fromClause = $5;
+					n->whereClause = $6;
+					n->groupClause = $7;
+					n->havingClause = $8;
+					n->windowClause = $9;
+					$$ = (Node *)n;
+				}
+			| SELECT distinct_clause target_list
 			into_clause from_clause where_clause
 			group_clause having_clause window_clause
 				{
@@ -9905,15 +9993,15 @@ simple_select:
 					n->fromClause = list_make1($2);
 					$$ = (Node *)n;
 				}
-			| select_clause UNION opt_all select_clause
+			| select_clause UNION all_or_distinct select_clause
 				{
 					$$ = makeSetOp(SETOP_UNION, $3, $1, $4);
 				}
-			| select_clause INTERSECT opt_all select_clause
+			| select_clause INTERSECT all_or_distinct select_clause
 				{
 					$$ = makeSetOp(SETOP_INTERSECT, $3, $1, $4);
 				}
-			| select_clause EXCEPT opt_all select_clause
+			| select_clause EXCEPT all_or_distinct select_clause
 				{
 					$$ = makeSetOp(SETOP_EXCEPT, $3, $1, $4);
 				}
@@ -10052,7 +10140,8 @@ opt_table:	TABLE									{}
 			| /*EMPTY*/								{}
 		;
 
-opt_all:	ALL										{ $$ = TRUE; }
+all_or_distinct:
+			ALL										{ $$ = TRUE; }
 			| DISTINCT								{ $$ = FALSE; }
 			| /*EMPTY*/								{ $$ = FALSE; }
 		;
@@ -10060,10 +10149,13 @@ opt_all:	ALL										{ $$ = TRUE; }
 /* We use (NIL) as a placeholder to indicate that all target expressions
  * should be placed in the DISTINCT list during parsetree analysis.
  */
-opt_distinct:
+distinct_clause:
 			DISTINCT								{ $$ = list_make1(NIL); }
 			| DISTINCT ON '(' expr_list ')'			{ $$ = $4; }
-			| ALL									{ $$ = NIL; }
+		;
+
+opt_all_clause:
+			ALL										{ $$ = NIL;}
 			| /*EMPTY*/								{ $$ = NIL; }
 		;
 
@@ -13367,6 +13459,7 @@ unreserved_keyword:
 			| COMMIT
 			| COMMITTED
 			| CONFIGURATION
+			| CONFLICT
 			| CONNECTION
 			| CONSTRAINTS
 			| CONTENT_P
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 8d90b5098a1..73c505ed85b 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -16,7 +16,9 @@
 #include "postgres.h"
 
 #include "access/heapam.h"
+#include "catalog/catalog.h"
 #include "catalog/heap.h"
+#include "catalog/pg_constraint.h"
 #include "catalog/pg_type.h"
 #include "commands/defrem.h"
 #include "nodes/makefuncs.h"
@@ -32,6 +34,7 @@
 #include "parser/parse_oper.h"
 #include "parser/parse_relation.h"
 #include "parser/parse_target.h"
+#include "parser/parse_type.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/guc.h"
 #include "utils/lsyscache.h"
@@ -75,6 +78,8 @@ static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node,
 						 List **tlist, ParseExprKind exprKind);
 static int get_matching_location(int sortgroupref,
 					  List *sortgrouprefs, List *exprs);
+static List *resolve_unique_index_expr(ParseState *pstate, InferClause * infer,
+						  Relation heapRel);
 static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
 					 List *grouplist, List *targetlist, int location,
 					 bool resolveUnknown);
@@ -2167,6 +2172,204 @@ get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs)
 }
 
 /*
+ * resolve_unique_index_expr
+ *		Infer a unique index from a list of indexElems, for ON
+ *		CONFLICT clause
+ *
+ * Perform parse analysis of expressions and columns appearing within ON
+ * CONFLICT clause.  During planning, the returned list of expressions is used
+ * to infer which unique index to use.
+ */
+static List *
+resolve_unique_index_expr(ParseState *pstate, InferClause *infer,
+						  Relation heapRel)
+{
+	List	   *result = NIL;
+	ListCell   *l;
+
+	foreach(l, infer->indexElems)
+	{
+		IndexElem	   *ielem = (IndexElem *) lfirst(l);
+		InferenceElem  *pInfer = makeNode(InferenceElem);
+		Node		   *parse;
+
+		/*
+		 * Raw grammar re-uses CREATE INDEX infrastructure for unique index
+		 * inference clause, and so will accept opclasses by name and so on.
+		 *
+		 * Make no attempt to match ASC or DESC ordering or NULLS FIRST/NULLS
+		 * LAST ordering, since those are not significant for inference
+		 * purposes (any unique index matching the inference specification in
+		 * other regards is accepted indifferently).  Actively reject this as
+		 * wrong-headed.
+		 */
+		if (ielem->ordering != SORTBY_DEFAULT)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+					 errmsg("ASC/DESC is not allowed in ON CONFLICT clause"),
+					 parser_errposition(pstate,
+										exprLocation((Node *) infer))));
+		if (ielem->nulls_ordering != SORTBY_NULLS_DEFAULT)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+					 errmsg("NULLS FIRST/LAST is not allowed in ON CONFLICT clause"),
+					 parser_errposition(pstate,
+										exprLocation((Node *) infer))));
+
+		if (!ielem->expr)
+		{
+			/* Simple index attribute */
+			ColumnRef  *n;
+
+			/*
+			 * Grammar won't have built raw expression for us in event of
+			 * plain column reference.  Create one directly, and perform
+			 * expression transformation.  Planner expects this, and performs
+			 * its own normalization for the purposes of matching against
+			 * pg_index.
+			 */
+			n = makeNode(ColumnRef);
+			n->fields = list_make1(makeString(ielem->name));
+			/* Location is approximately that of inference specification */
+			n->location = infer->location;
+			parse = (Node *) n;
+		}
+		else
+		{
+			/* Do parse transformation of the raw expression */
+			parse = (Node *) ielem->expr;
+		}
+
+		/*
+		 * transformExpr() should have already rejected subqueries,
+		 * aggregates, and window functions, based on the EXPR_KIND_ for an
+		 * index expression.  Expressions returning sets won't have been
+		 * rejected, but don't bother doing so here; there should be no
+		 * available expression unique index to match any such expression
+		 * against anyway.
+		 */
+		pInfer->expr = transformExpr(pstate, parse, EXPR_KIND_INDEX_EXPRESSION);
+
+		/* Perform lookup of collation and operator class as required */
+		if (!ielem->collation)
+			pInfer->infercollid = InvalidOid;
+		else
+			pInfer->infercollid = LookupCollation(pstate, ielem->collation,
+												  exprLocation(pInfer->expr));
+
+		if (!ielem->opclass)
+		{
+			pInfer->inferopfamily = InvalidOid;
+			pInfer->inferopcinputtype = InvalidOid;
+		}
+		else
+		{
+			Oid		opclass = get_opclass_oid(BTREE_AM_OID, ielem->opclass,
+											  false);
+
+			pInfer->inferopfamily = get_opclass_family(opclass);
+			pInfer->inferopcinputtype = get_opclass_input_type(opclass);
+		}
+
+		result = lappend(result, pInfer);
+	}
+
+	return result;
+}
+
+/*
+ * transformOnConflictArbiter -
+ *		transform arbiter expressions in an ON CONFLICT clause.
+ *
+ * Transformed expressions used to infer one unique index relation to serve as
+ * an ON CONFLICT arbiter.  Partial unique indexes may be inferred using WHERE
+ * clause from inference specification clause.
+ */
+void
+transformOnConflictArbiter(ParseState *pstate,
+						   OnConflictClause *onConflictClause,
+						   List **arbiterExpr, Node **arbiterWhere,
+						   Oid *constraint)
+{
+	InferClause *infer = onConflictClause->infer;
+
+	*arbiterExpr = NIL;
+	*arbiterWhere = NULL;
+	*constraint = InvalidOid;
+
+	if (onConflictClause->action == ONCONFLICT_UPDATE && !infer)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("ON CONFLICT DO UPDATE requires inference specification or constraint name"),
+				 errhint("For example, ON CONFLICT ON CONFLICT (<column>)."),
+				 parser_errposition(pstate,
+									exprLocation((Node *) onConflictClause))));
+
+	/*
+	 * To simplify certain aspects of its design, speculative insertion into
+	 * system catalogs is disallowed
+	 */
+	if (IsCatalogRelation(pstate->p_target_relation))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("ON CONFLICT not supported with system catalog tables"),
+				 parser_errposition(pstate,
+									exprLocation((Node *) onConflictClause))));
+
+	/* Same applies to table used by logical decoding as catalog table */
+	if (RelationIsUsedAsCatalogTable(pstate->p_target_relation))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("ON CONFLICT not supported on table \"%s\" used as a catalog table",
+						RelationGetRelationName(pstate->p_target_relation)),
+				 parser_errposition(pstate,
+									exprLocation((Node *) onConflictClause))));
+
+	/* ON CONFLICT DO NOTHING does not require an inference clause */
+	if (infer)
+	{
+		List	   *save_namespace;
+
+		/*
+		 * While we process the arbiter expressions, accept only
+		 * non-qualified references to the target table. Hide any other
+		 * relations.
+		 */
+		save_namespace = pstate->p_namespace;
+		pstate->p_namespace = NIL;
+		addRTEtoQuery(pstate, pstate->p_target_rangetblentry,
+					  false, false, true);
+
+		if (infer->indexElems)
+			*arbiterExpr = resolve_unique_index_expr(pstate, infer,
+													 pstate->p_target_relation);
+
+		/*
+		 * Handling inference WHERE clause (for partial unique index
+		 * inference)
+		 */
+		if (infer->whereClause)
+			*arbiterWhere = transformExpr(pstate, infer->whereClause,
+										  EXPR_KIND_INDEX_PREDICATE);
+
+		pstate->p_namespace = save_namespace;
+
+		if (infer->conname)
+			*constraint = get_relation_constraint_oid(RelationGetRelid(pstate->p_target_relation),
+													  infer->conname, false);
+	}
+
+	/*
+	 * It's convenient to form a list of expressions based on the
+	 * representation used by CREATE INDEX, since the same restrictions are
+	 * appropriate (e.g. on subqueries).  However, from here on, a dedicated
+	 * primnode representation is used for inference elements, and so
+	 * assign_query_collations() can be trusted to do the right thing with the
+	 * post parse analysis query tree inference clause representation.
+	 */
+}
+
+/*
  * addTargetToSortList
  *		If the given targetlist entry isn't already in the SortGroupClause
  *		list, add it to the end of the list, using the given sort ordering
diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c
index 7c6a11c7575..4c85b708d3b 100644
--- a/src/backend/parser/parse_collate.c
+++ b/src/backend/parser/parse_collate.c
@@ -479,9 +479,11 @@ assign_collations_walker(Node *node, assign_collations_context *context)
 						 parser_errposition(context->pstate,
 											loccontext.location2)));
 			break;
+		case T_InferenceElem:
 		case T_RangeTblRef:
 		case T_JoinExpr:
 		case T_FromExpr:
+		case T_OnConflictExpr:
 		case T_SortGroupClause:
 			(void) expression_tree_walker(node,
 										  assign_collations_walker,
diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c
index 2d85cf08e70..59973ba9c3c 100644
--- a/src/backend/parser/parse_target.c
+++ b/src/backend/parser/parse_target.c
@@ -537,11 +537,12 @@ transformAssignedExpr(ParseState *pstate,
 
 /*
  * updateTargetListEntry()
- *	This is used in UPDATE statements only. It prepares an UPDATE
- *	TargetEntry for assignment to a column of the target table.
- *	This includes coercing the given value to the target column's type
- *	(if necessary), and dealing with any subfield names or subscripts
- *	attached to the target column itself.
+ *	This is used in UPDATE statements (and ON CONFLICT DO UPDATE)
+ *	only.  It prepares an UPDATE TargetEntry for assignment to a
+ *	column of the target table.  This includes coercing the given
+ *	value to the target column's type (if necessary), and dealing with
+ *	any subfield names or subscripts attached to the target column
+ *	itself.
  *
  * pstate		parse state
  * tle			target list entry to be modified
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 88424964ef3..ea388182692 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -64,6 +64,8 @@ static void DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
 static void DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
 static void DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
 static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeSpecConfirm(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+
 static void DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
 						 xl_xact_parsed_commit *parsed, TransactionId xid);
 static void DecodeAbort(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
@@ -414,6 +416,11 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 			ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
 			break;
 
+		case XLOG_HEAP_CONFIRM:
+			if (SnapBuildProcessChange(builder, xid, buf->origptr))
+				DecodeSpecConfirm(ctx, buf);
+			break;
+
 		case XLOG_HEAP_LOCK:
 			/* we don't care about row level locks for now */
 			break;
@@ -564,11 +571,15 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 		return;
 
 	change = ReorderBufferGetChange(ctx->reorder);
-	change->action = REORDER_BUFFER_CHANGE_INSERT;
+	if (!(xlrec->flags & XLH_INSERT_IS_SPECULATIVE))
+		change->action = REORDER_BUFFER_CHANGE_INSERT;
+	else
+		change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT;
 	change->origin_id = XLogRecGetOrigin(r);
+
 	memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
 
-	if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+	if (xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE)
 	{
 		Size		tuplelen;
 		char	   *tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
@@ -615,7 +626,7 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	change->origin_id = XLogRecGetOrigin(r);
 	memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
 
-	if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+	if (xlrec->flags & XLH_UPDATE_CONTAINS_NEW_TUPLE)
 	{
 		data = XLogRecGetBlockData(r, 0, &datalen);
 
@@ -624,7 +635,7 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 		DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
 	}
 
-	if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
+	if (xlrec->flags & XLH_UPDATE_CONTAINS_OLD)
 	{
 		/* caution, remaining data in record is not aligned */
 		data = XLogRecGetData(r) + SizeOfHeapUpdate;
@@ -660,6 +671,13 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	if (target_node.dbNode != ctx->slot->data.database)
 		return;
 
+	/*
+	 * Super deletions are irrelevant for logical decoding, it's driven by the
+	 * confirmation records.
+	 */
+	if (xlrec->flags & XLH_DELETE_IS_SUPER)
+		return;
+
 	/* output plugin doesn't look for this origin, no need to queue */
 	if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
 		return;
@@ -671,7 +689,7 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
 
 	/* old primary key stored */
-	if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
+	if (xlrec->flags & XLH_DELETE_CONTAINS_OLD)
 	{
 		Assert(XLogRecGetDataLen(r) > (SizeOfHeapDelete + SizeOfHeapHeader));
 
@@ -737,7 +755,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 		 * We decode the tuple in pretty much the same way as DecodeXLogTuple,
 		 * but since the layout is slightly different, we can't use it here.
 		 */
-		if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+		if (xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE)
 		{
 			change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
 
@@ -775,7 +793,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 		 * xl_multi_insert_tuple record emitted by one heap_multi_insert()
 		 * call.
 		 */
-		if (xlrec->flags & XLOG_HEAP_LAST_MULTI_INSERT &&
+		if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI &&
 			(i + 1) == xlrec->ntuples)
 			change->data.tp.clear_toast_afterwards = true;
 		else
@@ -788,6 +806,40 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 }
 
 /*
+ * Parse XLOG_HEAP_CONFIRM from wal into a confirmation change.
+ *
+ * This is pretty trivial, all the state essentially already setup by the
+ * speculative insertion.
+ */
+static void
+DecodeSpecConfirm(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+	XLogReaderState *r = buf->record;
+	ReorderBufferChange *change;
+	RelFileNode target_node;
+
+	/* only interested in our database */
+	XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+	if (target_node.dbNode != ctx->slot->data.database)
+		return;
+
+	/* output plugin doesn't look for this origin, no need to queue */
+	if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
+		return;
+
+	change = ReorderBufferGetChange(ctx->reorder);
+	change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM;
+	change->origin_id = XLogRecGetOrigin(r);
+
+	memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
+
+	change->data.tp.clear_toast_afterwards = true;
+
+	ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
+}
+
+
+/*
  * Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete
  * (but not by heap_multi_insert) into a tuplebuf.
  *
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index c9c1d1036e0..57854b0aa57 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -401,6 +401,7 @@ ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change)
 		case REORDER_BUFFER_CHANGE_INSERT:
 		case REORDER_BUFFER_CHANGE_UPDATE:
 		case REORDER_BUFFER_CHANGE_DELETE:
+		case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT:
 			if (change->data.tp.newtuple)
 			{
 				ReorderBufferReturnTupleBuf(rb, change->data.tp.newtuple);
@@ -420,8 +421,9 @@ ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change)
 				change->data.snapshot = NULL;
 			}
 			break;
+			/* no data in addition to the struct itself */
+		case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
 		case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
-			break;
 		case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
 			break;
 	}
@@ -1317,6 +1319,7 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
 	PG_TRY();
 	{
 		ReorderBufferChange *change;
+		ReorderBufferChange *specinsert = NULL;
 
 		if (using_subtxn)
 			BeginInternalSubTransaction("replay");
@@ -1333,6 +1336,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
 
 			switch (change->action)
 			{
+				case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
+					/*
+					 * Confirmation for speculative insertion arrived. Simply
+					 * use as a normal record. It'll be cleaned up at the end
+					 * of INSERT processing.
+					 */
+					Assert(specinsert->data.tp.oldtuple == NULL);
+					change = specinsert;
+					change->action = REORDER_BUFFER_CHANGE_INSERT;
+
+					/* intentionally fall through */
 				case REORDER_BUFFER_CHANGE_INSERT:
 				case REORDER_BUFFER_CHANGE_UPDATE:
 				case REORDER_BUFFER_CHANGE_DELETE:
@@ -1348,7 +1362,7 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
 					if (reloid == InvalidOid &&
 						change->data.tp.newtuple == NULL &&
 						change->data.tp.oldtuple == NULL)
-						continue;
+						goto change_done;
 					else if (reloid == InvalidOid)
 						elog(ERROR, "could not map filenode \"%s\" to relation OID",
 							 relpathperm(change->data.tp.relnode,
@@ -1362,50 +1376,92 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
 							 relpathperm(change->data.tp.relnode,
 										 MAIN_FORKNUM));
 
-					if (RelationIsLogicallyLogged(relation))
+					if (!RelationIsLogicallyLogged(relation))
+						goto change_done;
+
+					/*
+					 * For now ignore sequence changes entirely. Most of
+					 * the time they don't log changes using records we
+					 * understand, so it doesn't make sense to handle the
+					 * few cases we do.
+					 */
+					if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
+						goto change_done;
+
+					/* user-triggered change */
+					if (!IsToastRelation(relation))
 					{
+						ReorderBufferToastReplace(rb, txn, relation, change);
+						rb->apply_change(rb, txn, relation, change);
+
 						/*
-						 * For now ignore sequence changes entirely. Most of
-						 * the time they don't log changes using records we
-						 * understand, so it doesn't make sense to handle the
-						 * few cases we do.
+						 * Only clear reassembled toast chunks if we're
+						 * sure they're not required anymore. The creator
+						 * of the tuple tells us.
 						 */
-						if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
-						{
-						}
-						/* user-triggered change */
-						else if (!IsToastRelation(relation))
-						{
-							ReorderBufferToastReplace(rb, txn, relation, change);
-							rb->apply_change(rb, txn, relation, change);
-
-							/*
-							 * Only clear reassembled toast chunks if we're
-							 * sure they're not required anymore. The creator
-							 * of the tuple tells us.
-							 */
-							if (change->data.tp.clear_toast_afterwards)
-								ReorderBufferToastReset(rb, txn);
-						}
-						/* we're not interested in toast deletions */
-						else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
-						{
-							/*
-							 * Need to reassemble the full toasted Datum in
-							 * memory, to ensure the chunks don't get reused
-							 * till we're done remove it from the list of this
-							 * transaction's changes. Otherwise it will get
-							 * freed/reused while restoring spooled data from
-							 * disk.
-							 */
-							dlist_delete(&change->node);
-							ReorderBufferToastAppendChunk(rb, txn, relation,
-														  change);
-						}
+						if (change->data.tp.clear_toast_afterwards)
+							ReorderBufferToastReset(rb, txn);
+					}
+					/* we're not interested in toast deletions */
+					else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
+					{
+						/*
+						 * Need to reassemble the full toasted Datum in
+						 * memory, to ensure the chunks don't get reused till
+						 * we're done remove it from the list of this
+						 * transaction's changes. Otherwise it will get
+						 * freed/reused while restoring spooled data from
+						 * disk.
+						 */
+						dlist_delete(&change->node);
+						ReorderBufferToastAppendChunk(rb, txn, relation,
+													  change);
+					}
+
+				change_done:
+					/*
+					 * Either speculative insertion was confirmed, or it was
+					 * unsuccessful and the record isn't needed anymore.
+					 */
+					if (specinsert != NULL)
+					{
+						ReorderBufferReturnChange(rb, specinsert);
+						specinsert = NULL;
+					}
 
+					if (relation != NULL)
+					{
+						RelationClose(relation);
+						relation = NULL;
 					}
-					RelationClose(relation);
 					break;
+
+				case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT:
+					/*
+					 * Speculative insertions are dealt with by delaying the
+					 * processing of the insert until the confirmation record
+					 * arrives. For that we simply unlink the record from the
+					 * chain, so it does not get freed/reused while restoring
+					 * spooled data from disk.
+					 *
+					 * This is safe in the face of concurrent catalog changes
+					 * because the relevant relation can't be changed between
+					 * speculative insertion and confirmation due to
+					 * CheckTableNotInUse() and locking.
+					 */
+
+					/* clear out a pending (and thus failed) speculation */
+					if (specinsert != NULL)
+					{
+						ReorderBufferReturnChange(rb, specinsert);
+						specinsert = NULL;
+					}
+
+					/* and memorize the pending insertion */
+					dlist_delete(&change->node);
+					specinsert = change;
+					break;
+
 				case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT:
 					/* get rid of the old */
 					TeardownHistoricSnapshot(false);
@@ -1474,6 +1530,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
 			}
 		}
 
+		/*
+		 * There's a a speculative insertion remaining, just clean in up, it
+		 * can't have been successful, otherwise we'd gotten a confirmation
+		 * record.
+		 */
+		if (specinsert)
+		{
+			ReorderBufferReturnChange(rb, specinsert);
+			specinsert = NULL;
+		}
+
 		/* clean up the iterator */
 		ReorderBufferIterTXNFinish(rb, iterstate);
 		iterstate = NULL;
@@ -2001,11 +2068,11 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
 
 	switch (change->action)
 	{
+		/* fall through these, they're all similar enough */
 		case REORDER_BUFFER_CHANGE_INSERT:
-			/* fall through */
 		case REORDER_BUFFER_CHANGE_UPDATE:
-			/* fall through */
 		case REORDER_BUFFER_CHANGE_DELETE:
+		case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT:
 			{
 				char	   *data;
 				ReorderBufferTupleBuf *oldtup,
@@ -2083,9 +2150,8 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
 				}
 				break;
 			}
+		case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
 		case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
-			/* ReorderBufferChange contains everything important */
-			break;
 		case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
 			/* ReorderBufferChange contains everything important */
 			break;
@@ -2256,11 +2322,11 @@ ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
 	/* restore individual stuff */
 	switch (change->action)
 	{
+		/* fall through these, they're all similar enough */
 		case REORDER_BUFFER_CHANGE_INSERT:
-			/* fall through */
 		case REORDER_BUFFER_CHANGE_UPDATE:
-			/* fall through */
 		case REORDER_BUFFER_CHANGE_DELETE:
+		case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT:
 			if (change->data.tp.newtuple)
 			{
 				Size		len = offsetof(ReorderBufferTupleBuf, t_data) +
@@ -2309,6 +2375,7 @@ ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
 				break;
 			}
 			/* the base struct contains all the data, easy peasy */
+		case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
 		case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
 		case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
 			break;
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 0fc47cb786c..39302a410b8 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -52,7 +52,10 @@ static Query *rewriteRuleAction(Query *parsetree,
 				  CmdType event,
 				  bool *returning_flag);
 static List *adjustJoinTreeList(Query *parsetree, bool removert, int rt_index);
-static void rewriteTargetListIU(Query *parsetree, Relation target_relation,
+static List *rewriteTargetListIU(List *targetList,
+					CmdType commandType,
+					Relation target_relation,
+					int result_rti,
 					List **attrno_list);
 static TargetEntry *process_matched_tle(TargetEntry *src_tle,
 					TargetEntry *prior_tle,
@@ -66,7 +69,7 @@ static void markQueryForLocking(Query *qry, Node *jtnode,
 					LockClauseStrength strength, LockWaitPolicy waitPolicy,
 					bool pushedDown);
 static List *matchLocks(CmdType event, RuleLock *rulelocks,
-		   int varno, Query *parsetree);
+		   int varno, Query *parsetree, bool *hasUpdate);
 static Query *fireRIRrules(Query *parsetree, List *activeRIRs,
 			 bool forUpdatePushedDown);
 static bool view_has_instead_trigger(Relation view, CmdType event);
@@ -679,11 +682,13 @@ adjustJoinTreeList(Query *parsetree, bool removert, int rt_index)
  * order of the original tlist's non-junk entries.  This is needed for
  * processing VALUES RTEs.
  */
-static void
-rewriteTargetListIU(Query *parsetree, Relation target_relation,
+static List*
+rewriteTargetListIU(List *targetList,
+					CmdType commandType,
+					Relation target_relation,
+					int result_rti,
 					List **attrno_list)
 {
-	CmdType		commandType = parsetree->commandType;
 	TargetEntry **new_tles;
 	List	   *new_tlist = NIL;
 	List	   *junk_tlist = NIL;
@@ -709,7 +714,7 @@ rewriteTargetListIU(Query *parsetree, Relation target_relation,
 	new_tles = (TargetEntry **) palloc0(numattrs * sizeof(TargetEntry *));
 	next_junk_attrno = numattrs + 1;
 
-	foreach(temp, parsetree->targetList)
+	foreach(temp, targetList)
 	{
 		TargetEntry *old_tle = (TargetEntry *) lfirst(temp);
 
@@ -827,7 +832,7 @@ rewriteTargetListIU(Query *parsetree, Relation target_relation,
 		{
 			Node	   *new_expr;
 
-			new_expr = (Node *) makeVar(parsetree->resultRelation,
+			new_expr = (Node *) makeVar(result_rti,
 										attrno,
 										att_tup->atttypid,
 										att_tup->atttypmod,
@@ -846,7 +851,7 @@ rewriteTargetListIU(Query *parsetree, Relation target_relation,
 
 	pfree(new_tles);
 
-	parsetree->targetList = list_concat(new_tlist, junk_tlist);
+	return list_concat(new_tlist, junk_tlist);
 }
 
 
@@ -1288,7 +1293,8 @@ static List *
 matchLocks(CmdType event,
 		   RuleLock *rulelocks,
 		   int varno,
-		   Query *parsetree)
+		   Query *parsetree,
+		   bool *hasUpdate)
 {
 	List	   *matching_locks = NIL;
 	int			nlocks;
@@ -1309,6 +1315,9 @@ matchLocks(CmdType event,
 	{
 		RewriteRule *oneLock = rulelocks->rules[i];
 
+		if (oneLock->event == CMD_UPDATE)
+			*hasUpdate = true;
+
 		/*
 		 * Suppress ON INSERT/UPDATE/DELETE rules that are disabled or
 		 * configured to not fire during the current sessions replication
@@ -1766,8 +1775,8 @@ fireRIRrules(Query *parsetree, List *activeRIRs, bool forUpdatePushedDown)
 		/*
 		 * Fetch any new security quals that must be applied to this RTE.
 		 */
-		get_row_security_policies(parsetree, rte, rt_index,
-								  &securityQuals, &withCheckOptions,
+		get_row_security_policies(parsetree, parsetree->commandType, rte,
+								  rt_index, &securityQuals, &withCheckOptions,
 								  &hasRowSecurity, &hasSubLinks);
 
 		if (securityQuals != NIL || withCheckOptions != NIL)
@@ -2642,6 +2651,18 @@ rewriteTargetView(Query *parsetree, Relation view)
 							tle->resno - FirstLowInvalidHeapAttributeNumber);
 		}
 
+		if (parsetree->onConflict)
+		{
+			foreach(lc, parsetree->onConflict->onConflictSet)
+			{
+				TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+				if (!tle->resjunk)
+					modified_cols = bms_add_member(modified_cols,
+								tle->resno - FirstLowInvalidHeapAttributeNumber);
+			}
+		}
+
 		auto_update_detail = view_cols_are_auto_updatable(viewquery,
 														  modified_cols,
 														  NULL,
@@ -2999,6 +3020,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
 	CmdType		event = parsetree->commandType;
 	bool		instead = false;
 	bool		returning = false;
+	bool		updatableview = false;
 	Query	   *qual_product = NULL;
 	List	   *rewritten = NIL;
 	ListCell   *lc1;
@@ -3081,6 +3103,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
 		Relation	rt_entry_relation;
 		List	   *locks;
 		List	   *product_queries;
+		bool		hasUpdate = false;
 
 		result_relation = parsetree->resultRelation;
 		Assert(result_relation != 0);
@@ -3123,19 +3146,41 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
 				List	   *attrnos;
 
 				/* Process the main targetlist ... */
-				rewriteTargetListIU(parsetree, rt_entry_relation, &attrnos);
+				parsetree->targetList = rewriteTargetListIU(parsetree->targetList,
+															parsetree->commandType,
+															rt_entry_relation,
+															parsetree->resultRelation,
+															&attrnos);
 				/* ... and the VALUES expression lists */
 				rewriteValuesRTE(values_rte, rt_entry_relation, attrnos);
 			}
 			else
 			{
 				/* Process just the main targetlist */
-				rewriteTargetListIU(parsetree, rt_entry_relation, NULL);
+				parsetree->targetList =
+					rewriteTargetListIU(parsetree->targetList,
+										parsetree->commandType,
+										rt_entry_relation,
+										parsetree->resultRelation, NULL);
+			}
+
+			if (parsetree->onConflict &&
+				parsetree->onConflict->action == ONCONFLICT_UPDATE)
+			{
+				parsetree->onConflict->onConflictSet =
+					rewriteTargetListIU(parsetree->onConflict->onConflictSet,
+										CMD_UPDATE,
+										rt_entry_relation,
+										parsetree->resultRelation,
+										NULL);
 			}
 		}
 		else if (event == CMD_UPDATE)
 		{
-			rewriteTargetListIU(parsetree, rt_entry_relation, NULL);
+			parsetree->targetList =
+				rewriteTargetListIU(parsetree->targetList,
+									parsetree->commandType, rt_entry_relation,
+									parsetree->resultRelation, NULL);
 			rewriteTargetListUD(parsetree, rt_entry, rt_entry_relation);
 		}
 		else if (event == CMD_DELETE)
@@ -3149,7 +3194,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
 		 * Collect and apply the appropriate rules.
 		 */
 		locks = matchLocks(event, rt_entry_relation->rd_rules,
-						   result_relation, parsetree);
+						   result_relation, parsetree, &hasUpdate);
 
 		product_queries = fireRules(parsetree,
 									result_relation,
@@ -3198,6 +3243,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
 			 */
 			instead = true;
 			returning = true;
+			updatableview = true;
 		}
 
 		/*
@@ -3278,6 +3324,17 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
 			}
 		}
 
+		/*
+		 * Updatable views are supported by ON CONFLICT, so don't prevent that
+		 * case from proceeding
+		 */
+		if (parsetree->onConflict &&
+			(product_queries != NIL || hasUpdate) &&
+			!updatableview)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("INSERT with ON CONFLICT clause cannot be used with table that has INSERT or UPDATE rules")));
+
 		heap_close(rt_entry_relation, NoLock);
 	}
 
diff --git a/src/backend/rewrite/rowsecurity.c b/src/backend/rewrite/rowsecurity.c
index b0b308118f4..2c095ce88ac 100644
--- a/src/backend/rewrite/rowsecurity.c
+++ b/src/backend/rewrite/rowsecurity.c
@@ -89,9 +89,10 @@ row_security_policy_hook_type	row_security_policy_hook_restrictive = NULL;
  * set to true if any of the quals returned contain sublinks.
  */
 void
-get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
-						  List **securityQuals, List **withCheckOptions,
-						  bool *hasRowSecurity, bool *hasSubLinks)
+get_row_security_policies(Query* root, CmdType commandType, RangeTblEntry* rte,
+						  int rt_index, List **securityQuals,
+						  List **withCheckOptions, bool *hasRowSecurity,
+						  bool *hasSubLinks)
 {
 	Expr			   *rowsec_expr = NULL;
 	Expr			   *rowsec_with_check_expr = NULL;
@@ -159,7 +160,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
 	/* Grab the built-in policies which should be applied to this relation. */
 	rel = heap_open(rte->relid, NoLock);
 
-	rowsec_policies = pull_row_security_policies(root->commandType, rel,
+	rowsec_policies = pull_row_security_policies(commandType, rel,
 												 user_id);
 
 	/*
@@ -201,7 +202,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
 	 */
 	if (row_security_policy_hook_restrictive)
 	{
-		hook_policies_restrictive = (*row_security_policy_hook_restrictive)(root->commandType, rel);
+		hook_policies_restrictive = (*row_security_policy_hook_restrictive)(commandType, rel);
 
 		/* Build the expression from any policies returned. */
 		if (hook_policies_restrictive != NIL)
@@ -214,7 +215,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
 
 	if (row_security_policy_hook_permissive)
 	{
-		hook_policies_permissive = (*row_security_policy_hook_permissive)(root->commandType, rel);
+		hook_policies_permissive = (*row_security_policy_hook_permissive)(commandType, rel);
 
 		/* Build the expression from any policies returned. */
 		if (hook_policies_permissive != NIL)
@@ -242,7 +243,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
 	 * WITH CHECK policy (this will be a copy of the USING policy, if no
 	 * explicit WITH CHECK policy exists).
 	 */
-	if (root->commandType == CMD_INSERT || root->commandType == CMD_UPDATE)
+	if (commandType == CMD_INSERT || commandType == CMD_UPDATE)
 	{
 		/*
 		 * WITH CHECK OPTIONS wants a WCO node which wraps each Expr, so
@@ -259,7 +260,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
 			WithCheckOption	   *wco;
 
 			wco = (WithCheckOption *) makeNode(WithCheckOption);
-			wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
+			wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
 														  WCO_RLS_UPDATE_CHECK;
 			wco->relname = pstrdup(RelationGetRelationName(rel));
 			wco->qual = (Node *) hook_with_check_expr_restrictive;
@@ -276,7 +277,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
 			WithCheckOption	   *wco;
 
 			wco = (WithCheckOption *) makeNode(WithCheckOption);
-			wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
+			wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
 														  WCO_RLS_UPDATE_CHECK;
 			wco->relname = pstrdup(RelationGetRelationName(rel));
 			wco->qual = (Node *) rowsec_with_check_expr;
@@ -289,7 +290,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
 			WithCheckOption	   *wco;
 
 			wco = (WithCheckOption *) makeNode(WithCheckOption);
-			wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
+			wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
 														  WCO_RLS_UPDATE_CHECK;
 			wco->relname = pstrdup(RelationGetRelationName(rel));
 			wco->qual = (Node *) hook_with_check_expr_permissive;
@@ -312,19 +313,72 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
 			combined_qual_eval = makeBoolExpr(OR_EXPR, combined_quals, -1);
 
 			wco = (WithCheckOption *) makeNode(WithCheckOption);
-			wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
+			wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
 														  WCO_RLS_UPDATE_CHECK;
 			wco->relname = pstrdup(RelationGetRelationName(rel));
 			wco->qual = (Node *) combined_qual_eval;
 			wco->cascaded = false;
 			*withCheckOptions = lappend(*withCheckOptions, wco);
 		}
+
+		/*
+		 * ON CONFLICT DO UPDATE has an RTE that is subject to both INSERT and
+		 * UPDATE RLS enforcement.  Those are enforced (as a special, distinct
+		 * kind of WCO) on the target tuple.
+		 *
+		 * Make a second, recursive pass over the RTE for this, gathering
+		 * UPDATE-applicable RLS checks/WCOs, and gathering and converting
+		 * UPDATE-applicable security quals into WCO_RLS_CONFLICT_CHECK RLS
+		 * checks/WCOs.  Finally, these distinct kinds of RLS checks/WCOs are
+		 * concatenated with our own INSERT-applicable list.
+		 */
+		if (root->onConflict && root->onConflict->action == ONCONFLICT_UPDATE &&
+			commandType == CMD_INSERT)
+		{
+			List	   *conflictSecurityQuals = NIL;
+			List	   *conflictWCOs = NIL;
+			ListCell   *item;
+			bool		conflictHasRowSecurity = false;
+			bool		conflictHasSublinks = false;
+
+			/* Assume that RTE is target resultRelation */
+			get_row_security_policies(root, CMD_UPDATE, rte, rt_index,
+									  &conflictSecurityQuals, &conflictWCOs,
+									  &conflictHasRowSecurity,
+									  &conflictHasSublinks);
+
+			if (conflictHasRowSecurity)
+				*hasRowSecurity = true;
+			if (conflictHasSublinks)
+				*hasSubLinks = true;
+
+			/*
+			 * Append WITH CHECK OPTIONs/RLS checks, which should not conflict
+			 * between this INSERT and the auxiliary UPDATE
+			 */
+			*withCheckOptions = list_concat(*withCheckOptions,
+											conflictWCOs);
+
+			foreach(item, conflictSecurityQuals)
+			{
+				Expr			   *conflict_rowsec_expr = (Expr *) lfirst(item);
+				WithCheckOption	   *wco;
+
+				wco = (WithCheckOption *) makeNode(WithCheckOption);
+
+				wco->kind = WCO_RLS_CONFLICT_CHECK;
+				wco->relname = pstrdup(RelationGetRelationName(rel));
+				wco->qual = (Node *) copyObject(conflict_rowsec_expr);
+				wco->cascaded = false;
+				*withCheckOptions = lappend(*withCheckOptions, wco);
+			}
+		}
 	}
 
 	/* For SELECT, UPDATE, and DELETE, set the security quals */
-	if (root->commandType == CMD_SELECT
-		|| root->commandType == CMD_UPDATE
-		|| root->commandType == CMD_DELETE)
+	if (commandType == CMD_SELECT
+		|| commandType == CMD_UPDATE
+		|| commandType == CMD_DELETE)
 	{
 		/* restrictive policies can simply be added to the list first */
 		if (hook_expr_restrictive)
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index d13a1673344..c0529497496 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -26,6 +26,24 @@
 
 
 /*
+ * Per-backend counter for generating speculative insertion tokens.
+ *
+ * This may wrap around, but that's OK as it's only used for the short
+ * duration between inserting a tuple and checking that there are no (unique)
+ * constraint violations.  It's theoretically possible that a backend sees a
+ * tuple that was speculatively inserted by another backend, but before it has
+ * started waiting on the token, the other backend completes its insertion,
+ * and then then performs 2^32 unrelated insertions.  And after all that, the
+ * first backend finally calls SpeculativeInsertionLockAcquire(), with the
+ * intention of waiting for the first insertion to complete, but ends up
+ * waiting for the latest unrelated insertion instead.  Even then, nothing
+ * particularly bad happens: in the worst case they deadlock, causing one of
+ * the transactions to abort.
+ */
+static uint32 speculativeInsertionToken = 0;
+
+
+/*
  * Struct to hold context info for transaction lock waits.
  *
  * 'oper' is the operation that needs to wait for the other transaction; 'rel'
@@ -576,6 +594,73 @@ ConditionalXactLockTableWait(TransactionId xid)
 }
 
 /*
+ *		SpeculativeInsertionLockAcquire
+ *
+ * Insert a lock showing that the given transaction ID is inserting a tuple,
+ * but hasn't yet decided whether it's going to keep it.  The lock can then be
+ * used to wait for the decision to go ahead with the insertion, or aborting
+ * it.
+ *
+ * The token is used to distinguish multiple insertions by the same
+ * transaction.  It is returned to caller.
+ */
+uint32
+SpeculativeInsertionLockAcquire(TransactionId xid)
+{
+	LOCKTAG		tag;
+
+	speculativeInsertionToken++;
+
+	/*
+	 * Check for wrap-around. Zero means no token is held, so don't use that.
+	 */
+	if (speculativeInsertionToken == 0)
+		speculativeInsertionToken = 1;
+
+	SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, speculativeInsertionToken);
+
+	(void) LockAcquire(&tag, ExclusiveLock, false, false);
+
+	return speculativeInsertionToken;
+}
+
+/*
+ *		SpeculativeInsertionLockRelease
+ *
+ * Delete the lock showing that the given transaction is speculatively
+ * inserting a tuple.
+ */
+void
+SpeculativeInsertionLockRelease(TransactionId xid)
+{
+	LOCKTAG		tag;
+
+	SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, speculativeInsertionToken);
+
+	LockRelease(&tag, ExclusiveLock, false);
+}
+
+/*
+ *		SpeculativeInsertionWait
+ *
+ * Wait for the specified transaction to finish or abort the insertion of a
+ * tuple.
+ */
+void
+SpeculativeInsertionWait(TransactionId xid, uint32 token)
+{
+	LOCKTAG		tag;
+
+	SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, token);
+
+	Assert(TransactionIdIsValid(xid));
+	Assert(token != 0);
+
+	(void) LockAcquire(&tag, ShareLock, false, false);
+	LockRelease(&tag, ShareLock, false);
+}
+
+/*
  * XactLockTableWaitErrorContextCb
  *		Error context callback for transaction lock waits.
  */
@@ -873,6 +958,12 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
 							 tag->locktag_field1,
 							 tag->locktag_field2);
 			break;
+		case LOCKTAG_SPECULATIVE_TOKEN:
+			appendStringInfo(buf,
+							 _("speculative token %u of transaction %u"),
+							 tag->locktag_field2,
+							 tag->locktag_field1);
+			break;
 		case LOCKTAG_OBJECT:
 			appendStringInfo(buf,
 							 _("object %u of class %u of database %u"),
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 9c14e8abdf8..bcffd85754c 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -202,8 +202,14 @@ ProcessQuery(PlannedStmt *plan,
 					lastOid = queryDesc->estate->es_lastoid;
 				else
 					lastOid = InvalidOid;
-				snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
-				   "INSERT %u %u", lastOid, queryDesc->estate->es_processed);
+				if (plan->isUpsert)
+					snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
+							 "UPSERT %u %u",
+							 lastOid, queryDesc->estate->es_processed);
+				else
+					snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
+							 "INSERT %u %u",
+							 lastOid, queryDesc->estate->es_processed);
 				break;
 			case CMD_UPDATE:
 				snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
@@ -1356,7 +1362,10 @@ PortalRunMulti(Portal portal, bool isTopLevel,
 	 * 0" here because technically there is no query of the matching tag type,
 	 * and printing a non-zero count for a different query type seems wrong,
 	 * e.g.  an INSERT that does an UPDATE instead should not print "0 1" if
-	 * one row was updated.  See QueryRewrite(), step 3, for details.
+	 * one row was updated (unless the ON CONFLICT DO UPDATE, or "UPSERT"
+	 * variant of INSERT was used to update the row, where it's logically a
+	 * direct effect of the top level command).  See QueryRewrite(), step 3,
+	 * for details.
 	 */
 	if (completionTag && completionTag[0] == '\0')
 	{
@@ -1366,6 +1375,8 @@ PortalRunMulti(Portal portal, bool isTopLevel,
 			sprintf(completionTag, "SELECT 0 0");
 		else if (strcmp(completionTag, "INSERT") == 0)
 			strcpy(completionTag, "INSERT 0 0");
+		else if (strcmp(completionTag, "UPSERT") == 0)
+			strcpy(completionTag, "UPSERT 0 0");
 		else if (strcmp(completionTag, "UPDATE") == 0)
 			strcpy(completionTag, "UPDATE 0");
 		else if (strcmp(completionTag, "DELETE") == 0)
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index 491824dd6bf..9d53a8b6a32 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -29,6 +29,7 @@ static const char *const LockTagTypeNames[] = {
 	"tuple",
 	"transactionid",
 	"virtualxid",
+	"speculative token",
 	"object",
 	"userlock",
 	"advisory"
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 69267bdb918..4b3cd85ad90 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -354,6 +354,9 @@ static void get_select_query_def(Query *query, deparse_context *context,
 					 TupleDesc resultDesc);
 static void get_insert_query_def(Query *query, deparse_context *context);
 static void get_update_query_def(Query *query, deparse_context *context);
+static void get_update_query_targetlist_def(Query *query, List *targetList,
+									deparse_context *context,
+									RangeTblEntry *rte);
 static void get_delete_query_def(Query *query, deparse_context *context);
 static void get_utility_query_def(Query *query, deparse_context *context);
 static void get_basic_select_query(Query *query, deparse_context *context,
@@ -3846,15 +3849,23 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps)
 	 * For a SubqueryScan, pretend the subplan is INNER referent.  (We don't
 	 * use OUTER because that could someday conflict with the normal meaning.)
 	 * Likewise, for a CteScan, pretend the subquery's plan is INNER referent.
+	 * For ON CONFLICT .. UPDATE we just need the inner tlist to point to the
+	 * excluded expression's tlist. (Similar to the SubqueryScan we don't want
+	 * to reuse OUTER, it's used for RETURNING in some modify table cases,
+	 * although not INSERT .. CONFLICT).
 	 */
 	if (IsA(ps, SubqueryScanState))
 		dpns->inner_planstate = ((SubqueryScanState *) ps)->subplan;
 	else if (IsA(ps, CteScanState))
 		dpns->inner_planstate = ((CteScanState *) ps)->cteplanstate;
+	else if (IsA(ps, ModifyTableState))
+		dpns->inner_planstate = ps;
 	else
 		dpns->inner_planstate = innerPlanState(ps);
 
-	if (dpns->inner_planstate)
+	if (IsA(ps, ModifyTableState))
+		dpns->inner_tlist = ((ModifyTableState *) ps)->mt_excludedtlist;
+	else if (dpns->inner_planstate)
 		dpns->inner_tlist = dpns->inner_planstate->plan->targetlist;
 	else
 		dpns->inner_tlist = NIL;
@@ -5302,6 +5313,32 @@ get_insert_query_def(Query *query, deparse_context *context)
 		appendStringInfoString(buf, "DEFAULT VALUES");
 	}
 
+	/* Add ON CONFLICT if present */
+	if (query->onConflict)
+	{
+		OnConflictExpr *confl = query->onConflict;
+
+		if (confl->action == ONCONFLICT_NOTHING)
+		{
+			appendStringInfoString(buf, " ON CONFLICT DO NOTHING");
+		}
+		else
+		{
+			appendStringInfoString(buf, " ON CONFLICT DO UPDATE SET ");
+			/* Deparse targetlist */
+			get_update_query_targetlist_def(query, confl->onConflictSet,
+											context, rte);
+
+			/* Add a WHERE clause if given */
+			if (confl->onConflictWhere != NULL)
+			{
+				appendContextKeyword(context, " WHERE ",
+									 -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+				get_rule_expr(confl->onConflictWhere, context, false);
+			}
+		}
+	}
+
 	/* Add RETURNING if present */
 	if (query->returningList)
 	{
@@ -5321,12 +5358,6 @@ get_update_query_def(Query *query, deparse_context *context)
 {
 	StringInfo	buf = context->buf;
 	RangeTblEntry *rte;
-	List	   *ma_sublinks;
-	ListCell   *next_ma_cell;
-	SubLink    *cur_ma_sublink;
-	int			remaining_ma_columns;
-	const char *sep;
-	ListCell   *l;
 
 	/* Insert the WITH clause if given */
 	get_with_clause(query, context);
@@ -5349,6 +5380,46 @@ get_update_query_def(Query *query, deparse_context *context)
 						 quote_identifier(rte->alias->aliasname));
 	appendStringInfoString(buf, " SET ");
 
+	/* Deparse targetlist */
+	get_update_query_targetlist_def(query, query->targetList, context, rte);
+
+	/* Add the FROM clause if needed */
+	get_from_clause(query, " FROM ", context);
+
+	/* Add a WHERE clause if given */
+	if (query->jointree->quals != NULL)
+	{
+		appendContextKeyword(context, " WHERE ",
+							 -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+		get_rule_expr(query->jointree->quals, context, false);
+	}
+
+	/* Add RETURNING if present */
+	if (query->returningList)
+	{
+		appendContextKeyword(context, " RETURNING",
+							 -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+		get_target_list(query->returningList, context, NULL);
+	}
+}
+
+
+/* ----------
+ * get_update_query_targetlist_def			- Parse back an UPDATE targetlist
+ * ----------
+ */
+static void
+get_update_query_targetlist_def(Query *query, List *targetList,
+								deparse_context *context, RangeTblEntry *rte)
+{
+	StringInfo	buf = context->buf;
+	ListCell   *l;
+	ListCell   *next_ma_cell;
+	int			remaining_ma_columns;
+	const char *sep;
+	SubLink    *cur_ma_sublink;
+	List	   *ma_sublinks;
+
 	/*
 	 * Prepare to deal with MULTIEXPR assignments: collect the source SubLinks
 	 * into a list.  We expect them to appear, in ID order, in resjunk tlist
@@ -5357,7 +5428,7 @@ get_update_query_def(Query *query, deparse_context *context)
 	ma_sublinks = NIL;
 	if (query->hasSubLinks)		/* else there can't be any */
 	{
-		foreach(l, query->targetList)
+		foreach(l, targetList)
 		{
 			TargetEntry *tle = (TargetEntry *) lfirst(l);
 
@@ -5379,7 +5450,7 @@ get_update_query_def(Query *query, deparse_context *context)
 
 	/* Add the comma separated list of 'attname = value' */
 	sep = "";
-	foreach(l, query->targetList)
+	foreach(l, targetList)
 	{
 		TargetEntry *tle = (TargetEntry *) lfirst(l);
 		Node	   *expr;
@@ -5470,25 +5541,6 @@ get_update_query_def(Query *query, deparse_context *context)
 
 		get_rule_expr(expr, context, false);
 	}
-
-	/* Add the FROM clause if needed */
-	get_from_clause(query, " FROM ", context);
-
-	/* Add a WHERE clause if given */
-	if (query->jointree->quals != NULL)
-	{
-		appendContextKeyword(context, " WHERE ",
-							 -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
-		get_rule_expr(query->jointree->quals, context, false);
-	}
-
-	/* Add RETURNING if present */
-	if (query->returningList)
-	{
-		appendContextKeyword(context, " RETURNING",
-							 -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
-		get_target_list(query->returningList, context, NULL);
-	}
 }
 
 
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
index a4a478d1142..b4284d6d94f 100644
--- a/src/backend/utils/time/tqual.c
+++ b/src/backend/utils/time/tqual.c
@@ -405,6 +405,13 @@ HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot,
 				}
 			}
 		}
+		/*
+		 * An invalid Xmin can be left behind by a speculative insertion that
+		 * is cancelled by super-deleting the tuple.  We shouldn't see any of
+		 * those in TOAST tables, but better safe than sorry.
+		 */
+		else if (!TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple)))
+			return false;
 	}
 
 	/* otherwise assume the tuple is valid for TOAST. */
@@ -714,8 +721,11 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
  * output argument to return the xids of concurrent xacts that affected the
  * tuple.  snapshot->xmin is set to the tuple's xmin if that is another
  * transaction that's still in progress; or to InvalidTransactionId if the
- * tuple's xmin is committed good, committed dead, or my own xact.  Similarly
- * for snapshot->xmax and the tuple's xmax.
+ * tuple's xmin is committed good, committed dead, or my own xact.
+ * Similarly for snapshot->xmax and the tuple's xmax.  If the tuple was
+ * inserted speculatively, meaning that the inserter might still back down
+ * on the insertion without aborting the whole transaction, the associated
+ * token is also returned in snapshot->speculativeToken.
  */
 bool
 HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
@@ -727,6 +737,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
 	Assert(htup->t_tableOid != InvalidOid);
 
 	snapshot->xmin = snapshot->xmax = InvalidTransactionId;
+	snapshot->speculativeToken = 0;
 
 	if (!HeapTupleHeaderXminCommitted(tuple))
 	{
@@ -808,6 +819,20 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
 		}
 		else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
 		{
+			/*
+			 * Return the speculative token to caller.  Caller can worry
+			 * about xmax, since it requires a conclusively locked row
+			 * version, and a concurrent update to this tuple is a conflict
+			 * of its purposes.
+			 */
+			if (HeapTupleHeaderIsSpeculative(tuple))
+			{
+				snapshot->speculativeToken =
+					HeapTupleHeaderGetSpeculativeToken(tuple);
+
+				Assert(snapshot->speculativeToken != 0);
+			}
+
 			snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple);
 			/* XXX shouldn't we fall through to look at xmax? */
 			return true;		/* in insertion by other */