From 30859930528b50e55f1a3ea66b6a9a3704c8df62 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 22 Jan 2025 11:58:20 -0500
Subject: [PATCH] Repair incorrect handling of
 AfterTriggerSharedData.ats_modifiedcols.

This patch fixes two distinct errors that both ultimately trace
to commit 71d60e2aa, which added the ats_modifiedcols field.

The more severe error is that ats_modifiedcols wasn't accounted for
in afterTriggerAddEvent's scanning loop that looks for a pre-existing
duplicate AfterTriggerSharedData.  Thus, a new event could be
incorrectly matched to an AfterTriggerSharedData that has a different
value of ats_modifiedcols, resulting in the wrong tg_updatedcols
bitmap getting passed to the trigger whenever it finally gets fired.
We'd not noticed because (a) few triggers consult tg_updatedcols,
and (b) we had no tests exercising a case where such a trigger was
called as an AFTER trigger.  In the test case added by this commit,
contrib/lo's trigger fails to remove a large object when expected
because (without this fix) it thinks the LO OID column hasn't changed.

The other problem was introduced by commit ce5aaea8c, which copied the
modified-columns bitmap into trigger-related storage.  It made a copy
for every trigger event, whereas what we really want is to make a new
copy only when we make a new AfterTriggerSharedData entry.  (We could
imagine adding extra logic to reduce the number of bitmap copies still
more, but it doesn't look worthwhile at the moment.)  In a simple test
of an UPDATE of 10000000 rows with a single AFTER trigger, this thinko
roughly tripled the amount of memory consumed by the pending-triggers
data structures, from 160446744 to 480443440 bytes.

Fixing the first problem requires introducing a bms_equal() call into
afterTriggerAddEvent's scanning loop, which is slightly annoying from
a speed perspective.  However, getting rid of the excessive bms_copy()
calls from the second problem balances that out; overall speed of
trigger operations is the same or slightly better, in my tests.

Discussion: https://postgr.es/m/3496294.1737501591@sss.pgh.pa.us
Backpatch-through: 13
---
 contrib/lo/expected/lo.out     | 69 ++++++++++++++++++++++++++++++++++
 contrib/lo/sql/lo.sql          | 40 ++++++++++++++++++++
 src/backend/commands/trigger.c | 18 ++++-----
 3 files changed, 117 insertions(+), 10 deletions(-)

diff --git a/contrib/lo/expected/lo.out b/contrib/lo/expected/lo.out
index c63e4b1c704..1b6c5a85649 100644
--- a/contrib/lo/expected/lo.out
+++ b/contrib/lo/expected/lo.out
@@ -47,4 +47,73 @@ SELECT lo_get(43214);
 DELETE FROM image;
 SELECT lo_get(43214);
 ERROR:  large object 43214 does not exist
+-- Now let's try it with an AFTER trigger
+DROP TRIGGER t_raster ON image;
+CREATE CONSTRAINT TRIGGER t_raster AFTER UPDATE OR DELETE ON image
+    DEFERRABLE INITIALLY DEFERRED
+    FOR EACH ROW EXECUTE PROCEDURE lo_manage(raster);
+SELECT lo_create(43223);
+ lo_create 
+-----------
+     43223
+(1 row)
+
+SELECT lo_create(43224);
+ lo_create 
+-----------
+     43224
+(1 row)
+
+SELECT lo_create(43225);
+ lo_create 
+-----------
+     43225
+(1 row)
+
+INSERT INTO image (title, raster) VALUES ('beautiful image', 43223);
+SELECT lo_get(43223);
+ lo_get 
+--------
+ \x
+(1 row)
+
+UPDATE image SET raster = 43224 WHERE title = 'beautiful image';
+SELECT lo_get(43223);  -- gone
+ERROR:  large object 43223 does not exist
+SELECT lo_get(43224);
+ lo_get 
+--------
+ \x
+(1 row)
+
+-- test updating of unrelated column
+UPDATE image SET title = 'beautiful picture' WHERE title = 'beautiful image';
+SELECT lo_get(43224);
+ lo_get 
+--------
+ \x
+(1 row)
+
+-- this case used to be buggy
+BEGIN;
+UPDATE image SET title = 'beautiful image' WHERE title = 'beautiful picture';
+UPDATE image SET raster = 43225 WHERE title = 'beautiful image';
+SELECT lo_get(43224);
+ lo_get 
+--------
+ \x
+(1 row)
+
+COMMIT;
+SELECT lo_get(43224);  -- gone
+ERROR:  large object 43224 does not exist
+SELECT lo_get(43225);
+ lo_get 
+--------
+ \x
+(1 row)
+
+DELETE FROM image;
+SELECT lo_get(43225);  -- gone
+ERROR:  large object 43225 does not exist
 DROP TABLE image;
diff --git a/contrib/lo/sql/lo.sql b/contrib/lo/sql/lo.sql
index 77039509245..99c3bd1fab3 100644
--- a/contrib/lo/sql/lo.sql
+++ b/contrib/lo/sql/lo.sql
@@ -27,4 +27,44 @@ DELETE FROM image;
 
 SELECT lo_get(43214);
 
+-- Now let's try it with an AFTER trigger
+
+DROP TRIGGER t_raster ON image;
+
+CREATE CONSTRAINT TRIGGER t_raster AFTER UPDATE OR DELETE ON image
+    DEFERRABLE INITIALLY DEFERRED
+    FOR EACH ROW EXECUTE PROCEDURE lo_manage(raster);
+
+SELECT lo_create(43223);
+SELECT lo_create(43224);
+SELECT lo_create(43225);
+
+INSERT INTO image (title, raster) VALUES ('beautiful image', 43223);
+
+SELECT lo_get(43223);
+
+UPDATE image SET raster = 43224 WHERE title = 'beautiful image';
+
+SELECT lo_get(43223);  -- gone
+SELECT lo_get(43224);
+
+-- test updating of unrelated column
+UPDATE image SET title = 'beautiful picture' WHERE title = 'beautiful image';
+
+SELECT lo_get(43224);
+
+-- this case used to be buggy
+BEGIN;
+UPDATE image SET title = 'beautiful image' WHERE title = 'beautiful picture';
+UPDATE image SET raster = 43225 WHERE title = 'beautiful image';
+SELECT lo_get(43224);
+COMMIT;
+
+SELECT lo_get(43224);  -- gone
+SELECT lo_get(43225);
+
+DELETE FROM image;
+
+SELECT lo_get(43225);  -- gone
+
 DROP TABLE image;
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index c852b137f06..d04b4beed91 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -3723,13 +3723,6 @@ afterTriggerCopyBitmap(Bitmapset *src)
 	if (src == NULL)
 		return NULL;
 
-	/* Create event context if we didn't already */
-	if (afterTriggers.event_cxt == NULL)
-		afterTriggers.event_cxt =
-			AllocSetContextCreate(TopTransactionContext,
-								  "AfterTriggerEvents",
-								  ALLOCSET_DEFAULT_SIZES);
-
 	oldcxt = MemoryContextSwitchTo(afterTriggers.event_cxt);
 
 	dst = bms_copy(src);
@@ -3831,16 +3824,21 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
 		 (char *) newshared >= chunk->endfree;
 		 newshared--)
 	{
+		/* compare fields roughly by probability of them being different */
 		if (newshared->ats_tgoid == evtshared->ats_tgoid &&
-			newshared->ats_relid == evtshared->ats_relid &&
 			newshared->ats_event == evtshared->ats_event &&
+			newshared->ats_firing_id == 0 &&
 			newshared->ats_table == evtshared->ats_table &&
-			newshared->ats_firing_id == 0)
+			newshared->ats_relid == evtshared->ats_relid &&
+			bms_equal(newshared->ats_modifiedcols,
+					  evtshared->ats_modifiedcols))
 			break;
 	}
 	if ((char *) newshared < chunk->endfree)
 	{
 		*newshared = *evtshared;
+		/* now we must make a suitably-long-lived copy of the bitmap */
+		newshared->ats_modifiedcols = afterTriggerCopyBitmap(evtshared->ats_modifiedcols);
 		newshared->ats_firing_id = 0;	/* just to be sure */
 		chunk->endfree = (char *) newshared;
 	}
@@ -5857,7 +5855,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
 			new_shared.ats_table = transition_capture->tcs_private;
 		else
 			new_shared.ats_table = NULL;
-		new_shared.ats_modifiedcols = afterTriggerCopyBitmap(modifiedCols);
+		new_shared.ats_modifiedcols = modifiedCols;
 
 		afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events,
 							 &new_event, &new_shared);
-- 
2.39.5