From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 16 Apr 2024 15:22:39 +0000 (-0400)
Subject: Fix generation of EC join conditions at the wrong plan level.
X-Git-Tag: REL_14_12~19
X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=ab2402268c04401533f163d959cdccd0e124f526;p=postgresql.git

Fix generation of EC join conditions at the wrong plan level.

get_baserel_parampathinfo previously assumed without checking that
the results of generate_join_implied_equalities "necessarily satisfy
join_clause_is_movable_into".  This turns out to be wrong in the
presence of outer joins, because the generated clauses could include
Vars that mustn't be evaluated below a relevant outer join.  That
led to applying clauses at the wrong plan level and possibly getting
incorrect query results.  We must check each clause's nullable_relids,
and really the right thing to do is test join_clause_is_movable_into.

However, trying to fix it that way exposes an oversight in
equivclass.c: it wasn't careful about marking join clauses for
appendrel children with the correct clause_relids.  That caused the
modified get_baserel_parampathinfo code to reject some clauses it
still needs to accept.  (See parallel commit for HEAD/v16 for more
commentary about that.)

Per bug #18429 from BenoƮt Ryder.  This misbehavior existed for
a long time before commit 2489d76c4, so patch v12-v15 this way.

Discussion: https://postgr.es/m/18429-8982d4a348cc86c6@postgresql.org
---

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 01d14df2c8c..8a61dd5af82 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -1880,6 +1880,21 @@ create_join_clause(PlannerInfo *root,
 												  rightem->em_nullable_relids),
 										ec->ec_min_security);
 
+	/*
+	 * If either EM is a child, force the clause's clause_relids to include
+	 * the relid(s) of the child rel.  In normal cases it would already, but
+	 * not if we are considering appendrel child relations with pseudoconstant
+	 * translated variables (i.e., UNION ALL sub-selects with constant output
+	 * items).  We must do this so that join_clause_is_movable_into() will
+	 * think that the clause should be evaluated at the correct place.
+	 */
+	if (leftem->em_is_child)
+		rinfo->clause_relids = bms_add_members(rinfo->clause_relids,
+											   leftem->em_relids);
+	if (rightem->em_is_child)
+		rinfo->clause_relids = bms_add_members(rinfo->clause_relids,
+											   rightem->em_relids);
+
 	/* Mark the clause as redundant, or not */
 	rinfo->parent_ec = parent_ec;
 
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index e105a4d5f1d..836bdaac069 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -1297,6 +1297,7 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel,
 	ParamPathInfo *ppi;
 	Relids		joinrelids;
 	List	   *pclauses;
+	List	   *eqclauses;
 	double		rows;
 	ListCell   *lc;
 
@@ -1330,14 +1331,24 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel,
 	}
 
 	/*
-	 * Add in joinclauses generated by EquivalenceClasses, too.  (These
-	 * necessarily satisfy join_clause_is_movable_into.)
+	 * Add in joinclauses generated by EquivalenceClasses, too.  In principle
+	 * these should always satisfy join_clause_is_movable_into; but if we are
+	 * below an outer join the clauses might contain Vars that should only be
+	 * evaluated above the join, so we have to check.
 	 */
-	pclauses = list_concat(pclauses,
-						   generate_join_implied_equalities(root,
-															joinrelids,
-															required_outer,
-															baserel));
+	eqclauses = generate_join_implied_equalities(root,
+												 joinrelids,
+												 required_outer,
+												 baserel);
+	foreach(lc, eqclauses)
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+		if (join_clause_is_movable_into(rinfo,
+										baserel->relids,
+										joinrelids))
+			pclauses = lappend(pclauses, rinfo);
+	}
 
 	/* Estimate the number of rows returned by the parameterized scan */
 	rows = get_parameterized_baserel_size(root, baserel, pclauses);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 8eaa964d462..2eca3782e60 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5949,6 +5949,37 @@ select * from
  3 | 3
 (6 rows)
 
+-- check for generation of join EC conditions at wrong level (bug #18429)
+explain (costs off)
+select * from (
+  select arrayd.ad, coalesce(c.hundred, 0) as h
+  from unnest(array[1]) as arrayd(ad)
+  left join lateral (
+    select hundred from tenk1 where unique2 = arrayd.ad
+  ) c on true
+) c2
+where c2.h * c2.ad = c2.h * (c2.ad + 1);
+                                              QUERY PLAN                                               
+-------------------------------------------------------------------------------------------------------
+ Nested Loop Left Join
+   Filter: ((COALESCE(tenk1.hundred, 0) * arrayd.ad) = (COALESCE(tenk1.hundred, 0) * (arrayd.ad + 1)))
+   ->  Function Scan on unnest arrayd
+   ->  Index Scan using tenk1_unique2 on tenk1
+         Index Cond: (unique2 = arrayd.ad)
+(5 rows)
+
+select * from (
+  select arrayd.ad, coalesce(c.hundred, 0) as h
+  from unnest(array[1]) as arrayd(ad)
+  left join lateral (
+    select hundred from tenk1 where unique2 = arrayd.ad
+  ) c on true
+) c2
+where c2.h * c2.ad = c2.h * (c2.ad + 1);
+ ad | h 
+----+---
+(0 rows)
+
 -- check the number of columns specified
 SELECT * FROM (int8_tbl i cross join int4_tbl j) ss(a,b,c,d);
 ERROR:  join expression "ss" has 3 columns available but 4 columns specified
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index f8ed53fcc43..9d622d18a12 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2029,6 +2029,25 @@ select * from
    (select q1.v)
   ) as q2;
 
+-- check for generation of join EC conditions at wrong level (bug #18429)
+explain (costs off)
+select * from (
+  select arrayd.ad, coalesce(c.hundred, 0) as h
+  from unnest(array[1]) as arrayd(ad)
+  left join lateral (
+    select hundred from tenk1 where unique2 = arrayd.ad
+  ) c on true
+) c2
+where c2.h * c2.ad = c2.h * (c2.ad + 1);
+select * from (
+  select arrayd.ad, coalesce(c.hundred, 0) as h
+  from unnest(array[1]) as arrayd(ad)
+  left join lateral (
+    select hundred from tenk1 where unique2 = arrayd.ad
+  ) c on true
+) c2
+where c2.h * c2.ad = c2.h * (c2.ad + 1);
+
 -- check the number of columns specified
 SELECT * FROM (int8_tbl i cross join int4_tbl j) ss(a,b,c,d);