summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/Makefile4
-rw-r--r--src/backend/optimizer/path/allpaths.c12
-rw-r--r--src/backend/optimizer/path/costsize.c54
-rw-r--r--src/backend/optimizer/path/equivclass.c1662
-rw-r--r--src/backend/optimizer/path/indxpath.c457
-rw-r--r--src/backend/optimizer/path/joinpath.c187
-rw-r--r--src/backend/optimizer/path/joinrels.c10
-rw-r--r--src/backend/optimizer/path/pathkeys.c1523
8 files changed, 2552 insertions, 1357 deletions
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile
index 7b5fce139de..833b3f59a37 100644
--- a/src/backend/optimizer/path/Makefile
+++ b/src/backend/optimizer/path/Makefile
@@ -4,7 +4,7 @@
# Makefile for optimizer/path
#
# IDENTIFICATION
-# $PostgreSQL: pgsql/src/backend/optimizer/path/Makefile,v 1.17 2007/01/20 17:16:11 petere Exp $
+# $PostgreSQL: pgsql/src/backend/optimizer/path/Makefile,v 1.18 2007/01/20 20:45:38 tgl Exp $
#
#-------------------------------------------------------------------------
@@ -12,7 +12,7 @@ subdir = src/backend/optimizer/path
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = allpaths.o clausesel.o costsize.o indxpath.o \
+OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \
joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o
all: SUBSYS.o
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 01178d93ddd..cbe78c3abd5 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.156 2007/01/09 02:14:12 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.157 2007/01/20 20:45:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -326,6 +326,16 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
appinfo);
/*
+ * We have to make child entries in the EquivalenceClass data
+ * structures as well.
+ */
+ if (rel->has_eclass_joins)
+ {
+ add_child_rel_equivalences(root, appinfo, rel, childrel);
+ childrel->has_eclass_joins = true;
+ }
+
+ /*
* Copy the parent's attr_needed data as well, with appropriate
* adjustment of relids and attribute numbers.
*/
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index e1c003b5047..22cbf8e2b92 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.174 2007/01/10 18:06:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.175 2007/01/20 20:45:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1258,8 +1258,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
Path *outer_path = path->jpath.outerjoinpath;
Path *inner_path = path->jpath.innerjoinpath;
List *mergeclauses = path->path_mergeclauses;
- Oid *mergeFamilies = path->path_mergeFamilies;
- int *mergeStrategies = path->path_mergeStrategies;
List *outersortkeys = path->outersortkeys;
List *innersortkeys = path->innersortkeys;
Cost startup_cost = 0;
@@ -1268,7 +1266,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
Selectivity merge_selec;
QualCost merge_qual_cost;
QualCost qp_qual_cost;
- RestrictInfo *firstclause;
double outer_path_rows = PATH_ROWS(outer_path);
double inner_path_rows = PATH_ROWS(inner_path);
double outer_rows,
@@ -1347,32 +1344,47 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
* inputs that will actually need to be scanned. We use only the first
* (most significant) merge clause for this purpose.
*
- * Since this calculation is somewhat expensive, and will be the same for
- * all mergejoin paths associated with the merge clause, we cache the
- * results in the RestrictInfo node. XXX that won't work anymore once
- * we support multiple possible orderings!
+ * XXX mergejoinscansel is a bit expensive, can we cache its results?
*/
if (mergeclauses && path->jpath.jointype != JOIN_FULL)
{
- firstclause = (RestrictInfo *) linitial(mergeclauses);
- if (firstclause->left_mergescansel < 0) /* not computed yet? */
- mergejoinscansel(root, (Node *) firstclause->clause,
- mergeFamilies[0],
- mergeStrategies[0],
- &firstclause->left_mergescansel,
- &firstclause->right_mergescansel);
-
- if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids))
+ RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses);
+ List *opathkeys;
+ List *ipathkeys;
+ PathKey *opathkey;
+ PathKey *ipathkey;
+ Selectivity leftscansel,
+ rightscansel;
+
+ /* Get the input pathkeys to determine the sort-order details */
+ opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys;
+ ipathkeys = innersortkeys ? innersortkeys : inner_path->pathkeys;
+ Assert(opathkeys);
+ Assert(ipathkeys);
+ opathkey = (PathKey *) linitial(opathkeys);
+ ipathkey = (PathKey *) linitial(ipathkeys);
+ /* debugging check */
+ if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||
+ opathkey->pk_strategy != ipathkey->pk_strategy ||
+ opathkey->pk_nulls_first != ipathkey->pk_nulls_first)
+ elog(ERROR, "left and right pathkeys do not match in mergejoin");
+
+ mergejoinscansel(root, (Node *) firstclause->clause,
+ opathkey->pk_opfamily, opathkey->pk_strategy,
+ &leftscansel, &rightscansel);
+
+ if (bms_is_subset(firstclause->left_relids,
+ outer_path->parent->relids))
{
/* left side of clause is outer */
- outerscansel = firstclause->left_mergescansel;
- innerscansel = firstclause->right_mergescansel;
+ outerscansel = leftscansel;
+ innerscansel = rightscansel;
}
else
{
/* left side of clause is inner */
- outerscansel = firstclause->right_mergescansel;
- innerscansel = firstclause->left_mergescansel;
+ outerscansel = rightscansel;
+ innerscansel = leftscansel;
}
if (path->jpath.jointype == JOIN_LEFT)
outerscansel = 1.0;
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
new file mode 100644
index 00000000000..063e8d5d014
--- /dev/null
+++ b/src/backend/optimizer/path/equivclass.c
@@ -0,0 +1,1662 @@
+/*-------------------------------------------------------------------------
+ *
+ * equivclass.c
+ * Routines for managing EquivalenceClasses
+ *
+ * See src/backend/optimizer/README for discussion of EquivalenceClasses.
+ *
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.1 2007/01/20 20:45:39 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/skey.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/paths.h"
+#include "optimizer/planmain.h"
+#include "optimizer/prep.h"
+#include "optimizer/var.h"
+#include "utils/lsyscache.h"
+
+
+static void add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
+ bool is_child, Oid datatype);
+static void generate_base_implied_equalities_const(PlannerInfo *root,
+ EquivalenceClass *ec);
+static void generate_base_implied_equalities_no_const(PlannerInfo *root,
+ EquivalenceClass *ec);
+static void generate_base_implied_equalities_broken(PlannerInfo *root,
+ EquivalenceClass *ec);
+static List *generate_join_implied_equalities_normal(PlannerInfo *root,
+ EquivalenceClass *ec,
+ RelOptInfo *joinrel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel);
+static List *generate_join_implied_equalities_broken(PlannerInfo *root,
+ EquivalenceClass *ec,
+ RelOptInfo *joinrel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel);
+static Oid select_equality_operator(EquivalenceClass *ec,
+ Oid lefttype, Oid righttype);
+static void reconsider_outer_join_clause(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ bool outer_on_left);
+static void reconsider_full_join_clause(PlannerInfo *root,
+ RestrictInfo *rinfo);
+
+
+/*
+ * process_equivalence
+ * The given clause has a mergejoinable operator and can be applied without
+ * any delay by an outer join, so its two sides can be considered equal
+ * anywhere they are both computable; moreover that equality can be
+ * extended transitively. Record this knowledge in the EquivalenceClass
+ * data structure. Returns TRUE if successful, FALSE if not (in which
+ * case caller should treat the clause as ordinary, not an equivalence).
+ *
+ * If below_outer_join is true, then the clause was found below the nullable
+ * side of an outer join, so its sides might validly be both NULL rather than
+ * strictly equal. We can still deduce equalities in such cases, but we take
+ * care to mark an EquivalenceClass if it came from any such clauses. Also,
+ * we have to check that both sides are either pseudo-constants or strict
+ * functions of Vars, else they might not both go to NULL above the outer
+ * join. (This is the reason why we need a failure return. It's more
+ * convenient to check this case here than at the call sites...)
+ *
+ * Note: constructing merged EquivalenceClasses is a standard UNION-FIND
+ * problem, for which there exist better data structures than simple lists.
+ * If this code ever proves to be a bottleneck then it could be sped up ---
+ * but for now, simple is beautiful.
+ *
+ * Note: this is only called during planner startup, not during GEQO
+ * exploration, so we need not worry about whether we're in the right
+ * memory context.
+ */
+bool
+process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
+ bool below_outer_join)
+{
+ Expr *clause = restrictinfo->clause;
+ Oid opno,
+ item1_type,
+ item2_type;
+ Expr *item1;
+ Expr *item2;
+ Relids item1_relids,
+ item2_relids;
+ List *opfamilies;
+ EquivalenceClass *ec1,
+ *ec2;
+ ListCell *lc1;
+
+ /* Extract info from given clause */
+ Assert(is_opclause(clause));
+ opno = ((OpExpr *) clause)->opno;
+ item1 = (Expr *) get_leftop(clause);
+ item2 = (Expr *) get_rightop(clause);
+ item1_relids = restrictinfo->left_relids;
+ item2_relids = restrictinfo->right_relids;
+
+ /*
+ * If below outer join, check for strictness, else reject.
+ */
+ if (below_outer_join)
+ {
+ if (!bms_is_empty(item1_relids) &&
+ contain_nonstrict_functions((Node *) item1))
+ return false; /* LHS is non-strict but not constant */
+ if (!bms_is_empty(item2_relids) &&
+ contain_nonstrict_functions((Node *) item2))
+ return false; /* RHS is non-strict but not constant */
+ }
+
+ /*
+ * We use the declared input types of the operator, not exprType() of
+ * the inputs, as the nominal datatypes for opfamily lookup. This
+ * presumes that btree operators are always registered with amoplefttype
+ * and amoprighttype equal to their declared input types. We will need
+ * this info anyway to build EquivalenceMember nodes, and by extracting
+ * it now we can use type comparisons to short-circuit some equal() tests.
+ */
+ op_input_types(opno, &item1_type, &item2_type);
+
+ opfamilies = restrictinfo->mergeopfamilies;
+
+ /*
+ * Sweep through the existing EquivalenceClasses looking for matches
+ * to item1 and item2. These are the possible outcomes:
+ *
+ * 1. We find both in the same EC. The equivalence is already known,
+ * so there's nothing to do.
+ *
+ * 2. We find both in different ECs. Merge the two ECs together.
+ *
+ * 3. We find just one. Add the other to its EC.
+ *
+ * 4. We find neither. Make a new, two-entry EC.
+ *
+ * Note: since all ECs are built through this process, it's impossible
+ * that we'd match an item in more than one existing EC. It is possible
+ * to match more than once within an EC, if someone fed us something silly
+ * like "WHERE X=X". (However, we can't simply discard such clauses,
+ * since they should fail when X is null; so we will build a 2-member
+ * EC to ensure the correct restriction clause gets generated. Hence
+ * there is no shortcut here for item1 and item2 equal.)
+ */
+ ec1 = ec2 = NULL;
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ ListCell *lc2;
+
+ /* Never match to a volatile EC */
+ if (cur_ec->ec_has_volatile)
+ continue;
+
+ /*
+ * A "match" requires matching sets of btree opfamilies. Use of
+ * equal() for this test has implications discussed in the comments
+ * for get_mergejoin_opfamilies().
+ */
+ if (!equal(opfamilies, cur_ec->ec_opfamilies))
+ continue;
+
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ Assert(!cur_em->em_is_child); /* no children yet */
+
+ /*
+ * If below an outer join, don't match constants: they're not
+ * as constant as they look.
+ */
+ if ((below_outer_join || cur_ec->ec_below_outer_join) &&
+ cur_em->em_is_const)
+ continue;
+
+ if (!ec1 &&
+ item1_type == cur_em->em_datatype &&
+ equal(item1, cur_em->em_expr))
+ {
+ ec1 = cur_ec;
+ if (ec2)
+ break;
+ }
+
+ if (!ec2 &&
+ item2_type == cur_em->em_datatype &&
+ equal(item2, cur_em->em_expr))
+ {
+ ec2 = cur_ec;
+ if (ec1)
+ break;
+ }
+ }
+
+ if (ec1 && ec2)
+ break;
+ }
+
+ /* Sweep finished, what did we find? */
+
+ if (ec1 && ec2)
+ {
+ /* If case 1, nothing to do, except add to sources */
+ if (ec1 == ec2)
+ {
+ ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
+ ec1->ec_below_outer_join |= below_outer_join;
+ return true;
+ }
+
+ /*
+ * Case 2: need to merge ec1 and ec2. We add ec2's items to ec1,
+ * then set ec2's ec_merged link to point to ec1 and remove ec2
+ * from the eq_classes list. We cannot simply delete ec2 because
+ * that could leave dangling pointers in existing PathKeys. We
+ * leave it behind with a link so that the merged EC can be found.
+ */
+ ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members);
+ ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources);
+ ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids);
+ ec1->ec_has_const |= ec2->ec_has_const;
+ /* can't need to set has_volatile */
+ ec1->ec_below_outer_join |= ec2->ec_below_outer_join;
+ ec2->ec_merged = ec1;
+ root->eq_classes = list_delete_ptr(root->eq_classes, ec2);
+ /* just to avoid debugging confusion w/ dangling pointers: */
+ ec2->ec_members = NIL;
+ ec2->ec_sources = NIL;
+ ec2->ec_relids = NULL;
+ ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
+ ec1->ec_below_outer_join |= below_outer_join;
+ }
+ else if (ec1)
+ {
+ /* Case 3: add item2 to ec1 */
+ add_eq_member(ec1, item2, item2_relids, false, item2_type);
+ ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
+ ec1->ec_below_outer_join |= below_outer_join;
+ }
+ else if (ec2)
+ {
+ /* Case 3: add item1 to ec2 */
+ add_eq_member(ec2, item1, item1_relids, false, item1_type);
+ ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo);
+ ec2->ec_below_outer_join |= below_outer_join;
+ }
+ else
+ {
+ /* Case 4: make a new, two-entry EC */
+ EquivalenceClass *ec = makeNode(EquivalenceClass);
+
+ ec->ec_opfamilies = opfamilies;
+ ec->ec_members = NIL;
+ ec->ec_sources = list_make1(restrictinfo);
+ ec->ec_relids = NULL;
+ ec->ec_has_const = false;
+ ec->ec_has_volatile = false;
+ ec->ec_below_outer_join = below_outer_join;
+ ec->ec_broken = false;
+ ec->ec_merged = NULL;
+ add_eq_member(ec, item1, item1_relids, false, item1_type);
+ add_eq_member(ec, item2, item2_relids, false, item2_type);
+
+ root->eq_classes = lappend(root->eq_classes, ec);
+ }
+
+ return true;
+}
+
+/*
+ * add_eq_member - build a new EquivalenceMember and add it to an EC
+ */
+static void
+add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
+ bool is_child, Oid datatype)
+{
+ EquivalenceMember *em = makeNode(EquivalenceMember);
+
+ em->em_expr = expr;
+ em->em_relids = relids;
+ em->em_is_const = false;
+ em->em_is_child = is_child;
+ em->em_datatype = datatype;
+
+ if (bms_is_empty(relids))
+ {
+ /*
+ * No Vars, assume it's a pseudoconstant. This is correct for
+ * entries generated from process_equivalence(), because a WHERE
+ * clause can't contain aggregates and non-volatility was checked
+ * before process_equivalence() ever got called. But
+ * get_eclass_for_sort_expr() has to work harder. We put the tests
+ * there not here to save cycles in the equivalence case.
+ */
+ Assert(!is_child);
+ em->em_is_const = true;
+ ec->ec_has_const = true;
+ /* it can't affect ec_relids */
+ }
+ else if (!is_child) /* child members don't add to ec_relids */
+ {
+ ec->ec_relids = bms_add_members(ec->ec_relids, relids);
+ }
+ ec->ec_members = lappend(ec->ec_members, em);
+}
+
+
+/*
+ * get_eclass_for_sort_expr
+ * Given an expression and opfamily info, find an existing equivalence
+ * class it is a member of; if none, build a new single-member
+ * EquivalenceClass for it.
+ *
+ * This can be used safely both before and after EquivalenceClass merging;
+ * since it never causes merging it does not invalidate any existing ECs
+ * or PathKeys.
+ *
+ * Note: opfamilies must be chosen consistently with the way
+ * process_equivalence() would do; that is, generated from a mergejoinable
+ * equality operator. Else we might fail to detect valid equivalences,
+ * generating poor (but not incorrect) plans.
+ */
+EquivalenceClass *
+get_eclass_for_sort_expr(PlannerInfo *root,
+ Expr *expr,
+ Oid expr_datatype,
+ List *opfamilies)
+{
+ EquivalenceClass *newec;
+ ListCell *lc1;
+ MemoryContext oldcontext;
+
+ /*
+ * Scan through the existing EquivalenceClasses for a match
+ */
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ ListCell *lc2;
+
+ /* we allow matching to a volatile EC here */
+
+ if (!equal(opfamilies, cur_ec->ec_opfamilies))
+ continue;
+
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ /*
+ * If below an outer join, don't match constants: they're not
+ * as constant as they look.
+ */
+ if (cur_ec->ec_below_outer_join &&
+ cur_em->em_is_const)
+ continue;
+
+ if (expr_datatype == cur_em->em_datatype &&
+ equal(expr, cur_em->em_expr))
+ return cur_ec; /* Match! */
+ }
+ }
+
+ /*
+ * No match, so build a new single-member EC
+ *
+ * Here, we must be sure that we construct the EC in the right context.
+ * We can assume, however, that the passed expr is long-lived.
+ */
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ newec = makeNode(EquivalenceClass);
+ newec->ec_opfamilies = list_copy(opfamilies);
+ newec->ec_members = NIL;
+ newec->ec_sources = NIL;
+ newec->ec_relids = NULL;
+ newec->ec_has_const = false;
+ newec->ec_has_volatile = contain_volatile_functions((Node *) expr);
+ newec->ec_below_outer_join = false;
+ newec->ec_broken = false;
+ newec->ec_merged = NULL;
+ add_eq_member(newec, expr, pull_varnos((Node *) expr),
+ false, expr_datatype);
+
+ /*
+ * add_eq_member doesn't check for volatile functions or aggregates,
+ * but such could appear in sort expressions, so we have to check
+ * whether its const-marking was correct.
+ */
+ if (newec->ec_has_const)
+ {
+ if (newec->ec_has_volatile || contain_agg_clause((Node *) expr))
+ {
+ newec->ec_has_const = false;
+ ((EquivalenceMember *) linitial(newec->ec_members))->em_is_const = false;
+ }
+ }
+
+ root->eq_classes = lappend(root->eq_classes, newec);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return newec;
+}
+
+
+/*
+ * generate_base_implied_equalities
+ * Generate any restriction clauses that we can deduce from equivalence
+ * classes.
+ *
+ * When an EC contains pseudoconstants, our strategy is to generate
+ * "member = const1" clauses where const1 is the first constant member, for
+ * every other member (including other constants). If we are able to do this
+ * then we don't need any "var = var" comparisons because we've successfully
+ * constrained all the vars at their points of creation. If we fail to
+ * generate any of these clauses due to lack of cross-type operators, we fall
+ * back to the "ec_broken" strategy described below. (XXX if there are
+ * multiple constants of different types, it's possible that we might succeed
+ * in forming all the required clauses if we started from a different const
+ * member; but this seems a sufficiently hokey corner case to not be worth
+ * spending lots of cycles on.)
+ *
+ * For ECs that contain no pseudoconstants, we generate derived clauses
+ * "member1 = member2" for each pair of members belonging to the same base
+ * relation (actually, if there are more than two for the same base relation,
+ * we only need enough clauses to link each to each other). This provides
+ * the base case for the recursion: each row emitted by a base relation scan
+ * will constrain all computable members of the EC to be equal. As each
+ * join path is formed, we'll add additional derived clauses on-the-fly
+ * to maintain this invariant (see generate_join_implied_equalities).
+ *
+ * If the opfamilies used by the EC do not provide complete sets of cross-type
+ * equality operators, it is possible that we will fail to generate a clause
+ * that must be generated to maintain the invariant. (An example: given
+ * "WHERE a.x = b.y AND b.y = a.z", the scheme breaks down if we cannot
+ * generate "a.x = a.z" as a restriction clause for A.) In this case we mark
+ * the EC "ec_broken" and fall back to regurgitating its original source
+ * RestrictInfos at appropriate times. We do not try to retract any derived
+ * clauses already generated from the broken EC, so the resulting plan could
+ * be poor due to bad selectivity estimates caused by redundant clauses. But
+ * the correct solution to that is to fix the opfamilies ...
+ *
+ * Equality clauses derived by this function are passed off to
+ * process_implied_equality (in plan/initsplan.c) to be inserted into the
+ * restrictinfo datastructures. Note that this must be called after initial
+ * scanning of the quals and before Path construction begins.
+ */
+void
+generate_base_implied_equalities(PlannerInfo *root)
+{
+ ListCell *lc;
+ Index rti;
+
+ foreach(lc, root->eq_classes)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc);
+
+ Assert(ec->ec_merged == NULL); /* else shouldn't be in list */
+ Assert(!ec->ec_broken); /* not yet anyway... */
+
+ /* Single-member ECs won't generate any deductions */
+ if (list_length(ec->ec_members) <= 1)
+ continue;
+
+ if (ec->ec_has_const)
+ generate_base_implied_equalities_const(root, ec);
+ else
+ generate_base_implied_equalities_no_const(root, ec);
+
+ /* Recover if we failed to generate required derived clauses */
+ if (ec->ec_broken)
+ generate_base_implied_equalities_broken(root, ec);
+ }
+
+ /*
+ * This is also a handy place to mark base rels (which should all
+ * exist by now) with flags showing whether they have pending eclass
+ * joins.
+ */
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *brel = root->simple_rel_array[rti];
+
+ if (brel == NULL)
+ continue;
+
+ brel->has_eclass_joins = has_relevant_eclass_joinclause(root, brel);
+ }
+}
+
+/*
+ * generate_base_implied_equalities when EC contains pseudoconstant(s)
+ */
+static void
+generate_base_implied_equalities_const(PlannerInfo *root,
+ EquivalenceClass *ec)
+{
+ EquivalenceMember *const_em = NULL;
+ ListCell *lc;
+
+ /* Find the constant member to use */
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
+
+ if (cur_em->em_is_const)
+ {
+ const_em = cur_em;
+ break;
+ }
+ }
+ Assert(const_em != NULL);
+
+ /* Generate a derived equality against each other member */
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
+ Oid eq_op;
+
+ Assert(!cur_em->em_is_child); /* no children yet */
+ if (cur_em == const_em)
+ continue;
+ eq_op = select_equality_operator(ec,
+ cur_em->em_datatype,
+ const_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ {
+ /* failed... */
+ ec->ec_broken = true;
+ break;
+ }
+ process_implied_equality(root, eq_op,
+ cur_em->em_expr, const_em->em_expr,
+ ec->ec_relids,
+ ec->ec_below_outer_join,
+ cur_em->em_is_const);
+ }
+}
+
+/*
+ * generate_base_implied_equalities when EC contains no pseudoconstants
+ */
+static void
+generate_base_implied_equalities_no_const(PlannerInfo *root,
+ EquivalenceClass *ec)
+{
+ EquivalenceMember **prev_ems;
+ ListCell *lc;
+
+ /*
+ * We scan the EC members once and track the last-seen member for each
+ * base relation. When we see another member of the same base relation,
+ * we generate "prev_mem = cur_mem". This results in the minimum number
+ * of derived clauses, but it's possible that it will fail when a different
+ * ordering would succeed. XXX FIXME: use a UNION-FIND algorithm similar
+ * to the way we build merged ECs. (Use a list-of-lists for each rel.)
+ */
+ prev_ems = (EquivalenceMember **)
+ palloc0(root->simple_rel_array_size * sizeof(EquivalenceMember *));
+
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
+ int relid;
+
+ Assert(!cur_em->em_is_child); /* no children yet */
+ if (bms_membership(cur_em->em_relids) != BMS_SINGLETON)
+ continue;
+ relid = bms_singleton_member(cur_em->em_relids);
+ Assert(relid < root->simple_rel_array_size);
+
+ if (prev_ems[relid] != NULL)
+ {
+ EquivalenceMember *prev_em = prev_ems[relid];
+ Oid eq_op;
+
+ eq_op = select_equality_operator(ec,
+ prev_em->em_datatype,
+ cur_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ {
+ /* failed... */
+ ec->ec_broken = true;
+ break;
+ }
+ process_implied_equality(root, eq_op,
+ prev_em->em_expr, cur_em->em_expr,
+ ec->ec_relids,
+ ec->ec_below_outer_join,
+ false);
+ }
+ prev_ems[relid] = cur_em;
+ }
+
+ pfree(prev_ems);
+
+ /*
+ * We also have to make sure that all the Vars used in the member
+ * clauses will be available at any join node we might try to reference
+ * them at. For the moment we force all the Vars to be available at
+ * all join nodes for this eclass. Perhaps this could be improved by
+ * doing some pre-analysis of which members we prefer to join, but it's
+ * no worse than what happened in the pre-8.3 code.
+ */
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
+ List *vars = pull_var_clause((Node *) cur_em->em_expr, false);
+
+ add_vars_to_targetlist(root, vars, ec->ec_relids);
+ list_free(vars);
+ }
+}
+
+/*
+ * generate_base_implied_equalities cleanup after failure
+ *
+ * What we must do here is push any zero- or one-relation source RestrictInfos
+ * of the EC back into the main restrictinfo datastructures. Multi-relation
+ * clauses will be regurgitated later by generate_join_implied_equalities().
+ * (We do it this way to maintain continuity with the case that ec_broken
+ * becomes set only after we've gone up a join level or two.)
+ */
+static void
+generate_base_implied_equalities_broken(PlannerInfo *root,
+ EquivalenceClass *ec)
+{
+ ListCell *lc;
+
+ foreach(lc, ec->ec_sources)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
+
+ if (bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE)
+ distribute_restrictinfo_to_rels(root, restrictinfo);
+ }
+}
+
+
+/*
+ * generate_join_implied_equalities
+ * Generate any join clauses that we can deduce from equivalence classes.
+ *
+ * At a join node, we must enforce restriction clauses sufficient to ensure
+ * that all equivalence-class members computable at that node are equal.
+ * Since the set of clauses to enforce can vary depending on which subset
+ * relations are the inputs, we have to compute this afresh for each join
+ * path pair. Hence a fresh List of RestrictInfo nodes is built and passed
+ * back on each call.
+ *
+ * The results are sufficient for use in merge, hash, and plain nestloop join
+ * methods. We do not worry here about selecting clauses that are optimal
+ * for use in a nestloop-with-inner-indexscan join, however. indxpath.c makes
+ * its own selections of clauses to use, and if the ones we pick here are
+ * redundant with those, the extras will be eliminated in createplan.c.
+ */
+List *
+generate_join_implied_equalities(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel)
+{
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, root->eq_classes)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc);
+ List *sublist = NIL;
+
+ /* ECs containing consts do not need any further enforcement */
+ if (ec->ec_has_const)
+ continue;
+
+ /* Single-member ECs won't generate any deductions */
+ if (list_length(ec->ec_members) <= 1)
+ continue;
+
+ /* We can quickly ignore any that don't overlap the join, too */
+ if (!bms_overlap(ec->ec_relids, joinrel->relids))
+ continue;
+
+ if (!ec->ec_broken)
+ sublist = generate_join_implied_equalities_normal(root,
+ ec,
+ joinrel,
+ outer_rel,
+ inner_rel);
+
+ /* Recover if we failed to generate required derived clauses */
+ if (ec->ec_broken)
+ sublist = generate_join_implied_equalities_broken(root,
+ ec,
+ joinrel,
+ outer_rel,
+ inner_rel);
+
+ result = list_concat(result, sublist);
+ }
+
+ return result;
+}
+
+/*
+ * generate_join_implied_equalities for a still-valid EC
+ */
+static List *
+generate_join_implied_equalities_normal(PlannerInfo *root,
+ EquivalenceClass *ec,
+ RelOptInfo *joinrel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel)
+{
+ List *result = NIL;
+ List *new_members = NIL;
+ List *outer_members = NIL;
+ List *inner_members = NIL;
+ ListCell *lc1;
+
+ /*
+ * First, scan the EC to identify member values that are computable
+ * at the outer rel, at the inner rel, or at this relation but not in
+ * either input rel. The outer-rel members should already be enforced
+ * equal, likewise for the inner-rel members. We'll need to create
+ * clauses to enforce that any newly computable members are all equal
+ * to each other as well as to at least one input member, plus enforce
+ * at least one outer-rel member equal to at least one inner-rel member.
+ */
+ foreach(lc1, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1);
+
+ if (cur_em->em_is_child)
+ continue; /* ignore children here */
+ if (!bms_is_subset(cur_em->em_relids, joinrel->relids))
+ continue; /* ignore --- not computable yet */
+
+ if (bms_is_subset(cur_em->em_relids, outer_rel->relids))
+ outer_members = lappend(outer_members, cur_em);
+ else if (bms_is_subset(cur_em->em_relids, inner_rel->relids))
+ inner_members = lappend(inner_members, cur_em);
+ else
+ new_members = lappend(new_members, cur_em);
+ }
+
+ /*
+ * First, select the joinclause if needed. We can equate any one outer
+ * member to any one inner member, but we have to find a datatype
+ * combination for which an opfamily member operator exists. If we
+ * have choices, we prefer simple Var members (possibly with RelabelType)
+ * since these are (a) cheapest to compute at runtime and (b) most likely
+ * to have useful statistics. Also, if enable_hashjoin is on, we prefer
+ * operators that are also hashjoinable.
+ */
+ if (outer_members && inner_members)
+ {
+ EquivalenceMember *best_outer_em = NULL;
+ EquivalenceMember *best_inner_em = NULL;
+ Oid best_eq_op = InvalidOid;
+ int best_score = -1;
+ RestrictInfo *rinfo;
+
+ foreach(lc1, outer_members)
+ {
+ EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc1);
+ ListCell *lc2;
+
+ foreach(lc2, inner_members)
+ {
+ EquivalenceMember *inner_em = (EquivalenceMember *) lfirst(lc2);
+ Oid eq_op;
+ int score;
+
+ eq_op = select_equality_operator(ec,
+ outer_em->em_datatype,
+ inner_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ continue;
+ score = 0;
+ if (IsA(outer_em->em_expr, Var) ||
+ (IsA(outer_em->em_expr, RelabelType) &&
+ IsA(((RelabelType *) outer_em->em_expr)->arg, Var)))
+ score++;
+ if (IsA(inner_em->em_expr, Var) ||
+ (IsA(inner_em->em_expr, RelabelType) &&
+ IsA(((RelabelType *) inner_em->em_expr)->arg, Var)))
+ score++;
+ if (!enable_hashjoin || op_hashjoinable(eq_op))
+ score++;
+ if (score > best_score)
+ {
+ best_outer_em = outer_em;
+ best_inner_em = inner_em;
+ best_eq_op = eq_op;
+ best_score = score;
+ if (best_score == 3)
+ break; /* no need to look further */
+ }
+ }
+ if (best_score == 3)
+ break; /* no need to look further */
+ }
+ if (best_score < 0)
+ {
+ /* failed... */
+ ec->ec_broken = true;
+ return NIL;
+ }
+
+ rinfo = build_implied_join_equality(best_eq_op,
+ best_outer_em->em_expr,
+ best_inner_em->em_expr,
+ ec->ec_relids);
+ /* mark restrictinfo as redundant with other joinclauses */
+ rinfo->parent_ec = ec;
+ /* we can set these too, rather than letting them be looked up later */
+ rinfo->left_ec = ec;
+ rinfo->right_ec = ec;
+
+ result = lappend(result, rinfo);
+ }
+
+ /*
+ * Now deal with building restrictions for any expressions that involve
+ * Vars from both sides of the join. We have to equate all of these to
+ * each other as well as to at least one old member (if any).
+ *
+ * XXX as in generate_base_implied_equalities_no_const, we could be a
+ * lot smarter here to avoid unnecessary failures in cross-type situations.
+ * For now, use the same left-to-right method used there.
+ */
+ if (new_members)
+ {
+ List *old_members = list_concat(outer_members, inner_members);
+ EquivalenceMember *prev_em = NULL;
+ RestrictInfo *rinfo;
+
+ /* For now, arbitrarily take the first old_member as the one to use */
+ if (old_members)
+ new_members = lappend(new_members, linitial(old_members));
+
+ foreach(lc1, new_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1);
+
+ if (prev_em != NULL)
+ {
+ Oid eq_op;
+
+ eq_op = select_equality_operator(ec,
+ prev_em->em_datatype,
+ cur_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ {
+ /* failed... */
+ ec->ec_broken = true;
+ return NIL;
+ }
+ rinfo = build_implied_join_equality(eq_op,
+ prev_em->em_expr,
+ cur_em->em_expr,
+ ec->ec_relids);
+
+ /* do NOT set parent_ec, this qual is not redundant! */
+
+ /* we can set these, though */
+ rinfo->left_ec = ec;
+ rinfo->right_ec = ec;
+
+ result = lappend(result, rinfo);
+ }
+ prev_em = cur_em;
+ }
+ }
+
+ return result;
+}
+
+/*
+ * generate_join_implied_equalities cleanup after failure
+ *
+ * Return any original RestrictInfos that are enforceable at this join.
+ */
+static List *
+generate_join_implied_equalities_broken(PlannerInfo *root,
+ EquivalenceClass *ec,
+ RelOptInfo *joinrel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel)
+{
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, ec->ec_sources)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
+
+ if (bms_is_subset(restrictinfo->required_relids, joinrel->relids) &&
+ !bms_is_subset(restrictinfo->required_relids, outer_rel->relids) &&
+ !bms_is_subset(restrictinfo->required_relids, inner_rel->relids))
+ result = lappend(result, restrictinfo);
+ }
+
+ return result;
+}
+
+
+/*
+ * select_equality_operator
+ * Select a suitable equality operator for comparing two EC members
+ *
+ * Returns InvalidOid if no operator can be found for this datatype combination
+ */
+static Oid
+select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype)
+{
+ ListCell *lc;
+
+ foreach(lc, ec->ec_opfamilies)
+ {
+ Oid opfamily = lfirst_oid(lc);
+ Oid opno;
+
+ opno = get_opfamily_member(opfamily, lefttype, righttype,
+ BTEqualStrategyNumber);
+ if (OidIsValid(opno))
+ return opno;
+ }
+ return InvalidOid;
+}
+
+
+/*
+ * reconsider_outer_join_clauses
+ * Re-examine any outer-join clauses that were set aside by
+ * distribute_qual_to_rels(), and either create EquivalenceClasses
+ * to replace them or push them out into the regular join-clause lists.
+ *
+ * When we have mergejoinable clauses A = B that are outer-join clauses,
+ * we can't blindly combine them with other clauses A = C to deduce B = C,
+ * since in fact the "equality" A = B won't necessarily hold above the
+ * outer join (one of the variables might be NULL instead). Nonetheless
+ * there are cases where we can add qual clauses using transitivity.
+ *
+ * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR
+ * for which there is also an equivalence clause OUTERVAR = CONSTANT.
+ * It is safe and useful to push a clause INNERVAR = CONSTANT into the
+ * evaluation of the inner (nullable) relation, because any inner rows not
+ * meeting this condition will not contribute to the outer-join result anyway.
+ * (Any outer rows they could join to will be eliminated by the pushed-down
+ * equivalence clause.)
+ *
+ * Note that the above rule does not work for full outer joins; nor is it
+ * very interesting to consider cases where the equivalence clause involves
+ * relations entirely outside the outer join, since such clauses couldn't
+ * be pushed into the inner side's scan anyway. So the restriction to
+ * outervar = pseudoconstant is not really giving up anything.
+ *
+ * For full-join cases, we can only do something useful if it's a FULL JOIN
+ * USING and a merged column has an equivalence MERGEDVAR = CONSTANT.
+ * By the time it gets here, the merged column will look like
+ * COALESCE(LEFTVAR, RIGHTVAR)
+ * and we will have a full-join clause LEFTVAR = RIGHTVAR that we can match
+ * the COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT
+ * and RIGHTVAR = CONSTANT into the input relations, since any rows not
+ * meeting these conditions cannot contribute to the join result.
+ *
+ * Again, there isn't any traction to be gained by trying to deal with
+ * clauses comparing a mergedvar to a non-pseudoconstant. So we can make
+ * use of the EquivalenceClasses to search for matching variables that were
+ * equivalenced to constants. The interesting outer-join clauses were
+ * accumulated for us by distribute_qual_to_rels.
+ *
+ * When we find one of these cases, we implement the changes we want by
+ * generating a new equivalence clause INNERVAR = CONSTANT (or LEFTVAR, etc)
+ * and pushing it into the EquivalenceClass structures. This is because we
+ * may already know that INNERVAR is equivalenced to some other var(s), and
+ * we'd like the constant to propagate to them too. Note that it would be
+ * unsafe to merge any existing EC for INNERVAR with the OUTERVAR's EC ---
+ * that could result in propagating constant restrictions from
+ * INNERVAR to OUTERVAR, which would be very wrong.
+ *
+ * If we don't find any match for a set-aside outer join clause, we must
+ * throw it back into the regular joinclause processing by passing it to
+ * distribute_restrictinfo_to_rels().
+ */
+void
+reconsider_outer_join_clauses(PlannerInfo *root)
+{
+ ListCell *lc;
+
+ /* Process the LEFT JOIN clauses */
+ foreach(lc, root->left_join_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ reconsider_outer_join_clause(root, rinfo, true);
+ }
+ /* And the RIGHT JOIN clauses */
+ foreach(lc, root->right_join_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ reconsider_outer_join_clause(root, rinfo, false);
+ }
+ /* And the FULL JOIN clauses */
+ foreach(lc, root->full_join_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ reconsider_full_join_clause(root, rinfo);
+ }
+}
+
+/*
+ * reconsider_outer_join_clauses for a single LEFT/RIGHT JOIN clause
+ */
+static void
+reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo,
+ bool outer_on_left)
+{
+ Expr *outervar,
+ *innervar;
+ Oid left_type,
+ right_type,
+ inner_datatype;
+ ListCell *lc1;
+
+ /* Extract needed info from the clause */
+ Assert(is_opclause(rinfo->clause));
+ op_input_types(((OpExpr *) rinfo->clause)->opno,
+ &left_type, &right_type);
+ if (outer_on_left)
+ {
+ outervar = (Expr *) get_leftop(rinfo->clause);
+ innervar = (Expr *) get_rightop(rinfo->clause);
+ inner_datatype = right_type;
+ }
+ else
+ {
+ outervar = (Expr *) get_rightop(rinfo->clause);
+ innervar = (Expr *) get_leftop(rinfo->clause);
+ inner_datatype = left_type;
+ }
+
+ /* Scan EquivalenceClasses for a match to outervar */
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ bool match;
+ ListCell *lc2;
+
+ /* Ignore EC unless it contains pseudoconstants */
+ if (!cur_ec->ec_has_const)
+ continue;
+ /* Never match to a volatile EC */
+ if (cur_ec->ec_has_volatile)
+ continue;
+ /* It has to match the outer-join clause as to opfamilies, too */
+ if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies))
+ continue;
+ /* Does it contain a match to outervar? */
+ match = false;
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ if (equal(outervar, cur_em->em_expr))
+ {
+ match = true;
+ break;
+ }
+ }
+ if (!match)
+ continue; /* no match, so ignore this EC */
+
+ /*
+ * Yes it does! Try to generate a clause INNERVAR = CONSTANT for
+ * each CONSTANT in the EC. Note that we must succeed with at
+ * least one constant before we can decide to throw away the
+ * outer-join clause.
+ */
+ match = false;
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+ Oid eq_op;
+ RestrictInfo *newrinfo;
+
+ if (!cur_em->em_is_const)
+ continue; /* ignore non-const members */
+ eq_op = select_equality_operator(cur_ec,
+ inner_datatype,
+ cur_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ continue; /* can't generate equality */
+ newrinfo = build_implied_join_equality(eq_op,
+ innervar,
+ cur_em->em_expr,
+ cur_ec->ec_relids);
+ if (process_equivalence(root, newrinfo, true))
+ match = true;
+ }
+
+ /*
+ * If we were able to equate INNERVAR to any constant, we're done, and
+ * we can throw away the outer-join clause as redundant. Otherwise,
+ * fall out of the search loop, since we know the OUTERVAR appears in
+ * at most one EC.
+ */
+ if (match)
+ return;
+ else
+ break;
+ }
+
+ /* We did not find a match, so throw it back into regular processing */
+ distribute_restrictinfo_to_rels(root, rinfo);
+}
+
+/*
+ * reconsider_outer_join_clauses for a single FULL JOIN clause
+ */
+static void
+reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo)
+{
+ Expr *leftvar;
+ Expr *rightvar;
+ Oid left_type,
+ right_type;
+ ListCell *lc1;
+
+ /* Extract needed info from the clause */
+ Assert(is_opclause(rinfo->clause));
+ leftvar = (Expr *) get_leftop(rinfo->clause);
+ rightvar = (Expr *) get_rightop(rinfo->clause);
+ op_input_types(((OpExpr *) rinfo->clause)->opno,
+ &left_type, &right_type);
+
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ EquivalenceMember *coal_em = NULL;
+ bool match;
+ bool matchleft;
+ bool matchright;
+ ListCell *lc2;
+
+ /* Ignore EC unless it contains pseudoconstants */
+ if (!cur_ec->ec_has_const)
+ continue;
+ /* Never match to a volatile EC */
+ if (cur_ec->ec_has_volatile)
+ continue;
+ /* It has to match the outer-join clause as to opfamilies, too */
+ if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies))
+ continue;
+
+ /*
+ * Does it contain a COALESCE(leftvar, rightvar) construct?
+ *
+ * We can assume the COALESCE() inputs are in the same order as
+ * the join clause, since both were automatically generated in the
+ * cases we care about.
+ *
+ * XXX currently this may fail to match in cross-type cases
+ * because the COALESCE will contain typecast operations while the
+ * join clause may not (if there is a cross-type mergejoin
+ * operator available for the two column types). Is it OK to strip
+ * implicit coercions from the COALESCE arguments?
+ */
+ match = false;
+ foreach(lc2, cur_ec->ec_members)
+ {
+ coal_em = (EquivalenceMember *) lfirst(lc2);
+ if (IsA(coal_em->em_expr, CoalesceExpr))
+ {
+ CoalesceExpr *cexpr = (CoalesceExpr *) coal_em->em_expr;
+ Node *cfirst;
+ Node *csecond;
+
+ if (list_length(cexpr->args) != 2)
+ continue;
+ cfirst = (Node *) linitial(cexpr->args);
+ csecond = (Node *) lsecond(cexpr->args);
+
+ if (equal(leftvar, cfirst) && equal(rightvar, csecond))
+ {
+ match = true;
+ break;
+ }
+ }
+ }
+ if (!match)
+ continue; /* no match, so ignore this EC */
+
+ /*
+ * Yes it does! Try to generate clauses LEFTVAR = CONSTANT and
+ * RIGHTVAR = CONSTANT for each CONSTANT in the EC. Note that we
+ * must succeed with at least one constant for each var before
+ * we can decide to throw away the outer-join clause.
+ */
+ matchleft = matchright = false;
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+ Oid eq_op;
+ RestrictInfo *newrinfo;
+
+ if (!cur_em->em_is_const)
+ continue; /* ignore non-const members */
+ eq_op = select_equality_operator(cur_ec,
+ left_type,
+ cur_em->em_datatype);
+ if (OidIsValid(eq_op))
+ {
+ newrinfo = build_implied_join_equality(eq_op,
+ leftvar,
+ cur_em->em_expr,
+ cur_ec->ec_relids);
+ if (process_equivalence(root, newrinfo, true))
+ matchleft = true;
+ }
+ eq_op = select_equality_operator(cur_ec,
+ right_type,
+ cur_em->em_datatype);
+ if (OidIsValid(eq_op))
+ {
+ newrinfo = build_implied_join_equality(eq_op,
+ rightvar,
+ cur_em->em_expr,
+ cur_ec->ec_relids);
+ if (process_equivalence(root, newrinfo, true))
+ matchright = true;
+ }
+ }
+
+ /*
+ * If we were able to equate both vars to constants, we're done, and
+ * we can throw away the full-join clause as redundant. Moreover,
+ * we can remove the COALESCE entry from the EC, since the added
+ * restrictions ensure it will always have the expected value.
+ * (We don't bother trying to update ec_relids or ec_sources.)
+ */
+ if (matchleft && matchright)
+ {
+ cur_ec->ec_members = list_delete_ptr(cur_ec->ec_members, coal_em);
+ return;
+ }
+ /*
+ * Otherwise, fall out of the search loop, since we know the COALESCE
+ * appears in at most one EC (XXX might stop being true if we allow
+ * stripping of coercions above?)
+ */
+ break;
+ }
+
+ /* We did not find a match, so throw it back into regular processing */
+ distribute_restrictinfo_to_rels(root, rinfo);
+}
+
+
+/*
+ * exprs_known_equal
+ * Detect whether two expressions are known equal due to equivalence
+ * relationships.
+ *
+ * Actually, this only shows that the expressions are equal according
+ * to some opfamily's notion of equality --- but we only use it for
+ * selectivity estimation, so a fuzzy idea of equality is OK.
+ *
+ * Note: does not bother to check for "equal(item1, item2)"; caller must
+ * check that case if it's possible to pass identical items.
+ */
+bool
+exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
+{
+ ListCell *lc1;
+
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
+ bool item1member = false;
+ bool item2member = false;
+ ListCell *lc2;
+
+ /* Never match to a volatile EC */
+ if (ec->ec_has_volatile)
+ continue;
+
+ foreach(lc2, ec->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
+
+ if (equal(item1, em->em_expr))
+ item1member = true;
+ else if (equal(item2, em->em_expr))
+ item2member = true;
+ /* Exit as soon as equality is proven */
+ if (item1member && item2member)
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/*
+ * add_child_rel_equivalences
+ * Search for EC members that reference (only) the parent_rel, and
+ * add transformed members referencing the child_rel.
+ *
+ * We only need to do this for ECs that could generate join conditions,
+ * since the child members are only used for creating inner-indexscan paths.
+ *
+ * parent_rel and child_rel could be derived from appinfo, but since the
+ * caller has already computed them, we might as well just pass them in.
+ */
+void
+add_child_rel_equivalences(PlannerInfo *root,
+ AppendRelInfo *appinfo,
+ RelOptInfo *parent_rel,
+ RelOptInfo *child_rel)
+{
+ ListCell *lc1;
+
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ ListCell *lc2;
+
+ /*
+ * Won't generate joinclauses if const or single-member (the latter
+ * test covers the volatile case too)
+ */
+ if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
+ continue;
+
+ /* No point in searching if parent rel not mentioned in eclass */
+ if (!bms_is_subset(parent_rel->relids, cur_ec->ec_relids))
+ continue;
+
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ /* Does it reference (only) parent_rel? */
+ if (bms_equal(cur_em->em_relids, parent_rel->relids))
+ {
+ /* Yes, generate transformed child version */
+ Expr *child_expr;
+
+ child_expr = (Expr *)
+ adjust_appendrel_attrs((Node *) cur_em->em_expr,
+ appinfo);
+ add_eq_member(cur_ec, child_expr, child_rel->relids,
+ true, cur_em->em_datatype);
+ }
+ }
+ }
+}
+
+
+/*
+ * find_eclass_clauses_for_index_join
+ * Create joinclauses usable for a nestloop-with-inner-indexscan
+ * scanning the given inner rel with the specified set of outer rels.
+ */
+List *
+find_eclass_clauses_for_index_join(PlannerInfo *root, RelOptInfo *rel,
+ Relids outer_relids)
+{
+ List *result = NIL;
+ bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
+ ListCell *lc1;
+
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ ListCell *lc2;
+
+ /*
+ * Won't generate joinclauses if const or single-member (the latter
+ * test covers the volatile case too)
+ */
+ if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
+ continue;
+
+ /*
+ * No point in searching if rel not mentioned in eclass (but we
+ * can't tell that for a child rel).
+ */
+ if (!is_child_rel &&
+ !bms_is_subset(rel->relids, cur_ec->ec_relids))
+ continue;
+ /* ... nor if no overlap with outer_relids */
+ if (!bms_overlap(outer_relids, cur_ec->ec_relids))
+ continue;
+
+ /* Scan members, looking for indexable columns */
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+ EquivalenceMember *best_outer_em = NULL;
+ Oid best_eq_op = InvalidOid;
+ ListCell *lc3;
+
+ if (!bms_equal(cur_em->em_relids, rel->relids) ||
+ !eclass_matches_any_index(cur_ec, cur_em, rel))
+ continue;
+
+ /*
+ * Found one, so try to generate a join clause. This is like
+ * generate_join_implied_equalities_normal, except simpler
+ * since our only preference item is to pick a Var on the
+ * outer side. We only need one join clause per index col.
+ */
+ foreach(lc3, cur_ec->ec_members)
+ {
+ EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc3);
+ Oid eq_op;
+
+ if (!bms_is_subset(outer_em->em_relids, outer_relids))
+ continue;
+ eq_op = select_equality_operator(cur_ec,
+ cur_em->em_datatype,
+ outer_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ continue;
+ best_outer_em = outer_em;
+ best_eq_op = eq_op;
+ if (IsA(outer_em->em_expr, Var) ||
+ (IsA(outer_em->em_expr, RelabelType) &&
+ IsA(((RelabelType *) outer_em->em_expr)->arg, Var)))
+ break; /* no need to look further */
+ }
+
+ if (best_outer_em)
+ {
+ /* Found a suitable joinclause */
+ RestrictInfo *rinfo;
+
+ rinfo = build_implied_join_equality(best_eq_op,
+ cur_em->em_expr,
+ best_outer_em->em_expr,
+ cur_ec->ec_relids);
+ /* mark restrictinfo as redundant with other joinclauses */
+ rinfo->parent_ec = cur_ec;
+ /* we can set these too */
+ rinfo->left_ec = cur_ec;
+ rinfo->right_ec = cur_ec;
+
+ result = lappend(result, rinfo);
+ /*
+ * Note: we keep scanning here because we want to provide
+ * a clause for every possible indexcol.
+ */
+ }
+ }
+ }
+
+ return result;
+}
+
+
+/*
+ * have_relevant_eclass_joinclause
+ * Detect whether there is an EquivalenceClass that could produce
+ * a joinclause between the two given relations.
+ *
+ * This is essentially a very cut-down version of
+ * generate_join_implied_equalities(). Note it's OK to occasionally say "yes"
+ * incorrectly. Hence we don't bother with details like whether the lack of a
+ * cross-type operator might prevent the clause from actually being generated.
+ */
+bool
+have_relevant_eclass_joinclause(PlannerInfo *root,
+ RelOptInfo *rel1, RelOptInfo *rel2)
+{
+ ListCell *lc1;
+
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
+ bool has_rel1;
+ bool has_rel2;
+ ListCell *lc2;
+
+ /*
+ * Won't generate joinclauses if const or single-member (the latter
+ * test covers the volatile case too)
+ */
+ if (ec->ec_has_const || list_length(ec->ec_members) <= 1)
+ continue;
+
+ /*
+ * Note we don't test ec_broken; if we did, we'd need a separate code
+ * path to look through ec_sources. Checking the members anyway is OK
+ * as a possibly-overoptimistic heuristic.
+ */
+
+ /* Needn't scan if it couldn't contain members from each rel */
+ if (!bms_overlap(rel1->relids, ec->ec_relids) ||
+ !bms_overlap(rel2->relids, ec->ec_relids))
+ continue;
+
+ /* Scan the EC to see if it has member(s) in each rel */
+ has_rel1 = has_rel2 = false;
+ foreach(lc2, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ if (cur_em->em_is_child)
+ continue; /* ignore children here */
+ if (bms_is_subset(cur_em->em_relids, rel1->relids))
+ {
+ has_rel1 = true;
+ if (has_rel2)
+ break;
+ }
+ if (bms_is_subset(cur_em->em_relids, rel2->relids))
+ {
+ has_rel2 = true;
+ if (has_rel1)
+ break;
+ }
+ }
+
+ if (has_rel1 && has_rel2)
+ return true;
+ }
+
+ return false;
+}
+
+
+/*
+ * has_relevant_eclass_joinclause
+ * Detect whether there is an EquivalenceClass that could produce
+ * a joinclause between the given relation and anything else.
+ *
+ * This is the same as have_relevant_eclass_joinclause with the other rel
+ * implicitly defined as "everything else in the query".
+ */
+bool
+has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1)
+{
+ ListCell *lc1;
+
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
+ bool has_rel1;
+ bool has_rel2;
+ ListCell *lc2;
+
+ /*
+ * Won't generate joinclauses if const or single-member (the latter
+ * test covers the volatile case too)
+ */
+ if (ec->ec_has_const || list_length(ec->ec_members) <= 1)
+ continue;
+
+ /*
+ * Note we don't test ec_broken; if we did, we'd need a separate code
+ * path to look through ec_sources. Checking the members anyway is OK
+ * as a possibly-overoptimistic heuristic.
+ */
+
+ /* Needn't scan if it couldn't contain members from each rel */
+ if (!bms_overlap(rel1->relids, ec->ec_relids) ||
+ bms_is_subset(ec->ec_relids, rel1->relids))
+ continue;
+
+ /* Scan the EC to see if it has member(s) in each rel */
+ has_rel1 = has_rel2 = false;
+ foreach(lc2, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ if (cur_em->em_is_child)
+ continue; /* ignore children here */
+ if (bms_is_subset(cur_em->em_relids, rel1->relids))
+ {
+ has_rel1 = true;
+ if (has_rel2)
+ break;
+ }
+ if (!bms_overlap(cur_em->em_relids, rel1->relids))
+ {
+ has_rel2 = true;
+ if (has_rel1)
+ break;
+ }
+ }
+
+ if (has_rel1 && has_rel2)
+ return true;
+ }
+
+ return false;
+}
+
+
+/*
+ * eclass_useful_for_merging
+ * Detect whether the EC could produce any mergejoinable join clauses
+ * against the specified relation.
+ *
+ * This is just a heuristic test and doesn't have to be exact; it's better
+ * to say "yes" incorrectly than "no". Hence we don't bother with details
+ * like whether the lack of a cross-type operator might prevent the clause
+ * from actually being generated.
+ */
+bool
+eclass_useful_for_merging(EquivalenceClass *eclass,
+ RelOptInfo *rel)
+{
+ ListCell *lc;
+
+ Assert(!eclass->ec_merged);
+
+ /*
+ * Won't generate joinclauses if const or single-member (the latter
+ * test covers the volatile case too)
+ */
+ if (eclass->ec_has_const || list_length(eclass->ec_members) <= 1)
+ return false;
+
+ /*
+ * Note we don't test ec_broken; if we did, we'd need a separate code
+ * path to look through ec_sources. Checking the members anyway is OK
+ * as a possibly-overoptimistic heuristic.
+ */
+
+ /* If rel already includes all members of eclass, no point in searching */
+ if (bms_is_subset(eclass->ec_relids, rel->relids))
+ return false;
+
+ /* To join, we need a member not in the given rel */
+ foreach(lc, eclass->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
+
+ if (!cur_em->em_is_child &&
+ !bms_overlap(cur_em->em_relids, rel->relids))
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 3fd52d48500..0146cacf4e5 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.215 2007/01/09 02:14:12 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.216 2007/01/20 20:45:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -32,7 +32,6 @@
#include "optimizer/var.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
-#include "utils/memutils.h"
#include "utils/pg_locale.h"
#include "utils/selfuncs.h"
@@ -72,21 +71,11 @@ static bool match_rowcompare_to_indexcol(IndexOptInfo *index,
Oid opfamily,
RowCompareExpr *clause,
Relids outer_relids);
-static Relids indexable_outerrelids(RelOptInfo *rel);
+static Relids indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel);
static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel,
Relids outer_relids);
static List *find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
Relids outer_relids, bool isouterjoin);
-static ScanDirection match_variant_ordering(PlannerInfo *root,
- IndexOptInfo *index,
- List *restrictclauses);
-static List *identify_ignorable_ordering_cols(PlannerInfo *root,
- IndexOptInfo *index,
- List *restrictclauses);
-static bool match_index_to_query_keys(PlannerInfo *root,
- IndexOptInfo *index,
- ScanDirection indexscandir,
- List *ignorables);
static bool match_boolean_index_clause(Node *clause, int indexcol,
IndexOptInfo *index);
static bool match_special_index_operator(Expr *clause, Oid opfamily,
@@ -157,7 +146,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
* participate in such join clauses. We'll use this set later to
* recognize outer rels that are equivalent for joining purposes.
*/
- rel->index_outer_relids = indexable_outerrelids(rel);
+ rel->index_outer_relids = indexable_outerrelids(root, rel);
/*
* Find all the index paths that are directly usable for this relation
@@ -351,8 +340,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
if (index_is_ordered && istoplevel && outer_rel == NULL)
{
index_pathkeys = build_index_pathkeys(root, index,
- ForwardScanDirection,
- true);
+ ForwardScanDirection);
useful_pathkeys = truncate_useless_pathkeys(root, rel,
index_pathkeys);
}
@@ -378,23 +366,21 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
}
/*
- * 4. If the index is ordered, and there is a requested query ordering
- * that we failed to match, consider variant ways of achieving the
- * ordering. Again, this is only interesting at top level.
+ * 4. If the index is ordered, a backwards scan might be
+ * interesting. Again, this is only interesting at top level.
*/
- if (index_is_ordered && istoplevel && outer_rel == NULL &&
- root->query_pathkeys != NIL &&
- pathkeys_useful_for_ordering(root, useful_pathkeys) == 0)
+ if (index_is_ordered && istoplevel && outer_rel == NULL)
{
- ScanDirection scandir;
-
- scandir = match_variant_ordering(root, index, restrictclauses);
- if (!ScanDirectionIsNoMovement(scandir))
+ index_pathkeys = build_index_pathkeys(root, index,
+ BackwardScanDirection);
+ useful_pathkeys = truncate_useless_pathkeys(root, rel,
+ index_pathkeys);
+ if (useful_pathkeys != NIL)
{
ipath = create_index_path(root, index,
restrictclauses,
- root->query_pathkeys,
- scandir,
+ useful_pathkeys,
+ BackwardScanDirection,
outer_rel);
result = lappend(result, ipath);
}
@@ -1207,19 +1193,6 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel)
List *restrictinfo_list = rel->baserestrictinfo;
ListCell *ilist;
- /*
- * Note: if Postgres tried to optimize queries by forming equivalence
- * classes over equi-joined attributes (i.e., if it recognized that a
- * qualification such as "where a.b=c.d and a.b=5" could make use of an
- * index on c.d), then we could use that equivalence class info here with
- * joininfo lists to do more complete tests for the usability of a partial
- * index. For now, the test only uses restriction clauses (those in
- * baserestrictinfo). --Nels, Dec '92
- *
- * XXX as of 7.1, equivalence class info *is* available. Consider
- * improving this code as foreseen by Nels.
- */
-
foreach(ilist, rel->indexlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
@@ -1242,18 +1215,19 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel)
* for the specified table. Returns a set of relids.
*/
static Relids
-indexable_outerrelids(RelOptInfo *rel)
+indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel)
{
Relids outer_relids = NULL;
- ListCell *l;
+ bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
+ ListCell *lc1;
/*
* Examine each joinclause in the joininfo list to see if it matches any
* key of any index. If so, add the clause's other rels to the result.
*/
- foreach(l, rel->joininfo)
+ foreach(lc1, rel->joininfo)
{
- RestrictInfo *joininfo = (RestrictInfo *) lfirst(l);
+ RestrictInfo *joininfo = (RestrictInfo *) lfirst(lc1);
Relids other_rels;
other_rels = bms_difference(joininfo->required_relids, rel->relids);
@@ -1263,6 +1237,71 @@ indexable_outerrelids(RelOptInfo *rel)
bms_free(other_rels);
}
+ /*
+ * We also have to look through the query's EquivalenceClasses to see
+ * if any of them could generate indexable join conditions for this rel.
+ */
+ if (rel->has_eclass_joins)
+ {
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ Relids other_rels = NULL;
+ bool found_index = false;
+ ListCell *lc2;
+
+ /*
+ * Won't generate joinclauses if const or single-member (the latter
+ * test covers the volatile case too)
+ */
+ if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
+ continue;
+
+ /*
+ * Note we don't test ec_broken; if we did, we'd need a separate
+ * code path to look through ec_sources. Checking the members
+ * anyway is OK as a possibly-overoptimistic heuristic.
+ */
+
+ /*
+ * No point in searching if rel not mentioned in eclass (but we
+ * can't tell that for a child rel).
+ */
+ if (!is_child_rel &&
+ !bms_is_subset(rel->relids, cur_ec->ec_relids))
+ continue;
+
+ /*
+ * Scan members, looking for both an index match and join
+ * candidates
+ */
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ /* Join candidate? */
+ if (!cur_em->em_is_child &&
+ !bms_overlap(cur_em->em_relids, rel->relids))
+ {
+ other_rels = bms_add_members(other_rels,
+ cur_em->em_relids);
+ continue;
+ }
+
+ /* Check for index match (only need one) */
+ if (!found_index &&
+ bms_equal(cur_em->em_relids, rel->relids) &&
+ eclass_matches_any_index(cur_ec, cur_em, rel))
+ found_index = true;
+ }
+
+ if (found_index)
+ outer_relids = bms_join(outer_relids, other_rels);
+ else
+ bms_free(other_rels);
+ }
+ }
+
return outer_relids;
}
@@ -1340,6 +1379,42 @@ matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids)
}
/*
+ * eclass_matches_any_index
+ * Workhorse for indexable_outerrelids: see if an EquivalenceClass member
+ * can be matched to any index column of the given rel.
+ *
+ * This is also exported for use by find_eclass_clauses_for_index_join.
+ */
+bool
+eclass_matches_any_index(EquivalenceClass *ec, EquivalenceMember *em,
+ RelOptInfo *rel)
+{
+ ListCell *l;
+
+ foreach(l, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
+ int indexcol = 0;
+ Oid *families = index->opfamily;
+
+ do
+ {
+ Oid curFamily = families[0];
+
+ if (list_member_oid(ec->ec_opfamilies, curFamily) &&
+ match_index_to_operand((Node *) em->em_expr, indexcol, index))
+ return true;
+
+ indexcol++;
+ families++;
+ } while (!DoneMatchingIndexKeys(families));
+ }
+
+ return false;
+}
+
+
+/*
* best_inner_indexscan
* Finds the best available inner indexscan for a nestloop join
* with the given rel on the inside and the given outer_rel outside.
@@ -1393,12 +1468,12 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
return NULL;
/*
- * Otherwise, we have to do path selection in the memory context of the
- * given rel, so that any created path can be safely attached to the rel's
- * cache of best inner paths. (This is not currently an issue for normal
- * planning, but it is an issue for GEQO planning.)
+ * Otherwise, we have to do path selection in the main planning context,
+ * so that any created path can be safely attached to the rel's cache of
+ * best inner paths. (This is not currently an issue for normal planning,
+ * but it is an issue for GEQO planning.)
*/
- oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
/*
* Intersect the given outer relids with index_outer_relids to find the
@@ -1539,7 +1614,12 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
Relids join_relids;
ListCell *l;
- /* Look for joinclauses that are usable with given outer_relids */
+ /*
+ * Look for joinclauses that are usable with given outer_relids. Note
+ * we'll take anything that's applicable to the join whether it has
+ * anything to do with an index or not; since we're only building a list,
+ * it's not worth filtering more finely here.
+ */
join_relids = bms_union(rel->relids, outer_relids);
foreach(l, rel->joininfo)
@@ -1557,276 +1637,27 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
bms_free(join_relids);
- /* if no join clause was matched then forget it, per comments above */
+ /*
+ * Also check to see if any EquivalenceClasses can produce a relevant
+ * joinclause. Since all such clauses are effectively pushed-down,
+ * this doesn't apply to outer joins.
+ */
+ if (!isouterjoin && rel->has_eclass_joins)
+ clause_list = list_concat(clause_list,
+ find_eclass_clauses_for_index_join(root,
+ rel,
+ outer_relids));
+
+ /* If no join clause was matched then forget it, per comments above */
if (clause_list == NIL)
return NIL;
- /*
- * We can also use any plain restriction clauses for the rel. We put
- * these at the front of the clause list for the convenience of
- * remove_redundant_join_clauses, which can never remove non-join clauses
- * and hence won't be able to get rid of a non-join clause if it appears
- * after a join clause it is redundant with.
- */
+ /* We can also use any plain restriction clauses for the rel */
clause_list = list_concat(list_copy(rel->baserestrictinfo), clause_list);
- /*
- * We may now have clauses that are known redundant. Get rid of 'em.
- */
- if (list_length(clause_list) > 1)
- {
- clause_list = remove_redundant_join_clauses(root,
- clause_list,
- isouterjoin);
- }
-
return clause_list;
}
-/****************************************************************************
- * ---- ROUTINES TO HANDLE PATHKEYS ----
- ****************************************************************************/
-
-/*
- * match_variant_ordering
- * Try to match an index's ordering to the query's requested ordering
- *
- * This is used when the index is ordered but a naive comparison fails to
- * match its ordering (pathkeys) to root->query_pathkeys. It may be that
- * we need to scan the index backwards. Also, a less naive comparison can
- * help for both forward and backward indexscans. Columns of the index
- * that have an equality restriction clause can be ignored in the match;
- * that is, an index on (x,y) can be considered to match the ordering of
- * ... WHERE x = 42 ORDER BY y;
- *
- * Note: it would be possible to similarly ignore useless ORDER BY items;
- * that is, an index on just y could be considered to match the ordering of
- * ... WHERE x = 42 ORDER BY x, y;
- * But proving that this is safe would require finding a btree opfamily
- * containing both the = operator and the < or > operator in the ORDER BY
- * item. That's significantly more expensive than what we do here, since
- * we'd have to look at restriction clauses unrelated to the current index
- * and search for opfamilies without any hint from the index. The practical
- * use-cases seem to be mostly covered by ignoring index columns, so that's
- * all we do for now.
- *
- * Inputs:
- * 'index' is the index of interest.
- * 'restrictclauses' is the list of sublists of restriction clauses
- * matching the columns of the index (NIL if none)
- *
- * If able to match the requested query pathkeys, returns either
- * ForwardScanDirection or BackwardScanDirection to indicate the proper index
- * scan direction. If no match, returns NoMovementScanDirection.
- */
-static ScanDirection
-match_variant_ordering(PlannerInfo *root,
- IndexOptInfo *index,
- List *restrictclauses)
-{
- List *ignorables;
-
- /*
- * Forget the whole thing if not a btree index; our check for ignorable
- * columns assumes we are dealing with btree opfamilies. (It'd be possible
- * to factor out just the try for backwards indexscan, but considering
- * that we presently have no orderable indexes except btrees anyway, it's
- * hardly worth contorting this code for that case.)
- *
- * Note: if you remove this, you probably need to put in a check on
- * amoptionalkey to prevent possible clauseless scan on an index that
- * won't cope.
- */
- if (index->relam != BTREE_AM_OID)
- return NoMovementScanDirection;
-
- /*
- * Figure out which index columns can be optionally ignored because they
- * have an equality constraint. This is the same set for either forward
- * or backward scan, so we do it just once.
- */
- ignorables = identify_ignorable_ordering_cols(root, index,
- restrictclauses);
-
- /*
- * Try to match to forward scan, then backward scan. However, we can skip
- * the forward-scan case if there are no ignorable columns, because
- * find_usable_indexes() would have found the match already.
- */
- if (ignorables &&
- match_index_to_query_keys(root, index, ForwardScanDirection,
- ignorables))
- return ForwardScanDirection;
-
- if (match_index_to_query_keys(root, index, BackwardScanDirection,
- ignorables))
- return BackwardScanDirection;
-
- return NoMovementScanDirection;
-}
-
-/*
- * identify_ignorable_ordering_cols
- * Determine which index columns can be ignored for ordering purposes
- *
- * Returns an integer List of column numbers (1-based) of ignorable
- * columns. The ignorable columns are those that have equality constraints
- * against pseudoconstants.
- */
-static List *
-identify_ignorable_ordering_cols(PlannerInfo *root,
- IndexOptInfo *index,
- List *restrictclauses)
-{
- List *result = NIL;
- int indexcol = 0; /* note this is 0-based */
- ListCell *l;
-
- /* restrictclauses is either NIL or has a sublist per column */
- foreach(l, restrictclauses)
- {
- List *sublist = (List *) lfirst(l);
- Oid opfamily = index->opfamily[indexcol];
- ListCell *l2;
-
- foreach(l2, sublist)
- {
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l2);
- OpExpr *clause = (OpExpr *) rinfo->clause;
- Oid clause_op;
- int op_strategy;
- bool varonleft;
- bool ispc;
-
- /* First check for boolean-index cases. */
- if (IsBooleanOpfamily(opfamily))
- {
- if (match_boolean_index_clause((Node *) clause, indexcol,
- index))
- {
- /*
- * The clause means either col = TRUE or col = FALSE; we
- * do not care which, it's an equality constraint either
- * way.
- */
- result = lappend_int(result, indexcol + 1);
- break;
- }
- }
-
- /* Otherwise, ignore if not a binary opclause */
- if (!is_opclause(clause) || list_length(clause->args) != 2)
- continue;
-
- /* Determine left/right sides and check the operator */
- clause_op = clause->opno;
- if (match_index_to_operand(linitial(clause->args), indexcol,
- index))
- {
- /* clause_op is correct */
- varonleft = true;
- }
- else
- {
- Assert(match_index_to_operand(lsecond(clause->args), indexcol,
- index));
- /* Must flip operator to get the opfamily member */
- clause_op = get_commutator(clause_op);
- varonleft = false;
- }
- if (!OidIsValid(clause_op))
- continue; /* ignore non match, per next comment */
- op_strategy = get_op_opfamily_strategy(clause_op, opfamily);
-
- /*
- * You might expect to see Assert(op_strategy != 0) here, but you
- * won't: the clause might contain a special indexable operator
- * rather than an ordinary opfamily member. Currently none of the
- * special operators are very likely to expand to an equality
- * operator; we do not bother to check, but just assume no match.
- */
- if (op_strategy != BTEqualStrategyNumber)
- continue;
-
- /* Now check that other side is pseudoconstant */
- if (varonleft)
- ispc = is_pseudo_constant_clause_relids(lsecond(clause->args),
- rinfo->right_relids);
- else
- ispc = is_pseudo_constant_clause_relids(linitial(clause->args),
- rinfo->left_relids);
- if (ispc)
- {
- result = lappend_int(result, indexcol + 1);
- break;
- }
- }
- indexcol++;
- }
- return result;
-}
-
-/*
- * match_index_to_query_keys
- * Check a single scan direction for "intelligent" match to query keys
- *
- * 'index' is the index of interest.
- * 'indexscandir' is the scan direction to consider
- * 'ignorables' is an integer list of indexes of ignorable index columns
- *
- * Returns TRUE on successful match (ie, the query_pathkeys can be considered
- * to match this index).
- */
-static bool
-match_index_to_query_keys(PlannerInfo *root,
- IndexOptInfo *index,
- ScanDirection indexscandir,
- List *ignorables)
-{
- List *index_pathkeys;
- ListCell *index_cell;
- int index_col;
- ListCell *r;
-
- /* Get the pathkeys that exactly describe the index */
- index_pathkeys = build_index_pathkeys(root, index, indexscandir, false);
-
- /*
- * Can we match to the query's requested pathkeys? The inner loop skips
- * over ignorable index columns while trying to match.
- */
- index_cell = list_head(index_pathkeys);
- index_col = 0;
-
- foreach(r, root->query_pathkeys)
- {
- List *rsubkey = (List *) lfirst(r);
-
- for (;;)
- {
- List *isubkey;
-
- if (index_cell == NULL)
- return false;
- isubkey = (List *) lfirst(index_cell);
- index_cell = lnext(index_cell);
- index_col++; /* index_col is now 1-based */
-
- /*
- * Since we are dealing with canonicalized pathkeys, pointer
- * comparison is sufficient to determine a match.
- */
- if (rsubkey == isubkey)
- break; /* matched current query pathkey */
-
- if (!list_member_int(ignorables, index_col))
- return false; /* definite failure to match */
- /* otherwise loop around and try to match to next index col */
- }
- }
-
- return true;
-}
/****************************************************************************
* ---- PATH CREATION UTILITIES ----
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 2885b021546..3a68d79ca9e 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.110 2007/01/10 18:06:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.111 2007/01/20 20:45:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,7 +16,6 @@
#include <math.h>
-#include "access/skey.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
@@ -40,10 +39,6 @@ static List *select_mergejoin_clauses(RelOptInfo *joinrel,
RelOptInfo *innerrel,
List *restrictlist,
JoinType jointype);
-static void build_mergejoin_strat_arrays(List *mergeclauses,
- Oid **mergefamilies,
- int **mergestrategies,
- bool **mergenullsfirst);
/*
@@ -205,9 +200,9 @@ sort_inner_and_outer(PlannerInfo *root,
*
* Actually, it's not quite true that every mergeclause ordering will
* generate a different path order, because some of the clauses may be
- * redundant. Therefore, what we do is convert the mergeclause list to a
- * list of canonical pathkeys, and then consider different orderings of
- * the pathkeys.
+ * partially redundant (refer to the same EquivalenceClasses). Therefore,
+ * what we do is convert the mergeclause list to a list of canonical
+ * pathkeys, and then consider different orderings of the pathkeys.
*
* Generating a path for *every* permutation of the pathkeys doesn't seem
* like a winning strategy; the cost in planning time is too high. For
@@ -216,75 +211,58 @@ sort_inner_and_outer(PlannerInfo *root,
* mergejoin without re-sorting against any other possible mergejoin
* partner path. But if we've not guessed the right ordering of secondary
* keys, we may end up evaluating clauses as qpquals when they could have
- * been done as mergeclauses. We need to figure out a better way. (Two
- * possible approaches: look at all the relevant index relations to
- * suggest plausible sort orders, or make just one output path and somehow
- * mark it as having a sort-order that can be rearranged freely.)
+ * been done as mergeclauses. (In practice, it's rare that there's more
+ * than two or three mergeclauses, so expending a huge amount of thought
+ * on that is probably not worth it.)
+ *
+ * The pathkey order returned by select_outer_pathkeys_for_merge() has
+ * some heuristics behind it (see that function), so be sure to try it
+ * exactly as-is as well as making variants.
*/
- all_pathkeys = make_pathkeys_for_mergeclauses(root,
- mergeclause_list,
- outerrel);
+ all_pathkeys = select_outer_pathkeys_for_merge(root,
+ mergeclause_list,
+ joinrel);
foreach(l, all_pathkeys)
{
List *front_pathkey = (List *) lfirst(l);
- List *cur_pathkeys;
List *cur_mergeclauses;
- Oid *mergefamilies;
- int *mergestrategies;
- bool *mergenullsfirst;
List *outerkeys;
List *innerkeys;
List *merge_pathkeys;
- /* Make a pathkey list with this guy first. */
+ /* Make a pathkey list with this guy first */
if (l != list_head(all_pathkeys))
- cur_pathkeys = lcons(front_pathkey,
- list_delete_ptr(list_copy(all_pathkeys),
- front_pathkey));
+ outerkeys = lcons(front_pathkey,
+ list_delete_ptr(list_copy(all_pathkeys),
+ front_pathkey));
else
- cur_pathkeys = all_pathkeys; /* no work at first one... */
+ outerkeys = all_pathkeys; /* no work at first one... */
- /*
- * Select mergeclause(s) that match this sort ordering. If we had
- * redundant merge clauses then we will get a subset of the original
- * clause list. There had better be some match, however...
- */
+ /* Sort the mergeclauses into the corresponding ordering */
cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
- cur_pathkeys,
+ outerkeys,
+ true,
mergeclause_list);
- Assert(cur_mergeclauses != NIL);
- /* Forget it if can't use all the clauses in right/full join */
- if (useallclauses &&
- list_length(cur_mergeclauses) != list_length(mergeclause_list))
- continue;
+ /* Should have used them all... */
+ Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));
- /*
- * Build sort pathkeys for both sides.
- *
- * Note: it's possible that the cheapest paths will already be sorted
- * properly. create_mergejoin_path will detect that case and suppress
- * an explicit sort step, so we needn't do so here.
- */
- outerkeys = make_pathkeys_for_mergeclauses(root,
- cur_mergeclauses,
- outerrel);
- innerkeys = make_pathkeys_for_mergeclauses(root,
- cur_mergeclauses,
- innerrel);
- /* Build pathkeys representing output sort order. */
+ /* Build sort pathkeys for the inner side */
+ innerkeys = make_inner_pathkeys_for_merge(root,
+ cur_mergeclauses,
+ outerkeys);
+
+ /* Build pathkeys representing output sort order */
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerkeys);
- /* Build opfamily info for execution */
- build_mergejoin_strat_arrays(cur_mergeclauses,
- &mergefamilies,
- &mergestrategies,
- &mergenullsfirst);
-
/*
* And now we can make the path.
+ *
+ * Note: it's possible that the cheapest paths will already be sorted
+ * properly. create_mergejoin_path will detect that case and suppress
+ * an explicit sort step, so we needn't do so here.
*/
add_path(joinrel, (Path *)
create_mergejoin_path(root,
@@ -295,9 +273,6 @@ sort_inner_and_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
cur_mergeclauses,
- mergefamilies,
- mergestrategies,
- mergenullsfirst,
outerkeys,
innerkeys));
}
@@ -427,9 +402,6 @@ match_unsorted_outer(PlannerInfo *root,
Path *outerpath = (Path *) lfirst(l);
List *merge_pathkeys;
List *mergeclauses;
- Oid *mergefamilies;
- int *mergestrategies;
- bool *mergenullsfirst;
List *innersortkeys;
List *trialsortkeys;
Path *cheapest_startup_inner;
@@ -510,6 +482,7 @@ match_unsorted_outer(PlannerInfo *root,
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
+ true,
mergeclause_list);
/*
@@ -532,15 +505,9 @@ match_unsorted_outer(PlannerInfo *root,
continue;
/* Compute the required ordering of the inner path */
- innersortkeys = make_pathkeys_for_mergeclauses(root,
- mergeclauses,
- innerrel);
-
- /* Build opfamily info for execution */
- build_mergejoin_strat_arrays(mergeclauses,
- &mergefamilies,
- &mergestrategies,
- &mergenullsfirst);
+ innersortkeys = make_inner_pathkeys_for_merge(root,
+ mergeclauses,
+ outerpath->pathkeys);
/*
* Generate a mergejoin on the basis of sorting the cheapest inner.
@@ -557,9 +524,6 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
mergeclauses,
- mergefamilies,
- mergestrategies,
- mergenullsfirst,
NIL,
innersortkeys));
@@ -613,18 +577,12 @@ match_unsorted_outer(PlannerInfo *root,
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
+ false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
-
- /* Build opfamily info for execution */
- build_mergejoin_strat_arrays(newclauses,
- &mergefamilies,
- &mergestrategies,
- &mergenullsfirst);
-
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
@@ -634,9 +592,6 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
newclauses,
- mergefamilies,
- mergestrategies,
- mergenullsfirst,
NIL,
NIL));
cheapest_total_inner = innerpath;
@@ -666,19 +621,13 @@ match_unsorted_outer(PlannerInfo *root,
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
+ false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
}
-
- /* Build opfamily info for execution */
- build_mergejoin_strat_arrays(newclauses,
- &mergefamilies,
- &mergestrategies,
- &mergenullsfirst);
-
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
@@ -688,9 +637,6 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
newclauses,
- mergefamilies,
- mergestrategies,
- mergenullsfirst,
NIL,
NIL));
}
@@ -909,6 +855,10 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
* Select mergejoin clauses that are usable for a particular join.
* Returns a list of RestrictInfo nodes for those clauses.
*
+ * We also mark each selected RestrictInfo to show which side is currently
+ * being considered as outer. These are transient markings that are only
+ * good for the duration of the current add_paths_to_joinrel() call!
+ *
* We examine each restrictinfo clause known for the join to see
* if it is mergejoinable and involves vars from the two sub-relations
* currently of interest.
@@ -939,7 +889,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
continue;
if (!restrictinfo->can_join ||
- restrictinfo->mergejoinoperator == InvalidOid)
+ restrictinfo->mergeopfamilies == NIL)
{
have_nonmergeable_joinclause = true;
continue; /* not mergejoinable */
@@ -954,11 +904,13 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
bms_is_subset(restrictinfo->right_relids, innerrel->relids))
{
/* righthand side is inner */
+ restrictinfo->outer_is_left = true;
}
else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
bms_is_subset(restrictinfo->right_relids, outerrel->relids))
{
/* lefthand side is inner */
+ restrictinfo->outer_is_left = false;
}
else
{
@@ -966,7 +918,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
continue; /* no good for these input relations */
}
- result_list = lcons(restrictinfo, result_list);
+ result_list = lappend(result_list, restrictinfo);
}
/*
@@ -995,46 +947,3 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
return result_list;
}
-
-/*
- * Temporary hack to build opfamily and strategy info needed for mergejoin
- * by the executor. We need to rethink the planner's handling of merge
- * planning so that it can deal with multiple possible merge orders, but
- * that's not done yet.
- */
-static void
-build_mergejoin_strat_arrays(List *mergeclauses,
- Oid **mergefamilies,
- int **mergestrategies,
- bool **mergenullsfirst)
-{
- int nClauses = list_length(mergeclauses);
- int i;
- ListCell *l;
-
- *mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid));
- *mergestrategies = (int *) palloc(nClauses * sizeof(int));
- *mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool));
-
- i = 0;
- foreach(l, mergeclauses)
- {
- RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
-
- /*
- * We do not need to worry about whether the mergeclause will be
- * commuted at runtime --- it's the same opfamily either way.
- */
- (*mergefamilies)[i] = restrictinfo->mergeopfamily;
- /*
- * For the moment, strategy must always be LessThan --- see
- * hack version of get_op_mergejoin_info
- */
- (*mergestrategies)[i] = BTLessStrategyNumber;
-
- /* And we only allow NULLS LAST, too */
- (*mergenullsfirst)[i] = false;
-
- i++;
- }
-}
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index f1fe34e5393..70e16eeb8e8 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.83 2007/01/05 22:19:31 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.84 2007/01/20 20:45:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -72,7 +72,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
other_rels = list_head(joinrels[1]); /* consider all initial
* rels */
- if (old_rel->joininfo != NIL)
+ if (old_rel->joininfo != NIL || old_rel->has_eclass_joins)
{
/*
* Note that if all available join clauses for this rel require
@@ -152,7 +152,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
* outer joins --- then we might have to force a bushy outer
* join. See have_relevant_joinclause().
*/
- if (old_rel->joininfo == NIL && root->oj_info_list == NIL)
+ if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
+ root->oj_info_list == NIL)
continue;
if (k == other_level)
@@ -251,8 +252,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
/*
* make_rels_by_clause_joins
* Build joins between the given relation 'old_rel' and other relations
- * that are mentioned within old_rel's joininfo list (i.e., relations
- * that participate in join clauses that 'old_rel' also participates in).
+ * that participate in join clauses that 'old_rel' also participates in.
* The join rel nodes are returned in a list.
*
* 'old_rel' is the relation entry for the relation to be joined
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 4fc5a5f1250..15793b4ef99 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -11,687 +11,180 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.81 2007/01/09 02:14:12 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.82 2007/01/20 20:45:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include "access/skey.h"
#include "nodes/makefuncs.h"
+#include "nodes/plannodes.h"
#include "optimizer/clauses.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
-#include "optimizer/planmain.h"
#include "optimizer/tlist.h"
-#include "optimizer/var.h"
#include "parser/parsetree.h"
#include "parser/parse_expr.h"
#include "utils/lsyscache.h"
-#include "utils/memutils.h"
-
-
-static PathKeyItem *makePathKeyItem(Node *key, Oid sortop, bool nulls_first,
- bool checkType);
-static void generate_outer_join_implications(PlannerInfo *root,
- List *equi_key_set,
- Relids *relids);
-static void sub_generate_join_implications(PlannerInfo *root,
- List *equi_key_set, Relids *relids,
- Node *item1, Oid sortop1,
- Relids item1_relids);
-static void process_implied_const_eq(PlannerInfo *root,
- List *equi_key_set, Relids *relids,
- Node *item1, Oid sortop1,
- Relids item1_relids,
- bool delete_it);
-static List *make_canonical_pathkey(PlannerInfo *root, PathKeyItem *item);
-static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel,
- AttrNumber varattno);
/*
- * makePathKeyItem
- * create a PathKeyItem node
+ * If an EC contains a const and isn't below-outer-join, any PathKey depending
+ * on it must be redundant, since there's only one possible value of the key.
*/
-static PathKeyItem *
-makePathKeyItem(Node *key, Oid sortop, bool nulls_first, bool checkType)
-{
- PathKeyItem *item = makeNode(PathKeyItem);
-
- /*
- * Some callers pass expressions that are not necessarily of the same type
- * as the sort operator expects as input (for example when dealing with an
- * index that uses binary-compatible operators). We must relabel these
- * with the correct type so that the key expressions will be seen as
- * equal() to expressions that have been correctly labeled.
- */
- if (checkType)
- {
- Oid lefttype,
- righttype;
+#define MUST_BE_REDUNDANT(eclass) \
+ ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join)
+
+static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily,
+ int strategy, bool nulls_first);
+static PathKey *make_canonical_pathkey(PlannerInfo *root,
+ EquivalenceClass *eclass, Oid opfamily,
+ int strategy, bool nulls_first);
+static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
+static PathKey *make_pathkey_from_sortinfo(PlannerInfo *root,
+ Expr *expr, Oid ordering_op,
+ bool nulls_first,
+ bool canonicalize);
+static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel,
+ AttrNumber varattno);
- op_input_types(sortop, &lefttype, &righttype);
- if (exprType(key) != lefttype)
- key = (Node *) makeRelabelType((Expr *) key,
- lefttype, -1,
- COERCE_DONTCARE);
- }
- item->key = key;
- item->sortop = sortop;
- item->nulls_first = nulls_first;
- return item;
-}
+/****************************************************************************
+ * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
+ ****************************************************************************/
/*
- * add_equijoined_keys
- * The given clause has a mergejoinable operator, so its two sides
- * can be considered equal after restriction clause application; in
- * particular, any pathkey mentioning one side (with the correct sortop)
- * can be expanded to include the other as well. Record the exprs and
- * associated sortops in the query's equi_key_list for future use.
+ * makePathKey
+ * create a PathKey node
*
- * The query's equi_key_list field points to a list of sublists of PathKeyItem
- * nodes, where each sublist is a set of two or more exprs+sortops that have
- * been identified as logically equivalent (and, therefore, we may consider
- * any two in a set to be equal). As described above, we will subsequently
- * use direct pointers to one of these sublists to represent any pathkey
- * that involves an equijoined variable.
+ * This does not promise to create a canonical PathKey, it's merely a
+ * convenience routine to build the specified node.
*/
-void
-add_equijoined_keys(PlannerInfo *root, RestrictInfo *restrictinfo)
+static PathKey *
+makePathKey(EquivalenceClass *eclass, Oid opfamily,
+ int strategy, bool nulls_first)
{
- Expr *clause = restrictinfo->clause;
- PathKeyItem *item1 = makePathKeyItem(get_leftop(clause),
- restrictinfo->left_sortop,
- false, /* XXX nulls_first? */
- false);
- PathKeyItem *item2 = makePathKeyItem(get_rightop(clause),
- restrictinfo->right_sortop,
- false, /* XXX nulls_first? */
- false);
- List *newset;
- ListCell *cursetlink;
-
- /* We might see a clause X=X; don't make a single-element list from it */
- if (equal(item1, item2))
- return;
-
- /*
- * Our plan is to make a two-element set, then sweep through the existing
- * equijoin sets looking for matches to item1 or item2. When we find one,
- * we remove that set from equi_key_list and union it into our new set.
- * When done, we add the new set to the front of equi_key_list.
- *
- * It may well be that the two items we're given are already known to be
- * equijoin-equivalent, in which case we don't need to change our data
- * structure. If we find both of them in the same equivalence set to
- * start with, we can quit immediately.
- *
- * This is a standard UNION-FIND problem, for which there exist better
- * data structures than simple lists. If this code ever proves to be a
- * bottleneck then it could be sped up --- but for now, simple is
- * beautiful.
- */
- newset = NIL;
-
- /* cannot use foreach here because of possible lremove */
- cursetlink = list_head(root->equi_key_list);
- while (cursetlink)
- {
- List *curset = (List *) lfirst(cursetlink);
- bool item1here = list_member(curset, item1);
- bool item2here = list_member(curset, item2);
-
- /* must advance cursetlink before lremove possibly pfree's it */
- cursetlink = lnext(cursetlink);
-
- if (item1here || item2here)
- {
- /*
- * If find both in same equivalence set, no need to do any more
- */
- if (item1here && item2here)
- {
- /* Better not have seen only one in an earlier set... */
- Assert(newset == NIL);
- return;
- }
-
- /* Build the new set only when we know we must */
- if (newset == NIL)
- newset = list_make2(item1, item2);
-
- /* Found a set to merge into our new set */
- newset = list_concat_unique(newset, curset);
-
- /*
- * Remove old set from equi_key_list.
- */
- root->equi_key_list = list_delete_ptr(root->equi_key_list, curset);
- list_free(curset); /* might as well recycle old cons cells */
- }
- }
+ PathKey *pk = makeNode(PathKey);
- /* Build the new set only when we know we must */
- if (newset == NIL)
- newset = list_make2(item1, item2);
+ pk->pk_eclass = eclass;
+ pk->pk_opfamily = opfamily;
+ pk->pk_strategy = strategy;
+ pk->pk_nulls_first = nulls_first;
- root->equi_key_list = lcons(newset, root->equi_key_list);
+ return pk;
}
/*
- * generate_implied_equalities
- * Scan the completed equi_key_list for the query, and generate explicit
- * qualifications (WHERE clauses) for all the pairwise equalities not
- * already mentioned in the quals; or remove qualifications found to be
- * redundant.
- *
- * Adding deduced equalities is useful because the additional clauses help
- * the selectivity-estimation code and may allow better joins to be chosen;
- * and in fact it's *necessary* to ensure that sort keys we think are
- * equivalent really are (see src/backend/optimizer/README for more info).
- *
- * If an equi_key_list set includes any constants then we adopt a different
- * strategy: we record all the "var = const" deductions we can make, and
- * actively remove all the "var = var" clauses that are implied by the set
- * (including the clauses that originally gave rise to the set!). The reason
- * is that given input like "a = b AND b = 42", once we have deduced "a = 42"
- * there is no longer any need to apply the clause "a = b"; not only is
- * it a waste of time to check it, but we will misestimate selectivity if the
- * clause is left in. So we must remove it. For this purpose, any pathkey
- * item that mentions no Vars of the current level can be taken as a constant.
- * (The only case where this would be risky is if the item contains volatile
- * functions; but we will never consider such an expression to be a pathkey
- * at all, because check_mergejoinable() will reject it.)
- *
- * Also, when we have constants in an equi_key_list we can try to propagate
- * the constants into outer joins; see generate_outer_join_implications
- * for discussion.
+ * make_canonical_pathkey
+ * Given the parameters for a PathKey, find any pre-existing matching
+ * pathkey in the query's list of "canonical" pathkeys. Make a new
+ * entry if there's not one already.
*
- * This routine just walks the equi_key_list to find all pairwise equalities.
- * We call process_implied_equality (in plan/initsplan.c) to adjust the
- * restrictinfo datastructures for each pair.
+ * Note that this function must not be used until after we have completed
+ * merging EquivalenceClasses.
*/
-void
-generate_implied_equalities(PlannerInfo *root)
+static PathKey *
+make_canonical_pathkey(PlannerInfo *root,
+ EquivalenceClass *eclass, Oid opfamily,
+ int strategy, bool nulls_first)
{
- ListCell *cursetlink;
-
- foreach(cursetlink, root->equi_key_list)
- {
- List *curset = (List *) lfirst(cursetlink);
- int nitems = list_length(curset);
- Relids *relids;
- bool have_consts;
- ListCell *ptr1;
- int i1;
+ PathKey *pk;
+ ListCell *lc;
+ MemoryContext oldcontext;
- /*
- * A set containing only two items cannot imply any equalities beyond
- * the one that created the set, so we can skip it --- unless outer
- * joins appear in the query.
- */
- if (nitems < 3 && !root->hasOuterJoins)
- continue;
+ /* The passed eclass might be non-canonical, so chase up to the top */
+ while (eclass->ec_merged)
+ eclass = eclass->ec_merged;
- /*
- * Collect info about relids mentioned in each item. For this routine
- * we only really care whether there are any at all in each item, but
- * process_implied_equality() needs the exact sets, so we may as well
- * pull them here.
- */
- relids = (Relids *) palloc(nitems * sizeof(Relids));
- have_consts = false;
- i1 = 0;
- foreach(ptr1, curset)
- {
- PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1);
+ foreach(lc, root->canon_pathkeys)
+ {
+ pk = (PathKey *) lfirst(lc);
+ if (eclass == pk->pk_eclass &&
+ opfamily == pk->pk_opfamily &&
+ strategy == pk->pk_strategy &&
+ nulls_first == pk->pk_nulls_first)
+ return pk;
+ }
- relids[i1] = pull_varnos(item1->key);
- if (bms_is_empty(relids[i1]))
- have_consts = true;
- i1++;
- }
+ /*
+ * Be sure canonical pathkeys are allocated in the main planning context.
+ * Not an issue in normal planning, but it is for GEQO.
+ */
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
- /*
- * Match each item in the set with all that appear after it (it's
- * sufficient to generate A=B, need not process B=A too).
- *
- * A set containing only two items cannot imply any equalities beyond
- * the one that created the set, so we can skip this processing in
- * that case.
- */
- if (nitems >= 3)
- {
- i1 = 0;
- foreach(ptr1, curset)
- {
- PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1);
- bool i1_is_variable = !bms_is_empty(relids[i1]);
- ListCell *ptr2;
- int i2 = i1 + 1;
+ pk = makePathKey(eclass, opfamily, strategy, nulls_first);
+ root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
- for_each_cell(ptr2, lnext(ptr1))
- {
- PathKeyItem *item2 = (PathKeyItem *) lfirst(ptr2);
- bool i2_is_variable = !bms_is_empty(relids[i2]);
-
- /*
- * If it's "const = const" then just ignore it altogether.
- * There is no place in the restrictinfo structure to
- * store it. (If the two consts are in fact unequal, then
- * propagating the comparison to Vars will cause us to
- * produce zero rows out, as expected.)
- */
- if (i1_is_variable || i2_is_variable)
- {
- /*
- * Tell process_implied_equality to delete the clause,
- * not add it, if it's "var = var" and we have
- * constants present in the list.
- */
- bool delete_it = (have_consts &&
- i1_is_variable &&
- i2_is_variable);
-
- process_implied_equality(root,
- item1->key, item2->key,
- item1->sortop, item2->sortop,
- relids[i1], relids[i2],
- delete_it);
- }
- i2++;
- }
- i1++;
- }
- }
+ MemoryContextSwitchTo(oldcontext);
- /*
- * If we have constant(s) and outer joins, try to propagate the
- * constants through outer-join quals.
- */
- if (have_consts && root->hasOuterJoins)
- generate_outer_join_implications(root, curset, relids);
- }
+ return pk;
}
/*
- * generate_outer_join_implications
- * Generate clauses that can be deduced in outer-join situations.
+ * pathkey_is_redundant
+ * Is a pathkey redundant with one already in the given list?
*
- * When we have mergejoinable clauses A = B that are outer-join clauses,
- * we can't blindly combine them with other clauses A = C to deduce B = C,
- * since in fact the "equality" A = B won't necessarily hold above the
- * outer join (one of the variables might be NULL instead). Nonetheless
- * there are cases where we can add qual clauses using transitivity.
+ * Both the given pathkey and the list members must be canonical for this
+ * to work properly. We detect two cases:
*
- * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR
- * combined with a pushed-down (valid everywhere) clause OUTERVAR = CONSTANT.
- * It is safe and useful to push a clause INNERVAR = CONSTANT into the
- * evaluation of the inner (nullable) relation, because any inner rows not
- * meeting this condition will not contribute to the outer-join result anyway.
- * (Any outer rows they could join to will be eliminated by the pushed-down
- * clause.)
+ * 1. If the new pathkey's equivalence class contains a constant, and isn't
+ * below an outer join, then we can disregard it as a sort key. An example:
+ * SELECT ... WHERE x = 42 ORDER BY x, y;
+ * We may as well just sort by y. Note that because of opfamily matching,
+ * this is semantically correct: we know that the equality constraint is one
+ * that actually binds the variable to a single value in the terms of any
+ * ordering operator that might go with the eclass. This rule not only lets
+ * us simplify (or even skip) explicit sorts, but also allows matching index
+ * sort orders to a query when there are don't-care index columns.
*
- * Note that the above rule does not work for full outer joins, nor for
- * pushed-down restrictions on an inner-side variable; nor is it very
- * interesting to consider cases where the pushed-down clause involves
- * relations entirely outside the outer join, since such clauses couldn't
- * be pushed into the inner side's scan anyway. So the restriction to
- * outervar = pseudoconstant is not really giving up anything.
+ * 2. If the new pathkey's equivalence class is the same as that of any
+ * existing member of the pathkey list, then it is redundant. Some examples:
+ * SELECT ... ORDER BY x, x;
+ * SELECT ... ORDER BY x, x DESC;
+ * SELECT ... WHERE x = y ORDER BY x, y;
+ * In all these cases the second sort key cannot distinguish values that are
+ * considered equal by the first, and so there's no point in using it.
+ * Note in particular that we need not compare opfamily (all the opfamilies
+ * of the EC have the same notion of equality) nor sort direction.
*
- * For full-join cases, we can only do something useful if it's a FULL JOIN
- * USING and a merged column has a restriction MERGEDVAR = CONSTANT. By
- * the time it gets here, the restriction will look like
- * COALESCE(LEFTVAR, RIGHTVAR) = CONSTANT
- * and we will have a join clause LEFTVAR = RIGHTVAR that we can match the
- * COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT
- * and RIGHTVAR = CONSTANT into the input relations, since any rows not
- * meeting these conditions cannot contribute to the join result.
- *
- * Again, there isn't any traction to be gained by trying to deal with
- * clauses comparing a mergedvar to a non-pseudoconstant. So we can make
- * use of the equi_key_lists to quickly find the interesting pushed-down
- * clauses. The interesting outer-join clauses were accumulated for us by
- * distribute_qual_to_rels.
- *
- * equi_key_set: a list of PathKeyItems that are known globally equivalent,
- * at least one of which is a pseudoconstant.
- * relids: an array of Relids sets showing the relation membership of each
- * PathKeyItem in equi_key_set.
+ * Because the equivclass.c machinery forms only one copy of any EC per query,
+ * pointer comparison is enough to decide whether canonical ECs are the same.
*/
-static void
-generate_outer_join_implications(PlannerInfo *root,
- List *equi_key_set,
- Relids *relids)
+static bool
+pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
{
- ListCell *l;
- int i = 0;
+ EquivalenceClass *new_ec = new_pathkey->pk_eclass;
+ ListCell *lc;
- /* Process each non-constant element of equi_key_set */
- foreach(l, equi_key_set)
- {
- PathKeyItem *item1 = (PathKeyItem *) lfirst(l);
+ /* Assert we've been given canonical pathkeys */
+ Assert(!new_ec->ec_merged);
- if (!bms_is_empty(relids[i]))
- {
- sub_generate_join_implications(root, equi_key_set, relids,
- item1->key,
- item1->sortop,
- relids[i]);
- }
- i++;
- }
-}
-
-/*
- * sub_generate_join_implications
- * Propagate a constant equality through outer join clauses.
- *
- * The item described by item1/sortop1/item1_relids has been determined
- * to be equal to the constant(s) listed in equi_key_set. Recursively
- * trace out the implications of this.
- *
- * equi_key_set and relids are as for generate_outer_join_implications.
- */
-static void
-sub_generate_join_implications(PlannerInfo *root,
- List *equi_key_set, Relids *relids,
- Node *item1, Oid sortop1, Relids item1_relids)
-
-{
- ListCell *l;
+ /* Check for EC containing a constant --- unconditionally redundant */
+ if (MUST_BE_REDUNDANT(new_ec))
+ return true;
- /*
- * Examine each mergejoinable outer-join clause with OUTERVAR on left,
- * looking for an OUTERVAR identical to item1
- */
- foreach(l, root->left_join_clauses)
+ /* If same EC already used in list, then redundant */
+ foreach(lc, pathkeys)
{
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
- Node *leftop = get_leftop(rinfo->clause);
-
- if (equal(leftop, item1) && rinfo->left_sortop == sortop1)
- {
- /*
- * Match, so find constant member(s) of set and generate implied
- * INNERVAR = CONSTANT
- */
- Node *rightop = get_rightop(rinfo->clause);
+ PathKey *old_pathkey = (PathKey *) lfirst(lc);
- process_implied_const_eq(root, equi_key_set, relids,
- rightop,
- rinfo->right_sortop,
- rinfo->right_relids,
- false);
+ /* Assert we've been given canonical pathkeys */
+ Assert(!old_pathkey->pk_eclass->ec_merged);
- /*
- * We can remove explicit tests of this outer-join qual, too,
- * since we now have tests forcing each of its sides to the same
- * value.
- */
- process_implied_equality(root,
- leftop, rightop,
- rinfo->left_sortop, rinfo->right_sortop,
- rinfo->left_relids, rinfo->right_relids,
- true);
-
- /*
- * And recurse to see if we can deduce anything from INNERVAR =
- * CONSTANT
- */
- sub_generate_join_implications(root, equi_key_set, relids,
- rightop,
- rinfo->right_sortop,
- rinfo->right_relids);
- }
- }
-
- /* The same, looking at clauses with OUTERVAR on right */
- foreach(l, root->right_join_clauses)
- {
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
- Node *rightop = get_rightop(rinfo->clause);
-
- if (equal(rightop, item1) && rinfo->right_sortop == sortop1)
- {
- /*
- * Match, so find constant member(s) of set and generate implied
- * INNERVAR = CONSTANT
- */
- Node *leftop = get_leftop(rinfo->clause);
-
- process_implied_const_eq(root, equi_key_set, relids,
- leftop,
- rinfo->left_sortop,
- rinfo->left_relids,
- false);
-
- /*
- * We can remove explicit tests of this outer-join qual, too,
- * since we now have tests forcing each of its sides to the same
- * value.
- */
- process_implied_equality(root,
- leftop, rightop,
- rinfo->left_sortop, rinfo->right_sortop,
- rinfo->left_relids, rinfo->right_relids,
- true);
-
- /*
- * And recurse to see if we can deduce anything from INNERVAR =
- * CONSTANT
- */
- sub_generate_join_implications(root, equi_key_set, relids,
- leftop,
- rinfo->left_sortop,
- rinfo->left_relids);
- }
- }
-
- /*
- * Only COALESCE(x,y) items can possibly match full joins
- */
- if (IsA(item1, CoalesceExpr))
- {
- CoalesceExpr *cexpr = (CoalesceExpr *) item1;
- Node *cfirst;
- Node *csecond;
-
- if (list_length(cexpr->args) != 2)
- return;
- cfirst = (Node *) linitial(cexpr->args);
- csecond = (Node *) lsecond(cexpr->args);
-
- /*
- * Examine each mergejoinable full-join clause, looking for a clause
- * of the form "x = y" matching the COALESCE(x,y) expression
- */
- foreach(l, root->full_join_clauses)
- {
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
- Node *leftop = get_leftop(rinfo->clause);
- Node *rightop = get_rightop(rinfo->clause);
-
- /*
- * We can assume the COALESCE() inputs are in the same order as
- * the join clause, since both were automatically generated in the
- * cases we care about.
- *
- * XXX currently this may fail to match in cross-type cases
- * because the COALESCE will contain typecast operations while the
- * join clause may not (if there is a cross-type mergejoin
- * operator available for the two column types). Is it OK to strip
- * implicit coercions from the COALESCE arguments? What of the
- * sortops in such cases?
- */
- if (equal(leftop, cfirst) &&
- equal(rightop, csecond) &&
- rinfo->left_sortop == sortop1 &&
- rinfo->right_sortop == sortop1)
- {
- /*
- * Match, so find constant member(s) of set and generate
- * implied LEFTVAR = CONSTANT
- */
- process_implied_const_eq(root, equi_key_set, relids,
- leftop,
- rinfo->left_sortop,
- rinfo->left_relids,
- false);
- /* ... and RIGHTVAR = CONSTANT */
- process_implied_const_eq(root, equi_key_set, relids,
- rightop,
- rinfo->right_sortop,
- rinfo->right_relids,
- false);
- /* ... and remove COALESCE() = CONSTANT */
- process_implied_const_eq(root, equi_key_set, relids,
- item1,
- sortop1,
- item1_relids,
- true);
-
- /*
- * We can remove explicit tests of this outer-join qual, too,
- * since we now have tests forcing each of its sides to the
- * same value.
- */
- process_implied_equality(root,
- leftop, rightop,
- rinfo->left_sortop,
- rinfo->right_sortop,
- rinfo->left_relids,
- rinfo->right_relids,
- true);
-
- /*
- * And recurse to see if we can deduce anything from LEFTVAR =
- * CONSTANT
- */
- sub_generate_join_implications(root, equi_key_set, relids,
- leftop,
- rinfo->left_sortop,
- rinfo->left_relids);
- /* ... and RIGHTVAR = CONSTANT */
- sub_generate_join_implications(root, equi_key_set, relids,
- rightop,
- rinfo->right_sortop,
- rinfo->right_relids);
-
- }
- }
- }
-}
-
-/*
- * process_implied_const_eq
- * Apply process_implied_equality with the given item and each
- * pseudoconstant member of equi_key_set.
- *
- * equi_key_set and relids are as for generate_outer_join_implications,
- * the other parameters as for process_implied_equality.
- */
-static void
-process_implied_const_eq(PlannerInfo *root, List *equi_key_set, Relids *relids,
- Node *item1, Oid sortop1, Relids item1_relids,
- bool delete_it)
-{
- ListCell *l;
- bool found = false;
- int i = 0;
-
- foreach(l, equi_key_set)
- {
- PathKeyItem *item2 = (PathKeyItem *) lfirst(l);
-
- if (bms_is_empty(relids[i]))
- {
- process_implied_equality(root,
- item1, item2->key,
- sortop1, item2->sortop,
- item1_relids, NULL,
- delete_it);
- found = true;
- }
- i++;
+ if (new_ec == old_pathkey->pk_eclass)
+ return true;
}
- /* Caller screwed up if no constants in list */
- Assert(found);
-}
-/*
- * exprs_known_equal
- * Detect whether two expressions are known equal due to equijoin clauses.
- *
- * Note: does not bother to check for "equal(item1, item2)"; caller must
- * check that case if it's possible to pass identical items.
- */
-bool
-exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
-{
- ListCell *cursetlink;
-
- foreach(cursetlink, root->equi_key_list)
- {
- List *curset = (List *) lfirst(cursetlink);
- bool item1member = false;
- bool item2member = false;
- ListCell *ptr;
-
- foreach(ptr, curset)
- {
- PathKeyItem *pitem = (PathKeyItem *) lfirst(ptr);
-
- if (equal(item1, pitem->key))
- item1member = true;
- else if (equal(item2, pitem->key))
- item2member = true;
- /* Exit as soon as equality is proven */
- if (item1member && item2member)
- return true;
- }
- }
return false;
}
-
-/*
- * make_canonical_pathkey
- * Given a PathKeyItem, find the equi_key_list subset it is a member of,
- * if any. If so, return a pointer to that sublist, which is the
- * canonical representation (for this query) of that PathKeyItem's
- * equivalence set. If it is not found, add a singleton "equivalence set"
- * to the equi_key_list and return that --- see compare_pathkeys.
- *
- * Note that this function must not be used until after we have completed
- * scanning the WHERE clause for equijoin operators.
- */
-static List *
-make_canonical_pathkey(PlannerInfo *root, PathKeyItem *item)
-{
- List *newset;
- ListCell *cursetlink;
-
- foreach(cursetlink, root->equi_key_list)
- {
- List *curset = (List *) lfirst(cursetlink);
-
- if (list_member(curset, item))
- return curset;
- }
- newset = list_make1(item);
- root->equi_key_list = lcons(newset, root->equi_key_list);
- return newset;
-}
-
/*
* canonicalize_pathkeys
* Convert a not-necessarily-canonical pathkeys list to canonical form.
*
* Note that this function must not be used until after we have completed
- * scanning the WHERE clause for equijoin operators.
+ * merging EquivalenceClasses.
*/
List *
canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
@@ -701,55 +194,116 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
foreach(l, pathkeys)
{
- List *pathkey = (List *) lfirst(l);
- PathKeyItem *item;
- List *cpathkey;
+ PathKey *pathkey = (PathKey *) lfirst(l);
+ EquivalenceClass *eclass;
+ PathKey *cpathkey;
- /*
- * It's sufficient to look at the first entry in the sublist; if there
- * are more entries, they're already part of an equivalence set by
- * definition.
- */
- Assert(pathkey != NIL);
- item = (PathKeyItem *) linitial(pathkey);
- cpathkey = make_canonical_pathkey(root, item);
+ /* Find the canonical (merged) EquivalenceClass */
+ eclass = pathkey->pk_eclass;
+ while (eclass->ec_merged)
+ eclass = eclass->ec_merged;
/*
- * Eliminate redundant ordering requests --- ORDER BY A,A is the same
- * as ORDER BY A. We want to check this only after we have
- * canonicalized the keys, so that equivalent-key knowledge is used
- * when deciding if an item is redundant.
+ * If we can tell it's redundant just from the EC, skip.
+ * pathkey_is_redundant would notice that, but we needn't even bother
+ * constructing the node...
*/
- new_pathkeys = list_append_unique_ptr(new_pathkeys, cpathkey);
+ if (MUST_BE_REDUNDANT(eclass))
+ continue;
+
+ /* OK, build a canonicalized PathKey struct */
+ cpathkey = make_canonical_pathkey(root,
+ eclass,
+ pathkey->pk_opfamily,
+ pathkey->pk_strategy,
+ pathkey->pk_nulls_first);
+
+ /* Add to list unless redundant */
+ if (!pathkey_is_redundant(cpathkey, new_pathkeys))
+ new_pathkeys = lappend(new_pathkeys, cpathkey);
}
return new_pathkeys;
}
-
/*
- * count_canonical_peers
- * Given a PathKeyItem, find the equi_key_list subset it is a member of,
- * if any. If so, return the number of other members of the set.
- * If not, return 0 (without actually adding it to our equi_key_list).
+ * make_pathkey_from_sortinfo
+ * Given an expression, a sortop, and a nulls-first flag, create
+ * a PathKey. If canonicalize = true, the result is a "canonical"
+ * PathKey, otherwise not. (But note it might be redundant anyway.)
*
- * This is a hack to support the rather bogus heuristics in
- * convert_subquery_pathkeys.
+ * canonicalize should always be TRUE after EquivalenceClass merging has
+ * been performed, but FALSE if we haven't done EquivalenceClass merging yet.
*/
-static int
-count_canonical_peers(PlannerInfo *root, PathKeyItem *item)
+static PathKey *
+make_pathkey_from_sortinfo(PlannerInfo *root,
+ Expr *expr, Oid ordering_op,
+ bool nulls_first,
+ bool canonicalize)
{
- ListCell *cursetlink;
+ Oid equality_op;
+ List *opfamilies;
+ Oid opfamily,
+ lefttype,
+ righttype;
+ int strategy;
+ ListCell *lc;
+ EquivalenceClass *eclass;
- foreach(cursetlink, root->equi_key_list)
- {
- List *curset = (List *) lfirst(cursetlink);
+ /*
+ * An ordering operator fully determines the behavior of its opfamily,
+ * so could only meaningfully appear in one family --- or perhaps two
+ * if one builds a reverse-sort opfamily, but there's not much point in
+ * that anymore. But EquivalenceClasses need to contain opfamily lists
+ * based on the family membership of equality operators, which could
+ * easily be bigger. So, look up the equality operator that goes with
+ * the ordering operator (this should be unique) and get its membership.
+ */
+ equality_op = get_equality_op_for_ordering_op(ordering_op);
+ if (!OidIsValid(equality_op)) /* shouldn't happen */
+ elog(ERROR, "could not find equality operator for ordering operator %u",
+ ordering_op);
+ opfamilies = get_mergejoin_opfamilies(equality_op);
+ if (!opfamilies) /* certainly should find some */
+ elog(ERROR, "could not find opfamilies for ordering operator %u",
+ ordering_op);
- if (list_member(curset, item))
- return list_length(curset) - 1;
+ /*
+ * Next we have to determine the strategy number to put into the pathkey.
+ * In the presence of reverse-sort opclasses there might be two answers.
+ * We prefer the one associated with the first opfamilies member that
+ * this ordering_op appears in (this will be consistently defined in
+ * normal system operation; see comments for get_mergejoin_opfamilies()).
+ */
+ opfamily = InvalidOid;
+ strategy = 0;
+ foreach(lc, opfamilies)
+ {
+ opfamily = lfirst_oid(lc);
+ strategy = get_op_opfamily_strategy(ordering_op, opfamily);
+ if (strategy)
+ break;
}
- return 0;
+ if (!(strategy == BTLessStrategyNumber ||
+ strategy == BTGreaterStrategyNumber))
+ elog(ERROR, "ordering operator %u is has wrong strategy number %d",
+ ordering_op, strategy);
+
+ /* Need the declared input type of the operator, too */
+ op_input_types(ordering_op, &lefttype, &righttype);
+ Assert(lefttype == righttype);
+
+ /* Now find or create a matching EquivalenceClass */
+ eclass = get_eclass_for_sort_expr(root, expr, lefttype, opfamilies);
+
+ /* And finally we can find or create a PathKey node */
+ if (canonicalize)
+ return make_canonical_pathkey(root, eclass, opfamily,
+ strategy, nulls_first);
+ else
+ return makePathKey(eclass, opfamily, strategy, nulls_first);
}
+
/****************************************************************************
* PATHKEY COMPARISONS
****************************************************************************/
@@ -760,7 +314,7 @@ count_canonical_peers(PlannerInfo *root, PathKeyItem *item)
* one is "better" than the other.
*
* This function may only be applied to canonicalized pathkey lists.
- * In the canonical representation, sublists can be checked for equality
+ * In the canonical representation, pathkeys can be checked for equality
* by simple pointer comparison.
*/
PathKeysComparison
@@ -771,33 +325,25 @@ compare_pathkeys(List *keys1, List *keys2)
forboth(key1, keys1, key2, keys2)
{
- List *subkey1 = (List *) lfirst(key1);
- List *subkey2 = (List *) lfirst(key2);
+ PathKey *pathkey1 = (PathKey *) lfirst(key1);
+ PathKey *pathkey2 = (PathKey *) lfirst(key2);
/*
* XXX would like to check that we've been given canonicalized input,
* but PlannerInfo not accessible here...
*/
#ifdef NOT_USED
- Assert(list_member_ptr(root->equi_key_list, subkey1));
- Assert(list_member_ptr(root->equi_key_list, subkey2));
+ Assert(list_member_ptr(root->canon_pathkeys, pathkey1));
+ Assert(list_member_ptr(root->canon_pathkeys, pathkey2));
#endif
- /*
- * We will never have two subkeys where one is a subset of the other,
- * because of the canonicalization process. Either they are equal or
- * they ain't. Furthermore, we only need pointer comparison to detect
- * equality.
- */
- if (subkey1 != subkey2)
+ if (pathkey1 != pathkey2)
return PATHKEYS_DIFFERENT; /* no need to keep looking */
}
/*
* If we reached the end of only one list, the other is longer and
- * therefore not a subset. (We assume the additional sublist(s) of the
- * other list are not NIL --- no pathkey list should ever have a NIL
- * sublist.)
+ * therefore not a subset.
*/
if (key1 == NULL && key2 == NULL)
return PATHKEYS_EQUAL;
@@ -912,11 +458,8 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
* If 'scandir' is BackwardScanDirection, attempt to build pathkeys
* representing a backwards scan of the index. Return NIL if can't do it.
*
- * If 'canonical' is TRUE, we remove duplicate pathkeys (which can occur
- * if two index columns are equijoined, eg WHERE x = 1 AND y = 1). This
- * is required if the result is to be compared directly to a canonical query
- * pathkeys list. However, some callers want a list with exactly one entry
- * per index column, and they must pass FALSE.
+ * The result is canonical, meaning that redundant pathkeys are removed;
+ * it may therefore have fewer entries than there are index columns.
*
* We generate the full pathkeys list whether or not all are useful for the
* current query. Caller should do truncate_useless_pathkeys().
@@ -924,8 +467,7 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
List *
build_index_pathkeys(PlannerInfo *root,
IndexOptInfo *index,
- ScanDirection scandir,
- bool canonical)
+ ScanDirection scandir)
{
List *retval = NIL;
ListCell *indexprs_item = list_head(index->indexprs);
@@ -936,9 +478,8 @@ build_index_pathkeys(PlannerInfo *root,
Oid sortop;
bool nulls_first;
int ikey;
- Node *indexkey;
- PathKeyItem *item;
- List *cpathkey;
+ Expr *indexkey;
+ PathKey *cpathkey;
if (ScanDirectionIsBackward(scandir))
{
@@ -958,25 +499,26 @@ build_index_pathkeys(PlannerInfo *root,
if (ikey != 0)
{
/* simple index column */
- indexkey = (Node *) find_indexkey_var(root, index->rel, ikey);
+ indexkey = (Expr *) find_indexkey_var(root, index->rel, ikey);
}
else
{
/* expression --- assume we need not copy it */
if (indexprs_item == NULL)
elog(ERROR, "wrong number of index expressions");
- indexkey = (Node *) lfirst(indexprs_item);
+ indexkey = (Expr *) lfirst(indexprs_item);
indexprs_item = lnext(indexprs_item);
}
- /* OK, make a sublist for this sort key */
- item = makePathKeyItem(indexkey, sortop, nulls_first, true);
- cpathkey = make_canonical_pathkey(root, item);
+ /* OK, make a canonical pathkey for this sort key */
+ cpathkey = make_pathkey_from_sortinfo(root,
+ indexkey,
+ sortop,
+ nulls_first,
+ true);
- /* Eliminate redundant ordering info if requested */
- if (canonical)
- retval = list_append_unique_ptr(retval, cpathkey);
- else
+ /* Add to list unless redundant */
+ if (!pathkey_is_redundant(cpathkey, retval))
retval = lappend(retval, cpathkey);
}
@@ -1042,31 +584,31 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
foreach(i, subquery_pathkeys)
{
- List *sub_pathkey = (List *) lfirst(i);
+ PathKey *sub_pathkey = (PathKey *) lfirst(i);
+ EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
+ PathKey *best_pathkey = NULL;
+ int best_score = -1;
ListCell *j;
- PathKeyItem *best_item = NULL;
- int best_score = 0;
- List *cpathkey;
/*
- * The sub_pathkey could contain multiple elements (representing
- * knowledge that multiple items are effectively equal). Each element
- * might match none, one, or more of the output columns that are
- * visible to the outer query. This means we may have multiple
- * possible representations of the sub_pathkey in the context of the
- * outer query. Ideally we would generate them all and put them all
- * into a pathkey list of the outer query, thereby propagating
- * equality knowledge up to the outer query. Right now we cannot do
- * so, because the outer query's canonical pathkey sets are already
- * frozen when this is called. Instead we prefer the one that has the
- * highest "score" (number of canonical pathkey peers, plus one if it
- * matches the outer query_pathkeys). This is the most likely to be
- * useful in the outer query.
+ * The sub_pathkey's EquivalenceClass could contain multiple elements
+ * (representing knowledge that multiple items are effectively equal).
+ * Each element might match none, one, or more of the output columns
+ * that are visible to the outer query. This means we may have
+ * multiple possible representations of the sub_pathkey in the context
+ * of the outer query. Ideally we would generate them all and put
+ * them all into an EC of the outer query, thereby propagating
+ * equality knowledge up to the outer query. Right now we cannot do
+ * so, because the outer query's EquivalenceClasses are already frozen
+ * when this is called. Instead we prefer the one that has the highest
+ * "score" (number of EC peers, plus one if it matches the outer
+ * query_pathkeys). This is the most likely to be useful in the outer
+ * query.
*/
- foreach(j, sub_pathkey)
+ foreach(j, sub_eclass->ec_members)
{
- PathKeyItem *sub_item = (PathKeyItem *) lfirst(j);
- Node *sub_key = sub_item->key;
+ EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
+ Expr *sub_expr = sub_member->em_expr;
Expr *rtarg;
ListCell *k;
@@ -1076,26 +618,27 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
* entry. (The latter case is worth extra code because it arises
* frequently in connection with varchar fields.)
*/
- if (IsA(sub_key, RelabelType))
- rtarg = ((RelabelType *) sub_key)->arg;
+ if (IsA(sub_expr, RelabelType))
+ rtarg = ((RelabelType *) sub_expr)->arg;
else
rtarg = NULL;
foreach(k, sub_tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(k);
- Node *outer_expr;
- PathKeyItem *outer_item;
+ Expr *outer_expr;
+ EquivalenceClass *outer_ec;
+ PathKey *outer_pk;
int score;
/* resjunk items aren't visible to outer query */
if (tle->resjunk)
continue;
- if (equal(tle->expr, sub_key))
+ if (equal(tle->expr, sub_expr))
{
/* Exact match */
- outer_expr = (Node *)
+ outer_expr = (Expr *)
makeVar(rel->relid,
tle->resno,
exprType((Node *) tle->expr),
@@ -1105,35 +648,40 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
else if (rtarg && equal(tle->expr, rtarg))
{
/* Match after discarding RelabelType */
- outer_expr = (Node *)
+ outer_expr = (Expr *)
makeVar(rel->relid,
tle->resno,
exprType((Node *) tle->expr),
exprTypmod((Node *) tle->expr),
0);
- outer_expr = (Node *)
+ outer_expr = (Expr *)
makeRelabelType((Expr *) outer_expr,
- ((RelabelType *) sub_key)->resulttype,
- ((RelabelType *) sub_key)->resulttypmod,
- ((RelabelType *) sub_key)->relabelformat);
+ ((RelabelType *) sub_expr)->resulttype,
+ ((RelabelType *) sub_expr)->resulttypmod,
+ ((RelabelType *) sub_expr)->relabelformat);
}
else
continue;
- /* Found a representation for this sub_key */
- outer_item = makePathKeyItem(outer_expr,
- sub_item->sortop,
- sub_item->nulls_first,
- true);
- /* score = # of mergejoin peers */
- score = count_canonical_peers(root, outer_item);
+ /* Found a representation for this sub_pathkey */
+ outer_ec = get_eclass_for_sort_expr(root,
+ outer_expr,
+ sub_member->em_datatype,
+ sub_eclass->ec_opfamilies);
+ outer_pk = make_canonical_pathkey(root,
+ outer_ec,
+ sub_pathkey->pk_opfamily,
+ sub_pathkey->pk_strategy,
+ sub_pathkey->pk_nulls_first);
+ /* score = # of equivalence peers */
+ score = list_length(outer_ec->ec_members) - 1;
/* +1 if it matches the proper query_pathkeys item */
if (retvallen < outer_query_keys &&
- list_member(list_nth(root->query_pathkeys, retvallen), outer_item))
+ list_nth(root->query_pathkeys, retvallen) == outer_pk)
score++;
if (score > best_score)
{
- best_item = outer_item;
+ best_pathkey = outer_pk;
best_score = score;
}
}
@@ -1143,19 +691,16 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
* If we couldn't find a representation of this sub_pathkey, we're
* done (we can't use the ones to its right, either).
*/
- if (!best_item)
+ if (!best_pathkey)
break;
- /* Canonicalize the chosen item (we did not before) */
- cpathkey = make_canonical_pathkey(root, best_item);
-
/*
* Eliminate redundant ordering info; could happen if outer query
- * equijoins subquery keys...
+ * equivalences subquery keys...
*/
- if (!list_member_ptr(retval, cpathkey))
+ if (!pathkey_is_redundant(best_pathkey, retval))
{
- retval = lappend(retval, cpathkey);
+ retval = lappend(retval, best_pathkey);
retvallen++;
}
}
@@ -1166,19 +711,14 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
/*
* build_join_pathkeys
* Build the path keys for a join relation constructed by mergejoin or
- * nestloop join. These keys should include all the path key vars of the
- * outer path (since the join will retain the ordering of the outer path)
- * plus any vars of the inner path that are equijoined to the outer vars.
- *
- * Per the discussion in backend/optimizer/README, equijoined inner vars
- * can be considered path keys of the result, just the same as the outer
- * vars they were joined with; furthermore, it doesn't matter what kind
- * of join algorithm is actually used.
+ * nestloop join. This is normally the same as the outer path's keys.
*
* EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
* having the outer path's path keys, because null lefthand rows may be
* inserted at random points. It must be treated as unsorted.
*
+ * We truncate away any pathkeys that are uninteresting for higher joins.
+ *
* 'joinrel' is the join relation that paths are being formed for
* 'jointype' is the join type (inner, left, full, etc)
* 'outer_pathkeys' is the list of the current outer path's path keys
@@ -1197,8 +737,7 @@ build_join_pathkeys(PlannerInfo *root,
/*
* This used to be quite a complex bit of code, but now that all pathkey
* sublists start out life canonicalized, we don't have to do a darn thing
- * here! The inner-rel vars we used to need to add are *already* part of
- * the outer pathkey!
+ * here!
*
* We do, however, need to truncate the pathkeys list, since it may
* contain pathkeys that were useful for forming this joinrel but are
@@ -1216,18 +755,21 @@ build_join_pathkeys(PlannerInfo *root,
* Generate a pathkeys list that represents the sort order specified
* by a list of SortClauses (GroupClauses will work too!)
*
- * NB: the result is NOT in canonical form, but must be passed through
- * canonicalize_pathkeys() before it can be used for comparisons or
- * labeling relation sort orders. (We do things this way because
- * grouping_planner needs to be able to construct requested pathkeys
- * before the pathkey equivalence sets have been created for the query.)
+ * If canonicalize is TRUE, the resulting PathKeys are all in canonical form;
+ * otherwise not. canonicalize should always be TRUE after EquivalenceClass
+ * merging has been performed, but FALSE if we haven't done EquivalenceClass
+ * merging yet. (We provide this option because grouping_planner() needs to
+ * be able to represent requested pathkeys before the equivalence classes have
+ * been created for the query.)
*
* 'sortclauses' is a list of SortClause or GroupClause nodes
* 'tlist' is the targetlist to find the referenced tlist entries in
*/
List *
-make_pathkeys_for_sortclauses(List *sortclauses,
- List *tlist)
+make_pathkeys_for_sortclauses(PlannerInfo *root,
+ List *sortclauses,
+ List *tlist,
+ bool canonicalize)
{
List *pathkeys = NIL;
ListCell *l;
@@ -1235,19 +777,24 @@ make_pathkeys_for_sortclauses(List *sortclauses,
foreach(l, sortclauses)
{
SortClause *sortcl = (SortClause *) lfirst(l);
- Node *sortkey;
- PathKeyItem *pathkey;
-
- sortkey = get_sortgroupclause_expr(sortcl, tlist);
- pathkey = makePathKeyItem(sortkey, sortcl->sortop, sortcl->nulls_first,
- true);
-
- /*
- * The pathkey becomes a one-element sublist, for now;
- * canonicalize_pathkeys() might replace it with a longer sublist
- * later.
- */
- pathkeys = lappend(pathkeys, list_make1(pathkey));
+ Expr *sortkey;
+ PathKey *pathkey;
+
+ sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
+ pathkey = make_pathkey_from_sortinfo(root,
+ sortkey,
+ sortcl->sortop,
+ sortcl->nulls_first,
+ canonicalize);
+
+ /* Canonical form eliminates redundant ordering keys */
+ if (canonicalize)
+ {
+ if (!pathkey_is_redundant(pathkey, pathkeys))
+ pathkeys = lappend(pathkeys, pathkey);
+ }
+ else
+ pathkeys = lappend(pathkeys, pathkey);
}
return pathkeys;
}
@@ -1257,49 +804,44 @@ make_pathkeys_for_sortclauses(List *sortclauses,
****************************************************************************/
/*
- * cache_mergeclause_pathkeys
- * Make the cached pathkeys valid in a mergeclause restrictinfo.
+ * cache_mergeclause_eclasses
+ * Make the cached EquivalenceClass links valid in a mergeclause
+ * restrictinfo.
*
- * RestrictInfo contains fields in which we may cache the result
- * of looking up the canonical pathkeys for the left and right sides
- * of the mergeclause. (Note that in normal cases they will be the
- * same, but not if the mergeclause appears above an OUTER JOIN.)
- * This is a worthwhile savings because these routines will be invoked
- * many times when dealing with a many-relation query.
- *
- * We have to be careful that the cached values are palloc'd in the same
- * context the RestrictInfo node itself is in. This is not currently a
- * problem for normal planning, but it is an issue for GEQO planning.
+ * RestrictInfo contains fields in which we may cache pointers to
+ * EquivalenceClasses for the left and right inputs of the mergeclause.
+ * (If the mergeclause is a true equivalence clause these will be the
+ * same EquivalenceClass, otherwise not.)
*/
void
-cache_mergeclause_pathkeys(PlannerInfo *root, RestrictInfo *restrictinfo)
+cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
{
- Node *key;
- PathKeyItem *item;
- MemoryContext oldcontext;
-
- Assert(restrictinfo->mergejoinoperator != InvalidOid);
+ Assert(restrictinfo->mergeopfamilies != NIL);
- if (restrictinfo->left_pathkey == NIL)
- {
- oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(restrictinfo));
- key = get_leftop(restrictinfo->clause);
- item = makePathKeyItem(key, restrictinfo->left_sortop,
- false, /* XXX nulls_first? */
- false);
- restrictinfo->left_pathkey = make_canonical_pathkey(root, item);
- MemoryContextSwitchTo(oldcontext);
- }
- if (restrictinfo->right_pathkey == NIL)
+ /* the cached values should be either both set or both not */
+ if (restrictinfo->left_ec == NULL)
{
- oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(restrictinfo));
- key = get_rightop(restrictinfo->clause);
- item = makePathKeyItem(key, restrictinfo->right_sortop,
- false, /* XXX nulls_first? */
- false);
- restrictinfo->right_pathkey = make_canonical_pathkey(root, item);
- MemoryContextSwitchTo(oldcontext);
+ Expr *clause = restrictinfo->clause;
+ Oid lefttype,
+ righttype;
+
+ /* Need the declared input types of the operator */
+ op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
+
+ /* Find or create a matching EquivalenceClass for each side */
+ restrictinfo->left_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) get_leftop(clause),
+ lefttype,
+ restrictinfo->mergeopfamilies);
+ restrictinfo->right_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) get_rightop(clause),
+ righttype,
+ restrictinfo->mergeopfamilies);
}
+ else
+ Assert(restrictinfo->right_ec != NULL);
}
/*
@@ -1309,72 +851,82 @@ cache_mergeclause_pathkeys(PlannerInfo *root, RestrictInfo *restrictinfo)
* If successful, it returns a list of mergeclauses.
*
* 'pathkeys' is a pathkeys list showing the ordering of an input path.
- * It doesn't matter whether it is for the inner or outer path.
+ * 'outer_keys' is TRUE if these keys are for the outer input path,
+ * FALSE if for inner.
* 'restrictinfos' is a list of mergejoinable restriction clauses for the
* join relation being formed.
*
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path. (See
+ * select_mergejoin_clauses())
+ *
* The result is NIL if no merge can be done, else a maximal list of
* usable mergeclauses (represented as a list of their restrictinfo nodes).
- *
- * XXX Ideally we ought to be considering context, ie what path orderings
- * are available on the other side of the join, rather than just making
- * an arbitrary choice among the mergeclauses that will work for this side
- * of the join.
*/
List *
find_mergeclauses_for_pathkeys(PlannerInfo *root,
List *pathkeys,
+ bool outer_keys,
List *restrictinfos)
{
List *mergeclauses = NIL;
ListCell *i;
- /* make sure we have pathkeys cached in the clauses */
+ /* make sure we have eclasses cached in the clauses */
foreach(i, restrictinfos)
{
- RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
- cache_mergeclause_pathkeys(root, restrictinfo);
+ cache_mergeclause_eclasses(root, rinfo);
}
foreach(i, pathkeys)
{
- List *pathkey = (List *) lfirst(i);
+ PathKey *pathkey = (PathKey *) lfirst(i);
+ EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
List *matched_restrictinfos = NIL;
ListCell *j;
- /*
- * We can match a pathkey against either left or right side of any
- * mergejoin clause. (We examine both sides since we aren't told if
- * the given pathkeys are for inner or outer input path; no confusion
- * is possible.) Furthermore, if there are multiple matching clauses,
- * take them all. In plain inner-join scenarios we expect only one
- * match, because redundant-mergeclause elimination will have removed
- * any redundant mergeclauses from the input list. However, in
- * outer-join scenarios there might be multiple matches. An example is
+ /*----------
+ * A mergejoin clause matches a pathkey if it has the same EC.
+ * If there are multiple matching clauses, take them all. In plain
+ * inner-join scenarios we expect only one match, because
+ * equivalence-class processing will have removed any redundant
+ * mergeclauses. However, in outer-join scenarios there might be
+ * multiple matches. An example is
*
- * select * from a full join b on a.v1 = b.v1 and a.v2 = b.v2 and a.v1
- * = b.v2;
+ * select * from a full join b
+ * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
*
- * Given the pathkeys ((a.v1), (a.v2)) it is okay to return all three
+ * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
* clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
* we *must* do so or we will be unable to form a valid plan.
+ *
+ * We expect that the given pathkeys list is canonical, which means
+ * no two members have the same EC, so it's not possible for this
+ * code to enter the same mergeclause into the result list twice.
+ *
+ * XXX it's possible that multiple matching clauses might have
+ * different ECs on the other side, in which case the order we put
+ * them into our result makes a difference in the pathkeys required
+ * for the other input path. However this routine hasn't got any info
+ * about which order would be best, so for now we disregard that case
+ * (which is probably a corner case anyway).
+ *----------
*/
foreach(j, restrictinfos)
{
- RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
+ EquivalenceClass *clause_ec;
- /*
- * We can compare canonical pathkey sublists by simple pointer
- * equality; see compare_pathkeys.
- */
- if ((pathkey == restrictinfo->left_pathkey ||
- pathkey == restrictinfo->right_pathkey) &&
- !list_member_ptr(mergeclauses, restrictinfo))
- {
- matched_restrictinfos = lappend(matched_restrictinfos,
- restrictinfo);
- }
+ if (outer_keys)
+ clause_ec = rinfo->outer_is_left ?
+ rinfo->left_ec : rinfo->right_ec;
+ else
+ clause_ec = rinfo->outer_is_left ?
+ rinfo->right_ec : rinfo->left_ec;
+ if (clause_ec == pathkey_ec)
+ matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
}
/*
@@ -1396,63 +948,270 @@ find_mergeclauses_for_pathkeys(PlannerInfo *root,
}
/*
- * make_pathkeys_for_mergeclauses
+ * select_outer_pathkeys_for_merge
+ * Builds a pathkey list representing a possible sort ordering
+ * that can be used with the given mergeclauses.
+ *
+ * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
+ * that will be used in a merge join.
+ * 'joinrel' is the join relation we are trying to construct.
+ *
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path. (See
+ * select_mergejoin_clauses())
+ *
+ * Returns a pathkeys list that can be applied to the outer relation.
+ *
+ * Since we assume here that a sort is required, there is no particular use
+ * in matching any available ordering of the outerrel. (joinpath.c has an
+ * entirely separate code path for considering sort-free mergejoins.) Rather,
+ * it's interesting to try to match the requested query_pathkeys so that a
+ * second output sort may be avoided; and failing that, we try to list "more
+ * popular" keys (those with the most unmatched EquivalenceClass peers)
+ * earlier, in hopes of making the resulting ordering useful for as many
+ * higher-level mergejoins as possible.
+ */
+List *
+select_outer_pathkeys_for_merge(PlannerInfo *root,
+ List *mergeclauses,
+ RelOptInfo *joinrel)
+{
+ List *pathkeys = NIL;
+ int nClauses = list_length(mergeclauses);
+ EquivalenceClass **ecs;
+ int *scores;
+ int necs;
+ ListCell *lc;
+ int j;
+
+ /* Might have no mergeclauses */
+ if (nClauses == 0)
+ return NIL;
+
+ /*
+ * Make arrays of the ECs used by the mergeclauses (dropping any
+ * duplicates) and their "popularity" scores.
+ */
+ ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
+ scores = (int *) palloc(nClauses * sizeof(int));
+ necs = 0;
+
+ foreach(lc, mergeclauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+ EquivalenceClass *oeclass;
+ int score;
+ ListCell *lc2;
+
+ /* get the outer eclass */
+ cache_mergeclause_eclasses(root, rinfo);
+
+ if (rinfo->outer_is_left)
+ oeclass = rinfo->left_ec;
+ else
+ oeclass = rinfo->right_ec;
+
+ /* reject duplicates */
+ for (j = 0; j < necs; j++)
+ {
+ if (ecs[j] == oeclass)
+ break;
+ }
+ if (j < necs)
+ continue;
+
+ /* compute score */
+ score = 0;
+ foreach(lc2, oeclass->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
+
+ /* Potential future join partner? */
+ if (!em->em_is_const && !em->em_is_child &&
+ !bms_overlap(em->em_relids, joinrel->relids))
+ score++;
+ }
+
+ ecs[necs] = oeclass;
+ scores[necs] = score;
+ necs++;
+ }
+
+ /*
+ * Find out if we have all the ECs mentioned in query_pathkeys; if so
+ * we can generate a sort order that's also useful for final output.
+ * There is no percentage in a partial match, though, so we have to
+ * have 'em all.
+ */
+ if (root->query_pathkeys)
+ {
+ foreach(lc, root->query_pathkeys)
+ {
+ PathKey *query_pathkey = (PathKey *) lfirst(lc);
+ EquivalenceClass *query_ec = query_pathkey->pk_eclass;
+
+ for (j = 0; j < necs; j++)
+ {
+ if (ecs[j] == query_ec)
+ break; /* found match */
+ }
+ if (j >= necs)
+ break; /* didn't find match */
+ }
+ /* if we got to the end of the list, we have them all */
+ if (lc == NULL)
+ {
+ /* copy query_pathkeys as starting point for our output */
+ pathkeys = list_copy(root->query_pathkeys);
+ /* mark their ECs as already-emitted */
+ foreach(lc, root->query_pathkeys)
+ {
+ PathKey *query_pathkey = (PathKey *) lfirst(lc);
+ EquivalenceClass *query_ec = query_pathkey->pk_eclass;
+
+ for (j = 0; j < necs; j++)
+ {
+ if (ecs[j] == query_ec)
+ {
+ scores[j] = -1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Add remaining ECs to the list in popularity order, using a default
+ * sort ordering. (We could use qsort() here, but the list length is
+ * usually so small it's not worth it.)
+ */
+ for (;;)
+ {
+ int best_j;
+ int best_score;
+ EquivalenceClass *ec;
+ PathKey *pathkey;
+
+ best_j = 0;
+ best_score = scores[0];
+ for (j = 1; j < necs; j++)
+ {
+ if (scores[j] > best_score)
+ {
+ best_j = j;
+ best_score = scores[j];
+ }
+ }
+ if (best_score < 0)
+ break; /* all done */
+ ec = ecs[best_j];
+ scores[best_j] = -1;
+ pathkey = make_canonical_pathkey(root,
+ ec,
+ linitial_oid(ec->ec_opfamilies),
+ BTLessStrategyNumber,
+ false);
+ /* can't be redundant because no duplicate ECs */
+ Assert(!pathkey_is_redundant(pathkey, pathkeys));
+ pathkeys = lappend(pathkeys, pathkey);
+ }
+
+ pfree(ecs);
+ pfree(scores);
+
+ return pathkeys;
+}
+
+/*
+ * make_inner_pathkeys_for_merge
* Builds a pathkey list representing the explicit sort order that
- * must be applied to a path in order to make it usable for the
+ * must be applied to an inner path to make it usable with the
* given mergeclauses.
*
* 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
* that will be used in a merge join.
- * 'rel' is the relation the pathkeys will apply to (ie, either the inner
- * or outer side of the proposed join rel).
+ * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
+ * side of the join.
*
- * Returns a pathkeys list that can be applied to the indicated relation.
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path. (See
+ * select_mergejoin_clauses())
+ *
+ * Returns a pathkeys list that can be applied to the inner relation.
*
* Note that it is not this routine's job to decide whether sorting is
* actually needed for a particular input path. Assume a sort is necessary;
* just make the keys, eh?
*/
List *
-make_pathkeys_for_mergeclauses(PlannerInfo *root,
- List *mergeclauses,
- RelOptInfo *rel)
+make_inner_pathkeys_for_merge(PlannerInfo *root,
+ List *mergeclauses,
+ List *outer_pathkeys)
{
List *pathkeys = NIL;
- ListCell *l;
+ EquivalenceClass *lastoeclass;
+ PathKey *opathkey;
+ ListCell *lc;
+ ListCell *lop;
- foreach(l, mergeclauses)
+ lastoeclass = NULL;
+ opathkey = NULL;
+ lop = list_head(outer_pathkeys);
+
+ foreach(lc, mergeclauses)
{
- RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
- List *pathkey;
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+ EquivalenceClass *oeclass;
+ EquivalenceClass *ieclass;
+ PathKey *pathkey;
- cache_mergeclause_pathkeys(root, restrictinfo);
+ cache_mergeclause_eclasses(root, rinfo);
- if (bms_is_subset(restrictinfo->left_relids, rel->relids))
+ if (rinfo->outer_is_left)
{
- /* Rel is left side of mergeclause */
- pathkey = restrictinfo->left_pathkey;
+ oeclass = rinfo->left_ec;
+ ieclass = rinfo->right_ec;
}
- else if (bms_is_subset(restrictinfo->right_relids, rel->relids))
+ else
{
- /* Rel is right side of mergeclause */
- pathkey = restrictinfo->right_pathkey;
+ oeclass = rinfo->right_ec;
+ ieclass = rinfo->left_ec;
}
- else
+
+ /* outer eclass should match current or next pathkeys */
+ /* we check this carefully for debugging reasons */
+ if (oeclass != lastoeclass)
{
- elog(ERROR, "could not identify which side of mergeclause to use");
- pathkey = NIL; /* keep compiler quiet */
+ if (!lop)
+ elog(ERROR, "too few pathkeys for mergeclauses");
+ opathkey = (PathKey *) lfirst(lop);
+ lop = lnext(lop);
+ lastoeclass = opathkey->pk_eclass;
+ if (oeclass != lastoeclass)
+ elog(ERROR, "outer pathkeys do not match mergeclause");
}
/*
- * When we are given multiple merge clauses, it's possible that some
- * clauses refer to the same vars as earlier clauses. There's no
- * reason for us to specify sort keys like (A,B,A) when (A,B) will do
- * --- and adding redundant sort keys makes add_path think that this
- * sort order is different from ones that are really the same, so
- * don't do it. Since we now have a canonicalized pathkey, a simple
- * ptrMember test is sufficient to detect redundant keys.
+ * Often, we'll have same EC on both sides, in which case the outer
+ * pathkey is also canonical for the inner side, and we can skip a
+ * useless search.
+ */
+ if (ieclass == oeclass)
+ pathkey = opathkey;
+ else
+ pathkey = make_canonical_pathkey(root,
+ ieclass,
+ opathkey->pk_opfamily,
+ opathkey->pk_strategy,
+ opathkey->pk_nulls_first);
+
+ /*
+ * Don't generate redundant pathkeys (can happen if multiple
+ * mergeclauses refer to same EC).
*/
- pathkeys = list_append_unique_ptr(pathkeys, pathkey);
+ if (!pathkey_is_redundant(pathkey, pathkeys))
+ pathkeys = lappend(pathkeys, pathkey);
}
return pathkeys;
@@ -1471,7 +1230,7 @@ make_pathkeys_for_mergeclauses(PlannerInfo *root,
/*
* pathkeys_useful_for_merging
* Count the number of pathkeys that may be useful for mergejoins
- * above the given relation (by looking at its joininfo list).
+ * above the given relation.
*
* We consider a pathkey potentially useful if it corresponds to the merge
* ordering of either side of any joinclause for the rel. This might be
@@ -1487,27 +1246,39 @@ pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
foreach(i, pathkeys)
{
- List *pathkey = (List *) lfirst(i);
+ PathKey *pathkey = (PathKey *) lfirst(i);
bool matched = false;
ListCell *j;
- foreach(j, rel->joininfo)
+ /*
+ * First look into the EquivalenceClass of the pathkey, to see if
+ * there are any members not yet joined to the rel. If so, it's
+ * surely possible to generate a mergejoin clause using them.
+ */
+ if (rel->has_eclass_joins &&
+ eclass_useful_for_merging(pathkey->pk_eclass, rel))
+ matched = true;
+ else
{
- RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
-
- if (restrictinfo->mergejoinoperator == InvalidOid)
- continue;
- cache_mergeclause_pathkeys(root, restrictinfo);
-
/*
- * We can compare canonical pathkey sublists by simple pointer
- * equality; see compare_pathkeys.
+ * Otherwise search the rel's joininfo list, which contains
+ * non-EquivalenceClass-derivable join clauses that might
+ * nonetheless be mergejoinable.
*/
- if (pathkey == restrictinfo->left_pathkey ||
- pathkey == restrictinfo->right_pathkey)
+ foreach(j, rel->joininfo)
{
- matched = true;
- break;
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
+
+ if (restrictinfo->mergeopfamilies == NIL)
+ continue;
+ cache_mergeclause_eclasses(root, restrictinfo);
+
+ if (pathkey->pk_eclass == restrictinfo->left_ec ||
+ pathkey->pk_eclass == restrictinfo->right_ec)
+ {
+ matched = true;
+ break;
+ }
}
}