diff options
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r-- | src/backend/optimizer/path/Makefile | 4 | ||||
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 12 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 54 | ||||
-rw-r--r-- | src/backend/optimizer/path/equivclass.c | 1662 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 457 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 187 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinrels.c | 10 | ||||
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 1523 |
8 files changed, 2552 insertions, 1357 deletions
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 7b5fce139de..833b3f59a37 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -4,7 +4,7 @@ # Makefile for optimizer/path # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/optimizer/path/Makefile,v 1.17 2007/01/20 17:16:11 petere Exp $ +# $PostgreSQL: pgsql/src/backend/optimizer/path/Makefile,v 1.18 2007/01/20 20:45:38 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/optimizer/path top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = allpaths.o clausesel.o costsize.o indxpath.o \ +OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \ joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o all: SUBSYS.o diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 01178d93ddd..cbe78c3abd5 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.156 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.157 2007/01/20 20:45:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -326,6 +326,16 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, appinfo); /* + * We have to make child entries in the EquivalenceClass data + * structures as well. + */ + if (rel->has_eclass_joins) + { + add_child_rel_equivalences(root, appinfo, rel, childrel); + childrel->has_eclass_joins = true; + } + + /* * Copy the parent's attr_needed data as well, with appropriate * adjustment of relids and attribute numbers. */ diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index e1c003b5047..22cbf8e2b92 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.174 2007/01/10 18:06:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.175 2007/01/20 20:45:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1258,8 +1258,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; List *mergeclauses = path->path_mergeclauses; - Oid *mergeFamilies = path->path_mergeFamilies; - int *mergeStrategies = path->path_mergeStrategies; List *outersortkeys = path->outersortkeys; List *innersortkeys = path->innersortkeys; Cost startup_cost = 0; @@ -1268,7 +1266,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) Selectivity merge_selec; QualCost merge_qual_cost; QualCost qp_qual_cost; - RestrictInfo *firstclause; double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double outer_rows, @@ -1347,32 +1344,47 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) * inputs that will actually need to be scanned. We use only the first * (most significant) merge clause for this purpose. * - * Since this calculation is somewhat expensive, and will be the same for - * all mergejoin paths associated with the merge clause, we cache the - * results in the RestrictInfo node. XXX that won't work anymore once - * we support multiple possible orderings! + * XXX mergejoinscansel is a bit expensive, can we cache its results? */ if (mergeclauses && path->jpath.jointype != JOIN_FULL) { - firstclause = (RestrictInfo *) linitial(mergeclauses); - if (firstclause->left_mergescansel < 0) /* not computed yet? */ - mergejoinscansel(root, (Node *) firstclause->clause, - mergeFamilies[0], - mergeStrategies[0], - &firstclause->left_mergescansel, - &firstclause->right_mergescansel); - - if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids)) + RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses); + List *opathkeys; + List *ipathkeys; + PathKey *opathkey; + PathKey *ipathkey; + Selectivity leftscansel, + rightscansel; + + /* Get the input pathkeys to determine the sort-order details */ + opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys; + ipathkeys = innersortkeys ? innersortkeys : inner_path->pathkeys; + Assert(opathkeys); + Assert(ipathkeys); + opathkey = (PathKey *) linitial(opathkeys); + ipathkey = (PathKey *) linitial(ipathkeys); + /* debugging check */ + if (opathkey->pk_opfamily != ipathkey->pk_opfamily || + opathkey->pk_strategy != ipathkey->pk_strategy || + opathkey->pk_nulls_first != ipathkey->pk_nulls_first) + elog(ERROR, "left and right pathkeys do not match in mergejoin"); + + mergejoinscansel(root, (Node *) firstclause->clause, + opathkey->pk_opfamily, opathkey->pk_strategy, + &leftscansel, &rightscansel); + + if (bms_is_subset(firstclause->left_relids, + outer_path->parent->relids)) { /* left side of clause is outer */ - outerscansel = firstclause->left_mergescansel; - innerscansel = firstclause->right_mergescansel; + outerscansel = leftscansel; + innerscansel = rightscansel; } else { /* left side of clause is inner */ - outerscansel = firstclause->right_mergescansel; - innerscansel = firstclause->left_mergescansel; + outerscansel = rightscansel; + innerscansel = leftscansel; } if (path->jpath.jointype == JOIN_LEFT) outerscansel = 1.0; diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c new file mode 100644 index 00000000000..063e8d5d014 --- /dev/null +++ b/src/backend/optimizer/path/equivclass.c @@ -0,0 +1,1662 @@ +/*------------------------------------------------------------------------- + * + * equivclass.c + * Routines for managing EquivalenceClasses + * + * See src/backend/optimizer/README for discussion of EquivalenceClasses. + * + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.1 2007/01/20 20:45:39 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/skey.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/paths.h" +#include "optimizer/planmain.h" +#include "optimizer/prep.h" +#include "optimizer/var.h" +#include "utils/lsyscache.h" + + +static void add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, + bool is_child, Oid datatype); +static void generate_base_implied_equalities_const(PlannerInfo *root, + EquivalenceClass *ec); +static void generate_base_implied_equalities_no_const(PlannerInfo *root, + EquivalenceClass *ec); +static void generate_base_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec); +static List *generate_join_implied_equalities_normal(PlannerInfo *root, + EquivalenceClass *ec, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); +static List *generate_join_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); +static Oid select_equality_operator(EquivalenceClass *ec, + Oid lefttype, Oid righttype); +static void reconsider_outer_join_clause(PlannerInfo *root, + RestrictInfo *rinfo, + bool outer_on_left); +static void reconsider_full_join_clause(PlannerInfo *root, + RestrictInfo *rinfo); + + +/* + * process_equivalence + * The given clause has a mergejoinable operator and can be applied without + * any delay by an outer join, so its two sides can be considered equal + * anywhere they are both computable; moreover that equality can be + * extended transitively. Record this knowledge in the EquivalenceClass + * data structure. Returns TRUE if successful, FALSE if not (in which + * case caller should treat the clause as ordinary, not an equivalence). + * + * If below_outer_join is true, then the clause was found below the nullable + * side of an outer join, so its sides might validly be both NULL rather than + * strictly equal. We can still deduce equalities in such cases, but we take + * care to mark an EquivalenceClass if it came from any such clauses. Also, + * we have to check that both sides are either pseudo-constants or strict + * functions of Vars, else they might not both go to NULL above the outer + * join. (This is the reason why we need a failure return. It's more + * convenient to check this case here than at the call sites...) + * + * Note: constructing merged EquivalenceClasses is a standard UNION-FIND + * problem, for which there exist better data structures than simple lists. + * If this code ever proves to be a bottleneck then it could be sped up --- + * but for now, simple is beautiful. + * + * Note: this is only called during planner startup, not during GEQO + * exploration, so we need not worry about whether we're in the right + * memory context. + */ +bool +process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, + bool below_outer_join) +{ + Expr *clause = restrictinfo->clause; + Oid opno, + item1_type, + item2_type; + Expr *item1; + Expr *item2; + Relids item1_relids, + item2_relids; + List *opfamilies; + EquivalenceClass *ec1, + *ec2; + ListCell *lc1; + + /* Extract info from given clause */ + Assert(is_opclause(clause)); + opno = ((OpExpr *) clause)->opno; + item1 = (Expr *) get_leftop(clause); + item2 = (Expr *) get_rightop(clause); + item1_relids = restrictinfo->left_relids; + item2_relids = restrictinfo->right_relids; + + /* + * If below outer join, check for strictness, else reject. + */ + if (below_outer_join) + { + if (!bms_is_empty(item1_relids) && + contain_nonstrict_functions((Node *) item1)) + return false; /* LHS is non-strict but not constant */ + if (!bms_is_empty(item2_relids) && + contain_nonstrict_functions((Node *) item2)) + return false; /* RHS is non-strict but not constant */ + } + + /* + * We use the declared input types of the operator, not exprType() of + * the inputs, as the nominal datatypes for opfamily lookup. This + * presumes that btree operators are always registered with amoplefttype + * and amoprighttype equal to their declared input types. We will need + * this info anyway to build EquivalenceMember nodes, and by extracting + * it now we can use type comparisons to short-circuit some equal() tests. + */ + op_input_types(opno, &item1_type, &item2_type); + + opfamilies = restrictinfo->mergeopfamilies; + + /* + * Sweep through the existing EquivalenceClasses looking for matches + * to item1 and item2. These are the possible outcomes: + * + * 1. We find both in the same EC. The equivalence is already known, + * so there's nothing to do. + * + * 2. We find both in different ECs. Merge the two ECs together. + * + * 3. We find just one. Add the other to its EC. + * + * 4. We find neither. Make a new, two-entry EC. + * + * Note: since all ECs are built through this process, it's impossible + * that we'd match an item in more than one existing EC. It is possible + * to match more than once within an EC, if someone fed us something silly + * like "WHERE X=X". (However, we can't simply discard such clauses, + * since they should fail when X is null; so we will build a 2-member + * EC to ensure the correct restriction clause gets generated. Hence + * there is no shortcut here for item1 and item2 equal.) + */ + ec1 = ec2 = NULL; + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* Never match to a volatile EC */ + if (cur_ec->ec_has_volatile) + continue; + + /* + * A "match" requires matching sets of btree opfamilies. Use of + * equal() for this test has implications discussed in the comments + * for get_mergejoin_opfamilies(). + */ + if (!equal(opfamilies, cur_ec->ec_opfamilies)) + continue; + + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + Assert(!cur_em->em_is_child); /* no children yet */ + + /* + * If below an outer join, don't match constants: they're not + * as constant as they look. + */ + if ((below_outer_join || cur_ec->ec_below_outer_join) && + cur_em->em_is_const) + continue; + + if (!ec1 && + item1_type == cur_em->em_datatype && + equal(item1, cur_em->em_expr)) + { + ec1 = cur_ec; + if (ec2) + break; + } + + if (!ec2 && + item2_type == cur_em->em_datatype && + equal(item2, cur_em->em_expr)) + { + ec2 = cur_ec; + if (ec1) + break; + } + } + + if (ec1 && ec2) + break; + } + + /* Sweep finished, what did we find? */ + + if (ec1 && ec2) + { + /* If case 1, nothing to do, except add to sources */ + if (ec1 == ec2) + { + ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); + ec1->ec_below_outer_join |= below_outer_join; + return true; + } + + /* + * Case 2: need to merge ec1 and ec2. We add ec2's items to ec1, + * then set ec2's ec_merged link to point to ec1 and remove ec2 + * from the eq_classes list. We cannot simply delete ec2 because + * that could leave dangling pointers in existing PathKeys. We + * leave it behind with a link so that the merged EC can be found. + */ + ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members); + ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources); + ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids); + ec1->ec_has_const |= ec2->ec_has_const; + /* can't need to set has_volatile */ + ec1->ec_below_outer_join |= ec2->ec_below_outer_join; + ec2->ec_merged = ec1; + root->eq_classes = list_delete_ptr(root->eq_classes, ec2); + /* just to avoid debugging confusion w/ dangling pointers: */ + ec2->ec_members = NIL; + ec2->ec_sources = NIL; + ec2->ec_relids = NULL; + ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); + ec1->ec_below_outer_join |= below_outer_join; + } + else if (ec1) + { + /* Case 3: add item2 to ec1 */ + add_eq_member(ec1, item2, item2_relids, false, item2_type); + ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); + ec1->ec_below_outer_join |= below_outer_join; + } + else if (ec2) + { + /* Case 3: add item1 to ec2 */ + add_eq_member(ec2, item1, item1_relids, false, item1_type); + ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); + ec2->ec_below_outer_join |= below_outer_join; + } + else + { + /* Case 4: make a new, two-entry EC */ + EquivalenceClass *ec = makeNode(EquivalenceClass); + + ec->ec_opfamilies = opfamilies; + ec->ec_members = NIL; + ec->ec_sources = list_make1(restrictinfo); + ec->ec_relids = NULL; + ec->ec_has_const = false; + ec->ec_has_volatile = false; + ec->ec_below_outer_join = below_outer_join; + ec->ec_broken = false; + ec->ec_merged = NULL; + add_eq_member(ec, item1, item1_relids, false, item1_type); + add_eq_member(ec, item2, item2_relids, false, item2_type); + + root->eq_classes = lappend(root->eq_classes, ec); + } + + return true; +} + +/* + * add_eq_member - build a new EquivalenceMember and add it to an EC + */ +static void +add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, + bool is_child, Oid datatype) +{ + EquivalenceMember *em = makeNode(EquivalenceMember); + + em->em_expr = expr; + em->em_relids = relids; + em->em_is_const = false; + em->em_is_child = is_child; + em->em_datatype = datatype; + + if (bms_is_empty(relids)) + { + /* + * No Vars, assume it's a pseudoconstant. This is correct for + * entries generated from process_equivalence(), because a WHERE + * clause can't contain aggregates and non-volatility was checked + * before process_equivalence() ever got called. But + * get_eclass_for_sort_expr() has to work harder. We put the tests + * there not here to save cycles in the equivalence case. + */ + Assert(!is_child); + em->em_is_const = true; + ec->ec_has_const = true; + /* it can't affect ec_relids */ + } + else if (!is_child) /* child members don't add to ec_relids */ + { + ec->ec_relids = bms_add_members(ec->ec_relids, relids); + } + ec->ec_members = lappend(ec->ec_members, em); +} + + +/* + * get_eclass_for_sort_expr + * Given an expression and opfamily info, find an existing equivalence + * class it is a member of; if none, build a new single-member + * EquivalenceClass for it. + * + * This can be used safely both before and after EquivalenceClass merging; + * since it never causes merging it does not invalidate any existing ECs + * or PathKeys. + * + * Note: opfamilies must be chosen consistently with the way + * process_equivalence() would do; that is, generated from a mergejoinable + * equality operator. Else we might fail to detect valid equivalences, + * generating poor (but not incorrect) plans. + */ +EquivalenceClass * +get_eclass_for_sort_expr(PlannerInfo *root, + Expr *expr, + Oid expr_datatype, + List *opfamilies) +{ + EquivalenceClass *newec; + ListCell *lc1; + MemoryContext oldcontext; + + /* + * Scan through the existing EquivalenceClasses for a match + */ + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* we allow matching to a volatile EC here */ + + if (!equal(opfamilies, cur_ec->ec_opfamilies)) + continue; + + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + /* + * If below an outer join, don't match constants: they're not + * as constant as they look. + */ + if (cur_ec->ec_below_outer_join && + cur_em->em_is_const) + continue; + + if (expr_datatype == cur_em->em_datatype && + equal(expr, cur_em->em_expr)) + return cur_ec; /* Match! */ + } + } + + /* + * No match, so build a new single-member EC + * + * Here, we must be sure that we construct the EC in the right context. + * We can assume, however, that the passed expr is long-lived. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + newec = makeNode(EquivalenceClass); + newec->ec_opfamilies = list_copy(opfamilies); + newec->ec_members = NIL; + newec->ec_sources = NIL; + newec->ec_relids = NULL; + newec->ec_has_const = false; + newec->ec_has_volatile = contain_volatile_functions((Node *) expr); + newec->ec_below_outer_join = false; + newec->ec_broken = false; + newec->ec_merged = NULL; + add_eq_member(newec, expr, pull_varnos((Node *) expr), + false, expr_datatype); + + /* + * add_eq_member doesn't check for volatile functions or aggregates, + * but such could appear in sort expressions, so we have to check + * whether its const-marking was correct. + */ + if (newec->ec_has_const) + { + if (newec->ec_has_volatile || contain_agg_clause((Node *) expr)) + { + newec->ec_has_const = false; + ((EquivalenceMember *) linitial(newec->ec_members))->em_is_const = false; + } + } + + root->eq_classes = lappend(root->eq_classes, newec); + + MemoryContextSwitchTo(oldcontext); + + return newec; +} + + +/* + * generate_base_implied_equalities + * Generate any restriction clauses that we can deduce from equivalence + * classes. + * + * When an EC contains pseudoconstants, our strategy is to generate + * "member = const1" clauses where const1 is the first constant member, for + * every other member (including other constants). If we are able to do this + * then we don't need any "var = var" comparisons because we've successfully + * constrained all the vars at their points of creation. If we fail to + * generate any of these clauses due to lack of cross-type operators, we fall + * back to the "ec_broken" strategy described below. (XXX if there are + * multiple constants of different types, it's possible that we might succeed + * in forming all the required clauses if we started from a different const + * member; but this seems a sufficiently hokey corner case to not be worth + * spending lots of cycles on.) + * + * For ECs that contain no pseudoconstants, we generate derived clauses + * "member1 = member2" for each pair of members belonging to the same base + * relation (actually, if there are more than two for the same base relation, + * we only need enough clauses to link each to each other). This provides + * the base case for the recursion: each row emitted by a base relation scan + * will constrain all computable members of the EC to be equal. As each + * join path is formed, we'll add additional derived clauses on-the-fly + * to maintain this invariant (see generate_join_implied_equalities). + * + * If the opfamilies used by the EC do not provide complete sets of cross-type + * equality operators, it is possible that we will fail to generate a clause + * that must be generated to maintain the invariant. (An example: given + * "WHERE a.x = b.y AND b.y = a.z", the scheme breaks down if we cannot + * generate "a.x = a.z" as a restriction clause for A.) In this case we mark + * the EC "ec_broken" and fall back to regurgitating its original source + * RestrictInfos at appropriate times. We do not try to retract any derived + * clauses already generated from the broken EC, so the resulting plan could + * be poor due to bad selectivity estimates caused by redundant clauses. But + * the correct solution to that is to fix the opfamilies ... + * + * Equality clauses derived by this function are passed off to + * process_implied_equality (in plan/initsplan.c) to be inserted into the + * restrictinfo datastructures. Note that this must be called after initial + * scanning of the quals and before Path construction begins. + */ +void +generate_base_implied_equalities(PlannerInfo *root) +{ + ListCell *lc; + Index rti; + + foreach(lc, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc); + + Assert(ec->ec_merged == NULL); /* else shouldn't be in list */ + Assert(!ec->ec_broken); /* not yet anyway... */ + + /* Single-member ECs won't generate any deductions */ + if (list_length(ec->ec_members) <= 1) + continue; + + if (ec->ec_has_const) + generate_base_implied_equalities_const(root, ec); + else + generate_base_implied_equalities_no_const(root, ec); + + /* Recover if we failed to generate required derived clauses */ + if (ec->ec_broken) + generate_base_implied_equalities_broken(root, ec); + } + + /* + * This is also a handy place to mark base rels (which should all + * exist by now) with flags showing whether they have pending eclass + * joins. + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + if (brel == NULL) + continue; + + brel->has_eclass_joins = has_relevant_eclass_joinclause(root, brel); + } +} + +/* + * generate_base_implied_equalities when EC contains pseudoconstant(s) + */ +static void +generate_base_implied_equalities_const(PlannerInfo *root, + EquivalenceClass *ec) +{ + EquivalenceMember *const_em = NULL; + ListCell *lc; + + /* Find the constant member to use */ + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + + if (cur_em->em_is_const) + { + const_em = cur_em; + break; + } + } + Assert(const_em != NULL); + + /* Generate a derived equality against each other member */ + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + Oid eq_op; + + Assert(!cur_em->em_is_child); /* no children yet */ + if (cur_em == const_em) + continue; + eq_op = select_equality_operator(ec, + cur_em->em_datatype, + const_em->em_datatype); + if (!OidIsValid(eq_op)) + { + /* failed... */ + ec->ec_broken = true; + break; + } + process_implied_equality(root, eq_op, + cur_em->em_expr, const_em->em_expr, + ec->ec_relids, + ec->ec_below_outer_join, + cur_em->em_is_const); + } +} + +/* + * generate_base_implied_equalities when EC contains no pseudoconstants + */ +static void +generate_base_implied_equalities_no_const(PlannerInfo *root, + EquivalenceClass *ec) +{ + EquivalenceMember **prev_ems; + ListCell *lc; + + /* + * We scan the EC members once and track the last-seen member for each + * base relation. When we see another member of the same base relation, + * we generate "prev_mem = cur_mem". This results in the minimum number + * of derived clauses, but it's possible that it will fail when a different + * ordering would succeed. XXX FIXME: use a UNION-FIND algorithm similar + * to the way we build merged ECs. (Use a list-of-lists for each rel.) + */ + prev_ems = (EquivalenceMember **) + palloc0(root->simple_rel_array_size * sizeof(EquivalenceMember *)); + + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + int relid; + + Assert(!cur_em->em_is_child); /* no children yet */ + if (bms_membership(cur_em->em_relids) != BMS_SINGLETON) + continue; + relid = bms_singleton_member(cur_em->em_relids); + Assert(relid < root->simple_rel_array_size); + + if (prev_ems[relid] != NULL) + { + EquivalenceMember *prev_em = prev_ems[relid]; + Oid eq_op; + + eq_op = select_equality_operator(ec, + prev_em->em_datatype, + cur_em->em_datatype); + if (!OidIsValid(eq_op)) + { + /* failed... */ + ec->ec_broken = true; + break; + } + process_implied_equality(root, eq_op, + prev_em->em_expr, cur_em->em_expr, + ec->ec_relids, + ec->ec_below_outer_join, + false); + } + prev_ems[relid] = cur_em; + } + + pfree(prev_ems); + + /* + * We also have to make sure that all the Vars used in the member + * clauses will be available at any join node we might try to reference + * them at. For the moment we force all the Vars to be available at + * all join nodes for this eclass. Perhaps this could be improved by + * doing some pre-analysis of which members we prefer to join, but it's + * no worse than what happened in the pre-8.3 code. + */ + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + List *vars = pull_var_clause((Node *) cur_em->em_expr, false); + + add_vars_to_targetlist(root, vars, ec->ec_relids); + list_free(vars); + } +} + +/* + * generate_base_implied_equalities cleanup after failure + * + * What we must do here is push any zero- or one-relation source RestrictInfos + * of the EC back into the main restrictinfo datastructures. Multi-relation + * clauses will be regurgitated later by generate_join_implied_equalities(). + * (We do it this way to maintain continuity with the case that ec_broken + * becomes set only after we've gone up a join level or two.) + */ +static void +generate_base_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec) +{ + ListCell *lc; + + foreach(lc, ec->ec_sources) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + if (bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE) + distribute_restrictinfo_to_rels(root, restrictinfo); + } +} + + +/* + * generate_join_implied_equalities + * Generate any join clauses that we can deduce from equivalence classes. + * + * At a join node, we must enforce restriction clauses sufficient to ensure + * that all equivalence-class members computable at that node are equal. + * Since the set of clauses to enforce can vary depending on which subset + * relations are the inputs, we have to compute this afresh for each join + * path pair. Hence a fresh List of RestrictInfo nodes is built and passed + * back on each call. + * + * The results are sufficient for use in merge, hash, and plain nestloop join + * methods. We do not worry here about selecting clauses that are optimal + * for use in a nestloop-with-inner-indexscan join, however. indxpath.c makes + * its own selections of clauses to use, and if the ones we pick here are + * redundant with those, the extras will be eliminated in createplan.c. + */ +List * +generate_join_implied_equalities(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc); + List *sublist = NIL; + + /* ECs containing consts do not need any further enforcement */ + if (ec->ec_has_const) + continue; + + /* Single-member ECs won't generate any deductions */ + if (list_length(ec->ec_members) <= 1) + continue; + + /* We can quickly ignore any that don't overlap the join, too */ + if (!bms_overlap(ec->ec_relids, joinrel->relids)) + continue; + + if (!ec->ec_broken) + sublist = generate_join_implied_equalities_normal(root, + ec, + joinrel, + outer_rel, + inner_rel); + + /* Recover if we failed to generate required derived clauses */ + if (ec->ec_broken) + sublist = generate_join_implied_equalities_broken(root, + ec, + joinrel, + outer_rel, + inner_rel); + + result = list_concat(result, sublist); + } + + return result; +} + +/* + * generate_join_implied_equalities for a still-valid EC + */ +static List * +generate_join_implied_equalities_normal(PlannerInfo *root, + EquivalenceClass *ec, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + List *result = NIL; + List *new_members = NIL; + List *outer_members = NIL; + List *inner_members = NIL; + ListCell *lc1; + + /* + * First, scan the EC to identify member values that are computable + * at the outer rel, at the inner rel, or at this relation but not in + * either input rel. The outer-rel members should already be enforced + * equal, likewise for the inner-rel members. We'll need to create + * clauses to enforce that any newly computable members are all equal + * to each other as well as to at least one input member, plus enforce + * at least one outer-rel member equal to at least one inner-rel member. + */ + foreach(lc1, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1); + + if (cur_em->em_is_child) + continue; /* ignore children here */ + if (!bms_is_subset(cur_em->em_relids, joinrel->relids)) + continue; /* ignore --- not computable yet */ + + if (bms_is_subset(cur_em->em_relids, outer_rel->relids)) + outer_members = lappend(outer_members, cur_em); + else if (bms_is_subset(cur_em->em_relids, inner_rel->relids)) + inner_members = lappend(inner_members, cur_em); + else + new_members = lappend(new_members, cur_em); + } + + /* + * First, select the joinclause if needed. We can equate any one outer + * member to any one inner member, but we have to find a datatype + * combination for which an opfamily member operator exists. If we + * have choices, we prefer simple Var members (possibly with RelabelType) + * since these are (a) cheapest to compute at runtime and (b) most likely + * to have useful statistics. Also, if enable_hashjoin is on, we prefer + * operators that are also hashjoinable. + */ + if (outer_members && inner_members) + { + EquivalenceMember *best_outer_em = NULL; + EquivalenceMember *best_inner_em = NULL; + Oid best_eq_op = InvalidOid; + int best_score = -1; + RestrictInfo *rinfo; + + foreach(lc1, outer_members) + { + EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc1); + ListCell *lc2; + + foreach(lc2, inner_members) + { + EquivalenceMember *inner_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + int score; + + eq_op = select_equality_operator(ec, + outer_em->em_datatype, + inner_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; + score = 0; + if (IsA(outer_em->em_expr, Var) || + (IsA(outer_em->em_expr, RelabelType) && + IsA(((RelabelType *) outer_em->em_expr)->arg, Var))) + score++; + if (IsA(inner_em->em_expr, Var) || + (IsA(inner_em->em_expr, RelabelType) && + IsA(((RelabelType *) inner_em->em_expr)->arg, Var))) + score++; + if (!enable_hashjoin || op_hashjoinable(eq_op)) + score++; + if (score > best_score) + { + best_outer_em = outer_em; + best_inner_em = inner_em; + best_eq_op = eq_op; + best_score = score; + if (best_score == 3) + break; /* no need to look further */ + } + } + if (best_score == 3) + break; /* no need to look further */ + } + if (best_score < 0) + { + /* failed... */ + ec->ec_broken = true; + return NIL; + } + + rinfo = build_implied_join_equality(best_eq_op, + best_outer_em->em_expr, + best_inner_em->em_expr, + ec->ec_relids); + /* mark restrictinfo as redundant with other joinclauses */ + rinfo->parent_ec = ec; + /* we can set these too, rather than letting them be looked up later */ + rinfo->left_ec = ec; + rinfo->right_ec = ec; + + result = lappend(result, rinfo); + } + + /* + * Now deal with building restrictions for any expressions that involve + * Vars from both sides of the join. We have to equate all of these to + * each other as well as to at least one old member (if any). + * + * XXX as in generate_base_implied_equalities_no_const, we could be a + * lot smarter here to avoid unnecessary failures in cross-type situations. + * For now, use the same left-to-right method used there. + */ + if (new_members) + { + List *old_members = list_concat(outer_members, inner_members); + EquivalenceMember *prev_em = NULL; + RestrictInfo *rinfo; + + /* For now, arbitrarily take the first old_member as the one to use */ + if (old_members) + new_members = lappend(new_members, linitial(old_members)); + + foreach(lc1, new_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1); + + if (prev_em != NULL) + { + Oid eq_op; + + eq_op = select_equality_operator(ec, + prev_em->em_datatype, + cur_em->em_datatype); + if (!OidIsValid(eq_op)) + { + /* failed... */ + ec->ec_broken = true; + return NIL; + } + rinfo = build_implied_join_equality(eq_op, + prev_em->em_expr, + cur_em->em_expr, + ec->ec_relids); + + /* do NOT set parent_ec, this qual is not redundant! */ + + /* we can set these, though */ + rinfo->left_ec = ec; + rinfo->right_ec = ec; + + result = lappend(result, rinfo); + } + prev_em = cur_em; + } + } + + return result; +} + +/* + * generate_join_implied_equalities cleanup after failure + * + * Return any original RestrictInfos that are enforceable at this join. + */ +static List * +generate_join_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, ec->ec_sources) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + if (bms_is_subset(restrictinfo->required_relids, joinrel->relids) && + !bms_is_subset(restrictinfo->required_relids, outer_rel->relids) && + !bms_is_subset(restrictinfo->required_relids, inner_rel->relids)) + result = lappend(result, restrictinfo); + } + + return result; +} + + +/* + * select_equality_operator + * Select a suitable equality operator for comparing two EC members + * + * Returns InvalidOid if no operator can be found for this datatype combination + */ +static Oid +select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype) +{ + ListCell *lc; + + foreach(lc, ec->ec_opfamilies) + { + Oid opfamily = lfirst_oid(lc); + Oid opno; + + opno = get_opfamily_member(opfamily, lefttype, righttype, + BTEqualStrategyNumber); + if (OidIsValid(opno)) + return opno; + } + return InvalidOid; +} + + +/* + * reconsider_outer_join_clauses + * Re-examine any outer-join clauses that were set aside by + * distribute_qual_to_rels(), and either create EquivalenceClasses + * to replace them or push them out into the regular join-clause lists. + * + * When we have mergejoinable clauses A = B that are outer-join clauses, + * we can't blindly combine them with other clauses A = C to deduce B = C, + * since in fact the "equality" A = B won't necessarily hold above the + * outer join (one of the variables might be NULL instead). Nonetheless + * there are cases where we can add qual clauses using transitivity. + * + * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR + * for which there is also an equivalence clause OUTERVAR = CONSTANT. + * It is safe and useful to push a clause INNERVAR = CONSTANT into the + * evaluation of the inner (nullable) relation, because any inner rows not + * meeting this condition will not contribute to the outer-join result anyway. + * (Any outer rows they could join to will be eliminated by the pushed-down + * equivalence clause.) + * + * Note that the above rule does not work for full outer joins; nor is it + * very interesting to consider cases where the equivalence clause involves + * relations entirely outside the outer join, since such clauses couldn't + * be pushed into the inner side's scan anyway. So the restriction to + * outervar = pseudoconstant is not really giving up anything. + * + * For full-join cases, we can only do something useful if it's a FULL JOIN + * USING and a merged column has an equivalence MERGEDVAR = CONSTANT. + * By the time it gets here, the merged column will look like + * COALESCE(LEFTVAR, RIGHTVAR) + * and we will have a full-join clause LEFTVAR = RIGHTVAR that we can match + * the COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT + * and RIGHTVAR = CONSTANT into the input relations, since any rows not + * meeting these conditions cannot contribute to the join result. + * + * Again, there isn't any traction to be gained by trying to deal with + * clauses comparing a mergedvar to a non-pseudoconstant. So we can make + * use of the EquivalenceClasses to search for matching variables that were + * equivalenced to constants. The interesting outer-join clauses were + * accumulated for us by distribute_qual_to_rels. + * + * When we find one of these cases, we implement the changes we want by + * generating a new equivalence clause INNERVAR = CONSTANT (or LEFTVAR, etc) + * and pushing it into the EquivalenceClass structures. This is because we + * may already know that INNERVAR is equivalenced to some other var(s), and + * we'd like the constant to propagate to them too. Note that it would be + * unsafe to merge any existing EC for INNERVAR with the OUTERVAR's EC --- + * that could result in propagating constant restrictions from + * INNERVAR to OUTERVAR, which would be very wrong. + * + * If we don't find any match for a set-aside outer join clause, we must + * throw it back into the regular joinclause processing by passing it to + * distribute_restrictinfo_to_rels(). + */ +void +reconsider_outer_join_clauses(PlannerInfo *root) +{ + ListCell *lc; + + /* Process the LEFT JOIN clauses */ + foreach(lc, root->left_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + reconsider_outer_join_clause(root, rinfo, true); + } + /* And the RIGHT JOIN clauses */ + foreach(lc, root->right_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + reconsider_outer_join_clause(root, rinfo, false); + } + /* And the FULL JOIN clauses */ + foreach(lc, root->full_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + reconsider_full_join_clause(root, rinfo); + } +} + +/* + * reconsider_outer_join_clauses for a single LEFT/RIGHT JOIN clause + */ +static void +reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, + bool outer_on_left) +{ + Expr *outervar, + *innervar; + Oid left_type, + right_type, + inner_datatype; + ListCell *lc1; + + /* Extract needed info from the clause */ + Assert(is_opclause(rinfo->clause)); + op_input_types(((OpExpr *) rinfo->clause)->opno, + &left_type, &right_type); + if (outer_on_left) + { + outervar = (Expr *) get_leftop(rinfo->clause); + innervar = (Expr *) get_rightop(rinfo->clause); + inner_datatype = right_type; + } + else + { + outervar = (Expr *) get_rightop(rinfo->clause); + innervar = (Expr *) get_leftop(rinfo->clause); + inner_datatype = left_type; + } + + /* Scan EquivalenceClasses for a match to outervar */ + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + bool match; + ListCell *lc2; + + /* Ignore EC unless it contains pseudoconstants */ + if (!cur_ec->ec_has_const) + continue; + /* Never match to a volatile EC */ + if (cur_ec->ec_has_volatile) + continue; + /* It has to match the outer-join clause as to opfamilies, too */ + if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies)) + continue; + /* Does it contain a match to outervar? */ + match = false; + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + if (equal(outervar, cur_em->em_expr)) + { + match = true; + break; + } + } + if (!match) + continue; /* no match, so ignore this EC */ + + /* + * Yes it does! Try to generate a clause INNERVAR = CONSTANT for + * each CONSTANT in the EC. Note that we must succeed with at + * least one constant before we can decide to throw away the + * outer-join clause. + */ + match = false; + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + RestrictInfo *newrinfo; + + if (!cur_em->em_is_const) + continue; /* ignore non-const members */ + eq_op = select_equality_operator(cur_ec, + inner_datatype, + cur_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; /* can't generate equality */ + newrinfo = build_implied_join_equality(eq_op, + innervar, + cur_em->em_expr, + cur_ec->ec_relids); + if (process_equivalence(root, newrinfo, true)) + match = true; + } + + /* + * If we were able to equate INNERVAR to any constant, we're done, and + * we can throw away the outer-join clause as redundant. Otherwise, + * fall out of the search loop, since we know the OUTERVAR appears in + * at most one EC. + */ + if (match) + return; + else + break; + } + + /* We did not find a match, so throw it back into regular processing */ + distribute_restrictinfo_to_rels(root, rinfo); +} + +/* + * reconsider_outer_join_clauses for a single FULL JOIN clause + */ +static void +reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) +{ + Expr *leftvar; + Expr *rightvar; + Oid left_type, + right_type; + ListCell *lc1; + + /* Extract needed info from the clause */ + Assert(is_opclause(rinfo->clause)); + leftvar = (Expr *) get_leftop(rinfo->clause); + rightvar = (Expr *) get_rightop(rinfo->clause); + op_input_types(((OpExpr *) rinfo->clause)->opno, + &left_type, &right_type); + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + EquivalenceMember *coal_em = NULL; + bool match; + bool matchleft; + bool matchright; + ListCell *lc2; + + /* Ignore EC unless it contains pseudoconstants */ + if (!cur_ec->ec_has_const) + continue; + /* Never match to a volatile EC */ + if (cur_ec->ec_has_volatile) + continue; + /* It has to match the outer-join clause as to opfamilies, too */ + if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies)) + continue; + + /* + * Does it contain a COALESCE(leftvar, rightvar) construct? + * + * We can assume the COALESCE() inputs are in the same order as + * the join clause, since both were automatically generated in the + * cases we care about. + * + * XXX currently this may fail to match in cross-type cases + * because the COALESCE will contain typecast operations while the + * join clause may not (if there is a cross-type mergejoin + * operator available for the two column types). Is it OK to strip + * implicit coercions from the COALESCE arguments? + */ + match = false; + foreach(lc2, cur_ec->ec_members) + { + coal_em = (EquivalenceMember *) lfirst(lc2); + if (IsA(coal_em->em_expr, CoalesceExpr)) + { + CoalesceExpr *cexpr = (CoalesceExpr *) coal_em->em_expr; + Node *cfirst; + Node *csecond; + + if (list_length(cexpr->args) != 2) + continue; + cfirst = (Node *) linitial(cexpr->args); + csecond = (Node *) lsecond(cexpr->args); + + if (equal(leftvar, cfirst) && equal(rightvar, csecond)) + { + match = true; + break; + } + } + } + if (!match) + continue; /* no match, so ignore this EC */ + + /* + * Yes it does! Try to generate clauses LEFTVAR = CONSTANT and + * RIGHTVAR = CONSTANT for each CONSTANT in the EC. Note that we + * must succeed with at least one constant for each var before + * we can decide to throw away the outer-join clause. + */ + matchleft = matchright = false; + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + RestrictInfo *newrinfo; + + if (!cur_em->em_is_const) + continue; /* ignore non-const members */ + eq_op = select_equality_operator(cur_ec, + left_type, + cur_em->em_datatype); + if (OidIsValid(eq_op)) + { + newrinfo = build_implied_join_equality(eq_op, + leftvar, + cur_em->em_expr, + cur_ec->ec_relids); + if (process_equivalence(root, newrinfo, true)) + matchleft = true; + } + eq_op = select_equality_operator(cur_ec, + right_type, + cur_em->em_datatype); + if (OidIsValid(eq_op)) + { + newrinfo = build_implied_join_equality(eq_op, + rightvar, + cur_em->em_expr, + cur_ec->ec_relids); + if (process_equivalence(root, newrinfo, true)) + matchright = true; + } + } + + /* + * If we were able to equate both vars to constants, we're done, and + * we can throw away the full-join clause as redundant. Moreover, + * we can remove the COALESCE entry from the EC, since the added + * restrictions ensure it will always have the expected value. + * (We don't bother trying to update ec_relids or ec_sources.) + */ + if (matchleft && matchright) + { + cur_ec->ec_members = list_delete_ptr(cur_ec->ec_members, coal_em); + return; + } + /* + * Otherwise, fall out of the search loop, since we know the COALESCE + * appears in at most one EC (XXX might stop being true if we allow + * stripping of coercions above?) + */ + break; + } + + /* We did not find a match, so throw it back into regular processing */ + distribute_restrictinfo_to_rels(root, rinfo); +} + + +/* + * exprs_known_equal + * Detect whether two expressions are known equal due to equivalence + * relationships. + * + * Actually, this only shows that the expressions are equal according + * to some opfamily's notion of equality --- but we only use it for + * selectivity estimation, so a fuzzy idea of equality is OK. + * + * Note: does not bother to check for "equal(item1, item2)"; caller must + * check that case if it's possible to pass identical items. + */ +bool +exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) +{ + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); + bool item1member = false; + bool item2member = false; + ListCell *lc2; + + /* Never match to a volatile EC */ + if (ec->ec_has_volatile) + continue; + + foreach(lc2, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + + if (equal(item1, em->em_expr)) + item1member = true; + else if (equal(item2, em->em_expr)) + item2member = true; + /* Exit as soon as equality is proven */ + if (item1member && item2member) + return true; + } + } + return false; +} + + +/* + * add_child_rel_equivalences + * Search for EC members that reference (only) the parent_rel, and + * add transformed members referencing the child_rel. + * + * We only need to do this for ECs that could generate join conditions, + * since the child members are only used for creating inner-indexscan paths. + * + * parent_rel and child_rel could be derived from appinfo, but since the + * caller has already computed them, we might as well just pass them in. + */ +void +add_child_rel_equivalences(PlannerInfo *root, + AppendRelInfo *appinfo, + RelOptInfo *parent_rel, + RelOptInfo *child_rel) +{ + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1) + continue; + + /* No point in searching if parent rel not mentioned in eclass */ + if (!bms_is_subset(parent_rel->relids, cur_ec->ec_relids)) + continue; + + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + /* Does it reference (only) parent_rel? */ + if (bms_equal(cur_em->em_relids, parent_rel->relids)) + { + /* Yes, generate transformed child version */ + Expr *child_expr; + + child_expr = (Expr *) + adjust_appendrel_attrs((Node *) cur_em->em_expr, + appinfo); + add_eq_member(cur_ec, child_expr, child_rel->relids, + true, cur_em->em_datatype); + } + } + } +} + + +/* + * find_eclass_clauses_for_index_join + * Create joinclauses usable for a nestloop-with-inner-indexscan + * scanning the given inner rel with the specified set of outer rels. + */ +List * +find_eclass_clauses_for_index_join(PlannerInfo *root, RelOptInfo *rel, + Relids outer_relids) +{ + List *result = NIL; + bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL); + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1) + continue; + + /* + * No point in searching if rel not mentioned in eclass (but we + * can't tell that for a child rel). + */ + if (!is_child_rel && + !bms_is_subset(rel->relids, cur_ec->ec_relids)) + continue; + /* ... nor if no overlap with outer_relids */ + if (!bms_overlap(outer_relids, cur_ec->ec_relids)) + continue; + + /* Scan members, looking for indexable columns */ + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + EquivalenceMember *best_outer_em = NULL; + Oid best_eq_op = InvalidOid; + ListCell *lc3; + + if (!bms_equal(cur_em->em_relids, rel->relids) || + !eclass_matches_any_index(cur_ec, cur_em, rel)) + continue; + + /* + * Found one, so try to generate a join clause. This is like + * generate_join_implied_equalities_normal, except simpler + * since our only preference item is to pick a Var on the + * outer side. We only need one join clause per index col. + */ + foreach(lc3, cur_ec->ec_members) + { + EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc3); + Oid eq_op; + + if (!bms_is_subset(outer_em->em_relids, outer_relids)) + continue; + eq_op = select_equality_operator(cur_ec, + cur_em->em_datatype, + outer_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; + best_outer_em = outer_em; + best_eq_op = eq_op; + if (IsA(outer_em->em_expr, Var) || + (IsA(outer_em->em_expr, RelabelType) && + IsA(((RelabelType *) outer_em->em_expr)->arg, Var))) + break; /* no need to look further */ + } + + if (best_outer_em) + { + /* Found a suitable joinclause */ + RestrictInfo *rinfo; + + rinfo = build_implied_join_equality(best_eq_op, + cur_em->em_expr, + best_outer_em->em_expr, + cur_ec->ec_relids); + /* mark restrictinfo as redundant with other joinclauses */ + rinfo->parent_ec = cur_ec; + /* we can set these too */ + rinfo->left_ec = cur_ec; + rinfo->right_ec = cur_ec; + + result = lappend(result, rinfo); + /* + * Note: we keep scanning here because we want to provide + * a clause for every possible indexcol. + */ + } + } + } + + return result; +} + + +/* + * have_relevant_eclass_joinclause + * Detect whether there is an EquivalenceClass that could produce + * a joinclause between the two given relations. + * + * This is essentially a very cut-down version of + * generate_join_implied_equalities(). Note it's OK to occasionally say "yes" + * incorrectly. Hence we don't bother with details like whether the lack of a + * cross-type operator might prevent the clause from actually being generated. + */ +bool +have_relevant_eclass_joinclause(PlannerInfo *root, + RelOptInfo *rel1, RelOptInfo *rel2) +{ + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); + bool has_rel1; + bool has_rel2; + ListCell *lc2; + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (ec->ec_has_const || list_length(ec->ec_members) <= 1) + continue; + + /* + * Note we don't test ec_broken; if we did, we'd need a separate code + * path to look through ec_sources. Checking the members anyway is OK + * as a possibly-overoptimistic heuristic. + */ + + /* Needn't scan if it couldn't contain members from each rel */ + if (!bms_overlap(rel1->relids, ec->ec_relids) || + !bms_overlap(rel2->relids, ec->ec_relids)) + continue; + + /* Scan the EC to see if it has member(s) in each rel */ + has_rel1 = has_rel2 = false; + foreach(lc2, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + if (cur_em->em_is_child) + continue; /* ignore children here */ + if (bms_is_subset(cur_em->em_relids, rel1->relids)) + { + has_rel1 = true; + if (has_rel2) + break; + } + if (bms_is_subset(cur_em->em_relids, rel2->relids)) + { + has_rel2 = true; + if (has_rel1) + break; + } + } + + if (has_rel1 && has_rel2) + return true; + } + + return false; +} + + +/* + * has_relevant_eclass_joinclause + * Detect whether there is an EquivalenceClass that could produce + * a joinclause between the given relation and anything else. + * + * This is the same as have_relevant_eclass_joinclause with the other rel + * implicitly defined as "everything else in the query". + */ +bool +has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1) +{ + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); + bool has_rel1; + bool has_rel2; + ListCell *lc2; + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (ec->ec_has_const || list_length(ec->ec_members) <= 1) + continue; + + /* + * Note we don't test ec_broken; if we did, we'd need a separate code + * path to look through ec_sources. Checking the members anyway is OK + * as a possibly-overoptimistic heuristic. + */ + + /* Needn't scan if it couldn't contain members from each rel */ + if (!bms_overlap(rel1->relids, ec->ec_relids) || + bms_is_subset(ec->ec_relids, rel1->relids)) + continue; + + /* Scan the EC to see if it has member(s) in each rel */ + has_rel1 = has_rel2 = false; + foreach(lc2, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + if (cur_em->em_is_child) + continue; /* ignore children here */ + if (bms_is_subset(cur_em->em_relids, rel1->relids)) + { + has_rel1 = true; + if (has_rel2) + break; + } + if (!bms_overlap(cur_em->em_relids, rel1->relids)) + { + has_rel2 = true; + if (has_rel1) + break; + } + } + + if (has_rel1 && has_rel2) + return true; + } + + return false; +} + + +/* + * eclass_useful_for_merging + * Detect whether the EC could produce any mergejoinable join clauses + * against the specified relation. + * + * This is just a heuristic test and doesn't have to be exact; it's better + * to say "yes" incorrectly than "no". Hence we don't bother with details + * like whether the lack of a cross-type operator might prevent the clause + * from actually being generated. + */ +bool +eclass_useful_for_merging(EquivalenceClass *eclass, + RelOptInfo *rel) +{ + ListCell *lc; + + Assert(!eclass->ec_merged); + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (eclass->ec_has_const || list_length(eclass->ec_members) <= 1) + return false; + + /* + * Note we don't test ec_broken; if we did, we'd need a separate code + * path to look through ec_sources. Checking the members anyway is OK + * as a possibly-overoptimistic heuristic. + */ + + /* If rel already includes all members of eclass, no point in searching */ + if (bms_is_subset(eclass->ec_relids, rel->relids)) + return false; + + /* To join, we need a member not in the given rel */ + foreach(lc, eclass->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + + if (!cur_em->em_is_child && + !bms_overlap(cur_em->em_relids, rel->relids)) + return true; + } + + return false; +} diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 3fd52d48500..0146cacf4e5 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.215 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.216 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,7 +32,6 @@ #include "optimizer/var.h" #include "utils/builtins.h" #include "utils/lsyscache.h" -#include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/selfuncs.h" @@ -72,21 +71,11 @@ static bool match_rowcompare_to_indexcol(IndexOptInfo *index, Oid opfamily, RowCompareExpr *clause, Relids outer_relids); -static Relids indexable_outerrelids(RelOptInfo *rel); +static Relids indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel); static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids); static List *find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel, Relids outer_relids, bool isouterjoin); -static ScanDirection match_variant_ordering(PlannerInfo *root, - IndexOptInfo *index, - List *restrictclauses); -static List *identify_ignorable_ordering_cols(PlannerInfo *root, - IndexOptInfo *index, - List *restrictclauses); -static bool match_index_to_query_keys(PlannerInfo *root, - IndexOptInfo *index, - ScanDirection indexscandir, - List *ignorables); static bool match_boolean_index_clause(Node *clause, int indexcol, IndexOptInfo *index); static bool match_special_index_operator(Expr *clause, Oid opfamily, @@ -157,7 +146,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) * participate in such join clauses. We'll use this set later to * recognize outer rels that are equivalent for joining purposes. */ - rel->index_outer_relids = indexable_outerrelids(rel); + rel->index_outer_relids = indexable_outerrelids(root, rel); /* * Find all the index paths that are directly usable for this relation @@ -351,8 +340,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, if (index_is_ordered && istoplevel && outer_rel == NULL) { index_pathkeys = build_index_pathkeys(root, index, - ForwardScanDirection, - true); + ForwardScanDirection); useful_pathkeys = truncate_useless_pathkeys(root, rel, index_pathkeys); } @@ -378,23 +366,21 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, } /* - * 4. If the index is ordered, and there is a requested query ordering - * that we failed to match, consider variant ways of achieving the - * ordering. Again, this is only interesting at top level. + * 4. If the index is ordered, a backwards scan might be + * interesting. Again, this is only interesting at top level. */ - if (index_is_ordered && istoplevel && outer_rel == NULL && - root->query_pathkeys != NIL && - pathkeys_useful_for_ordering(root, useful_pathkeys) == 0) + if (index_is_ordered && istoplevel && outer_rel == NULL) { - ScanDirection scandir; - - scandir = match_variant_ordering(root, index, restrictclauses); - if (!ScanDirectionIsNoMovement(scandir)) + index_pathkeys = build_index_pathkeys(root, index, + BackwardScanDirection); + useful_pathkeys = truncate_useless_pathkeys(root, rel, + index_pathkeys); + if (useful_pathkeys != NIL) { ipath = create_index_path(root, index, restrictclauses, - root->query_pathkeys, - scandir, + useful_pathkeys, + BackwardScanDirection, outer_rel); result = lappend(result, ipath); } @@ -1207,19 +1193,6 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel) List *restrictinfo_list = rel->baserestrictinfo; ListCell *ilist; - /* - * Note: if Postgres tried to optimize queries by forming equivalence - * classes over equi-joined attributes (i.e., if it recognized that a - * qualification such as "where a.b=c.d and a.b=5" could make use of an - * index on c.d), then we could use that equivalence class info here with - * joininfo lists to do more complete tests for the usability of a partial - * index. For now, the test only uses restriction clauses (those in - * baserestrictinfo). --Nels, Dec '92 - * - * XXX as of 7.1, equivalence class info *is* available. Consider - * improving this code as foreseen by Nels. - */ - foreach(ilist, rel->indexlist) { IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); @@ -1242,18 +1215,19 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel) * for the specified table. Returns a set of relids. */ static Relids -indexable_outerrelids(RelOptInfo *rel) +indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel) { Relids outer_relids = NULL; - ListCell *l; + bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL); + ListCell *lc1; /* * Examine each joinclause in the joininfo list to see if it matches any * key of any index. If so, add the clause's other rels to the result. */ - foreach(l, rel->joininfo) + foreach(lc1, rel->joininfo) { - RestrictInfo *joininfo = (RestrictInfo *) lfirst(l); + RestrictInfo *joininfo = (RestrictInfo *) lfirst(lc1); Relids other_rels; other_rels = bms_difference(joininfo->required_relids, rel->relids); @@ -1263,6 +1237,71 @@ indexable_outerrelids(RelOptInfo *rel) bms_free(other_rels); } + /* + * We also have to look through the query's EquivalenceClasses to see + * if any of them could generate indexable join conditions for this rel. + */ + if (rel->has_eclass_joins) + { + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + Relids other_rels = NULL; + bool found_index = false; + ListCell *lc2; + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1) + continue; + + /* + * Note we don't test ec_broken; if we did, we'd need a separate + * code path to look through ec_sources. Checking the members + * anyway is OK as a possibly-overoptimistic heuristic. + */ + + /* + * No point in searching if rel not mentioned in eclass (but we + * can't tell that for a child rel). + */ + if (!is_child_rel && + !bms_is_subset(rel->relids, cur_ec->ec_relids)) + continue; + + /* + * Scan members, looking for both an index match and join + * candidates + */ + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + /* Join candidate? */ + if (!cur_em->em_is_child && + !bms_overlap(cur_em->em_relids, rel->relids)) + { + other_rels = bms_add_members(other_rels, + cur_em->em_relids); + continue; + } + + /* Check for index match (only need one) */ + if (!found_index && + bms_equal(cur_em->em_relids, rel->relids) && + eclass_matches_any_index(cur_ec, cur_em, rel)) + found_index = true; + } + + if (found_index) + outer_relids = bms_join(outer_relids, other_rels); + else + bms_free(other_rels); + } + } + return outer_relids; } @@ -1340,6 +1379,42 @@ matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids) } /* + * eclass_matches_any_index + * Workhorse for indexable_outerrelids: see if an EquivalenceClass member + * can be matched to any index column of the given rel. + * + * This is also exported for use by find_eclass_clauses_for_index_join. + */ +bool +eclass_matches_any_index(EquivalenceClass *ec, EquivalenceMember *em, + RelOptInfo *rel) +{ + ListCell *l; + + foreach(l, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(l); + int indexcol = 0; + Oid *families = index->opfamily; + + do + { + Oid curFamily = families[0]; + + if (list_member_oid(ec->ec_opfamilies, curFamily) && + match_index_to_operand((Node *) em->em_expr, indexcol, index)) + return true; + + indexcol++; + families++; + } while (!DoneMatchingIndexKeys(families)); + } + + return false; +} + + +/* * best_inner_indexscan * Finds the best available inner indexscan for a nestloop join * with the given rel on the inside and the given outer_rel outside. @@ -1393,12 +1468,12 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, return NULL; /* - * Otherwise, we have to do path selection in the memory context of the - * given rel, so that any created path can be safely attached to the rel's - * cache of best inner paths. (This is not currently an issue for normal - * planning, but it is an issue for GEQO planning.) + * Otherwise, we have to do path selection in the main planning context, + * so that any created path can be safely attached to the rel's cache of + * best inner paths. (This is not currently an issue for normal planning, + * but it is an issue for GEQO planning.) */ - oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); + oldcontext = MemoryContextSwitchTo(root->planner_cxt); /* * Intersect the given outer relids with index_outer_relids to find the @@ -1539,7 +1614,12 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel, Relids join_relids; ListCell *l; - /* Look for joinclauses that are usable with given outer_relids */ + /* + * Look for joinclauses that are usable with given outer_relids. Note + * we'll take anything that's applicable to the join whether it has + * anything to do with an index or not; since we're only building a list, + * it's not worth filtering more finely here. + */ join_relids = bms_union(rel->relids, outer_relids); foreach(l, rel->joininfo) @@ -1557,276 +1637,27 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel, bms_free(join_relids); - /* if no join clause was matched then forget it, per comments above */ + /* + * Also check to see if any EquivalenceClasses can produce a relevant + * joinclause. Since all such clauses are effectively pushed-down, + * this doesn't apply to outer joins. + */ + if (!isouterjoin && rel->has_eclass_joins) + clause_list = list_concat(clause_list, + find_eclass_clauses_for_index_join(root, + rel, + outer_relids)); + + /* If no join clause was matched then forget it, per comments above */ if (clause_list == NIL) return NIL; - /* - * We can also use any plain restriction clauses for the rel. We put - * these at the front of the clause list for the convenience of - * remove_redundant_join_clauses, which can never remove non-join clauses - * and hence won't be able to get rid of a non-join clause if it appears - * after a join clause it is redundant with. - */ + /* We can also use any plain restriction clauses for the rel */ clause_list = list_concat(list_copy(rel->baserestrictinfo), clause_list); - /* - * We may now have clauses that are known redundant. Get rid of 'em. - */ - if (list_length(clause_list) > 1) - { - clause_list = remove_redundant_join_clauses(root, - clause_list, - isouterjoin); - } - return clause_list; } -/**************************************************************************** - * ---- ROUTINES TO HANDLE PATHKEYS ---- - ****************************************************************************/ - -/* - * match_variant_ordering - * Try to match an index's ordering to the query's requested ordering - * - * This is used when the index is ordered but a naive comparison fails to - * match its ordering (pathkeys) to root->query_pathkeys. It may be that - * we need to scan the index backwards. Also, a less naive comparison can - * help for both forward and backward indexscans. Columns of the index - * that have an equality restriction clause can be ignored in the match; - * that is, an index on (x,y) can be considered to match the ordering of - * ... WHERE x = 42 ORDER BY y; - * - * Note: it would be possible to similarly ignore useless ORDER BY items; - * that is, an index on just y could be considered to match the ordering of - * ... WHERE x = 42 ORDER BY x, y; - * But proving that this is safe would require finding a btree opfamily - * containing both the = operator and the < or > operator in the ORDER BY - * item. That's significantly more expensive than what we do here, since - * we'd have to look at restriction clauses unrelated to the current index - * and search for opfamilies without any hint from the index. The practical - * use-cases seem to be mostly covered by ignoring index columns, so that's - * all we do for now. - * - * Inputs: - * 'index' is the index of interest. - * 'restrictclauses' is the list of sublists of restriction clauses - * matching the columns of the index (NIL if none) - * - * If able to match the requested query pathkeys, returns either - * ForwardScanDirection or BackwardScanDirection to indicate the proper index - * scan direction. If no match, returns NoMovementScanDirection. - */ -static ScanDirection -match_variant_ordering(PlannerInfo *root, - IndexOptInfo *index, - List *restrictclauses) -{ - List *ignorables; - - /* - * Forget the whole thing if not a btree index; our check for ignorable - * columns assumes we are dealing with btree opfamilies. (It'd be possible - * to factor out just the try for backwards indexscan, but considering - * that we presently have no orderable indexes except btrees anyway, it's - * hardly worth contorting this code for that case.) - * - * Note: if you remove this, you probably need to put in a check on - * amoptionalkey to prevent possible clauseless scan on an index that - * won't cope. - */ - if (index->relam != BTREE_AM_OID) - return NoMovementScanDirection; - - /* - * Figure out which index columns can be optionally ignored because they - * have an equality constraint. This is the same set for either forward - * or backward scan, so we do it just once. - */ - ignorables = identify_ignorable_ordering_cols(root, index, - restrictclauses); - - /* - * Try to match to forward scan, then backward scan. However, we can skip - * the forward-scan case if there are no ignorable columns, because - * find_usable_indexes() would have found the match already. - */ - if (ignorables && - match_index_to_query_keys(root, index, ForwardScanDirection, - ignorables)) - return ForwardScanDirection; - - if (match_index_to_query_keys(root, index, BackwardScanDirection, - ignorables)) - return BackwardScanDirection; - - return NoMovementScanDirection; -} - -/* - * identify_ignorable_ordering_cols - * Determine which index columns can be ignored for ordering purposes - * - * Returns an integer List of column numbers (1-based) of ignorable - * columns. The ignorable columns are those that have equality constraints - * against pseudoconstants. - */ -static List * -identify_ignorable_ordering_cols(PlannerInfo *root, - IndexOptInfo *index, - List *restrictclauses) -{ - List *result = NIL; - int indexcol = 0; /* note this is 0-based */ - ListCell *l; - - /* restrictclauses is either NIL or has a sublist per column */ - foreach(l, restrictclauses) - { - List *sublist = (List *) lfirst(l); - Oid opfamily = index->opfamily[indexcol]; - ListCell *l2; - - foreach(l2, sublist) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l2); - OpExpr *clause = (OpExpr *) rinfo->clause; - Oid clause_op; - int op_strategy; - bool varonleft; - bool ispc; - - /* First check for boolean-index cases. */ - if (IsBooleanOpfamily(opfamily)) - { - if (match_boolean_index_clause((Node *) clause, indexcol, - index)) - { - /* - * The clause means either col = TRUE or col = FALSE; we - * do not care which, it's an equality constraint either - * way. - */ - result = lappend_int(result, indexcol + 1); - break; - } - } - - /* Otherwise, ignore if not a binary opclause */ - if (!is_opclause(clause) || list_length(clause->args) != 2) - continue; - - /* Determine left/right sides and check the operator */ - clause_op = clause->opno; - if (match_index_to_operand(linitial(clause->args), indexcol, - index)) - { - /* clause_op is correct */ - varonleft = true; - } - else - { - Assert(match_index_to_operand(lsecond(clause->args), indexcol, - index)); - /* Must flip operator to get the opfamily member */ - clause_op = get_commutator(clause_op); - varonleft = false; - } - if (!OidIsValid(clause_op)) - continue; /* ignore non match, per next comment */ - op_strategy = get_op_opfamily_strategy(clause_op, opfamily); - - /* - * You might expect to see Assert(op_strategy != 0) here, but you - * won't: the clause might contain a special indexable operator - * rather than an ordinary opfamily member. Currently none of the - * special operators are very likely to expand to an equality - * operator; we do not bother to check, but just assume no match. - */ - if (op_strategy != BTEqualStrategyNumber) - continue; - - /* Now check that other side is pseudoconstant */ - if (varonleft) - ispc = is_pseudo_constant_clause_relids(lsecond(clause->args), - rinfo->right_relids); - else - ispc = is_pseudo_constant_clause_relids(linitial(clause->args), - rinfo->left_relids); - if (ispc) - { - result = lappend_int(result, indexcol + 1); - break; - } - } - indexcol++; - } - return result; -} - -/* - * match_index_to_query_keys - * Check a single scan direction for "intelligent" match to query keys - * - * 'index' is the index of interest. - * 'indexscandir' is the scan direction to consider - * 'ignorables' is an integer list of indexes of ignorable index columns - * - * Returns TRUE on successful match (ie, the query_pathkeys can be considered - * to match this index). - */ -static bool -match_index_to_query_keys(PlannerInfo *root, - IndexOptInfo *index, - ScanDirection indexscandir, - List *ignorables) -{ - List *index_pathkeys; - ListCell *index_cell; - int index_col; - ListCell *r; - - /* Get the pathkeys that exactly describe the index */ - index_pathkeys = build_index_pathkeys(root, index, indexscandir, false); - - /* - * Can we match to the query's requested pathkeys? The inner loop skips - * over ignorable index columns while trying to match. - */ - index_cell = list_head(index_pathkeys); - index_col = 0; - - foreach(r, root->query_pathkeys) - { - List *rsubkey = (List *) lfirst(r); - - for (;;) - { - List *isubkey; - - if (index_cell == NULL) - return false; - isubkey = (List *) lfirst(index_cell); - index_cell = lnext(index_cell); - index_col++; /* index_col is now 1-based */ - - /* - * Since we are dealing with canonicalized pathkeys, pointer - * comparison is sufficient to determine a match. - */ - if (rsubkey == isubkey) - break; /* matched current query pathkey */ - - if (!list_member_int(ignorables, index_col)) - return false; /* definite failure to match */ - /* otherwise loop around and try to match to next index col */ - } - } - - return true; -} /**************************************************************************** * ---- PATH CREATION UTILITIES ---- diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 2885b021546..3a68d79ca9e 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.110 2007/01/10 18:06:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.111 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,7 +16,6 @@ #include <math.h> -#include "access/skey.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -40,10 +39,6 @@ static List *select_mergejoin_clauses(RelOptInfo *joinrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype); -static void build_mergejoin_strat_arrays(List *mergeclauses, - Oid **mergefamilies, - int **mergestrategies, - bool **mergenullsfirst); /* @@ -205,9 +200,9 @@ sort_inner_and_outer(PlannerInfo *root, * * Actually, it's not quite true that every mergeclause ordering will * generate a different path order, because some of the clauses may be - * redundant. Therefore, what we do is convert the mergeclause list to a - * list of canonical pathkeys, and then consider different orderings of - * the pathkeys. + * partially redundant (refer to the same EquivalenceClasses). Therefore, + * what we do is convert the mergeclause list to a list of canonical + * pathkeys, and then consider different orderings of the pathkeys. * * Generating a path for *every* permutation of the pathkeys doesn't seem * like a winning strategy; the cost in planning time is too high. For @@ -216,75 +211,58 @@ sort_inner_and_outer(PlannerInfo *root, * mergejoin without re-sorting against any other possible mergejoin * partner path. But if we've not guessed the right ordering of secondary * keys, we may end up evaluating clauses as qpquals when they could have - * been done as mergeclauses. We need to figure out a better way. (Two - * possible approaches: look at all the relevant index relations to - * suggest plausible sort orders, or make just one output path and somehow - * mark it as having a sort-order that can be rearranged freely.) + * been done as mergeclauses. (In practice, it's rare that there's more + * than two or three mergeclauses, so expending a huge amount of thought + * on that is probably not worth it.) + * + * The pathkey order returned by select_outer_pathkeys_for_merge() has + * some heuristics behind it (see that function), so be sure to try it + * exactly as-is as well as making variants. */ - all_pathkeys = make_pathkeys_for_mergeclauses(root, - mergeclause_list, - outerrel); + all_pathkeys = select_outer_pathkeys_for_merge(root, + mergeclause_list, + joinrel); foreach(l, all_pathkeys) { List *front_pathkey = (List *) lfirst(l); - List *cur_pathkeys; List *cur_mergeclauses; - Oid *mergefamilies; - int *mergestrategies; - bool *mergenullsfirst; List *outerkeys; List *innerkeys; List *merge_pathkeys; - /* Make a pathkey list with this guy first. */ + /* Make a pathkey list with this guy first */ if (l != list_head(all_pathkeys)) - cur_pathkeys = lcons(front_pathkey, - list_delete_ptr(list_copy(all_pathkeys), - front_pathkey)); + outerkeys = lcons(front_pathkey, + list_delete_ptr(list_copy(all_pathkeys), + front_pathkey)); else - cur_pathkeys = all_pathkeys; /* no work at first one... */ + outerkeys = all_pathkeys; /* no work at first one... */ - /* - * Select mergeclause(s) that match this sort ordering. If we had - * redundant merge clauses then we will get a subset of the original - * clause list. There had better be some match, however... - */ + /* Sort the mergeclauses into the corresponding ordering */ cur_mergeclauses = find_mergeclauses_for_pathkeys(root, - cur_pathkeys, + outerkeys, + true, mergeclause_list); - Assert(cur_mergeclauses != NIL); - /* Forget it if can't use all the clauses in right/full join */ - if (useallclauses && - list_length(cur_mergeclauses) != list_length(mergeclause_list)) - continue; + /* Should have used them all... */ + Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list)); - /* - * Build sort pathkeys for both sides. - * - * Note: it's possible that the cheapest paths will already be sorted - * properly. create_mergejoin_path will detect that case and suppress - * an explicit sort step, so we needn't do so here. - */ - outerkeys = make_pathkeys_for_mergeclauses(root, - cur_mergeclauses, - outerrel); - innerkeys = make_pathkeys_for_mergeclauses(root, - cur_mergeclauses, - innerrel); - /* Build pathkeys representing output sort order. */ + /* Build sort pathkeys for the inner side */ + innerkeys = make_inner_pathkeys_for_merge(root, + cur_mergeclauses, + outerkeys); + + /* Build pathkeys representing output sort order */ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerkeys); - /* Build opfamily info for execution */ - build_mergejoin_strat_arrays(cur_mergeclauses, - &mergefamilies, - &mergestrategies, - &mergenullsfirst); - /* * And now we can make the path. + * + * Note: it's possible that the cheapest paths will already be sorted + * properly. create_mergejoin_path will detect that case and suppress + * an explicit sort step, so we needn't do so here. */ add_path(joinrel, (Path *) create_mergejoin_path(root, @@ -295,9 +273,6 @@ sort_inner_and_outer(PlannerInfo *root, restrictlist, merge_pathkeys, cur_mergeclauses, - mergefamilies, - mergestrategies, - mergenullsfirst, outerkeys, innerkeys)); } @@ -427,9 +402,6 @@ match_unsorted_outer(PlannerInfo *root, Path *outerpath = (Path *) lfirst(l); List *merge_pathkeys; List *mergeclauses; - Oid *mergefamilies; - int *mergestrategies; - bool *mergenullsfirst; List *innersortkeys; List *trialsortkeys; Path *cheapest_startup_inner; @@ -510,6 +482,7 @@ match_unsorted_outer(PlannerInfo *root, /* Look for useful mergeclauses (if any) */ mergeclauses = find_mergeclauses_for_pathkeys(root, outerpath->pathkeys, + true, mergeclause_list); /* @@ -532,15 +505,9 @@ match_unsorted_outer(PlannerInfo *root, continue; /* Compute the required ordering of the inner path */ - innersortkeys = make_pathkeys_for_mergeclauses(root, - mergeclauses, - innerrel); - - /* Build opfamily info for execution */ - build_mergejoin_strat_arrays(mergeclauses, - &mergefamilies, - &mergestrategies, - &mergenullsfirst); + innersortkeys = make_inner_pathkeys_for_merge(root, + mergeclauses, + outerpath->pathkeys); /* * Generate a mergejoin on the basis of sorting the cheapest inner. @@ -557,9 +524,6 @@ match_unsorted_outer(PlannerInfo *root, restrictlist, merge_pathkeys, mergeclauses, - mergefamilies, - mergestrategies, - mergenullsfirst, NIL, innersortkeys)); @@ -613,18 +577,12 @@ match_unsorted_outer(PlannerInfo *root, newclauses = find_mergeclauses_for_pathkeys(root, trialsortkeys, + false, mergeclauses); Assert(newclauses != NIL); } else newclauses = mergeclauses; - - /* Build opfamily info for execution */ - build_mergejoin_strat_arrays(newclauses, - &mergefamilies, - &mergestrategies, - &mergenullsfirst); - add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, @@ -634,9 +592,6 @@ match_unsorted_outer(PlannerInfo *root, restrictlist, merge_pathkeys, newclauses, - mergefamilies, - mergestrategies, - mergenullsfirst, NIL, NIL)); cheapest_total_inner = innerpath; @@ -666,19 +621,13 @@ match_unsorted_outer(PlannerInfo *root, newclauses = find_mergeclauses_for_pathkeys(root, trialsortkeys, + false, mergeclauses); Assert(newclauses != NIL); } else newclauses = mergeclauses; } - - /* Build opfamily info for execution */ - build_mergejoin_strat_arrays(newclauses, - &mergefamilies, - &mergestrategies, - &mergenullsfirst); - add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, @@ -688,9 +637,6 @@ match_unsorted_outer(PlannerInfo *root, restrictlist, merge_pathkeys, newclauses, - mergefamilies, - mergestrategies, - mergenullsfirst, NIL, NIL)); } @@ -909,6 +855,10 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, * Select mergejoin clauses that are usable for a particular join. * Returns a list of RestrictInfo nodes for those clauses. * + * We also mark each selected RestrictInfo to show which side is currently + * being considered as outer. These are transient markings that are only + * good for the duration of the current add_paths_to_joinrel() call! + * * We examine each restrictinfo clause known for the join to see * if it is mergejoinable and involves vars from the two sub-relations * currently of interest. @@ -939,7 +889,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel, continue; if (!restrictinfo->can_join || - restrictinfo->mergejoinoperator == InvalidOid) + restrictinfo->mergeopfamilies == NIL) { have_nonmergeable_joinclause = true; continue; /* not mergejoinable */ @@ -954,11 +904,13 @@ select_mergejoin_clauses(RelOptInfo *joinrel, bms_is_subset(restrictinfo->right_relids, innerrel->relids)) { /* righthand side is inner */ + restrictinfo->outer_is_left = true; } else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) && bms_is_subset(restrictinfo->right_relids, outerrel->relids)) { /* lefthand side is inner */ + restrictinfo->outer_is_left = false; } else { @@ -966,7 +918,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel, continue; /* no good for these input relations */ } - result_list = lcons(restrictinfo, result_list); + result_list = lappend(result_list, restrictinfo); } /* @@ -995,46 +947,3 @@ select_mergejoin_clauses(RelOptInfo *joinrel, return result_list; } - -/* - * Temporary hack to build opfamily and strategy info needed for mergejoin - * by the executor. We need to rethink the planner's handling of merge - * planning so that it can deal with multiple possible merge orders, but - * that's not done yet. - */ -static void -build_mergejoin_strat_arrays(List *mergeclauses, - Oid **mergefamilies, - int **mergestrategies, - bool **mergenullsfirst) -{ - int nClauses = list_length(mergeclauses); - int i; - ListCell *l; - - *mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid)); - *mergestrategies = (int *) palloc(nClauses * sizeof(int)); - *mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool)); - - i = 0; - foreach(l, mergeclauses) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); - - /* - * We do not need to worry about whether the mergeclause will be - * commuted at runtime --- it's the same opfamily either way. - */ - (*mergefamilies)[i] = restrictinfo->mergeopfamily; - /* - * For the moment, strategy must always be LessThan --- see - * hack version of get_op_mergejoin_info - */ - (*mergestrategies)[i] = BTLessStrategyNumber; - - /* And we only allow NULLS LAST, too */ - (*mergenullsfirst)[i] = false; - - i++; - } -} diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index f1fe34e5393..70e16eeb8e8 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.83 2007/01/05 22:19:31 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.84 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -72,7 +72,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) other_rels = list_head(joinrels[1]); /* consider all initial * rels */ - if (old_rel->joininfo != NIL) + if (old_rel->joininfo != NIL || old_rel->has_eclass_joins) { /* * Note that if all available join clauses for this rel require @@ -152,7 +152,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) * outer joins --- then we might have to force a bushy outer * join. See have_relevant_joinclause(). */ - if (old_rel->joininfo == NIL && root->oj_info_list == NIL) + if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins && + root->oj_info_list == NIL) continue; if (k == other_level) @@ -251,8 +252,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) /* * make_rels_by_clause_joins * Build joins between the given relation 'old_rel' and other relations - * that are mentioned within old_rel's joininfo list (i.e., relations - * that participate in join clauses that 'old_rel' also participates in). + * that participate in join clauses that 'old_rel' also participates in. * The join rel nodes are returned in a list. * * 'old_rel' is the relation entry for the relation to be joined diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 4fc5a5f1250..15793b4ef99 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -11,687 +11,180 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.81 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.82 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "access/skey.h" #include "nodes/makefuncs.h" +#include "nodes/plannodes.h" #include "optimizer/clauses.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" -#include "optimizer/planmain.h" #include "optimizer/tlist.h" -#include "optimizer/var.h" #include "parser/parsetree.h" #include "parser/parse_expr.h" #include "utils/lsyscache.h" -#include "utils/memutils.h" - - -static PathKeyItem *makePathKeyItem(Node *key, Oid sortop, bool nulls_first, - bool checkType); -static void generate_outer_join_implications(PlannerInfo *root, - List *equi_key_set, - Relids *relids); -static void sub_generate_join_implications(PlannerInfo *root, - List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, - Relids item1_relids); -static void process_implied_const_eq(PlannerInfo *root, - List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, - Relids item1_relids, - bool delete_it); -static List *make_canonical_pathkey(PlannerInfo *root, PathKeyItem *item); -static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, - AttrNumber varattno); /* - * makePathKeyItem - * create a PathKeyItem node + * If an EC contains a const and isn't below-outer-join, any PathKey depending + * on it must be redundant, since there's only one possible value of the key. */ -static PathKeyItem * -makePathKeyItem(Node *key, Oid sortop, bool nulls_first, bool checkType) -{ - PathKeyItem *item = makeNode(PathKeyItem); - - /* - * Some callers pass expressions that are not necessarily of the same type - * as the sort operator expects as input (for example when dealing with an - * index that uses binary-compatible operators). We must relabel these - * with the correct type so that the key expressions will be seen as - * equal() to expressions that have been correctly labeled. - */ - if (checkType) - { - Oid lefttype, - righttype; +#define MUST_BE_REDUNDANT(eclass) \ + ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join) + +static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first); +static PathKey *make_canonical_pathkey(PlannerInfo *root, + EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first); +static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); +static PathKey *make_pathkey_from_sortinfo(PlannerInfo *root, + Expr *expr, Oid ordering_op, + bool nulls_first, + bool canonicalize); +static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, + AttrNumber varattno); - op_input_types(sortop, &lefttype, &righttype); - if (exprType(key) != lefttype) - key = (Node *) makeRelabelType((Expr *) key, - lefttype, -1, - COERCE_DONTCARE); - } - item->key = key; - item->sortop = sortop; - item->nulls_first = nulls_first; - return item; -} +/**************************************************************************** + * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING + ****************************************************************************/ /* - * add_equijoined_keys - * The given clause has a mergejoinable operator, so its two sides - * can be considered equal after restriction clause application; in - * particular, any pathkey mentioning one side (with the correct sortop) - * can be expanded to include the other as well. Record the exprs and - * associated sortops in the query's equi_key_list for future use. + * makePathKey + * create a PathKey node * - * The query's equi_key_list field points to a list of sublists of PathKeyItem - * nodes, where each sublist is a set of two or more exprs+sortops that have - * been identified as logically equivalent (and, therefore, we may consider - * any two in a set to be equal). As described above, we will subsequently - * use direct pointers to one of these sublists to represent any pathkey - * that involves an equijoined variable. + * This does not promise to create a canonical PathKey, it's merely a + * convenience routine to build the specified node. */ -void -add_equijoined_keys(PlannerInfo *root, RestrictInfo *restrictinfo) +static PathKey * +makePathKey(EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first) { - Expr *clause = restrictinfo->clause; - PathKeyItem *item1 = makePathKeyItem(get_leftop(clause), - restrictinfo->left_sortop, - false, /* XXX nulls_first? */ - false); - PathKeyItem *item2 = makePathKeyItem(get_rightop(clause), - restrictinfo->right_sortop, - false, /* XXX nulls_first? */ - false); - List *newset; - ListCell *cursetlink; - - /* We might see a clause X=X; don't make a single-element list from it */ - if (equal(item1, item2)) - return; - - /* - * Our plan is to make a two-element set, then sweep through the existing - * equijoin sets looking for matches to item1 or item2. When we find one, - * we remove that set from equi_key_list and union it into our new set. - * When done, we add the new set to the front of equi_key_list. - * - * It may well be that the two items we're given are already known to be - * equijoin-equivalent, in which case we don't need to change our data - * structure. If we find both of them in the same equivalence set to - * start with, we can quit immediately. - * - * This is a standard UNION-FIND problem, for which there exist better - * data structures than simple lists. If this code ever proves to be a - * bottleneck then it could be sped up --- but for now, simple is - * beautiful. - */ - newset = NIL; - - /* cannot use foreach here because of possible lremove */ - cursetlink = list_head(root->equi_key_list); - while (cursetlink) - { - List *curset = (List *) lfirst(cursetlink); - bool item1here = list_member(curset, item1); - bool item2here = list_member(curset, item2); - - /* must advance cursetlink before lremove possibly pfree's it */ - cursetlink = lnext(cursetlink); - - if (item1here || item2here) - { - /* - * If find both in same equivalence set, no need to do any more - */ - if (item1here && item2here) - { - /* Better not have seen only one in an earlier set... */ - Assert(newset == NIL); - return; - } - - /* Build the new set only when we know we must */ - if (newset == NIL) - newset = list_make2(item1, item2); - - /* Found a set to merge into our new set */ - newset = list_concat_unique(newset, curset); - - /* - * Remove old set from equi_key_list. - */ - root->equi_key_list = list_delete_ptr(root->equi_key_list, curset); - list_free(curset); /* might as well recycle old cons cells */ - } - } + PathKey *pk = makeNode(PathKey); - /* Build the new set only when we know we must */ - if (newset == NIL) - newset = list_make2(item1, item2); + pk->pk_eclass = eclass; + pk->pk_opfamily = opfamily; + pk->pk_strategy = strategy; + pk->pk_nulls_first = nulls_first; - root->equi_key_list = lcons(newset, root->equi_key_list); + return pk; } /* - * generate_implied_equalities - * Scan the completed equi_key_list for the query, and generate explicit - * qualifications (WHERE clauses) for all the pairwise equalities not - * already mentioned in the quals; or remove qualifications found to be - * redundant. - * - * Adding deduced equalities is useful because the additional clauses help - * the selectivity-estimation code and may allow better joins to be chosen; - * and in fact it's *necessary* to ensure that sort keys we think are - * equivalent really are (see src/backend/optimizer/README for more info). - * - * If an equi_key_list set includes any constants then we adopt a different - * strategy: we record all the "var = const" deductions we can make, and - * actively remove all the "var = var" clauses that are implied by the set - * (including the clauses that originally gave rise to the set!). The reason - * is that given input like "a = b AND b = 42", once we have deduced "a = 42" - * there is no longer any need to apply the clause "a = b"; not only is - * it a waste of time to check it, but we will misestimate selectivity if the - * clause is left in. So we must remove it. For this purpose, any pathkey - * item that mentions no Vars of the current level can be taken as a constant. - * (The only case where this would be risky is if the item contains volatile - * functions; but we will never consider such an expression to be a pathkey - * at all, because check_mergejoinable() will reject it.) - * - * Also, when we have constants in an equi_key_list we can try to propagate - * the constants into outer joins; see generate_outer_join_implications - * for discussion. + * make_canonical_pathkey + * Given the parameters for a PathKey, find any pre-existing matching + * pathkey in the query's list of "canonical" pathkeys. Make a new + * entry if there's not one already. * - * This routine just walks the equi_key_list to find all pairwise equalities. - * We call process_implied_equality (in plan/initsplan.c) to adjust the - * restrictinfo datastructures for each pair. + * Note that this function must not be used until after we have completed + * merging EquivalenceClasses. */ -void -generate_implied_equalities(PlannerInfo *root) +static PathKey * +make_canonical_pathkey(PlannerInfo *root, + EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first) { - ListCell *cursetlink; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); - int nitems = list_length(curset); - Relids *relids; - bool have_consts; - ListCell *ptr1; - int i1; + PathKey *pk; + ListCell *lc; + MemoryContext oldcontext; - /* - * A set containing only two items cannot imply any equalities beyond - * the one that created the set, so we can skip it --- unless outer - * joins appear in the query. - */ - if (nitems < 3 && !root->hasOuterJoins) - continue; + /* The passed eclass might be non-canonical, so chase up to the top */ + while (eclass->ec_merged) + eclass = eclass->ec_merged; - /* - * Collect info about relids mentioned in each item. For this routine - * we only really care whether there are any at all in each item, but - * process_implied_equality() needs the exact sets, so we may as well - * pull them here. - */ - relids = (Relids *) palloc(nitems * sizeof(Relids)); - have_consts = false; - i1 = 0; - foreach(ptr1, curset) - { - PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1); + foreach(lc, root->canon_pathkeys) + { + pk = (PathKey *) lfirst(lc); + if (eclass == pk->pk_eclass && + opfamily == pk->pk_opfamily && + strategy == pk->pk_strategy && + nulls_first == pk->pk_nulls_first) + return pk; + } - relids[i1] = pull_varnos(item1->key); - if (bms_is_empty(relids[i1])) - have_consts = true; - i1++; - } + /* + * Be sure canonical pathkeys are allocated in the main planning context. + * Not an issue in normal planning, but it is for GEQO. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); - /* - * Match each item in the set with all that appear after it (it's - * sufficient to generate A=B, need not process B=A too). - * - * A set containing only two items cannot imply any equalities beyond - * the one that created the set, so we can skip this processing in - * that case. - */ - if (nitems >= 3) - { - i1 = 0; - foreach(ptr1, curset) - { - PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1); - bool i1_is_variable = !bms_is_empty(relids[i1]); - ListCell *ptr2; - int i2 = i1 + 1; + pk = makePathKey(eclass, opfamily, strategy, nulls_first); + root->canon_pathkeys = lappend(root->canon_pathkeys, pk); - for_each_cell(ptr2, lnext(ptr1)) - { - PathKeyItem *item2 = (PathKeyItem *) lfirst(ptr2); - bool i2_is_variable = !bms_is_empty(relids[i2]); - - /* - * If it's "const = const" then just ignore it altogether. - * There is no place in the restrictinfo structure to - * store it. (If the two consts are in fact unequal, then - * propagating the comparison to Vars will cause us to - * produce zero rows out, as expected.) - */ - if (i1_is_variable || i2_is_variable) - { - /* - * Tell process_implied_equality to delete the clause, - * not add it, if it's "var = var" and we have - * constants present in the list. - */ - bool delete_it = (have_consts && - i1_is_variable && - i2_is_variable); - - process_implied_equality(root, - item1->key, item2->key, - item1->sortop, item2->sortop, - relids[i1], relids[i2], - delete_it); - } - i2++; - } - i1++; - } - } + MemoryContextSwitchTo(oldcontext); - /* - * If we have constant(s) and outer joins, try to propagate the - * constants through outer-join quals. - */ - if (have_consts && root->hasOuterJoins) - generate_outer_join_implications(root, curset, relids); - } + return pk; } /* - * generate_outer_join_implications - * Generate clauses that can be deduced in outer-join situations. + * pathkey_is_redundant + * Is a pathkey redundant with one already in the given list? * - * When we have mergejoinable clauses A = B that are outer-join clauses, - * we can't blindly combine them with other clauses A = C to deduce B = C, - * since in fact the "equality" A = B won't necessarily hold above the - * outer join (one of the variables might be NULL instead). Nonetheless - * there are cases where we can add qual clauses using transitivity. + * Both the given pathkey and the list members must be canonical for this + * to work properly. We detect two cases: * - * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR - * combined with a pushed-down (valid everywhere) clause OUTERVAR = CONSTANT. - * It is safe and useful to push a clause INNERVAR = CONSTANT into the - * evaluation of the inner (nullable) relation, because any inner rows not - * meeting this condition will not contribute to the outer-join result anyway. - * (Any outer rows they could join to will be eliminated by the pushed-down - * clause.) + * 1. If the new pathkey's equivalence class contains a constant, and isn't + * below an outer join, then we can disregard it as a sort key. An example: + * SELECT ... WHERE x = 42 ORDER BY x, y; + * We may as well just sort by y. Note that because of opfamily matching, + * this is semantically correct: we know that the equality constraint is one + * that actually binds the variable to a single value in the terms of any + * ordering operator that might go with the eclass. This rule not only lets + * us simplify (or even skip) explicit sorts, but also allows matching index + * sort orders to a query when there are don't-care index columns. * - * Note that the above rule does not work for full outer joins, nor for - * pushed-down restrictions on an inner-side variable; nor is it very - * interesting to consider cases where the pushed-down clause involves - * relations entirely outside the outer join, since such clauses couldn't - * be pushed into the inner side's scan anyway. So the restriction to - * outervar = pseudoconstant is not really giving up anything. + * 2. If the new pathkey's equivalence class is the same as that of any + * existing member of the pathkey list, then it is redundant. Some examples: + * SELECT ... ORDER BY x, x; + * SELECT ... ORDER BY x, x DESC; + * SELECT ... WHERE x = y ORDER BY x, y; + * In all these cases the second sort key cannot distinguish values that are + * considered equal by the first, and so there's no point in using it. + * Note in particular that we need not compare opfamily (all the opfamilies + * of the EC have the same notion of equality) nor sort direction. * - * For full-join cases, we can only do something useful if it's a FULL JOIN - * USING and a merged column has a restriction MERGEDVAR = CONSTANT. By - * the time it gets here, the restriction will look like - * COALESCE(LEFTVAR, RIGHTVAR) = CONSTANT - * and we will have a join clause LEFTVAR = RIGHTVAR that we can match the - * COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT - * and RIGHTVAR = CONSTANT into the input relations, since any rows not - * meeting these conditions cannot contribute to the join result. - * - * Again, there isn't any traction to be gained by trying to deal with - * clauses comparing a mergedvar to a non-pseudoconstant. So we can make - * use of the equi_key_lists to quickly find the interesting pushed-down - * clauses. The interesting outer-join clauses were accumulated for us by - * distribute_qual_to_rels. - * - * equi_key_set: a list of PathKeyItems that are known globally equivalent, - * at least one of which is a pseudoconstant. - * relids: an array of Relids sets showing the relation membership of each - * PathKeyItem in equi_key_set. + * Because the equivclass.c machinery forms only one copy of any EC per query, + * pointer comparison is enough to decide whether canonical ECs are the same. */ -static void -generate_outer_join_implications(PlannerInfo *root, - List *equi_key_set, - Relids *relids) +static bool +pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys) { - ListCell *l; - int i = 0; + EquivalenceClass *new_ec = new_pathkey->pk_eclass; + ListCell *lc; - /* Process each non-constant element of equi_key_set */ - foreach(l, equi_key_set) - { - PathKeyItem *item1 = (PathKeyItem *) lfirst(l); + /* Assert we've been given canonical pathkeys */ + Assert(!new_ec->ec_merged); - if (!bms_is_empty(relids[i])) - { - sub_generate_join_implications(root, equi_key_set, relids, - item1->key, - item1->sortop, - relids[i]); - } - i++; - } -} - -/* - * sub_generate_join_implications - * Propagate a constant equality through outer join clauses. - * - * The item described by item1/sortop1/item1_relids has been determined - * to be equal to the constant(s) listed in equi_key_set. Recursively - * trace out the implications of this. - * - * equi_key_set and relids are as for generate_outer_join_implications. - */ -static void -sub_generate_join_implications(PlannerInfo *root, - List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, Relids item1_relids) - -{ - ListCell *l; + /* Check for EC containing a constant --- unconditionally redundant */ + if (MUST_BE_REDUNDANT(new_ec)) + return true; - /* - * Examine each mergejoinable outer-join clause with OUTERVAR on left, - * looking for an OUTERVAR identical to item1 - */ - foreach(l, root->left_join_clauses) + /* If same EC already used in list, then redundant */ + foreach(lc, pathkeys) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *leftop = get_leftop(rinfo->clause); - - if (equal(leftop, item1) && rinfo->left_sortop == sortop1) - { - /* - * Match, so find constant member(s) of set and generate implied - * INNERVAR = CONSTANT - */ - Node *rightop = get_rightop(rinfo->clause); + PathKey *old_pathkey = (PathKey *) lfirst(lc); - process_implied_const_eq(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids, - false); + /* Assert we've been given canonical pathkeys */ + Assert(!old_pathkey->pk_eclass->ec_merged); - /* - * We can remove explicit tests of this outer-join qual, too, - * since we now have tests forcing each of its sides to the same - * value. - */ - process_implied_equality(root, - leftop, rightop, - rinfo->left_sortop, rinfo->right_sortop, - rinfo->left_relids, rinfo->right_relids, - true); - - /* - * And recurse to see if we can deduce anything from INNERVAR = - * CONSTANT - */ - sub_generate_join_implications(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids); - } - } - - /* The same, looking at clauses with OUTERVAR on right */ - foreach(l, root->right_join_clauses) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *rightop = get_rightop(rinfo->clause); - - if (equal(rightop, item1) && rinfo->right_sortop == sortop1) - { - /* - * Match, so find constant member(s) of set and generate implied - * INNERVAR = CONSTANT - */ - Node *leftop = get_leftop(rinfo->clause); - - process_implied_const_eq(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids, - false); - - /* - * We can remove explicit tests of this outer-join qual, too, - * since we now have tests forcing each of its sides to the same - * value. - */ - process_implied_equality(root, - leftop, rightop, - rinfo->left_sortop, rinfo->right_sortop, - rinfo->left_relids, rinfo->right_relids, - true); - - /* - * And recurse to see if we can deduce anything from INNERVAR = - * CONSTANT - */ - sub_generate_join_implications(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids); - } - } - - /* - * Only COALESCE(x,y) items can possibly match full joins - */ - if (IsA(item1, CoalesceExpr)) - { - CoalesceExpr *cexpr = (CoalesceExpr *) item1; - Node *cfirst; - Node *csecond; - - if (list_length(cexpr->args) != 2) - return; - cfirst = (Node *) linitial(cexpr->args); - csecond = (Node *) lsecond(cexpr->args); - - /* - * Examine each mergejoinable full-join clause, looking for a clause - * of the form "x = y" matching the COALESCE(x,y) expression - */ - foreach(l, root->full_join_clauses) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *leftop = get_leftop(rinfo->clause); - Node *rightop = get_rightop(rinfo->clause); - - /* - * We can assume the COALESCE() inputs are in the same order as - * the join clause, since both were automatically generated in the - * cases we care about. - * - * XXX currently this may fail to match in cross-type cases - * because the COALESCE will contain typecast operations while the - * join clause may not (if there is a cross-type mergejoin - * operator available for the two column types). Is it OK to strip - * implicit coercions from the COALESCE arguments? What of the - * sortops in such cases? - */ - if (equal(leftop, cfirst) && - equal(rightop, csecond) && - rinfo->left_sortop == sortop1 && - rinfo->right_sortop == sortop1) - { - /* - * Match, so find constant member(s) of set and generate - * implied LEFTVAR = CONSTANT - */ - process_implied_const_eq(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids, - false); - /* ... and RIGHTVAR = CONSTANT */ - process_implied_const_eq(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids, - false); - /* ... and remove COALESCE() = CONSTANT */ - process_implied_const_eq(root, equi_key_set, relids, - item1, - sortop1, - item1_relids, - true); - - /* - * We can remove explicit tests of this outer-join qual, too, - * since we now have tests forcing each of its sides to the - * same value. - */ - process_implied_equality(root, - leftop, rightop, - rinfo->left_sortop, - rinfo->right_sortop, - rinfo->left_relids, - rinfo->right_relids, - true); - - /* - * And recurse to see if we can deduce anything from LEFTVAR = - * CONSTANT - */ - sub_generate_join_implications(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids); - /* ... and RIGHTVAR = CONSTANT */ - sub_generate_join_implications(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids); - - } - } - } -} - -/* - * process_implied_const_eq - * Apply process_implied_equality with the given item and each - * pseudoconstant member of equi_key_set. - * - * equi_key_set and relids are as for generate_outer_join_implications, - * the other parameters as for process_implied_equality. - */ -static void -process_implied_const_eq(PlannerInfo *root, List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, Relids item1_relids, - bool delete_it) -{ - ListCell *l; - bool found = false; - int i = 0; - - foreach(l, equi_key_set) - { - PathKeyItem *item2 = (PathKeyItem *) lfirst(l); - - if (bms_is_empty(relids[i])) - { - process_implied_equality(root, - item1, item2->key, - sortop1, item2->sortop, - item1_relids, NULL, - delete_it); - found = true; - } - i++; + if (new_ec == old_pathkey->pk_eclass) + return true; } - /* Caller screwed up if no constants in list */ - Assert(found); -} -/* - * exprs_known_equal - * Detect whether two expressions are known equal due to equijoin clauses. - * - * Note: does not bother to check for "equal(item1, item2)"; caller must - * check that case if it's possible to pass identical items. - */ -bool -exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) -{ - ListCell *cursetlink; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); - bool item1member = false; - bool item2member = false; - ListCell *ptr; - - foreach(ptr, curset) - { - PathKeyItem *pitem = (PathKeyItem *) lfirst(ptr); - - if (equal(item1, pitem->key)) - item1member = true; - else if (equal(item2, pitem->key)) - item2member = true; - /* Exit as soon as equality is proven */ - if (item1member && item2member) - return true; - } - } return false; } - -/* - * make_canonical_pathkey - * Given a PathKeyItem, find the equi_key_list subset it is a member of, - * if any. If so, return a pointer to that sublist, which is the - * canonical representation (for this query) of that PathKeyItem's - * equivalence set. If it is not found, add a singleton "equivalence set" - * to the equi_key_list and return that --- see compare_pathkeys. - * - * Note that this function must not be used until after we have completed - * scanning the WHERE clause for equijoin operators. - */ -static List * -make_canonical_pathkey(PlannerInfo *root, PathKeyItem *item) -{ - List *newset; - ListCell *cursetlink; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); - - if (list_member(curset, item)) - return curset; - } - newset = list_make1(item); - root->equi_key_list = lcons(newset, root->equi_key_list); - return newset; -} - /* * canonicalize_pathkeys * Convert a not-necessarily-canonical pathkeys list to canonical form. * * Note that this function must not be used until after we have completed - * scanning the WHERE clause for equijoin operators. + * merging EquivalenceClasses. */ List * canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) @@ -701,55 +194,116 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) foreach(l, pathkeys) { - List *pathkey = (List *) lfirst(l); - PathKeyItem *item; - List *cpathkey; + PathKey *pathkey = (PathKey *) lfirst(l); + EquivalenceClass *eclass; + PathKey *cpathkey; - /* - * It's sufficient to look at the first entry in the sublist; if there - * are more entries, they're already part of an equivalence set by - * definition. - */ - Assert(pathkey != NIL); - item = (PathKeyItem *) linitial(pathkey); - cpathkey = make_canonical_pathkey(root, item); + /* Find the canonical (merged) EquivalenceClass */ + eclass = pathkey->pk_eclass; + while (eclass->ec_merged) + eclass = eclass->ec_merged; /* - * Eliminate redundant ordering requests --- ORDER BY A,A is the same - * as ORDER BY A. We want to check this only after we have - * canonicalized the keys, so that equivalent-key knowledge is used - * when deciding if an item is redundant. + * If we can tell it's redundant just from the EC, skip. + * pathkey_is_redundant would notice that, but we needn't even bother + * constructing the node... */ - new_pathkeys = list_append_unique_ptr(new_pathkeys, cpathkey); + if (MUST_BE_REDUNDANT(eclass)) + continue; + + /* OK, build a canonicalized PathKey struct */ + cpathkey = make_canonical_pathkey(root, + eclass, + pathkey->pk_opfamily, + pathkey->pk_strategy, + pathkey->pk_nulls_first); + + /* Add to list unless redundant */ + if (!pathkey_is_redundant(cpathkey, new_pathkeys)) + new_pathkeys = lappend(new_pathkeys, cpathkey); } return new_pathkeys; } - /* - * count_canonical_peers - * Given a PathKeyItem, find the equi_key_list subset it is a member of, - * if any. If so, return the number of other members of the set. - * If not, return 0 (without actually adding it to our equi_key_list). + * make_pathkey_from_sortinfo + * Given an expression, a sortop, and a nulls-first flag, create + * a PathKey. If canonicalize = true, the result is a "canonical" + * PathKey, otherwise not. (But note it might be redundant anyway.) * - * This is a hack to support the rather bogus heuristics in - * convert_subquery_pathkeys. + * canonicalize should always be TRUE after EquivalenceClass merging has + * been performed, but FALSE if we haven't done EquivalenceClass merging yet. */ -static int -count_canonical_peers(PlannerInfo *root, PathKeyItem *item) +static PathKey * +make_pathkey_from_sortinfo(PlannerInfo *root, + Expr *expr, Oid ordering_op, + bool nulls_first, + bool canonicalize) { - ListCell *cursetlink; + Oid equality_op; + List *opfamilies; + Oid opfamily, + lefttype, + righttype; + int strategy; + ListCell *lc; + EquivalenceClass *eclass; - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); + /* + * An ordering operator fully determines the behavior of its opfamily, + * so could only meaningfully appear in one family --- or perhaps two + * if one builds a reverse-sort opfamily, but there's not much point in + * that anymore. But EquivalenceClasses need to contain opfamily lists + * based on the family membership of equality operators, which could + * easily be bigger. So, look up the equality operator that goes with + * the ordering operator (this should be unique) and get its membership. + */ + equality_op = get_equality_op_for_ordering_op(ordering_op); + if (!OidIsValid(equality_op)) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + ordering_op); + opfamilies = get_mergejoin_opfamilies(equality_op); + if (!opfamilies) /* certainly should find some */ + elog(ERROR, "could not find opfamilies for ordering operator %u", + ordering_op); - if (list_member(curset, item)) - return list_length(curset) - 1; + /* + * Next we have to determine the strategy number to put into the pathkey. + * In the presence of reverse-sort opclasses there might be two answers. + * We prefer the one associated with the first opfamilies member that + * this ordering_op appears in (this will be consistently defined in + * normal system operation; see comments for get_mergejoin_opfamilies()). + */ + opfamily = InvalidOid; + strategy = 0; + foreach(lc, opfamilies) + { + opfamily = lfirst_oid(lc); + strategy = get_op_opfamily_strategy(ordering_op, opfamily); + if (strategy) + break; } - return 0; + if (!(strategy == BTLessStrategyNumber || + strategy == BTGreaterStrategyNumber)) + elog(ERROR, "ordering operator %u is has wrong strategy number %d", + ordering_op, strategy); + + /* Need the declared input type of the operator, too */ + op_input_types(ordering_op, &lefttype, &righttype); + Assert(lefttype == righttype); + + /* Now find or create a matching EquivalenceClass */ + eclass = get_eclass_for_sort_expr(root, expr, lefttype, opfamilies); + + /* And finally we can find or create a PathKey node */ + if (canonicalize) + return make_canonical_pathkey(root, eclass, opfamily, + strategy, nulls_first); + else + return makePathKey(eclass, opfamily, strategy, nulls_first); } + /**************************************************************************** * PATHKEY COMPARISONS ****************************************************************************/ @@ -760,7 +314,7 @@ count_canonical_peers(PlannerInfo *root, PathKeyItem *item) * one is "better" than the other. * * This function may only be applied to canonicalized pathkey lists. - * In the canonical representation, sublists can be checked for equality + * In the canonical representation, pathkeys can be checked for equality * by simple pointer comparison. */ PathKeysComparison @@ -771,33 +325,25 @@ compare_pathkeys(List *keys1, List *keys2) forboth(key1, keys1, key2, keys2) { - List *subkey1 = (List *) lfirst(key1); - List *subkey2 = (List *) lfirst(key2); + PathKey *pathkey1 = (PathKey *) lfirst(key1); + PathKey *pathkey2 = (PathKey *) lfirst(key2); /* * XXX would like to check that we've been given canonicalized input, * but PlannerInfo not accessible here... */ #ifdef NOT_USED - Assert(list_member_ptr(root->equi_key_list, subkey1)); - Assert(list_member_ptr(root->equi_key_list, subkey2)); + Assert(list_member_ptr(root->canon_pathkeys, pathkey1)); + Assert(list_member_ptr(root->canon_pathkeys, pathkey2)); #endif - /* - * We will never have two subkeys where one is a subset of the other, - * because of the canonicalization process. Either they are equal or - * they ain't. Furthermore, we only need pointer comparison to detect - * equality. - */ - if (subkey1 != subkey2) + if (pathkey1 != pathkey2) return PATHKEYS_DIFFERENT; /* no need to keep looking */ } /* * If we reached the end of only one list, the other is longer and - * therefore not a subset. (We assume the additional sublist(s) of the - * other list are not NIL --- no pathkey list should ever have a NIL - * sublist.) + * therefore not a subset. */ if (key1 == NULL && key2 == NULL) return PATHKEYS_EQUAL; @@ -912,11 +458,8 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, * If 'scandir' is BackwardScanDirection, attempt to build pathkeys * representing a backwards scan of the index. Return NIL if can't do it. * - * If 'canonical' is TRUE, we remove duplicate pathkeys (which can occur - * if two index columns are equijoined, eg WHERE x = 1 AND y = 1). This - * is required if the result is to be compared directly to a canonical query - * pathkeys list. However, some callers want a list with exactly one entry - * per index column, and they must pass FALSE. + * The result is canonical, meaning that redundant pathkeys are removed; + * it may therefore have fewer entries than there are index columns. * * We generate the full pathkeys list whether or not all are useful for the * current query. Caller should do truncate_useless_pathkeys(). @@ -924,8 +467,7 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, List * build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, - ScanDirection scandir, - bool canonical) + ScanDirection scandir) { List *retval = NIL; ListCell *indexprs_item = list_head(index->indexprs); @@ -936,9 +478,8 @@ build_index_pathkeys(PlannerInfo *root, Oid sortop; bool nulls_first; int ikey; - Node *indexkey; - PathKeyItem *item; - List *cpathkey; + Expr *indexkey; + PathKey *cpathkey; if (ScanDirectionIsBackward(scandir)) { @@ -958,25 +499,26 @@ build_index_pathkeys(PlannerInfo *root, if (ikey != 0) { /* simple index column */ - indexkey = (Node *) find_indexkey_var(root, index->rel, ikey); + indexkey = (Expr *) find_indexkey_var(root, index->rel, ikey); } else { /* expression --- assume we need not copy it */ if (indexprs_item == NULL) elog(ERROR, "wrong number of index expressions"); - indexkey = (Node *) lfirst(indexprs_item); + indexkey = (Expr *) lfirst(indexprs_item); indexprs_item = lnext(indexprs_item); } - /* OK, make a sublist for this sort key */ - item = makePathKeyItem(indexkey, sortop, nulls_first, true); - cpathkey = make_canonical_pathkey(root, item); + /* OK, make a canonical pathkey for this sort key */ + cpathkey = make_pathkey_from_sortinfo(root, + indexkey, + sortop, + nulls_first, + true); - /* Eliminate redundant ordering info if requested */ - if (canonical) - retval = list_append_unique_ptr(retval, cpathkey); - else + /* Add to list unless redundant */ + if (!pathkey_is_redundant(cpathkey, retval)) retval = lappend(retval, cpathkey); } @@ -1042,31 +584,31 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, foreach(i, subquery_pathkeys) { - List *sub_pathkey = (List *) lfirst(i); + PathKey *sub_pathkey = (PathKey *) lfirst(i); + EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass; + PathKey *best_pathkey = NULL; + int best_score = -1; ListCell *j; - PathKeyItem *best_item = NULL; - int best_score = 0; - List *cpathkey; /* - * The sub_pathkey could contain multiple elements (representing - * knowledge that multiple items are effectively equal). Each element - * might match none, one, or more of the output columns that are - * visible to the outer query. This means we may have multiple - * possible representations of the sub_pathkey in the context of the - * outer query. Ideally we would generate them all and put them all - * into a pathkey list of the outer query, thereby propagating - * equality knowledge up to the outer query. Right now we cannot do - * so, because the outer query's canonical pathkey sets are already - * frozen when this is called. Instead we prefer the one that has the - * highest "score" (number of canonical pathkey peers, plus one if it - * matches the outer query_pathkeys). This is the most likely to be - * useful in the outer query. + * The sub_pathkey's EquivalenceClass could contain multiple elements + * (representing knowledge that multiple items are effectively equal). + * Each element might match none, one, or more of the output columns + * that are visible to the outer query. This means we may have + * multiple possible representations of the sub_pathkey in the context + * of the outer query. Ideally we would generate them all and put + * them all into an EC of the outer query, thereby propagating + * equality knowledge up to the outer query. Right now we cannot do + * so, because the outer query's EquivalenceClasses are already frozen + * when this is called. Instead we prefer the one that has the highest + * "score" (number of EC peers, plus one if it matches the outer + * query_pathkeys). This is the most likely to be useful in the outer + * query. */ - foreach(j, sub_pathkey) + foreach(j, sub_eclass->ec_members) { - PathKeyItem *sub_item = (PathKeyItem *) lfirst(j); - Node *sub_key = sub_item->key; + EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j); + Expr *sub_expr = sub_member->em_expr; Expr *rtarg; ListCell *k; @@ -1076,26 +618,27 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * entry. (The latter case is worth extra code because it arises * frequently in connection with varchar fields.) */ - if (IsA(sub_key, RelabelType)) - rtarg = ((RelabelType *) sub_key)->arg; + if (IsA(sub_expr, RelabelType)) + rtarg = ((RelabelType *) sub_expr)->arg; else rtarg = NULL; foreach(k, sub_tlist) { TargetEntry *tle = (TargetEntry *) lfirst(k); - Node *outer_expr; - PathKeyItem *outer_item; + Expr *outer_expr; + EquivalenceClass *outer_ec; + PathKey *outer_pk; int score; /* resjunk items aren't visible to outer query */ if (tle->resjunk) continue; - if (equal(tle->expr, sub_key)) + if (equal(tle->expr, sub_expr)) { /* Exact match */ - outer_expr = (Node *) + outer_expr = (Expr *) makeVar(rel->relid, tle->resno, exprType((Node *) tle->expr), @@ -1105,35 +648,40 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, else if (rtarg && equal(tle->expr, rtarg)) { /* Match after discarding RelabelType */ - outer_expr = (Node *) + outer_expr = (Expr *) makeVar(rel->relid, tle->resno, exprType((Node *) tle->expr), exprTypmod((Node *) tle->expr), 0); - outer_expr = (Node *) + outer_expr = (Expr *) makeRelabelType((Expr *) outer_expr, - ((RelabelType *) sub_key)->resulttype, - ((RelabelType *) sub_key)->resulttypmod, - ((RelabelType *) sub_key)->relabelformat); + ((RelabelType *) sub_expr)->resulttype, + ((RelabelType *) sub_expr)->resulttypmod, + ((RelabelType *) sub_expr)->relabelformat); } else continue; - /* Found a representation for this sub_key */ - outer_item = makePathKeyItem(outer_expr, - sub_item->sortop, - sub_item->nulls_first, - true); - /* score = # of mergejoin peers */ - score = count_canonical_peers(root, outer_item); + /* Found a representation for this sub_pathkey */ + outer_ec = get_eclass_for_sort_expr(root, + outer_expr, + sub_member->em_datatype, + sub_eclass->ec_opfamilies); + outer_pk = make_canonical_pathkey(root, + outer_ec, + sub_pathkey->pk_opfamily, + sub_pathkey->pk_strategy, + sub_pathkey->pk_nulls_first); + /* score = # of equivalence peers */ + score = list_length(outer_ec->ec_members) - 1; /* +1 if it matches the proper query_pathkeys item */ if (retvallen < outer_query_keys && - list_member(list_nth(root->query_pathkeys, retvallen), outer_item)) + list_nth(root->query_pathkeys, retvallen) == outer_pk) score++; if (score > best_score) { - best_item = outer_item; + best_pathkey = outer_pk; best_score = score; } } @@ -1143,19 +691,16 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * If we couldn't find a representation of this sub_pathkey, we're * done (we can't use the ones to its right, either). */ - if (!best_item) + if (!best_pathkey) break; - /* Canonicalize the chosen item (we did not before) */ - cpathkey = make_canonical_pathkey(root, best_item); - /* * Eliminate redundant ordering info; could happen if outer query - * equijoins subquery keys... + * equivalences subquery keys... */ - if (!list_member_ptr(retval, cpathkey)) + if (!pathkey_is_redundant(best_pathkey, retval)) { - retval = lappend(retval, cpathkey); + retval = lappend(retval, best_pathkey); retvallen++; } } @@ -1166,19 +711,14 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, /* * build_join_pathkeys * Build the path keys for a join relation constructed by mergejoin or - * nestloop join. These keys should include all the path key vars of the - * outer path (since the join will retain the ordering of the outer path) - * plus any vars of the inner path that are equijoined to the outer vars. - * - * Per the discussion in backend/optimizer/README, equijoined inner vars - * can be considered path keys of the result, just the same as the outer - * vars they were joined with; furthermore, it doesn't matter what kind - * of join algorithm is actually used. + * nestloop join. This is normally the same as the outer path's keys. * * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as * having the outer path's path keys, because null lefthand rows may be * inserted at random points. It must be treated as unsorted. * + * We truncate away any pathkeys that are uninteresting for higher joins. + * * 'joinrel' is the join relation that paths are being formed for * 'jointype' is the join type (inner, left, full, etc) * 'outer_pathkeys' is the list of the current outer path's path keys @@ -1197,8 +737,7 @@ build_join_pathkeys(PlannerInfo *root, /* * This used to be quite a complex bit of code, but now that all pathkey * sublists start out life canonicalized, we don't have to do a darn thing - * here! The inner-rel vars we used to need to add are *already* part of - * the outer pathkey! + * here! * * We do, however, need to truncate the pathkeys list, since it may * contain pathkeys that were useful for forming this joinrel but are @@ -1216,18 +755,21 @@ build_join_pathkeys(PlannerInfo *root, * Generate a pathkeys list that represents the sort order specified * by a list of SortClauses (GroupClauses will work too!) * - * NB: the result is NOT in canonical form, but must be passed through - * canonicalize_pathkeys() before it can be used for comparisons or - * labeling relation sort orders. (We do things this way because - * grouping_planner needs to be able to construct requested pathkeys - * before the pathkey equivalence sets have been created for the query.) + * If canonicalize is TRUE, the resulting PathKeys are all in canonical form; + * otherwise not. canonicalize should always be TRUE after EquivalenceClass + * merging has been performed, but FALSE if we haven't done EquivalenceClass + * merging yet. (We provide this option because grouping_planner() needs to + * be able to represent requested pathkeys before the equivalence classes have + * been created for the query.) * * 'sortclauses' is a list of SortClause or GroupClause nodes * 'tlist' is the targetlist to find the referenced tlist entries in */ List * -make_pathkeys_for_sortclauses(List *sortclauses, - List *tlist) +make_pathkeys_for_sortclauses(PlannerInfo *root, + List *sortclauses, + List *tlist, + bool canonicalize) { List *pathkeys = NIL; ListCell *l; @@ -1235,19 +777,24 @@ make_pathkeys_for_sortclauses(List *sortclauses, foreach(l, sortclauses) { SortClause *sortcl = (SortClause *) lfirst(l); - Node *sortkey; - PathKeyItem *pathkey; - - sortkey = get_sortgroupclause_expr(sortcl, tlist); - pathkey = makePathKeyItem(sortkey, sortcl->sortop, sortcl->nulls_first, - true); - - /* - * The pathkey becomes a one-element sublist, for now; - * canonicalize_pathkeys() might replace it with a longer sublist - * later. - */ - pathkeys = lappend(pathkeys, list_make1(pathkey)); + Expr *sortkey; + PathKey *pathkey; + + sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); + pathkey = make_pathkey_from_sortinfo(root, + sortkey, + sortcl->sortop, + sortcl->nulls_first, + canonicalize); + + /* Canonical form eliminates redundant ordering keys */ + if (canonicalize) + { + if (!pathkey_is_redundant(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); + } + else + pathkeys = lappend(pathkeys, pathkey); } return pathkeys; } @@ -1257,49 +804,44 @@ make_pathkeys_for_sortclauses(List *sortclauses, ****************************************************************************/ /* - * cache_mergeclause_pathkeys - * Make the cached pathkeys valid in a mergeclause restrictinfo. + * cache_mergeclause_eclasses + * Make the cached EquivalenceClass links valid in a mergeclause + * restrictinfo. * - * RestrictInfo contains fields in which we may cache the result - * of looking up the canonical pathkeys for the left and right sides - * of the mergeclause. (Note that in normal cases they will be the - * same, but not if the mergeclause appears above an OUTER JOIN.) - * This is a worthwhile savings because these routines will be invoked - * many times when dealing with a many-relation query. - * - * We have to be careful that the cached values are palloc'd in the same - * context the RestrictInfo node itself is in. This is not currently a - * problem for normal planning, but it is an issue for GEQO planning. + * RestrictInfo contains fields in which we may cache pointers to + * EquivalenceClasses for the left and right inputs of the mergeclause. + * (If the mergeclause is a true equivalence clause these will be the + * same EquivalenceClass, otherwise not.) */ void -cache_mergeclause_pathkeys(PlannerInfo *root, RestrictInfo *restrictinfo) +cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) { - Node *key; - PathKeyItem *item; - MemoryContext oldcontext; - - Assert(restrictinfo->mergejoinoperator != InvalidOid); + Assert(restrictinfo->mergeopfamilies != NIL); - if (restrictinfo->left_pathkey == NIL) - { - oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(restrictinfo)); - key = get_leftop(restrictinfo->clause); - item = makePathKeyItem(key, restrictinfo->left_sortop, - false, /* XXX nulls_first? */ - false); - restrictinfo->left_pathkey = make_canonical_pathkey(root, item); - MemoryContextSwitchTo(oldcontext); - } - if (restrictinfo->right_pathkey == NIL) + /* the cached values should be either both set or both not */ + if (restrictinfo->left_ec == NULL) { - oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(restrictinfo)); - key = get_rightop(restrictinfo->clause); - item = makePathKeyItem(key, restrictinfo->right_sortop, - false, /* XXX nulls_first? */ - false); - restrictinfo->right_pathkey = make_canonical_pathkey(root, item); - MemoryContextSwitchTo(oldcontext); + Expr *clause = restrictinfo->clause; + Oid lefttype, + righttype; + + /* Need the declared input types of the operator */ + op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype); + + /* Find or create a matching EquivalenceClass for each side */ + restrictinfo->left_ec = + get_eclass_for_sort_expr(root, + (Expr *) get_leftop(clause), + lefttype, + restrictinfo->mergeopfamilies); + restrictinfo->right_ec = + get_eclass_for_sort_expr(root, + (Expr *) get_rightop(clause), + righttype, + restrictinfo->mergeopfamilies); } + else + Assert(restrictinfo->right_ec != NULL); } /* @@ -1309,72 +851,82 @@ cache_mergeclause_pathkeys(PlannerInfo *root, RestrictInfo *restrictinfo) * If successful, it returns a list of mergeclauses. * * 'pathkeys' is a pathkeys list showing the ordering of an input path. - * It doesn't matter whether it is for the inner or outer path. + * 'outer_keys' is TRUE if these keys are for the outer input path, + * FALSE if for inner. * 'restrictinfos' is a list of mergejoinable restriction clauses for the * join relation being formed. * + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * * The result is NIL if no merge can be done, else a maximal list of * usable mergeclauses (represented as a list of their restrictinfo nodes). - * - * XXX Ideally we ought to be considering context, ie what path orderings - * are available on the other side of the join, rather than just making - * an arbitrary choice among the mergeclauses that will work for this side - * of the join. */ List * find_mergeclauses_for_pathkeys(PlannerInfo *root, List *pathkeys, + bool outer_keys, List *restrictinfos) { List *mergeclauses = NIL; ListCell *i; - /* make sure we have pathkeys cached in the clauses */ + /* make sure we have eclasses cached in the clauses */ foreach(i, restrictinfos) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); - cache_mergeclause_pathkeys(root, restrictinfo); + cache_mergeclause_eclasses(root, rinfo); } foreach(i, pathkeys) { - List *pathkey = (List *) lfirst(i); + PathKey *pathkey = (PathKey *) lfirst(i); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; List *matched_restrictinfos = NIL; ListCell *j; - /* - * We can match a pathkey against either left or right side of any - * mergejoin clause. (We examine both sides since we aren't told if - * the given pathkeys are for inner or outer input path; no confusion - * is possible.) Furthermore, if there are multiple matching clauses, - * take them all. In plain inner-join scenarios we expect only one - * match, because redundant-mergeclause elimination will have removed - * any redundant mergeclauses from the input list. However, in - * outer-join scenarios there might be multiple matches. An example is + /*---------- + * A mergejoin clause matches a pathkey if it has the same EC. + * If there are multiple matching clauses, take them all. In plain + * inner-join scenarios we expect only one match, because + * equivalence-class processing will have removed any redundant + * mergeclauses. However, in outer-join scenarios there might be + * multiple matches. An example is * - * select * from a full join b on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 - * = b.v2; + * select * from a full join b + * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2; * - * Given the pathkeys ((a.v1), (a.v2)) it is okay to return all three + * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed * we *must* do so or we will be unable to form a valid plan. + * + * We expect that the given pathkeys list is canonical, which means + * no two members have the same EC, so it's not possible for this + * code to enter the same mergeclause into the result list twice. + * + * XXX it's possible that multiple matching clauses might have + * different ECs on the other side, in which case the order we put + * them into our result makes a difference in the pathkeys required + * for the other input path. However this routine hasn't got any info + * about which order would be best, so for now we disregard that case + * (which is probably a corner case anyway). + *---------- */ foreach(j, restrictinfos) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(j); + EquivalenceClass *clause_ec; - /* - * We can compare canonical pathkey sublists by simple pointer - * equality; see compare_pathkeys. - */ - if ((pathkey == restrictinfo->left_pathkey || - pathkey == restrictinfo->right_pathkey) && - !list_member_ptr(mergeclauses, restrictinfo)) - { - matched_restrictinfos = lappend(matched_restrictinfos, - restrictinfo); - } + if (outer_keys) + clause_ec = rinfo->outer_is_left ? + rinfo->left_ec : rinfo->right_ec; + else + clause_ec = rinfo->outer_is_left ? + rinfo->right_ec : rinfo->left_ec; + if (clause_ec == pathkey_ec) + matched_restrictinfos = lappend(matched_restrictinfos, rinfo); } /* @@ -1396,63 +948,270 @@ find_mergeclauses_for_pathkeys(PlannerInfo *root, } /* - * make_pathkeys_for_mergeclauses + * select_outer_pathkeys_for_merge + * Builds a pathkey list representing a possible sort ordering + * that can be used with the given mergeclauses. + * + * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses + * that will be used in a merge join. + * 'joinrel' is the join relation we are trying to construct. + * + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * + * Returns a pathkeys list that can be applied to the outer relation. + * + * Since we assume here that a sort is required, there is no particular use + * in matching any available ordering of the outerrel. (joinpath.c has an + * entirely separate code path for considering sort-free mergejoins.) Rather, + * it's interesting to try to match the requested query_pathkeys so that a + * second output sort may be avoided; and failing that, we try to list "more + * popular" keys (those with the most unmatched EquivalenceClass peers) + * earlier, in hopes of making the resulting ordering useful for as many + * higher-level mergejoins as possible. + */ +List * +select_outer_pathkeys_for_merge(PlannerInfo *root, + List *mergeclauses, + RelOptInfo *joinrel) +{ + List *pathkeys = NIL; + int nClauses = list_length(mergeclauses); + EquivalenceClass **ecs; + int *scores; + int necs; + ListCell *lc; + int j; + + /* Might have no mergeclauses */ + if (nClauses == 0) + return NIL; + + /* + * Make arrays of the ECs used by the mergeclauses (dropping any + * duplicates) and their "popularity" scores. + */ + ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *)); + scores = (int *) palloc(nClauses * sizeof(int)); + necs = 0; + + foreach(lc, mergeclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + EquivalenceClass *oeclass; + int score; + ListCell *lc2; + + /* get the outer eclass */ + cache_mergeclause_eclasses(root, rinfo); + + if (rinfo->outer_is_left) + oeclass = rinfo->left_ec; + else + oeclass = rinfo->right_ec; + + /* reject duplicates */ + for (j = 0; j < necs; j++) + { + if (ecs[j] == oeclass) + break; + } + if (j < necs) + continue; + + /* compute score */ + score = 0; + foreach(lc2, oeclass->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + + /* Potential future join partner? */ + if (!em->em_is_const && !em->em_is_child && + !bms_overlap(em->em_relids, joinrel->relids)) + score++; + } + + ecs[necs] = oeclass; + scores[necs] = score; + necs++; + } + + /* + * Find out if we have all the ECs mentioned in query_pathkeys; if so + * we can generate a sort order that's also useful for final output. + * There is no percentage in a partial match, though, so we have to + * have 'em all. + */ + if (root->query_pathkeys) + { + foreach(lc, root->query_pathkeys) + { + PathKey *query_pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *query_ec = query_pathkey->pk_eclass; + + for (j = 0; j < necs; j++) + { + if (ecs[j] == query_ec) + break; /* found match */ + } + if (j >= necs) + break; /* didn't find match */ + } + /* if we got to the end of the list, we have them all */ + if (lc == NULL) + { + /* copy query_pathkeys as starting point for our output */ + pathkeys = list_copy(root->query_pathkeys); + /* mark their ECs as already-emitted */ + foreach(lc, root->query_pathkeys) + { + PathKey *query_pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *query_ec = query_pathkey->pk_eclass; + + for (j = 0; j < necs; j++) + { + if (ecs[j] == query_ec) + { + scores[j] = -1; + break; + } + } + } + } + } + + /* + * Add remaining ECs to the list in popularity order, using a default + * sort ordering. (We could use qsort() here, but the list length is + * usually so small it's not worth it.) + */ + for (;;) + { + int best_j; + int best_score; + EquivalenceClass *ec; + PathKey *pathkey; + + best_j = 0; + best_score = scores[0]; + for (j = 1; j < necs; j++) + { + if (scores[j] > best_score) + { + best_j = j; + best_score = scores[j]; + } + } + if (best_score < 0) + break; /* all done */ + ec = ecs[best_j]; + scores[best_j] = -1; + pathkey = make_canonical_pathkey(root, + ec, + linitial_oid(ec->ec_opfamilies), + BTLessStrategyNumber, + false); + /* can't be redundant because no duplicate ECs */ + Assert(!pathkey_is_redundant(pathkey, pathkeys)); + pathkeys = lappend(pathkeys, pathkey); + } + + pfree(ecs); + pfree(scores); + + return pathkeys; +} + +/* + * make_inner_pathkeys_for_merge * Builds a pathkey list representing the explicit sort order that - * must be applied to a path in order to make it usable for the + * must be applied to an inner path to make it usable with the * given mergeclauses. * * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses * that will be used in a merge join. - * 'rel' is the relation the pathkeys will apply to (ie, either the inner - * or outer side of the proposed join rel). + * 'outer_pathkeys' are the already-known canonical pathkeys for the outer + * side of the join. * - * Returns a pathkeys list that can be applied to the indicated relation. + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * + * Returns a pathkeys list that can be applied to the inner relation. * * Note that it is not this routine's job to decide whether sorting is * actually needed for a particular input path. Assume a sort is necessary; * just make the keys, eh? */ List * -make_pathkeys_for_mergeclauses(PlannerInfo *root, - List *mergeclauses, - RelOptInfo *rel) +make_inner_pathkeys_for_merge(PlannerInfo *root, + List *mergeclauses, + List *outer_pathkeys) { List *pathkeys = NIL; - ListCell *l; + EquivalenceClass *lastoeclass; + PathKey *opathkey; + ListCell *lc; + ListCell *lop; - foreach(l, mergeclauses) + lastoeclass = NULL; + opathkey = NULL; + lop = list_head(outer_pathkeys); + + foreach(lc, mergeclauses) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); - List *pathkey; + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + EquivalenceClass *oeclass; + EquivalenceClass *ieclass; + PathKey *pathkey; - cache_mergeclause_pathkeys(root, restrictinfo); + cache_mergeclause_eclasses(root, rinfo); - if (bms_is_subset(restrictinfo->left_relids, rel->relids)) + if (rinfo->outer_is_left) { - /* Rel is left side of mergeclause */ - pathkey = restrictinfo->left_pathkey; + oeclass = rinfo->left_ec; + ieclass = rinfo->right_ec; } - else if (bms_is_subset(restrictinfo->right_relids, rel->relids)) + else { - /* Rel is right side of mergeclause */ - pathkey = restrictinfo->right_pathkey; + oeclass = rinfo->right_ec; + ieclass = rinfo->left_ec; } - else + + /* outer eclass should match current or next pathkeys */ + /* we check this carefully for debugging reasons */ + if (oeclass != lastoeclass) { - elog(ERROR, "could not identify which side of mergeclause to use"); - pathkey = NIL; /* keep compiler quiet */ + if (!lop) + elog(ERROR, "too few pathkeys for mergeclauses"); + opathkey = (PathKey *) lfirst(lop); + lop = lnext(lop); + lastoeclass = opathkey->pk_eclass; + if (oeclass != lastoeclass) + elog(ERROR, "outer pathkeys do not match mergeclause"); } /* - * When we are given multiple merge clauses, it's possible that some - * clauses refer to the same vars as earlier clauses. There's no - * reason for us to specify sort keys like (A,B,A) when (A,B) will do - * --- and adding redundant sort keys makes add_path think that this - * sort order is different from ones that are really the same, so - * don't do it. Since we now have a canonicalized pathkey, a simple - * ptrMember test is sufficient to detect redundant keys. + * Often, we'll have same EC on both sides, in which case the outer + * pathkey is also canonical for the inner side, and we can skip a + * useless search. + */ + if (ieclass == oeclass) + pathkey = opathkey; + else + pathkey = make_canonical_pathkey(root, + ieclass, + opathkey->pk_opfamily, + opathkey->pk_strategy, + opathkey->pk_nulls_first); + + /* + * Don't generate redundant pathkeys (can happen if multiple + * mergeclauses refer to same EC). */ - pathkeys = list_append_unique_ptr(pathkeys, pathkey); + if (!pathkey_is_redundant(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); } return pathkeys; @@ -1471,7 +1230,7 @@ make_pathkeys_for_mergeclauses(PlannerInfo *root, /* * pathkeys_useful_for_merging * Count the number of pathkeys that may be useful for mergejoins - * above the given relation (by looking at its joininfo list). + * above the given relation. * * We consider a pathkey potentially useful if it corresponds to the merge * ordering of either side of any joinclause for the rel. This might be @@ -1487,27 +1246,39 @@ pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys) foreach(i, pathkeys) { - List *pathkey = (List *) lfirst(i); + PathKey *pathkey = (PathKey *) lfirst(i); bool matched = false; ListCell *j; - foreach(j, rel->joininfo) + /* + * First look into the EquivalenceClass of the pathkey, to see if + * there are any members not yet joined to the rel. If so, it's + * surely possible to generate a mergejoin clause using them. + */ + if (rel->has_eclass_joins && + eclass_useful_for_merging(pathkey->pk_eclass, rel)) + matched = true; + else { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); - - if (restrictinfo->mergejoinoperator == InvalidOid) - continue; - cache_mergeclause_pathkeys(root, restrictinfo); - /* - * We can compare canonical pathkey sublists by simple pointer - * equality; see compare_pathkeys. + * Otherwise search the rel's joininfo list, which contains + * non-EquivalenceClass-derivable join clauses that might + * nonetheless be mergejoinable. */ - if (pathkey == restrictinfo->left_pathkey || - pathkey == restrictinfo->right_pathkey) + foreach(j, rel->joininfo) { - matched = true; - break; + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); + + if (restrictinfo->mergeopfamilies == NIL) + continue; + cache_mergeclause_eclasses(root, restrictinfo); + + if (pathkey->pk_eclass == restrictinfo->left_ec || + pathkey->pk_eclass == restrictinfo->right_ec) + { + matched = true; + break; + } } } |