diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/optimizer/geqo/geqo_eval.c | 234 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_main.c | 5 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_pool.c | 28 | ||||
-rw-r--r-- | src/backend/optimizer/geqo/geqo_recombination.c | 14 |
4 files changed, 147 insertions, 134 deletions
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 226d64a94d9..ea74abdde5e 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.89 2009/07/16 20:55:44 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.90 2009/07/19 21:00:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,6 +32,15 @@ #include "utils/memutils.h" +/* A "clump" of already-joined relations within gimme_tree */ +typedef struct +{ + RelOptInfo *joinrel; /* joinrel for the set of relations */ + int size; /* number of input relations in clump */ +} Clump; + +static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, + bool force); static bool desirable_join(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel); @@ -52,20 +61,6 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) struct HTAB *savehash; /* - * Because gimme_tree considers both left- and right-sided trees, there is - * no difference between a tour (a,b,c,d,...) and a tour (b,a,c,d,...) --- - * the same join orders will be considered. To avoid redundant cost - * calculations, we simply reject tours where tour[0] > tour[1], assigning - * them an artificially bad fitness. - * - * init_tour() is aware of this rule and so we should never reject a tour - * during the initial filling of the pool. It seems difficult to persuade - * the recombination logic never to break the rule, however. - */ - if (num_gene >= 2 && tour[0] > tour[1]) - return DBL_MAX; - - /* * Create a private memory context that will hold all temp storage * allocated inside gimme_tree(). * @@ -108,10 +103,7 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) * XXX geqo does not currently support optimization for partial result * retrieval --- how to fix? */ - if (joinrel) - fitness = joinrel->cheapest_total_path->total_cost; - else - fitness = DBL_MAX; + fitness = joinrel->cheapest_total_path->total_cost; /* * Restore join_rel_list to its former state, and put back original @@ -136,83 +128,113 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) * 'tour' is the proposed join order, of length 'num_gene' * * Returns a new join relation whose cheapest path is the best plan for - * this join order. NB: will return NULL if join order is invalid. + * this join order. * * The original implementation of this routine always joined in the specified * order, and so could only build left-sided plans (and right-sided and * mixtures, as a byproduct of the fact that make_join_rel() is symmetric). * It could never produce a "bushy" plan. This had a couple of big problems, - * of which the worst was that as of 7.4, there are situations involving IN - * subqueries where the only valid plans are bushy. + * of which the worst was that there are situations involving join order + * restrictions where the only valid plans are bushy. * * The present implementation takes the given tour as a guideline, but - * postpones joins that seem unsuitable according to some heuristic rules. - * This allows correct bushy plans to be generated at need, and as a nice - * side-effect it seems to materially improve the quality of the generated - * plans. + * postpones joins that are illegal or seem unsuitable according to some + * heuristic rules. This allows correct bushy plans to be generated at need, + * and as a nice side-effect it seems to materially improve the quality of the + * generated plans. */ RelOptInfo * gimme_tree(PlannerInfo *root, Gene *tour, int num_gene) { GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; - RelOptInfo **stack; - int stack_depth; - RelOptInfo *joinrel; + List *clumps; int rel_count; /* - * Create a stack to hold not-yet-joined relations. + * Sometimes, a relation can't yet be joined to others due to heuristics + * or actual semantic restrictions. We maintain a list of "clumps" of + * successfully joined relations, with larger clumps at the front. + * Each new relation from the tour is added to the first clump it can + * be joined to; if there is none then it becomes a new clump of its own. + * When we enlarge an existing clump we check to see if it can now be + * merged with any other clumps. After the tour is all scanned, we + * forget about the heuristics and try to forcibly join any remaining + * clumps. Some forced joins might still fail due to semantics, but + * we should always be able to find some join order that works. */ - stack = (RelOptInfo **) palloc(num_gene * sizeof(RelOptInfo *)); - stack_depth = 0; + clumps = NIL; - /* - * Push each relation onto the stack in the specified order. After - * pushing each relation, see whether the top two stack entries are - * joinable according to the desirable_join() heuristics. If so, join - * them into one stack entry, and try again to combine with the next stack - * entry down (if any). When the stack top is no longer joinable, - * continue to the next input relation. After we have pushed the last - * input relation, the heuristics are disabled and we force joining all - * the remaining stack entries. - * - * If desirable_join() always returns true, this produces a straight - * left-to-right join just like the old code. Otherwise we may produce a - * bushy plan or a left/right-sided plan that really corresponds to some - * tour other than the one given. To the extent that the heuristics are - * helpful, however, this will be a better plan than the raw tour. - * - * Also, when a join attempt fails (because of OJ or IN constraints), we - * may be able to recover and produce a workable plan, where the old code - * just had to give up. This case acts the same as a false result from - * desirable_join(). - */ for (rel_count = 0; rel_count < num_gene; rel_count++) { int cur_rel_index; + RelOptInfo *cur_rel; + Clump *cur_clump; - /* Get the next input relation and push it */ + /* Get the next input relation */ cur_rel_index = (int) tour[rel_count]; - stack[stack_depth] = (RelOptInfo *) list_nth(private->initial_rels, - cur_rel_index - 1); - stack_depth++; - - /* - * While it's feasible, pop the top two stack entries and replace with - * their join. - */ - while (stack_depth >= 2) + cur_rel = (RelOptInfo *) list_nth(private->initial_rels, + cur_rel_index - 1); + + /* Make it into a single-rel clump */ + cur_clump = (Clump *) palloc(sizeof(Clump)); + cur_clump->joinrel = cur_rel; + cur_clump->size = 1; + + /* Merge it into the clumps list, using only desirable joins */ + clumps = merge_clump(root, clumps, cur_clump, false); + } + + if (list_length(clumps) > 1) + { + /* Force-join the remaining clumps in some legal order */ + List *fclumps; + ListCell *lc; + + fclumps = NIL; + foreach(lc, clumps) { - RelOptInfo *outer_rel = stack[stack_depth - 2]; - RelOptInfo *inner_rel = stack[stack_depth - 1]; + Clump *clump = (Clump *) lfirst(lc); - /* - * Don't pop if heuristics say not to join now. However, once we - * have exhausted the input, the heuristics can't prevent popping. - */ - if (rel_count < num_gene - 1 && - !desirable_join(root, outer_rel, inner_rel)) - break; + fclumps = merge_clump(root, fclumps, clump, true); + } + clumps = fclumps; + } + + /* Did we succeed in forming a single join relation? */ + if (list_length(clumps) != 1) + elog(ERROR, "failed to join all relations together"); + + return ((Clump *) linitial(clumps))->joinrel; +} + +/* + * Merge a "clump" into the list of existing clumps for gimme_tree. + * + * We try to merge the clump into some existing clump, and repeat if + * successful. When no more merging is possible, insert the clump + * into the list, preserving the list ordering rule (namely, that + * clumps of larger size appear earlier). + * + * If force is true, merge anywhere a join is legal, even if it causes + * a cartesian join to be performed. When force is false, do only + * "desirable" joins. + */ +static List * +merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force) +{ + ListCell *prev; + ListCell *lc; + + /* Look for a clump that new_clump can join to */ + prev = NULL; + foreach(lc, clumps) + { + Clump *old_clump = (Clump *) lfirst(lc); + + if (force || + desirable_join(root, old_clump->joinrel, new_clump->joinrel)) + { + RelOptInfo *joinrel; /* * Construct a RelOptInfo representing the join of these two input @@ -220,30 +242,60 @@ gimme_tree(PlannerInfo *root, Gene *tour, int num_gene) * root->join_rel_list yet, and so the paths constructed for it * will only include the ones we want. */ - joinrel = make_join_rel(root, outer_rel, inner_rel); - - /* Can't pop stack here if join order is not valid */ - if (!joinrel) - break; - - /* Find and save the cheapest paths for this rel */ - set_cheapest(joinrel); - - /* Pop the stack and replace the inputs with their join */ - stack_depth--; - stack[stack_depth - 1] = joinrel; + joinrel = make_join_rel(root, + old_clump->joinrel, + new_clump->joinrel); + + /* Keep searching if join order is not valid */ + if (joinrel) + { + /* Find and save the cheapest paths for this joinrel */ + set_cheapest(joinrel); + + /* Absorb new clump into old */ + old_clump->joinrel = joinrel; + old_clump->size += new_clump->size; + pfree(new_clump); + + /* Remove old_clump from list */ + clumps = list_delete_cell(clumps, lc, prev); + + /* + * Recursively try to merge the enlarged old_clump with + * others. When no further merge is possible, we'll reinsert + * it into the list. + */ + return merge_clump(root, clumps, old_clump, force); + } } + prev = lc; } - /* Did we succeed in forming a single join relation? */ - if (stack_depth == 1) - joinrel = stack[0]; - else - joinrel = NULL; + /* + * No merging is possible, so add new_clump as an independent clump, in + * proper order according to size. We can be fast for the common case + * where it has size 1 --- it should always go at the end. + */ + if (clumps == NIL || new_clump->size == 1) + return lappend(clumps, new_clump); + + /* Check if it belongs at the front */ + lc = list_head(clumps); + if (new_clump->size > ((Clump *) lfirst(lc))->size) + return lcons(new_clump, clumps); - pfree(stack); + /* Else search for the place to insert it */ + for (;;) + { + ListCell *nxt = lnext(lc); + + if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size) + break; /* it belongs after 'lc', before 'nxt' */ + lc = nxt; + } + lappend_cell(clumps, lc, new_clump); - return joinrel; + return clumps; } /* diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c index 824ae7b6108..5b1368bbb25 100644 --- a/src/backend/optimizer/geqo/geqo_main.c +++ b/src/backend/optimizer/geqo/geqo_main.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.57 2009/07/16 20:55:44 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.58 2009/07/19 21:00:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -257,9 +257,6 @@ geqo(PlannerInfo *root, int number_of_rels, List *initial_rels) best_rel = gimme_tree(root, best_tour, pool->string_length); - if (best_rel == NULL) - elog(ERROR, "failed to make a valid plan"); - /* DBG: show the query plan */ #ifdef NOT_USED print_plan(best_plan, root); diff --git a/src/backend/optimizer/geqo/geqo_pool.c b/src/backend/optimizer/geqo/geqo_pool.c index 26137272f70..c7eda949aa3 100644 --- a/src/backend/optimizer/geqo/geqo_pool.c +++ b/src/backend/optimizer/geqo/geqo_pool.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_pool.c,v 1.34 2009/07/16 20:55:44 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_pool.c,v 1.35 2009/07/19 21:00:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -92,37 +92,13 @@ random_init_pool(PlannerInfo *root, Pool *pool) { Chromosome *chromo = (Chromosome *) pool->data; int i; - int bad = 0; - /* - * We immediately discard any invalid individuals (those that geqo_eval - * returns DBL_MAX for), thereby not wasting pool space on them. - * - * If we fail to make any valid individuals after 10000 tries, give up; - * this probably means something is broken, and we shouldn't just let - * ourselves get stuck in an infinite loop. - */ - i = 0; - while (i < pool->size) + for (i = 0; i < pool->size; i++) { init_tour(root, chromo[i].string, pool->string_length); pool->data[i].worth = geqo_eval(root, chromo[i].string, pool->string_length); - if (pool->data[i].worth < DBL_MAX) - i++; - else - { - bad++; - if (i == 0 && bad >= 10000) - elog(ERROR, "failed to make a valid plan"); - } } - -#ifdef GEQO_DEBUG - if (bad > 0) - elog(DEBUG1, "%d invalid tours found while selecting %d pool entries", - bad, pool->size); -#endif } /* diff --git a/src/backend/optimizer/geqo/geqo_recombination.c b/src/backend/optimizer/geqo/geqo_recombination.c index ebdd2203839..b31c7634b0f 100644 --- a/src/backend/optimizer/geqo/geqo_recombination.c +++ b/src/backend/optimizer/geqo/geqo_recombination.c @@ -3,7 +3,7 @@ * geqo_recombination.c * misc recombination procedures * -* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_recombination.c,v 1.16 2009/07/16 20:55:44 tgl Exp $ +* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_recombination.c,v 1.17 2009/07/19 21:00:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -61,18 +61,6 @@ init_tour(PlannerInfo *root, Gene *tour, int num_gene) remainder--; } - /* - * Since geqo_eval() will reject tours where tour[0] > tour[1], we may as - * well switch the two to make it a valid tour. - */ - if (num_gene >= 2 && tour[0] > tour[1]) - { - Gene gtmp = tour[0]; - - tour[0] = tour[1]; - tour[1] = gtmp; - } - pfree(tmp); } |