diff options
author | Dean Rasheed | 2020-12-03 10:03:49 +0000 |
---|---|---|
committer | Dean Rasheed | 2020-12-03 10:03:49 +0000 |
commit | 25a9e54d2db31b8031c2d8166114b187e8347098 (patch) | |
tree | 72c1f1052e27080d2658ae234a286e18a7a86749 /src/include | |
parent | b5913f6120792465f4394b93c15c2e2ac0c08376 (diff) |
Improve estimation of OR clauses using extended statistics.
Formerly we only applied extended statistics to an OR clause as part
of the clauselist_selectivity() code path for an OR clause appearing
in an implicitly-ANDed list of clauses. This meant that it could only
use extended statistics if all sub-clauses of the OR clause were
covered by a single extended statistics object.
Instead, teach clause_selectivity() how to apply extended statistics
to an OR clause by handling its ORed list of sub-clauses in a similar
manner to an implicitly-ANDed list of sub-clauses, but with different
combination rules. This allows one or more extended statistics objects
to be used to estimate all or part of the list of sub-clauses. Any
remaining sub-clauses are then treated as if they are independent.
Additionally, to avoid double-application of extended statistics, this
introduces "extended" versions of clause_selectivity() and
clauselist_selectivity(), which include an option to ignore extended
statistics. This replaces the old clauselist_selectivity_simple()
function which failed to completely ignore extended statistics when
called from the extended statistics code.
A known limitation of the current infrastructure is that an AND clause
under an OR clause is not treated as compatible with extended
statistics (because we don't build RestrictInfos for such sub-AND
clauses). Thus, for example, "(a=1 AND b=1) OR (a=2 AND b=2)" will
currently be treated as two independent AND clauses (each of which may
be estimated using extended statistics), but extended statistics will
not currently be used to account for any possible overlap between
those clauses. Improving that is left as a task for the future.
Original patch by Tomas Vondra, with additional improvements by me.
Discussion: https://postgr.es/m/20200113230008.g67iyk4cs3xbnjju@development
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/optimizer/optimizer.h | 18 | ||||
-rw-r--r-- | src/include/statistics/extended_stats_internal.h | 15 | ||||
-rw-r--r-- | src/include/statistics/statistics.h | 3 |
3 files changed, 29 insertions, 7 deletions
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index 3e4171056e8..dea0e7338d5 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -58,17 +58,23 @@ extern Selectivity clause_selectivity(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); -extern Selectivity clauselist_selectivity_simple(PlannerInfo *root, - List *clauses, - int varRelid, - JoinType jointype, - SpecialJoinInfo *sjinfo, - Bitmapset *estimatedclauses); +extern Selectivity clause_selectivity_ext(PlannerInfo *root, + Node *clause, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + bool use_extended_stats); extern Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); +extern Selectivity clauselist_selectivity_ext(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + bool use_extended_stats); /* in path/costsize.c: */ diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index 61e69696cfe..02bf6a05027 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -99,6 +99,11 @@ extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows, extern bool examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp); +extern Selectivity mcv_combine_selectivities(Selectivity simple_sel, + Selectivity mcv_sel, + Selectivity mcv_basesel, + Selectivity mcv_totalsel); + extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, List *clauses, @@ -109,4 +114,14 @@ extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, Selectivity *basesel, Selectivity *totalsel); +extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root, + StatisticExtInfo *stat, + MCVList *mcv, + Node *clause, + bool **or_matches, + Selectivity *basesel, + Selectivity *overlap_mcvsel, + Selectivity *overlap_basesel, + Selectivity *totalsel); + #endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index 50fce4935f3..c9ed21155cd 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -116,7 +116,8 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, - Bitmapset **estimatedclauses); + Bitmapset **estimatedclauses, + bool is_or); extern bool has_stats_of_kind(List *stats, char requiredkind); extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind, Bitmapset **clause_attnums, |